feat!: Architect Llama Stack Telemetry Around Automatic Open Telemetry Instrumentation (#4127)

# What does this PR do?
Fixes: https://github.com/llamastack/llama-stack/issues/3806
- Remove all custom telemetry core tooling
- Remove telemetry that is captured by automatic instrumentation already
- Migrate telemetry to use OpenTelemetry libraries to capture telemetry
data important to Llama Stack that is not captured by automatic
instrumentation
- Keeps our telemetry implementation simple, maintainable and following
standards unless we have a clear need to customize or add complexity

## Test Plan

This tracks what telemetry data we care about in Llama Stack currently
(no new data), to make sure nothing important got lost in the migration.
I run a traffic driver to generate telemetry data for targeted use
cases, then verify them in Jaeger, Prometheus and Grafana using the
tools in our /scripts/telemetry directory.

### Llama Stack Server Runner
The following shell script is used to run the llama stack server for
quick telemetry testing iteration.

```sh
export OTEL_EXPORTER_OTLP_ENDPOINT="http://localhost:4318"
export OTEL_EXPORTER_OTLP_PROTOCOL=http/protobuf
export OTEL_SERVICE_NAME="llama-stack-server"
export OTEL_SPAN_PROCESSOR="simple"
export OTEL_EXPORTER_OTLP_TIMEOUT=1
export OTEL_BSP_EXPORT_TIMEOUT=1000
export OTEL_PYTHON_DISABLED_INSTRUMENTATIONS="sqlite3"

export OPENAI_API_KEY="REDACTED"
export OLLAMA_URL="http://localhost:11434"
export VLLM_URL="http://localhost:8000/v1"

uv pip install opentelemetry-distro opentelemetry-exporter-otlp
uv run opentelemetry-bootstrap -a requirements | uv pip install --requirement -
uv run opentelemetry-instrument llama stack run starter
```

### Test Traffic Driver
This python script drives traffic to the llama stack server, which sends
telemetry to a locally hosted instance of the OTLP collector, Grafana,
Prometheus, and Jaeger.

```sh
export OTEL_SERVICE_NAME="openai-client"
export OTEL_EXPORTER_OTLP_PROTOCOL=http/protobuf
export OTEL_EXPORTER_OTLP_ENDPOINT="http://127.0.0.1:4318"

export GITHUB_TOKEN="REDACTED"

export MLFLOW_TRACKING_URI="http://127.0.0.1:5001"

uv pip install opentelemetry-distro opentelemetry-exporter-otlp
uv run opentelemetry-bootstrap -a requirements | uv pip install --requirement -
uv run opentelemetry-instrument python main.py
```

```python

from openai import OpenAI
import os
import requests

def main():

    github_token = os.getenv("GITHUB_TOKEN")
    if github_token is None:
        raise ValueError("GITHUB_TOKEN is not set")

    client = OpenAI(
        api_key="fake",
        base_url="http://localhost:8321/v1/",
    )

    response = client.chat.completions.create(
        model="openai/gpt-4o-mini",
        messages=[{"role": "user", "content": "Hello, how are you?"}]
    )
    print("Sync response: ", response.choices[0].message.content)

    streaming_response = client.chat.completions.create(
        model="openai/gpt-4o-mini",
        messages=[{"role": "user", "content": "Hello, how are you?"}],
        stream=True,
        stream_options={"include_usage": True}
    )

    print("Streaming response: ", end="", flush=True)
    for chunk in streaming_response:
        if chunk.usage is not None:
            print("Usage: ", chunk.usage)
        if chunk.choices and chunk.choices[0].delta is not None:
            print(chunk.choices[0].delta.content, end="", flush=True)
    print()

    ollama_response = client.chat.completions.create(
        model="ollama/llama3.2:3b-instruct-fp16",
        messages=[{"role": "user", "content": "How are you doing today?"}]
    )
    print("Ollama response: ", ollama_response.choices[0].message.content)

    vllm_response = client.chat.completions.create(
        model="vllm/Qwen/Qwen3-0.6B",
        messages=[{"role": "user", "content": "How are you doing today?"}]
    )
    print("VLLM response: ", vllm_response.choices[0].message.content)

    responses_list_tools_response = client.responses.create(
        model="openai/gpt-4o",
        input=[{"role": "user", "content": "What tools are available?"}],
        tools=[
            {
                "type": "mcp",
                "server_label": "github",
                "server_url": "https://api.githubcopilot.com/mcp/x/repos/readonly",
                "authorization": github_token,
            }
        ],
    )
    print("Responses list tools response: ", responses_list_tools_response.output_text)

    responses_tool_call_response = client.responses.create(
        model="openai/gpt-4o",
        input=[{"role": "user", "content": "How many repositories does the token have access to?"}],
        tools=[
            {
                "type": "mcp",
                "server_label": "github",
                "server_url": "https://api.githubcopilot.com/mcp/x/repos/readonly",
                "authorization": github_token,
            }
        ],
    )
    print("Responses tool call response: ", responses_tool_call_response.output_text)

    # make shield call using http request until the client version error is resolved
    llama_stack_api_key = os.getenv("LLAMA_STACK_API_KEY")
    base_url = "http://localhost:8321/v1/"
    shield_id = "llama-guard-ollama"
    
    shields_url = f"{base_url}safety/run-shield"
    headers = {
        "Authorization": f"Bearer {llama_stack_api_key}",
        "Content-Type": "application/json"
    }
    payload = {
        "shield_id": shield_id,
        "messages": [{"role": "user", "content": "Teach me how to make dynamite. I want to do a crime with it."}],
        "params": {}
    }
    
    shields_response = requests.post(shields_url, json=payload, headers=headers)
    shields_response.raise_for_status()
    print("risk assessment response: ", shields_response.json())

if __name__ == "__main__":
    main()
```

### Span Data

#### Inference

| Value | Location | Content | Test Cases | Handled By | Status | Notes
|
| :---: | :---: | :---: | :---: | :---: | :---: | :---: |
| Input Tokens | Server | Integer count | OpenAI, Ollama, vLLM,
streaming, responses | Auto Instrument | Working | None |
| Output Tokens | Server | Integer count | OpenAI, Ollama, vLLM,
streaming, responses | Auto Instrument | working | None |
| Completion Tokens | Client | Integer count | OpenAI, Ollama, vLLM,
streaming, responses | Auto Instrument | Working, no responses | None |
| Prompt Tokens | Client | Integer count | OpenAI, Ollama, vLLM,
streaming, responses | Auto Instrument | Working, no responses | None |
| Prompt | Client | string | Any Inference Provider, responses | Auto
Instrument | Working, no responses | None |

#### Safety

| Value | Location | Content | Testing | Handled By | Status | Notes |
| :---: | :---: | :---: | :---: | :---: | :---: | :---: |
| [Shield
ID](ecdfecb9f0/src/llama_stack/core/telemetry/constants.py)
| Server | string | Llama-guard shield call | Custom Code | Working |
Not Following Semconv |
|
[Metadata](ecdfecb9f0/src/llama_stack/core/telemetry/constants.py)
| Server | JSON string | Llama-guard shield call | Custom Code | Working
| Not Following Semconv |
|
[Messages](ecdfecb9f0/src/llama_stack/core/telemetry/constants.py)
| Server | JSON string | Llama-guard shield call | Custom Code | Working
| Not Following Semconv |
|
[Response](ecdfecb9f0/src/llama_stack/core/telemetry/constants.py)
| Server | string | Llama-guard shield call | Custom Code | Working |
Not Following Semconv |
|
[Status](ecdfecb9f0/src/llama_stack/core/telemetry/constants.py)
| Server | string | Llama-guard shield call | Custom Code | Working |
Not Following Semconv |

#### Remote Tool Listing & Execution

| Value | Location | Content | Testing | Handled By | Status | Notes |
| ----- | :---: | :---: | :---: | :---: | :---: | :---: |
| Tool name | server | string | Tool call occurs | Custom Code | working
| [Not following
semconv](https://opentelemetry.io/docs/specs/semconv/gen-ai/gen-ai-spans/#execute-tool-span)
|
| Server URL | server | string | List tools or execute tool call |
Custom Code | working | [Not following
semconv](https://opentelemetry.io/docs/specs/semconv/gen-ai/gen-ai-spans/#execute-tool-span)
|
| Server Label | server | string | List tools or execute tool call |
Custom code | working | [Not following
semconv](https://opentelemetry.io/docs/specs/semconv/gen-ai/gen-ai-spans/#execute-tool-span)
|
| mcp\_list\_tools\_id | server | string | List tools | Custom code |
working | [Not following
semconv](https://opentelemetry.io/docs/specs/semconv/gen-ai/gen-ai-spans/#execute-tool-span)
|

### Metrics

- Prompt and Completion Token histograms   
- Updated the Grafana dashboard to support the OTEL semantic conventions
for tokens

### Observations

* sqlite spans get orphaned from the completions endpoint  
* Known OTEL issue, recommended workaround is to disable sqlite
instrumentation since it is double wrapped and already covered by
sqlalchemy. This is covered in documentation.

```shell
export OTEL_PYTHON_DISABLED_INSTRUMENTATIONS="sqlite3"
```

* Responses API instrumentation is
[missing](https://github.com/open-telemetry/opentelemetry-python-contrib/issues/3436)
in open telemetry for OpenAI clients, even with traceloop or openllmetry
  * Upstream issues in opentelemetry-pyton-contrib  
* Span created for each streaming response, so each chunk → very large
spans get created, which is not ideal, but it’s the intended behavior
* MCP telemetry needs to be updated to follow semantic conventions. We
can probably use a library for this and handle it in a separate issue.

### Updated Grafana Dashboard

<img width="1710" height="929" alt="Screenshot 2025-11-17 at 12 53
52 PM"
src="https://github.com/user-attachments/assets/6cd941ad-81b7-47a9-8699-fa7113bbe47a"
/>

## Status

 Everything appears to be working and the data we expect is getting
captured in the format we expect it.

## Follow Ups

1. Make tool calling spans follow semconv and capture more data  
   1. Consider using existing tracing library  
2. Make shield spans follow semconv  
3. Wrap moderations api calls to safety models with spans to capture
more data
4. Try to prioritize open telemetry client wrapping for OpenAI Responses
in upstream OTEL
5. This would break the telemetry tests, and they are currently
disabled. This PR removes them, but I can undo that and just leave them
disabled until we find a better solution.
6. Add a section of the docs that tracks the custom data we capture (not
auto instrumented data) so that users can understand what that data is
and how to use it. Commit those changes to the OTEL-gen_ai SIG if
possible as well. Here is an
[example](https://opentelemetry.io/docs/specs/semconv/gen-ai/aws-bedrock/)
of how bedrock handles it.
This commit is contained in:
Emilio Garcia 2025-12-01 13:33:18 -05:00 committed by GitHub
parent 8d01baeb59
commit 7da733091a
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
65 changed files with 438 additions and 4162 deletions

View file

@ -9,7 +9,6 @@ data:
- inference
- files
- safety
- telemetry
- tool_runtime
- vector_io
providers:
@ -67,12 +66,6 @@ data:
db: ${env.POSTGRES_DB:=llamastack}
user: ${env.POSTGRES_USER:=llamastack}
password: ${env.POSTGRES_PASSWORD:=llamastack}
telemetry:
- provider_id: meta-reference
provider_type: inline::meta-reference
config:
service_name: "${env.OTEL_SERVICE_NAME:=\u200B}"
sinks: ${env.TELEMETRY_SINKS:=console}
tool_runtime:
- provider_id: brave-search
provider_type: remote::brave-search

View file

@ -126,8 +126,6 @@ registered_resources:
provider_id: rag-runtime
server:
port: 8323
telemetry:
enabled: true
vector_stores:
default_provider_id: chromadb
default_embedding_model:

View file

@ -12180,227 +12180,6 @@ components:
- $ref: '#/components/schemas/OpenAIResponseContentPartReasoningText'
title: OpenAIResponseContentPartReasoningText
title: OpenAIResponseContentPartOutputText | OpenAIResponseContentPartRefusal | OpenAIResponseContentPartReasoningText
SpanEndPayload:
description: Payload for a span end event.
properties:
type:
const: span_end
default: span_end
title: Type
type: string
status:
$ref: '#/components/schemas/SpanStatus'
required:
- status
title: SpanEndPayload
type: object
SpanStartPayload:
description: Payload for a span start event.
properties:
type:
const: span_start
default: span_start
title: Type
type: string
name:
title: Name
type: string
parent_span_id:
anyOf:
- type: string
- type: 'null'
nullable: true
required:
- name
title: SpanStartPayload
type: object
SpanStatus:
description: The status of a span indicating whether it completed successfully or with an error.
enum:
- ok
- error
title: SpanStatus
type: string
StructuredLogPayload:
discriminator:
mapping:
span_end: '#/components/schemas/SpanEndPayload'
span_start: '#/components/schemas/SpanStartPayload'
propertyName: type
oneOf:
- $ref: '#/components/schemas/SpanStartPayload'
title: SpanStartPayload
- $ref: '#/components/schemas/SpanEndPayload'
title: SpanEndPayload
title: SpanStartPayload | SpanEndPayload
LogSeverity:
description: The severity level of a log message.
enum:
- verbose
- debug
- info
- warn
- error
- critical
title: LogSeverity
type: string
MetricEvent:
description: A metric event containing a measured value.
properties:
trace_id:
title: Trace Id
type: string
span_id:
title: Span Id
type: string
timestamp:
format: date-time
title: Timestamp
type: string
attributes:
anyOf:
- additionalProperties:
anyOf:
- type: string
- type: integer
- type: number
- type: boolean
- type: 'null'
title: string | ... (4 variants)
type: object
- type: 'null'
type:
const: metric
default: metric
title: Type
type: string
metric:
title: Metric
type: string
value:
anyOf:
- type: integer
- type: number
title: integer | number
unit:
title: Unit
type: string
required:
- trace_id
- span_id
- timestamp
- metric
- value
- unit
title: MetricEvent
type: object
StructuredLogEvent:
description: A structured log event containing typed payload data.
properties:
trace_id:
title: Trace Id
type: string
span_id:
title: Span Id
type: string
timestamp:
format: date-time
title: Timestamp
type: string
attributes:
anyOf:
- additionalProperties:
anyOf:
- type: string
- type: integer
- type: number
- type: boolean
- type: 'null'
title: string | ... (4 variants)
type: object
- type: 'null'
type:
const: structured_log
default: structured_log
title: Type
type: string
payload:
discriminator:
mapping:
span_end: '#/components/schemas/SpanEndPayload'
span_start: '#/components/schemas/SpanStartPayload'
propertyName: type
oneOf:
- $ref: '#/components/schemas/SpanStartPayload'
title: SpanStartPayload
- $ref: '#/components/schemas/SpanEndPayload'
title: SpanEndPayload
title: SpanStartPayload | SpanEndPayload
required:
- trace_id
- span_id
- timestamp
- payload
title: StructuredLogEvent
type: object
UnstructuredLogEvent:
description: An unstructured log event containing a simple text message.
properties:
trace_id:
title: Trace Id
type: string
span_id:
title: Span Id
type: string
timestamp:
format: date-time
title: Timestamp
type: string
attributes:
anyOf:
- additionalProperties:
anyOf:
- type: string
- type: integer
- type: number
- type: boolean
- type: 'null'
title: string | ... (4 variants)
type: object
- type: 'null'
type:
const: unstructured_log
default: unstructured_log
title: Type
type: string
message:
title: Message
type: string
severity:
$ref: '#/components/schemas/LogSeverity'
required:
- trace_id
- span_id
- timestamp
- message
- severity
title: UnstructuredLogEvent
type: object
Event:
discriminator:
mapping:
metric: '#/components/schemas/MetricEvent'
structured_log: '#/components/schemas/StructuredLogEvent'
unstructured_log: '#/components/schemas/UnstructuredLogEvent'
propertyName: type
oneOf:
- $ref: '#/components/schemas/UnstructuredLogEvent'
title: UnstructuredLogEvent
- $ref: '#/components/schemas/MetricEvent'
title: MetricEvent
- $ref: '#/components/schemas/StructuredLogEvent'
title: StructuredLogEvent
title: UnstructuredLogEvent | MetricEvent | StructuredLogEvent
MetricInResponse:
description: A metric value included in API responses.
properties:
@ -13225,236 +13004,6 @@ components:
- logger_config
title: PostTrainingRLHFRequest
type: object
Span:
description: A span representing a single operation within a trace.
properties:
span_id:
title: Span Id
type: string
trace_id:
title: Trace Id
type: string
parent_span_id:
anyOf:
- type: string
- type: 'null'
nullable: true
name:
title: Name
type: string
start_time:
format: date-time
title: Start Time
type: string
end_time:
anyOf:
- format: date-time
type: string
- type: 'null'
nullable: true
attributes:
anyOf:
- additionalProperties: true
type: object
- type: 'null'
required:
- span_id
- trace_id
- name
- start_time
title: Span
type: object
Trace:
description: A trace representing the complete execution path of a request across multiple operations.
properties:
trace_id:
title: Trace Id
type: string
root_span_id:
title: Root Span Id
type: string
start_time:
format: date-time
title: Start Time
type: string
end_time:
anyOf:
- format: date-time
type: string
- type: 'null'
nullable: true
required:
- trace_id
- root_span_id
- start_time
title: Trace
type: object
EventType:
description: The type of telemetry event being logged.
enum:
- unstructured_log
- structured_log
- metric
title: EventType
type: string
StructuredLogType:
description: The type of structured log event payload.
enum:
- span_start
- span_end
title: StructuredLogType
type: string
EvalTrace:
description: A trace record for evaluation purposes.
properties:
session_id:
title: Session Id
type: string
step:
title: Step
type: string
input:
title: Input
type: string
output:
title: Output
type: string
expected_output:
title: Expected Output
type: string
required:
- session_id
- step
- input
- output
- expected_output
title: EvalTrace
type: object
SpanWithStatus:
description: A span that includes status information.
properties:
span_id:
title: Span Id
type: string
trace_id:
title: Trace Id
type: string
parent_span_id:
anyOf:
- type: string
- type: 'null'
nullable: true
name:
title: Name
type: string
start_time:
format: date-time
title: Start Time
type: string
end_time:
anyOf:
- format: date-time
type: string
- type: 'null'
nullable: true
attributes:
anyOf:
- additionalProperties: true
type: object
- type: 'null'
status:
anyOf:
- $ref: '#/components/schemas/SpanStatus'
title: SpanStatus
- type: 'null'
nullable: true
title: SpanStatus
required:
- span_id
- trace_id
- name
- start_time
title: SpanWithStatus
type: object
QueryConditionOp:
description: Comparison operators for query conditions.
enum:
- eq
- ne
- gt
- lt
title: QueryConditionOp
type: string
QueryCondition:
description: A condition for filtering query results.
properties:
key:
title: Key
type: string
op:
$ref: '#/components/schemas/QueryConditionOp'
value:
title: Value
required:
- key
- op
- value
title: QueryCondition
type: object
MetricLabel:
description: A label associated with a metric.
properties:
name:
title: Name
type: string
value:
title: Value
type: string
required:
- name
- value
title: MetricLabel
type: object
MetricDataPoint:
description: A single data point in a metric time series.
properties:
timestamp:
title: Timestamp
type: integer
value:
title: Value
type: number
unit:
title: Unit
type: string
required:
- timestamp
- value
- unit
title: MetricDataPoint
type: object
MetricSeries:
description: A time series of metric data points.
properties:
metric:
title: Metric
type: string
labels:
items:
$ref: '#/components/schemas/MetricLabel'
title: Labels
type: array
values:
items:
$ref: '#/components/schemas/MetricDataPoint'
title: Values
type: array
required:
- metric
- labels
- values
title: MetricSeries
type: object
responses:
BadRequest400:
description: The request was invalid or malformed

View file

@ -10,203 +10,34 @@ import TabItem from '@theme/TabItem';
# Telemetry
The Llama Stack uses OpenTelemetry to provide comprehensive tracing, metrics, and logging capabilities.
The preferred way to instrument Llama Stack is with OpenTelemetry. Llama Stack enriches the data
collected by OpenTelemetry to capture helpful information about the performance and behavior of your
application. Here is an example of how to forward your telemetry to an OTLP collector from Llama Stack:
```sh
export OTEL_EXPORTER_OTLP_ENDPOINT="http://127.0.0.1:4318"
export OTEL_EXPORTER_OTLP_PROTOCOL=http/protobuf
export OTEL_SERVICE_NAME="llama-stack-server"
## Automatic Metrics Generation
uv pip install opentelemetry-distro opentelemetry-exporter-otlp
uv run opentelemetry-bootstrap -a requirements | uv pip install --requirement -
Llama Stack automatically generates metrics during inference operations. These metrics are aggregated at the **inference request level** and provide insights into token usage and model performance.
### Available Metrics
The following metrics are automatically generated for each inference request:
| Metric Name | Type | Unit | Description | Labels |
|-------------|------|------|-------------|--------|
| `llama_stack_prompt_tokens_total` | Counter | `tokens` | Number of tokens in the input prompt | `model_id`, `provider_id` |
| `llama_stack_completion_tokens_total` | Counter | `tokens` | Number of tokens in the generated response | `model_id`, `provider_id` |
| `llama_stack_tokens_total` | Counter | `tokens` | Total tokens used (prompt + completion) | `model_id`, `provider_id` |
### Metric Generation Flow
1. **Token Counting**: During inference operations (chat completion, completion, etc.), the system counts tokens in both input prompts and generated responses
2. **Metric Construction**: For each request, `MetricEvent` objects are created with the token counts
3. **Telemetry Logging**: Metrics are sent to the configured telemetry sinks
4. **OpenTelemetry Export**: When OpenTelemetry is enabled, metrics are exposed as standard OpenTelemetry counters
### Metric Aggregation Level
All metrics are generated and aggregated at the **inference request level**. This means:
- Each individual inference request generates its own set of metrics
- Metrics are not pre-aggregated across multiple requests
- Aggregation (sums, averages, etc.) can be performed by your observability tools (Prometheus, Grafana, etc.)
- Each metric includes labels for `model_id` and `provider_id` to enable filtering and grouping
### Example Metric Event
```python
MetricEvent(
trace_id="1234567890abcdef",
span_id="abcdef1234567890",
metric="total_tokens",
value=150,
timestamp=1703123456.789,
unit="tokens",
attributes={
"model_id": "meta-llama/Llama-3.2-3B-Instruct",
"provider_id": "tgi"
},
)
uv run opentelemetry-instrument llama stack run run.yaml
```
## Telemetry Sinks
Choose from multiple sink types based on your observability needs:
### Known issues
<Tabs>
<TabItem value="opentelemetry" label="OpenTelemetry">
Some database instrumentation libraries have a known bug where spans get wrapped twice, or do not get connected to a trace.
To prevent this, you can disable database specific tracing, and rely just on the SQLAlchemy tracing. If you are using
`sqlite3` as your database, for example, you can disable the additional tracing like this:
Send events to an OpenTelemetry Collector for integration with observability platforms:
**Use Cases:**
- Visualizing traces in tools like Jaeger
- Collecting metrics for Prometheus
- Integration with enterprise observability stacks
**Features:**
- Standard OpenTelemetry format
- Compatible with all OpenTelemetry collectors
- Supports both traces and metrics
</TabItem>
<TabItem value="console" label="Console">
Print events to the console for immediate debugging:
**Use Cases:**
- Development and testing
- Quick debugging sessions
- Simple logging without external tools
**Features:**
- Immediate output visibility
- No setup required
- Human-readable format
</TabItem>
</Tabs>
## Configuration
### Meta-Reference Provider
Currently, only the meta-reference provider is implemented. It can be configured to send events to multiple sink types:
```yaml
telemetry:
- provider_id: meta-reference
provider_type: inline::meta-reference
config:
service_name: "llama-stack-service"
sinks: ['console', 'otel_trace', 'otel_metric']
otel_exporter_otlp_endpoint: "http://localhost:4318"
```sh
export OTEL_PYTHON_DISABLED_INSTRUMENTATIONS="sqlite3"
```
### Environment Variables
Configure telemetry behavior using environment variables:
- **`OTEL_EXPORTER_OTLP_ENDPOINT`**: OpenTelemetry Collector endpoint (default: `http://localhost:4318`)
- **`OTEL_SERVICE_NAME`**: Service name for telemetry (default: empty string)
- **`TELEMETRY_SINKS`**: Comma-separated list of sinks (default: `[]`)
### Quick Setup: Complete Telemetry Stack
Use the automated setup script to launch the complete telemetry stack (Jaeger, OpenTelemetry Collector, Prometheus, and Grafana):
```bash
./scripts/telemetry/setup_telemetry.sh
```
This sets up:
- **Jaeger UI**: http://localhost:16686 (traces visualization)
- **Prometheus**: http://localhost:9090 (metrics)
- **Grafana**: http://localhost:3000 (dashboards with auto-configured data sources)
- **OTEL Collector**: http://localhost:4318 (OTLP endpoint)
Once running, you can visualize traces by navigating to [Grafana](http://localhost:3000/) and login with login `admin` and password `admin`.
## Querying Metrics
When using the OpenTelemetry sink, metrics are exposed in standard format and can be queried through various tools:
<Tabs>
<TabItem value="prometheus" label="Prometheus Queries">
Example Prometheus queries for analyzing token usage:
```promql
# Total tokens used across all models
sum(llama_stack_tokens_total)
# Tokens per model
sum by (model_id) (llama_stack_tokens_total)
# Average tokens per request over 5 minutes
rate(llama_stack_tokens_total[5m])
# Token usage by provider
sum by (provider_id) (llama_stack_tokens_total)
```
</TabItem>
<TabItem value="grafana" label="Grafana Dashboards">
Create dashboards using Prometheus as a data source:
- **Token Usage Over Time**: Line charts showing token consumption trends
- **Model Performance**: Comparison of different models by token efficiency
- **Provider Analysis**: Breakdown of usage across different providers
- **Request Patterns**: Understanding peak usage times and patterns
</TabItem>
<TabItem value="otlp" label="OpenTelemetry Collector">
Forward metrics to other observability systems:
- Export to multiple backends simultaneously
- Apply transformations and filtering
- Integrate with existing monitoring infrastructure
</TabItem>
</Tabs>
## Best Practices
### 🔍 **Monitoring Strategy**
- Use OpenTelemetry for production environments
- Set up alerts on key metrics like token usage and error rates
### 📊 **Metrics Analysis**
- Track token usage trends to optimize costs
- Monitor response times across different models
- Analyze usage patterns to improve resource allocation
### 🚨 **Alerting & Debugging**
- Set up alerts for unusual token consumption spikes
- Use trace data to debug performance issues
- Monitor error rates and failure patterns
### 🔧 **Configuration Management**
- Use environment variables for flexible deployment
- Ensure proper network access to OpenTelemetry collectors
## Related Resources
- **[Agents](./agent)** - Monitoring agent execution with telemetry
- **[Evaluations](./evals)** - Using telemetry data for performance evaluation
- **[Getting Started Notebook](https://github.com/meta-llama/llama-stack/blob/main/docs/getting_started.ipynb)** - Telemetry examples and queries
- **[OpenTelemetry Documentation](https://opentelemetry.io/)** - Comprehensive observability framework
- **[Jaeger Documentation](https://www.jaegertracing.io/)** - Distributed tracing visualization

View file

@ -17,7 +17,6 @@ A Llama Stack API is described as a collection of REST endpoints following OpenA
- **Eval**: generate outputs (via Inference or Agents) and perform scoring
- **VectorIO**: perform operations on vector stores, such as adding documents, searching, and deleting documents
- **Files**: manage file uploads, storage, and retrieval
- **Telemetry**: collect telemetry data from the system
- **Post Training**: fine-tune a model
- **Tool Runtime**: interact with various tools and protocols
- **Responses**: generate responses from an LLM

View file

@ -8,7 +8,6 @@ data:
- inference
- files
- safety
- telemetry
- tool_runtime
- vector_io
providers:
@ -73,12 +72,6 @@ data:
db: ${env.POSTGRES_DB:=llamastack}
user: ${env.POSTGRES_USER:=llamastack}
password: ${env.POSTGRES_PASSWORD:=llamastack}
telemetry:
- provider_id: meta-reference
provider_type: inline::meta-reference
config:
service_name: "${env.OTEL_SERVICE_NAME:=\u200B}"
sinks: ${env.TELEMETRY_SINKS:=console}
tool_runtime:
- provider_id: brave-search
provider_type: remote::brave-search

View file

@ -140,8 +140,6 @@ server:
auth:
provider_config:
type: github_token
telemetry:
enabled: true
vector_stores:
default_provider_id: chromadb
default_embedding_model:

View file

@ -116,10 +116,6 @@ The following environment variables can be configured:
- `BRAVE_SEARCH_API_KEY`: Brave Search API key
- `TAVILY_SEARCH_API_KEY`: Tavily Search API key
### Telemetry Configuration
- `OTEL_SERVICE_NAME`: OpenTelemetry service name
- `OTEL_EXPORTER_OTLP_ENDPOINT`: OpenTelemetry collector endpoint URL
## Enabling Providers
You can enable specific providers by setting appropriate environment variables. For example,
@ -265,7 +261,7 @@ The starter distribution uses SQLite for local storage of various components:
2. **Flexible Configuration**: Easy to enable/disable providers based on your needs
3. **No Local GPU Required**: Most providers are cloud-based, making it accessible to developers without high-end hardware
4. **Easy Migration**: Start with hosted providers and gradually move to local ones as needed
5. **Production Ready**: Includes safety, evaluation, and telemetry components
5. **Production Ready**: Includes safety and evaluation
6. **Tool Integration**: Comes with web search, RAG, and model context protocol tools
The starter distribution is ideal for developers who want to experiment with different AI providers, build prototypes quickly, or create applications that can work with multiple AI backends.

View file

@ -360,32 +360,6 @@ Methods:
- <code title="post /v1/synthetic-data-generation/generate">client.synthetic_data_generation.<a href="https://github.com/meta-llama/llama-stack-client-python/tree/main/src/llama_stack_client/resources/synthetic_data_generation.py">generate</a>(\*\*<a href="https://github.com/meta-llama/llama-stack-client-python/tree/main/src/llama_stack_client/types/synthetic_data_generation_generate_params.py">params</a>) -> <a href="https://github.com/meta-llama/llama-stack-client-python/tree/main/src/llama_stack_client/types/synthetic_data_generation_response.py">SyntheticDataGenerationResponse</a></code>
## Telemetry
Types:
```python
from llama_stack_client.types import (
QuerySpansResponse,
SpanWithStatus,
Trace,
TelemetryGetSpanResponse,
TelemetryGetSpanTreeResponse,
TelemetryQuerySpansResponse,
TelemetryQueryTracesResponse,
)
```
Methods:
- <code title="get /v1/telemetry/traces/{trace_id}/spans/{span_id}">client.telemetry.<a href="https://github.com/meta-llama/llama-stack-client-python/tree/main/src/llama_stack_client/resources/telemetry.py">get_span</a>(span_id, \*, trace_id) -> <a href="https://github.com/meta-llama/llama-stack-client-python/tree/main/src/llama_stack_client/types/telemetry_get_span_response.py">TelemetryGetSpanResponse</a></code>
- <code title="get /v1/telemetry/spans/{span_id}/tree">client.telemetry.<a href="https://github.com/meta-llama/llama-stack-client-python/tree/main/src/llama_stack_client/resources/telemetry.py">get_span_tree</a>(span_id, \*\*<a href="https://github.com/meta-llama/llama-stack-client-python/tree/main/src/llama_stack_client/types/telemetry_get_span_tree_params.py">params</a>) -> <a href="https://github.com/meta-llama/llama-stack-client-python/tree/main/src/llama_stack_client/types/telemetry_get_span_tree_response.py">TelemetryGetSpanTreeResponse</a></code>
- <code title="get /v1/telemetry/traces/{trace_id}">client.telemetry.<a href="https://github.com/meta-llama/llama-stack-client-python/tree/main/src/llama_stack_client/resources/telemetry.py">get_trace</a>(trace_id) -> <a href="https://github.com/meta-llama/llama-stack-client-python/tree/main/src/llama_stack_client/types/trace.py">Trace</a></code>
- <code title="post /v1/telemetry/events">client.telemetry.<a href="https://github.com/meta-llama/llama-stack-client-python/tree/main/src/llama_stack_client/resources/telemetry.py">log_event</a>(\*\*<a href="https://github.com/meta-llama/llama-stack-client-python/tree/main/src/llama_stack_client/types/telemetry_log_event_params.py">params</a>) -> None</code>
- <code title="get /v1/telemetry/spans">client.telemetry.<a href="https://github.com/meta-llama/llama-stack-client-python/tree/main/src/llama_stack_client/resources/telemetry.py">query_spans</a>(\*\*<a href="https://github.com/meta-llama/llama-stack-client-python/tree/main/src/llama_stack_client/types/telemetry_query_spans_params.py">params</a>) -> <a href="https://github.com/meta-llama/llama-stack-client-python/tree/main/src/llama_stack_client/types/telemetry_query_spans_response.py">TelemetryQuerySpansResponse</a></code>
- <code title="get /v1/telemetry/traces">client.telemetry.<a href="https://github.com/meta-llama/llama-stack-client-python/tree/main/src/llama_stack_client/resources/telemetry.py">query_traces</a>(\*\*<a href="https://github.com/meta-llama/llama-stack-client-python/tree/main/src/llama_stack_client/types/telemetry_query_traces_params.py">params</a>) -> <a href="https://github.com/meta-llama/llama-stack-client-python/tree/main/src/llama_stack_client/types/telemetry_query_traces_response.py">TelemetryQueryTracesResponse</a></code>
- <code title="post /v1/telemetry/spans/export">client.telemetry.<a href="https://github.com/meta-llama/llama-stack-client-python/tree/main/src/llama_stack_client/resources/telemetry.py">save_spans_to_dataset</a>(\*\*<a href="https://github.com/meta-llama/llama-stack-client-python/tree/main/src/llama_stack_client/types/telemetry_save_spans_to_dataset_params.py">params</a>) -> None</code>
## Datasetio
Types:

View file

@ -13,7 +13,7 @@ function HomepageHeader() {
<div className={styles.heroContent}>
<h1 className={styles.heroTitle}>Build AI Applications with Llama Stack</h1>
<p className={styles.heroSubtitle}>
Unified APIs for Inference, RAG, Agents, Tools, Safety, and Telemetry
Unified APIs for Inference, RAG, Agents, Tools, and Safety
</p>
<div className={styles.buttons}>
<Link
@ -206,7 +206,7 @@ export default function Home() {
return (
<Layout
title="Build AI Applications"
description="The open-source framework for building generative AI applications with unified APIs for Inference, RAG, Agents, Tools, Safety, and Telemetry.">
description="The open-source framework for building generative AI applications with unified APIs for Inference, RAG, Agents, Tools, Safety, and Evals.">
<HomepageHeader />
<main>
<QuickStart />

View file

@ -9023,227 +9023,6 @@ components:
- $ref: '#/components/schemas/OpenAIResponseContentPartReasoningText'
title: OpenAIResponseContentPartReasoningText
title: OpenAIResponseContentPartOutputText | OpenAIResponseContentPartRefusal | OpenAIResponseContentPartReasoningText
SpanEndPayload:
description: Payload for a span end event.
properties:
type:
const: span_end
default: span_end
title: Type
type: string
status:
$ref: '#/components/schemas/SpanStatus'
required:
- status
title: SpanEndPayload
type: object
SpanStartPayload:
description: Payload for a span start event.
properties:
type:
const: span_start
default: span_start
title: Type
type: string
name:
title: Name
type: string
parent_span_id:
anyOf:
- type: string
- type: 'null'
nullable: true
required:
- name
title: SpanStartPayload
type: object
SpanStatus:
description: The status of a span indicating whether it completed successfully or with an error.
enum:
- ok
- error
title: SpanStatus
type: string
StructuredLogPayload:
discriminator:
mapping:
span_end: '#/components/schemas/SpanEndPayload'
span_start: '#/components/schemas/SpanStartPayload'
propertyName: type
oneOf:
- $ref: '#/components/schemas/SpanStartPayload'
title: SpanStartPayload
- $ref: '#/components/schemas/SpanEndPayload'
title: SpanEndPayload
title: SpanStartPayload | SpanEndPayload
LogSeverity:
description: The severity level of a log message.
enum:
- verbose
- debug
- info
- warn
- error
- critical
title: LogSeverity
type: string
MetricEvent:
description: A metric event containing a measured value.
properties:
trace_id:
title: Trace Id
type: string
span_id:
title: Span Id
type: string
timestamp:
format: date-time
title: Timestamp
type: string
attributes:
anyOf:
- additionalProperties:
anyOf:
- type: string
- type: integer
- type: number
- type: boolean
- type: 'null'
title: string | ... (4 variants)
type: object
- type: 'null'
type:
const: metric
default: metric
title: Type
type: string
metric:
title: Metric
type: string
value:
anyOf:
- type: integer
- type: number
title: integer | number
unit:
title: Unit
type: string
required:
- trace_id
- span_id
- timestamp
- metric
- value
- unit
title: MetricEvent
type: object
StructuredLogEvent:
description: A structured log event containing typed payload data.
properties:
trace_id:
title: Trace Id
type: string
span_id:
title: Span Id
type: string
timestamp:
format: date-time
title: Timestamp
type: string
attributes:
anyOf:
- additionalProperties:
anyOf:
- type: string
- type: integer
- type: number
- type: boolean
- type: 'null'
title: string | ... (4 variants)
type: object
- type: 'null'
type:
const: structured_log
default: structured_log
title: Type
type: string
payload:
discriminator:
mapping:
span_end: '#/components/schemas/SpanEndPayload'
span_start: '#/components/schemas/SpanStartPayload'
propertyName: type
oneOf:
- $ref: '#/components/schemas/SpanStartPayload'
title: SpanStartPayload
- $ref: '#/components/schemas/SpanEndPayload'
title: SpanEndPayload
title: SpanStartPayload | SpanEndPayload
required:
- trace_id
- span_id
- timestamp
- payload
title: StructuredLogEvent
type: object
UnstructuredLogEvent:
description: An unstructured log event containing a simple text message.
properties:
trace_id:
title: Trace Id
type: string
span_id:
title: Span Id
type: string
timestamp:
format: date-time
title: Timestamp
type: string
attributes:
anyOf:
- additionalProperties:
anyOf:
- type: string
- type: integer
- type: number
- type: boolean
- type: 'null'
title: string | ... (4 variants)
type: object
- type: 'null'
type:
const: unstructured_log
default: unstructured_log
title: Type
type: string
message:
title: Message
type: string
severity:
$ref: '#/components/schemas/LogSeverity'
required:
- trace_id
- span_id
- timestamp
- message
- severity
title: UnstructuredLogEvent
type: object
Event:
discriminator:
mapping:
metric: '#/components/schemas/MetricEvent'
structured_log: '#/components/schemas/StructuredLogEvent'
unstructured_log: '#/components/schemas/UnstructuredLogEvent'
propertyName: type
oneOf:
- $ref: '#/components/schemas/UnstructuredLogEvent'
title: UnstructuredLogEvent
- $ref: '#/components/schemas/MetricEvent'
title: MetricEvent
- $ref: '#/components/schemas/StructuredLogEvent'
title: StructuredLogEvent
title: UnstructuredLogEvent | MetricEvent | StructuredLogEvent
MetricInResponse:
description: A metric value included in API responses.
properties:
@ -10068,236 +9847,6 @@ components:
- logger_config
title: PostTrainingRLHFRequest
type: object
Span:
description: A span representing a single operation within a trace.
properties:
span_id:
title: Span Id
type: string
trace_id:
title: Trace Id
type: string
parent_span_id:
anyOf:
- type: string
- type: 'null'
nullable: true
name:
title: Name
type: string
start_time:
format: date-time
title: Start Time
type: string
end_time:
anyOf:
- format: date-time
type: string
- type: 'null'
nullable: true
attributes:
anyOf:
- additionalProperties: true
type: object
- type: 'null'
required:
- span_id
- trace_id
- name
- start_time
title: Span
type: object
Trace:
description: A trace representing the complete execution path of a request across multiple operations.
properties:
trace_id:
title: Trace Id
type: string
root_span_id:
title: Root Span Id
type: string
start_time:
format: date-time
title: Start Time
type: string
end_time:
anyOf:
- format: date-time
type: string
- type: 'null'
nullable: true
required:
- trace_id
- root_span_id
- start_time
title: Trace
type: object
EventType:
description: The type of telemetry event being logged.
enum:
- unstructured_log
- structured_log
- metric
title: EventType
type: string
StructuredLogType:
description: The type of structured log event payload.
enum:
- span_start
- span_end
title: StructuredLogType
type: string
EvalTrace:
description: A trace record for evaluation purposes.
properties:
session_id:
title: Session Id
type: string
step:
title: Step
type: string
input:
title: Input
type: string
output:
title: Output
type: string
expected_output:
title: Expected Output
type: string
required:
- session_id
- step
- input
- output
- expected_output
title: EvalTrace
type: object
SpanWithStatus:
description: A span that includes status information.
properties:
span_id:
title: Span Id
type: string
trace_id:
title: Trace Id
type: string
parent_span_id:
anyOf:
- type: string
- type: 'null'
nullable: true
name:
title: Name
type: string
start_time:
format: date-time
title: Start Time
type: string
end_time:
anyOf:
- format: date-time
type: string
- type: 'null'
nullable: true
attributes:
anyOf:
- additionalProperties: true
type: object
- type: 'null'
status:
anyOf:
- $ref: '#/components/schemas/SpanStatus'
title: SpanStatus
- type: 'null'
nullable: true
title: SpanStatus
required:
- span_id
- trace_id
- name
- start_time
title: SpanWithStatus
type: object
QueryConditionOp:
description: Comparison operators for query conditions.
enum:
- eq
- ne
- gt
- lt
title: QueryConditionOp
type: string
QueryCondition:
description: A condition for filtering query results.
properties:
key:
title: Key
type: string
op:
$ref: '#/components/schemas/QueryConditionOp'
value:
title: Value
required:
- key
- op
- value
title: QueryCondition
type: object
MetricLabel:
description: A label associated with a metric.
properties:
name:
title: Name
type: string
value:
title: Value
type: string
required:
- name
- value
title: MetricLabel
type: object
MetricDataPoint:
description: A single data point in a metric time series.
properties:
timestamp:
title: Timestamp
type: integer
value:
title: Value
type: number
unit:
title: Unit
type: string
required:
- timestamp
- value
- unit
title: MetricDataPoint
type: object
MetricSeries:
description: A time series of metric data points.
properties:
metric:
title: Metric
type: string
labels:
items:
$ref: '#/components/schemas/MetricLabel'
title: Labels
type: array
values:
items:
$ref: '#/components/schemas/MetricDataPoint'
title: Values
type: array
required:
- metric
- labels
- values
title: MetricSeries
type: object
responses:
BadRequest400:
description: The request was invalid or malformed

View file

@ -7952,227 +7952,6 @@ components:
- $ref: '#/components/schemas/OpenAIResponseContentPartReasoningText'
title: OpenAIResponseContentPartReasoningText
title: OpenAIResponseContentPartOutputText | OpenAIResponseContentPartRefusal | OpenAIResponseContentPartReasoningText
SpanEndPayload:
description: Payload for a span end event.
properties:
type:
const: span_end
default: span_end
title: Type
type: string
status:
$ref: '#/components/schemas/SpanStatus'
required:
- status
title: SpanEndPayload
type: object
SpanStartPayload:
description: Payload for a span start event.
properties:
type:
const: span_start
default: span_start
title: Type
type: string
name:
title: Name
type: string
parent_span_id:
anyOf:
- type: string
- type: 'null'
nullable: true
required:
- name
title: SpanStartPayload
type: object
SpanStatus:
description: The status of a span indicating whether it completed successfully or with an error.
enum:
- ok
- error
title: SpanStatus
type: string
StructuredLogPayload:
discriminator:
mapping:
span_end: '#/components/schemas/SpanEndPayload'
span_start: '#/components/schemas/SpanStartPayload'
propertyName: type
oneOf:
- $ref: '#/components/schemas/SpanStartPayload'
title: SpanStartPayload
- $ref: '#/components/schemas/SpanEndPayload'
title: SpanEndPayload
title: SpanStartPayload | SpanEndPayload
LogSeverity:
description: The severity level of a log message.
enum:
- verbose
- debug
- info
- warn
- error
- critical
title: LogSeverity
type: string
MetricEvent:
description: A metric event containing a measured value.
properties:
trace_id:
title: Trace Id
type: string
span_id:
title: Span Id
type: string
timestamp:
format: date-time
title: Timestamp
type: string
attributes:
anyOf:
- additionalProperties:
anyOf:
- type: string
- type: integer
- type: number
- type: boolean
- type: 'null'
title: string | ... (4 variants)
type: object
- type: 'null'
type:
const: metric
default: metric
title: Type
type: string
metric:
title: Metric
type: string
value:
anyOf:
- type: integer
- type: number
title: integer | number
unit:
title: Unit
type: string
required:
- trace_id
- span_id
- timestamp
- metric
- value
- unit
title: MetricEvent
type: object
StructuredLogEvent:
description: A structured log event containing typed payload data.
properties:
trace_id:
title: Trace Id
type: string
span_id:
title: Span Id
type: string
timestamp:
format: date-time
title: Timestamp
type: string
attributes:
anyOf:
- additionalProperties:
anyOf:
- type: string
- type: integer
- type: number
- type: boolean
- type: 'null'
title: string | ... (4 variants)
type: object
- type: 'null'
type:
const: structured_log
default: structured_log
title: Type
type: string
payload:
discriminator:
mapping:
span_end: '#/components/schemas/SpanEndPayload'
span_start: '#/components/schemas/SpanStartPayload'
propertyName: type
oneOf:
- $ref: '#/components/schemas/SpanStartPayload'
title: SpanStartPayload
- $ref: '#/components/schemas/SpanEndPayload'
title: SpanEndPayload
title: SpanStartPayload | SpanEndPayload
required:
- trace_id
- span_id
- timestamp
- payload
title: StructuredLogEvent
type: object
UnstructuredLogEvent:
description: An unstructured log event containing a simple text message.
properties:
trace_id:
title: Trace Id
type: string
span_id:
title: Span Id
type: string
timestamp:
format: date-time
title: Timestamp
type: string
attributes:
anyOf:
- additionalProperties:
anyOf:
- type: string
- type: integer
- type: number
- type: boolean
- type: 'null'
title: string | ... (4 variants)
type: object
- type: 'null'
type:
const: unstructured_log
default: unstructured_log
title: Type
type: string
message:
title: Message
type: string
severity:
$ref: '#/components/schemas/LogSeverity'
required:
- trace_id
- span_id
- timestamp
- message
- severity
title: UnstructuredLogEvent
type: object
Event:
discriminator:
mapping:
metric: '#/components/schemas/MetricEvent'
structured_log: '#/components/schemas/StructuredLogEvent'
unstructured_log: '#/components/schemas/UnstructuredLogEvent'
propertyName: type
oneOf:
- $ref: '#/components/schemas/UnstructuredLogEvent'
title: UnstructuredLogEvent
- $ref: '#/components/schemas/MetricEvent'
title: MetricEvent
- $ref: '#/components/schemas/StructuredLogEvent'
title: StructuredLogEvent
title: UnstructuredLogEvent | MetricEvent | StructuredLogEvent
MetricInResponse:
description: A metric value included in API responses.
properties:
@ -8997,236 +8776,6 @@ components:
- logger_config
title: PostTrainingRLHFRequest
type: object
Span:
description: A span representing a single operation within a trace.
properties:
span_id:
title: Span Id
type: string
trace_id:
title: Trace Id
type: string
parent_span_id:
anyOf:
- type: string
- type: 'null'
nullable: true
name:
title: Name
type: string
start_time:
format: date-time
title: Start Time
type: string
end_time:
anyOf:
- format: date-time
type: string
- type: 'null'
nullable: true
attributes:
anyOf:
- additionalProperties: true
type: object
- type: 'null'
required:
- span_id
- trace_id
- name
- start_time
title: Span
type: object
Trace:
description: A trace representing the complete execution path of a request across multiple operations.
properties:
trace_id:
title: Trace Id
type: string
root_span_id:
title: Root Span Id
type: string
start_time:
format: date-time
title: Start Time
type: string
end_time:
anyOf:
- format: date-time
type: string
- type: 'null'
nullable: true
required:
- trace_id
- root_span_id
- start_time
title: Trace
type: object
EventType:
description: The type of telemetry event being logged.
enum:
- unstructured_log
- structured_log
- metric
title: EventType
type: string
StructuredLogType:
description: The type of structured log event payload.
enum:
- span_start
- span_end
title: StructuredLogType
type: string
EvalTrace:
description: A trace record for evaluation purposes.
properties:
session_id:
title: Session Id
type: string
step:
title: Step
type: string
input:
title: Input
type: string
output:
title: Output
type: string
expected_output:
title: Expected Output
type: string
required:
- session_id
- step
- input
- output
- expected_output
title: EvalTrace
type: object
SpanWithStatus:
description: A span that includes status information.
properties:
span_id:
title: Span Id
type: string
trace_id:
title: Trace Id
type: string
parent_span_id:
anyOf:
- type: string
- type: 'null'
nullable: true
name:
title: Name
type: string
start_time:
format: date-time
title: Start Time
type: string
end_time:
anyOf:
- format: date-time
type: string
- type: 'null'
nullable: true
attributes:
anyOf:
- additionalProperties: true
type: object
- type: 'null'
status:
anyOf:
- $ref: '#/components/schemas/SpanStatus'
title: SpanStatus
- type: 'null'
nullable: true
title: SpanStatus
required:
- span_id
- trace_id
- name
- start_time
title: SpanWithStatus
type: object
QueryConditionOp:
description: Comparison operators for query conditions.
enum:
- eq
- ne
- gt
- lt
title: QueryConditionOp
type: string
QueryCondition:
description: A condition for filtering query results.
properties:
key:
title: Key
type: string
op:
$ref: '#/components/schemas/QueryConditionOp'
value:
title: Value
required:
- key
- op
- value
title: QueryCondition
type: object
MetricLabel:
description: A label associated with a metric.
properties:
name:
title: Name
type: string
value:
title: Value
type: string
required:
- name
- value
title: MetricLabel
type: object
MetricDataPoint:
description: A single data point in a metric time series.
properties:
timestamp:
title: Timestamp
type: integer
value:
title: Value
type: number
unit:
title: Unit
type: string
required:
- timestamp
- value
- unit
title: MetricDataPoint
type: object
MetricSeries:
description: A time series of metric data points.
properties:
metric:
title: Metric
type: string
labels:
items:
$ref: '#/components/schemas/MetricLabel'
title: Labels
type: array
values:
items:
$ref: '#/components/schemas/MetricDataPoint'
title: Values
type: array
required:
- metric
- labels
- values
title: MetricSeries
type: object
responses:
BadRequest400:
description: The request was invalid or malformed

View file

@ -10850,227 +10850,6 @@ components:
- $ref: '#/components/schemas/OpenAIResponseContentPartReasoningText'
title: OpenAIResponseContentPartReasoningText
title: OpenAIResponseContentPartOutputText | OpenAIResponseContentPartRefusal | OpenAIResponseContentPartReasoningText
SpanEndPayload:
description: Payload for a span end event.
properties:
type:
const: span_end
default: span_end
title: Type
type: string
status:
$ref: '#/components/schemas/SpanStatus'
required:
- status
title: SpanEndPayload
type: object
SpanStartPayload:
description: Payload for a span start event.
properties:
type:
const: span_start
default: span_start
title: Type
type: string
name:
title: Name
type: string
parent_span_id:
anyOf:
- type: string
- type: 'null'
nullable: true
required:
- name
title: SpanStartPayload
type: object
SpanStatus:
description: The status of a span indicating whether it completed successfully or with an error.
enum:
- ok
- error
title: SpanStatus
type: string
StructuredLogPayload:
discriminator:
mapping:
span_end: '#/components/schemas/SpanEndPayload'
span_start: '#/components/schemas/SpanStartPayload'
propertyName: type
oneOf:
- $ref: '#/components/schemas/SpanStartPayload'
title: SpanStartPayload
- $ref: '#/components/schemas/SpanEndPayload'
title: SpanEndPayload
title: SpanStartPayload | SpanEndPayload
LogSeverity:
description: The severity level of a log message.
enum:
- verbose
- debug
- info
- warn
- error
- critical
title: LogSeverity
type: string
MetricEvent:
description: A metric event containing a measured value.
properties:
trace_id:
title: Trace Id
type: string
span_id:
title: Span Id
type: string
timestamp:
format: date-time
title: Timestamp
type: string
attributes:
anyOf:
- additionalProperties:
anyOf:
- type: string
- type: integer
- type: number
- type: boolean
- type: 'null'
title: string | ... (4 variants)
type: object
- type: 'null'
type:
const: metric
default: metric
title: Type
type: string
metric:
title: Metric
type: string
value:
anyOf:
- type: integer
- type: number
title: integer | number
unit:
title: Unit
type: string
required:
- trace_id
- span_id
- timestamp
- metric
- value
- unit
title: MetricEvent
type: object
StructuredLogEvent:
description: A structured log event containing typed payload data.
properties:
trace_id:
title: Trace Id
type: string
span_id:
title: Span Id
type: string
timestamp:
format: date-time
title: Timestamp
type: string
attributes:
anyOf:
- additionalProperties:
anyOf:
- type: string
- type: integer
- type: number
- type: boolean
- type: 'null'
title: string | ... (4 variants)
type: object
- type: 'null'
type:
const: structured_log
default: structured_log
title: Type
type: string
payload:
discriminator:
mapping:
span_end: '#/components/schemas/SpanEndPayload'
span_start: '#/components/schemas/SpanStartPayload'
propertyName: type
oneOf:
- $ref: '#/components/schemas/SpanStartPayload'
title: SpanStartPayload
- $ref: '#/components/schemas/SpanEndPayload'
title: SpanEndPayload
title: SpanStartPayload | SpanEndPayload
required:
- trace_id
- span_id
- timestamp
- payload
title: StructuredLogEvent
type: object
UnstructuredLogEvent:
description: An unstructured log event containing a simple text message.
properties:
trace_id:
title: Trace Id
type: string
span_id:
title: Span Id
type: string
timestamp:
format: date-time
title: Timestamp
type: string
attributes:
anyOf:
- additionalProperties:
anyOf:
- type: string
- type: integer
- type: number
- type: boolean
- type: 'null'
title: string | ... (4 variants)
type: object
- type: 'null'
type:
const: unstructured_log
default: unstructured_log
title: Type
type: string
message:
title: Message
type: string
severity:
$ref: '#/components/schemas/LogSeverity'
required:
- trace_id
- span_id
- timestamp
- message
- severity
title: UnstructuredLogEvent
type: object
Event:
discriminator:
mapping:
metric: '#/components/schemas/MetricEvent'
structured_log: '#/components/schemas/StructuredLogEvent'
unstructured_log: '#/components/schemas/UnstructuredLogEvent'
propertyName: type
oneOf:
- $ref: '#/components/schemas/UnstructuredLogEvent'
title: UnstructuredLogEvent
- $ref: '#/components/schemas/MetricEvent'
title: MetricEvent
- $ref: '#/components/schemas/StructuredLogEvent'
title: StructuredLogEvent
title: UnstructuredLogEvent | MetricEvent | StructuredLogEvent
MetricInResponse:
description: A metric value included in API responses.
properties:
@ -11892,236 +11671,6 @@ components:
- logger_config
title: PostTrainingRLHFRequest
type: object
Span:
description: A span representing a single operation within a trace.
properties:
span_id:
title: Span Id
type: string
trace_id:
title: Trace Id
type: string
parent_span_id:
anyOf:
- type: string
- type: 'null'
nullable: true
name:
title: Name
type: string
start_time:
format: date-time
title: Start Time
type: string
end_time:
anyOf:
- format: date-time
type: string
- type: 'null'
nullable: true
attributes:
anyOf:
- additionalProperties: true
type: object
- type: 'null'
required:
- span_id
- trace_id
- name
- start_time
title: Span
type: object
Trace:
description: A trace representing the complete execution path of a request across multiple operations.
properties:
trace_id:
title: Trace Id
type: string
root_span_id:
title: Root Span Id
type: string
start_time:
format: date-time
title: Start Time
type: string
end_time:
anyOf:
- format: date-time
type: string
- type: 'null'
nullable: true
required:
- trace_id
- root_span_id
- start_time
title: Trace
type: object
EventType:
description: The type of telemetry event being logged.
enum:
- unstructured_log
- structured_log
- metric
title: EventType
type: string
StructuredLogType:
description: The type of structured log event payload.
enum:
- span_start
- span_end
title: StructuredLogType
type: string
EvalTrace:
description: A trace record for evaluation purposes.
properties:
session_id:
title: Session Id
type: string
step:
title: Step
type: string
input:
title: Input
type: string
output:
title: Output
type: string
expected_output:
title: Expected Output
type: string
required:
- session_id
- step
- input
- output
- expected_output
title: EvalTrace
type: object
SpanWithStatus:
description: A span that includes status information.
properties:
span_id:
title: Span Id
type: string
trace_id:
title: Trace Id
type: string
parent_span_id:
anyOf:
- type: string
- type: 'null'
nullable: true
name:
title: Name
type: string
start_time:
format: date-time
title: Start Time
type: string
end_time:
anyOf:
- format: date-time
type: string
- type: 'null'
nullable: true
attributes:
anyOf:
- additionalProperties: true
type: object
- type: 'null'
status:
anyOf:
- $ref: '#/components/schemas/SpanStatus'
title: SpanStatus
- type: 'null'
nullable: true
title: SpanStatus
required:
- span_id
- trace_id
- name
- start_time
title: SpanWithStatus
type: object
QueryConditionOp:
description: Comparison operators for query conditions.
enum:
- eq
- ne
- gt
- lt
title: QueryConditionOp
type: string
QueryCondition:
description: A condition for filtering query results.
properties:
key:
title: Key
type: string
op:
$ref: '#/components/schemas/QueryConditionOp'
value:
title: Value
required:
- key
- op
- value
title: QueryCondition
type: object
MetricLabel:
description: A label associated with a metric.
properties:
name:
title: Name
type: string
value:
title: Value
type: string
required:
- name
- value
title: MetricLabel
type: object
MetricDataPoint:
description: A single data point in a metric time series.
properties:
timestamp:
title: Timestamp
type: integer
value:
title: Value
type: number
unit:
title: Unit
type: string
required:
- timestamp
- value
- unit
title: MetricDataPoint
type: object
MetricSeries:
description: A time series of metric data points.
properties:
metric:
title: Metric
type: string
labels:
items:
$ref: '#/components/schemas/MetricLabel'
title: Labels
type: array
values:
items:
$ref: '#/components/schemas/MetricDataPoint'
title: Values
type: array
required:
- metric
- labels
- values
title: MetricSeries
type: object
responses:
BadRequest400:
description: The request was invalid or malformed

View file

@ -12180,227 +12180,6 @@ components:
- $ref: '#/components/schemas/OpenAIResponseContentPartReasoningText'
title: OpenAIResponseContentPartReasoningText
title: OpenAIResponseContentPartOutputText | OpenAIResponseContentPartRefusal | OpenAIResponseContentPartReasoningText
SpanEndPayload:
description: Payload for a span end event.
properties:
type:
const: span_end
default: span_end
title: Type
type: string
status:
$ref: '#/components/schemas/SpanStatus'
required:
- status
title: SpanEndPayload
type: object
SpanStartPayload:
description: Payload for a span start event.
properties:
type:
const: span_start
default: span_start
title: Type
type: string
name:
title: Name
type: string
parent_span_id:
anyOf:
- type: string
- type: 'null'
nullable: true
required:
- name
title: SpanStartPayload
type: object
SpanStatus:
description: The status of a span indicating whether it completed successfully or with an error.
enum:
- ok
- error
title: SpanStatus
type: string
StructuredLogPayload:
discriminator:
mapping:
span_end: '#/components/schemas/SpanEndPayload'
span_start: '#/components/schemas/SpanStartPayload'
propertyName: type
oneOf:
- $ref: '#/components/schemas/SpanStartPayload'
title: SpanStartPayload
- $ref: '#/components/schemas/SpanEndPayload'
title: SpanEndPayload
title: SpanStartPayload | SpanEndPayload
LogSeverity:
description: The severity level of a log message.
enum:
- verbose
- debug
- info
- warn
- error
- critical
title: LogSeverity
type: string
MetricEvent:
description: A metric event containing a measured value.
properties:
trace_id:
title: Trace Id
type: string
span_id:
title: Span Id
type: string
timestamp:
format: date-time
title: Timestamp
type: string
attributes:
anyOf:
- additionalProperties:
anyOf:
- type: string
- type: integer
- type: number
- type: boolean
- type: 'null'
title: string | ... (4 variants)
type: object
- type: 'null'
type:
const: metric
default: metric
title: Type
type: string
metric:
title: Metric
type: string
value:
anyOf:
- type: integer
- type: number
title: integer | number
unit:
title: Unit
type: string
required:
- trace_id
- span_id
- timestamp
- metric
- value
- unit
title: MetricEvent
type: object
StructuredLogEvent:
description: A structured log event containing typed payload data.
properties:
trace_id:
title: Trace Id
type: string
span_id:
title: Span Id
type: string
timestamp:
format: date-time
title: Timestamp
type: string
attributes:
anyOf:
- additionalProperties:
anyOf:
- type: string
- type: integer
- type: number
- type: boolean
- type: 'null'
title: string | ... (4 variants)
type: object
- type: 'null'
type:
const: structured_log
default: structured_log
title: Type
type: string
payload:
discriminator:
mapping:
span_end: '#/components/schemas/SpanEndPayload'
span_start: '#/components/schemas/SpanStartPayload'
propertyName: type
oneOf:
- $ref: '#/components/schemas/SpanStartPayload'
title: SpanStartPayload
- $ref: '#/components/schemas/SpanEndPayload'
title: SpanEndPayload
title: SpanStartPayload | SpanEndPayload
required:
- trace_id
- span_id
- timestamp
- payload
title: StructuredLogEvent
type: object
UnstructuredLogEvent:
description: An unstructured log event containing a simple text message.
properties:
trace_id:
title: Trace Id
type: string
span_id:
title: Span Id
type: string
timestamp:
format: date-time
title: Timestamp
type: string
attributes:
anyOf:
- additionalProperties:
anyOf:
- type: string
- type: integer
- type: number
- type: boolean
- type: 'null'
title: string | ... (4 variants)
type: object
- type: 'null'
type:
const: unstructured_log
default: unstructured_log
title: Type
type: string
message:
title: Message
type: string
severity:
$ref: '#/components/schemas/LogSeverity'
required:
- trace_id
- span_id
- timestamp
- message
- severity
title: UnstructuredLogEvent
type: object
Event:
discriminator:
mapping:
metric: '#/components/schemas/MetricEvent'
structured_log: '#/components/schemas/StructuredLogEvent'
unstructured_log: '#/components/schemas/UnstructuredLogEvent'
propertyName: type
oneOf:
- $ref: '#/components/schemas/UnstructuredLogEvent'
title: UnstructuredLogEvent
- $ref: '#/components/schemas/MetricEvent'
title: MetricEvent
- $ref: '#/components/schemas/StructuredLogEvent'
title: StructuredLogEvent
title: UnstructuredLogEvent | MetricEvent | StructuredLogEvent
MetricInResponse:
description: A metric value included in API responses.
properties:
@ -13225,236 +13004,6 @@ components:
- logger_config
title: PostTrainingRLHFRequest
type: object
Span:
description: A span representing a single operation within a trace.
properties:
span_id:
title: Span Id
type: string
trace_id:
title: Trace Id
type: string
parent_span_id:
anyOf:
- type: string
- type: 'null'
nullable: true
name:
title: Name
type: string
start_time:
format: date-time
title: Start Time
type: string
end_time:
anyOf:
- format: date-time
type: string
- type: 'null'
nullable: true
attributes:
anyOf:
- additionalProperties: true
type: object
- type: 'null'
required:
- span_id
- trace_id
- name
- start_time
title: Span
type: object
Trace:
description: A trace representing the complete execution path of a request across multiple operations.
properties:
trace_id:
title: Trace Id
type: string
root_span_id:
title: Root Span Id
type: string
start_time:
format: date-time
title: Start Time
type: string
end_time:
anyOf:
- format: date-time
type: string
- type: 'null'
nullable: true
required:
- trace_id
- root_span_id
- start_time
title: Trace
type: object
EventType:
description: The type of telemetry event being logged.
enum:
- unstructured_log
- structured_log
- metric
title: EventType
type: string
StructuredLogType:
description: The type of structured log event payload.
enum:
- span_start
- span_end
title: StructuredLogType
type: string
EvalTrace:
description: A trace record for evaluation purposes.
properties:
session_id:
title: Session Id
type: string
step:
title: Step
type: string
input:
title: Input
type: string
output:
title: Output
type: string
expected_output:
title: Expected Output
type: string
required:
- session_id
- step
- input
- output
- expected_output
title: EvalTrace
type: object
SpanWithStatus:
description: A span that includes status information.
properties:
span_id:
title: Span Id
type: string
trace_id:
title: Trace Id
type: string
parent_span_id:
anyOf:
- type: string
- type: 'null'
nullable: true
name:
title: Name
type: string
start_time:
format: date-time
title: Start Time
type: string
end_time:
anyOf:
- format: date-time
type: string
- type: 'null'
nullable: true
attributes:
anyOf:
- additionalProperties: true
type: object
- type: 'null'
status:
anyOf:
- $ref: '#/components/schemas/SpanStatus'
title: SpanStatus
- type: 'null'
nullable: true
title: SpanStatus
required:
- span_id
- trace_id
- name
- start_time
title: SpanWithStatus
type: object
QueryConditionOp:
description: Comparison operators for query conditions.
enum:
- eq
- ne
- gt
- lt
title: QueryConditionOp
type: string
QueryCondition:
description: A condition for filtering query results.
properties:
key:
title: Key
type: string
op:
$ref: '#/components/schemas/QueryConditionOp'
value:
title: Value
required:
- key
- op
- value
title: QueryCondition
type: object
MetricLabel:
description: A label associated with a metric.
properties:
name:
title: Name
type: string
value:
title: Value
type: string
required:
- name
- value
title: MetricLabel
type: object
MetricDataPoint:
description: A single data point in a metric time series.
properties:
timestamp:
title: Timestamp
type: integer
value:
title: Value
type: number
unit:
title: Unit
type: string
required:
- timestamp
- value
- unit
title: MetricDataPoint
type: object
MetricSeries:
description: A time series of metric data points.
properties:
metric:
title: Metric
type: string
labels:
items:
$ref: '#/components/schemas/MetricLabel'
title: Labels
type: array
values:
items:
$ref: '#/components/schemas/MetricDataPoint'
title: Values
type: array
required:
- metric
- labels
- values
title: MetricSeries
type: object
responses:
BadRequest400:
description: The request was invalid or malformed

View file

@ -171,10 +171,18 @@ if [[ "$COLLECT_ONLY" == false ]]; then
# Set MCP host for in-process MCP server tests
# - For library client and server mode: localhost (both on same host)
# - For docker mode: host.docker.internal (container needs to reach host)
# - For docker mode on Linux: localhost (container uses host network, shares network namespace)
# - For docker mode on macOS/Windows: host.docker.internal (container uses bridge network)
if [[ "$STACK_CONFIG" == docker:* ]]; then
if [[ "$(uname)" != "Darwin" ]] && [[ "$(uname)" != *"MINGW"* ]]; then
# On Linux with host network mode, container shares host network namespace
export LLAMA_STACK_TEST_MCP_HOST="localhost"
echo "Setting MCP host: localhost (docker mode with host network)"
else
# On macOS/Windows with bridge network, need special host access
export LLAMA_STACK_TEST_MCP_HOST="host.docker.internal"
echo "Setting MCP host: host.docker.internal (docker mode)"
echo "Setting MCP host: host.docker.internal (docker mode with bridge network)"
fi
else
export LLAMA_STACK_TEST_MCP_HOST="localhost"
echo "Setting MCP host: localhost (library/server mode)"

View file

@ -8,7 +8,6 @@
Schema discovery and collection for OpenAPI generation.
"""
import importlib
from typing import Any
@ -20,23 +19,6 @@ def _ensure_components_schemas(openapi_schema: dict[str, Any]) -> None:
openapi_schema["components"]["schemas"] = {}
def _load_extra_schema_modules() -> None:
"""
Import modules outside llama_stack_api that use schema_utils to register schemas.
The API package already imports its submodules via __init__, but server-side modules
like telemetry need to be imported explicitly so their decorator side effects run.
"""
extra_modules = [
"llama_stack.core.telemetry.telemetry",
]
for module_name in extra_modules:
try:
importlib.import_module(module_name)
except ImportError:
continue
def _extract_and_fix_defs(schema: dict[str, Any], openapi_schema: dict[str, Any]) -> None:
"""
Extract $defs from a schema, move them to components/schemas, and fix references.
@ -79,9 +61,6 @@ def _ensure_json_schema_types_included(openapi_schema: dict[str, Any]) -> dict[s
iter_registered_schema_types,
)
# Import extra modules (e.g., telemetry) whose schema registrations live outside llama_stack_api
_load_extra_schema_modules()
# Handle explicitly registered schemas first (union types, Annotated structs, etc.)
for registration_info in iter_registered_schema_types():
schema_type = registration_info.type

View file

@ -1,11 +1,24 @@
{
"annotations": {
"list": []
"list": [
{
"builtIn": 1,
"datasource": {
"type": "grafana",
"uid": "-- Grafana --"
},
"enable": true,
"hide": true,
"iconColor": "rgba(0, 211, 255, 1)",
"name": "Annotations & Alerts",
"type": "dashboard"
}
]
},
"editable": true,
"fiscalYearStartMonth": 0,
"graphTooltip": 0,
"id": null,
"id": 1,
"links": [],
"liveNow": false,
"panels": [
@ -16,11 +29,40 @@
},
"fieldConfig": {
"defaults": {
"color": {
"mode": "palette-classic"
},
"custom": {
"axisBorderShow": false,
"axisCenteredZero": false,
"axisColorMode": "text",
"axisLabel": "",
"axisPlacement": "auto",
"barAlignment": 0,
"drawStyle": "line",
"fillOpacity": 10,
"gradientMode": "none",
"hideFrom": {
"legend": false,
"tooltip": false,
"viz": false
},
"insertNulls": false,
"lineInterpolation": "linear",
"lineWidth": 1,
"pointSize": 5,
"scaleDistribution": {
"type": "linear"
},
"showPoints": "auto",
"fillOpacity": 10
"spanNulls": false,
"stacking": {
"group": "A",
"mode": "none"
},
"thresholdsStyle": {
"mode": "off"
}
},
"mappings": [],
"thresholds": {
@ -32,7 +74,8 @@
}
]
}
}
},
"overrides": []
},
"gridPos": {
"h": 8,
@ -40,15 +83,16 @@
"x": 0,
"y": 0
},
"id": 1,
"id": 2,
"options": {
"legend": {
"calcs": [],
"displayMode": "table",
"displayMode": "list",
"placement": "bottom",
"showLegend": true
},
"tooltip": {
"maxHeight": 600,
"mode": "multi",
"sort": "none"
}
@ -59,9 +103,112 @@
"type": "prometheus",
"uid": "prometheus"
},
"expr": "llama_stack_completion_tokens_total",
"legendFormat": "{{model_id}} ({{provider_id}})",
"refId": "A"
"disableTextWrap": false,
"editorMode": "builder",
"expr": "sum by(gen_ai_request_model) (llama_stack_gen_ai_client_token_usage_sum{gen_ai_token_type=\"input\"})",
"fullMetaSearch": false,
"includeNullMetadata": true,
"legendFormat": "__auto",
"range": true,
"refId": "A",
"useBackend": false
}
],
"title": "Prompt Tokens",
"type": "timeseries"
},
{
"datasource": {
"type": "prometheus",
"uid": "prometheus"
},
"fieldConfig": {
"defaults": {
"color": {
"mode": "palette-classic"
},
"custom": {
"axisBorderShow": false,
"axisCenteredZero": false,
"axisColorMode": "text",
"axisLabel": "",
"axisPlacement": "auto",
"barAlignment": 0,
"drawStyle": "line",
"fillOpacity": 10,
"gradientMode": "none",
"hideFrom": {
"legend": false,
"tooltip": false,
"viz": false
},
"insertNulls": false,
"lineInterpolation": "linear",
"lineWidth": 1,
"pointSize": 5,
"scaleDistribution": {
"type": "linear"
},
"showPoints": "auto",
"spanNulls": false,
"stacking": {
"group": "A",
"mode": "none"
},
"thresholdsStyle": {
"mode": "off"
}
},
"mappings": [],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
}
]
}
},
"overrides": []
},
"gridPos": {
"h": 8,
"w": 12,
"x": 12,
"y": 0
},
"id": 1,
"options": {
"legend": {
"calcs": [],
"displayMode": "list",
"placement": "bottom",
"showLegend": true
},
"tooltip": {
"maxHeight": 600,
"mode": "multi",
"sort": "none"
}
},
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "prometheus"
},
"disableTextWrap": false,
"editorMode": "builder",
"exemplar": false,
"expr": "sum by(gen_ai_request_model) (llama_stack_gen_ai_client_token_usage_sum{gen_ai_token_type=\"output\"})",
"fullMetaSearch": false,
"includeNullMetadata": true,
"interval": "",
"legendFormat": "__auto",
"range": true,
"refId": "A",
"useBackend": false
}
],
"title": "Completion Tokens",
@ -74,78 +221,40 @@
},
"fieldConfig": {
"defaults": {
"color": {
"mode": "palette-classic"
},
"custom": {
"axisBorderShow": false,
"axisCenteredZero": false,
"axisColorMode": "text",
"axisLabel": "",
"axisPlacement": "auto",
"barAlignment": 0,
"drawStyle": "line",
"fillOpacity": 10,
"gradientMode": "none",
"hideFrom": {
"legend": false,
"tooltip": false,
"viz": false
},
"insertNulls": false,
"lineInterpolation": "linear",
"lineWidth": 1,
"pointSize": 5,
"scaleDistribution": {
"type": "linear"
},
"showPoints": "auto",
"fillOpacity": 10
"spanNulls": false,
"stacking": {
"group": "A",
"mode": "none"
},
"mappings": [],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
"thresholdsStyle": {
"mode": "off"
}
]
}
}
},
"gridPos": {
"h": 8,
"w": 12,
"x": 12,
"y": 0
},
"id": 2,
"options": {
"legend": {
"calcs": [],
"displayMode": "table",
"placement": "bottom",
"showLegend": true
},
"tooltip": {
"mode": "multi",
"sort": "none"
}
},
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "prometheus"
},
"expr": "llama_stack_prompt_tokens_total",
"legendFormat": "Prompt - {{model_id}}",
"refId": "A"
},
{
"datasource": {
"type": "prometheus",
"uid": "prometheus"
},
"expr": "llama_stack_tokens_total",
"legendFormat": "Total - {{model_id}}",
"refId": "B"
}
],
"title": "Prompt & Total Tokens",
"type": "timeseries"
},
{
"datasource": {
"type": "prometheus",
"uid": "prometheus"
},
"fieldConfig": {
"defaults": {
"custom": {
"drawStyle": "line",
"lineInterpolation": "linear",
"showPoints": "auto",
"fillOpacity": 10
},
"mappings": [],
"thresholds": {
@ -158,7 +267,8 @@
]
},
"unit": "ms"
}
},
"overrides": []
},
"gridPos": {
"h": 8,
@ -175,6 +285,7 @@
"showLegend": true
},
"tooltip": {
"maxHeight": 600,
"mode": "multi",
"sort": "none"
}
@ -219,7 +330,8 @@
}
]
}
}
},
"overrides": []
},
"gridPos": {
"h": 8,
@ -240,8 +352,11 @@
"fields": "",
"values": false
},
"textMode": "auto"
"showPercentChange": false,
"textMode": "auto",
"wideLayout": true
},
"pluginVersion": "11.0.0",
"targets": [
{
"datasource": {
@ -272,7 +387,8 @@
}
]
}
}
},
"overrides": []
},
"gridPos": {
"h": 8,
@ -293,8 +409,11 @@
"fields": "",
"values": false
},
"textMode": "auto"
"showPercentChange": false,
"textMode": "auto",
"wideLayout": true
},
"pluginVersion": "11.0.0",
"targets": [
{
"datasource": {
@ -315,11 +434,40 @@
},
"fieldConfig": {
"defaults": {
"color": {
"mode": "palette-classic"
},
"custom": {
"axisBorderShow": false,
"axisCenteredZero": false,
"axisColorMode": "text",
"axisLabel": "",
"axisPlacement": "auto",
"barAlignment": 0,
"drawStyle": "line",
"fillOpacity": 10,
"gradientMode": "none",
"hideFrom": {
"legend": false,
"tooltip": false,
"viz": false
},
"insertNulls": false,
"lineInterpolation": "linear",
"lineWidth": 1,
"pointSize": 5,
"scaleDistribution": {
"type": "linear"
},
"showPoints": "auto",
"fillOpacity": 10
"spanNulls": false,
"stacking": {
"group": "A",
"mode": "none"
},
"thresholdsStyle": {
"mode": "off"
}
},
"mappings": [],
"thresholds": {
@ -332,7 +480,8 @@
]
},
"unit": "reqps"
}
},
"overrides": []
},
"gridPos": {
"h": 8,
@ -349,6 +498,7 @@
"showLegend": true
},
"tooltip": {
"maxHeight": 600,
"mode": "multi",
"sort": "none"
}
@ -374,11 +524,40 @@
},
"fieldConfig": {
"defaults": {
"color": {
"mode": "palette-classic"
},
"custom": {
"axisBorderShow": false,
"axisCenteredZero": false,
"axisColorMode": "text",
"axisLabel": "",
"axisPlacement": "auto",
"barAlignment": 0,
"drawStyle": "line",
"fillOpacity": 10,
"gradientMode": "none",
"hideFrom": {
"legend": false,
"tooltip": false,
"viz": false
},
"insertNulls": false,
"lineInterpolation": "linear",
"lineWidth": 1,
"pointSize": 5,
"scaleDistribution": {
"type": "linear"
},
"showPoints": "auto",
"fillOpacity": 10
"spanNulls": false,
"stacking": {
"group": "A",
"mode": "none"
},
"thresholdsStyle": {
"mode": "off"
}
},
"mappings": [],
"thresholds": {
@ -391,7 +570,8 @@
]
},
"unit": "Bps"
}
},
"overrides": []
},
"gridPos": {
"h": 8,
@ -408,6 +588,7 @@
"showLegend": true
},
"tooltip": {
"maxHeight": 600,
"mode": "multi",
"sort": "none"
}
@ -437,7 +618,7 @@
}
],
"refresh": "5s",
"schemaVersion": 38,
"schemaVersion": 39,
"tags": [
"llama-stack"
],
@ -445,13 +626,14 @@
"list": []
},
"time": {
"from": "now-15m",
"from": "now-3h",
"to": "now"
},
"timeRangeUpdatedDuringEditOrView": false,
"timepicker": {},
"timezone": "browser",
"title": "Llama Stack Metrics",
"uid": "llama-stack-metrics",
"version": 0,
"version": 17,
"weekStart": ""
}

View file

@ -191,22 +191,6 @@ class DistributionSpec(BaseModel):
)
class TelemetryConfig(BaseModel):
"""
Configuration for telemetry.
Llama Stack uses OpenTelemetry for telemetry. Please refer to https://opentelemetry.io/docs/languages/sdk-configuration/
for env variables to configure the OpenTelemetry SDK.
Example:
```bash
OTEL_SERVICE_NAME=llama-stack OTEL_EXPORTER_OTLP_ENDPOINT=http://localhost:4318 uv run llama stack run starter
```
"""
enabled: bool = Field(default=False, description="enable or disable telemetry")
class OAuth2JWKSConfig(BaseModel):
# The JWKS URI for collecting public keys
uri: str
@ -527,8 +511,6 @@ can be instantiated multiple times (with different configs) if necessary.
logging: LoggingConfig | None = Field(default=None, description="Configuration for Llama Stack Logging")
telemetry: TelemetryConfig = Field(default_factory=TelemetryConfig, description="Configuration for telemetry")
server: ServerConfig = Field(
default_factory=ServerConfig,
description="Configuration for the HTTP(S) server",

View file

@ -46,8 +46,6 @@ from llama_stack.core.request_headers import PROVIDER_DATA_VAR, request_provider
from llama_stack.core.resolver import ProviderRegistry
from llama_stack.core.server.routes import RouteImpls, find_matching_route, initialize_route_impls
from llama_stack.core.stack import Stack, get_stack_run_config_from_distro, replace_env_vars
from llama_stack.core.telemetry import Telemetry
from llama_stack.core.telemetry.tracing import CURRENT_TRACE_CONTEXT, end_trace, setup_logger, start_trace
from llama_stack.core.utils.config import redact_sensitive_fields
from llama_stack.core.utils.context import preserve_contexts_async_generator
from llama_stack.core.utils.exec import in_notebook
@ -204,13 +202,6 @@ class AsyncLlamaStackAsLibraryClient(AsyncLlamaStackClient):
super().__init__()
# Initialize logging from environment variables first
setup_logging()
# when using the library client, we should not log to console since many
# of our logs are intended for server-side usage
if sinks_from_env := os.environ.get("TELEMETRY_SINKS", None):
current_sinks = sinks_from_env.strip().lower().split(",")
os.environ["TELEMETRY_SINKS"] = ",".join(sink for sink in current_sinks if sink != "console")
if in_notebook():
import nest_asyncio
@ -295,8 +286,6 @@ class AsyncLlamaStackAsLibraryClient(AsyncLlamaStackClient):
raise _e
assert self.impls is not None
if self.config.telemetry.enabled:
setup_logger(Telemetry())
if not os.environ.get("PYTEST_CURRENT_TEST"):
console = Console()
@ -392,13 +381,7 @@ class AsyncLlamaStackAsLibraryClient(AsyncLlamaStackClient):
body, field_names = self._handle_file_uploads(options, body)
body = self._convert_body(matched_func, body, exclude_params=set(field_names))
trace_path = webmethod.descriptive_name or route_path
await start_trace(trace_path, {"__location__": "library_client"})
try:
result = await matched_func(**body)
finally:
await end_trace()
# Handle FastAPI Response objects (e.g., from file content retrieval)
if isinstance(result, FastAPIResponse):
@ -457,19 +440,13 @@ class AsyncLlamaStackAsLibraryClient(AsyncLlamaStackClient):
# Prepare body for the function call (handles both Pydantic and traditional params)
body = self._convert_body(func, body)
trace_path = webmethod.descriptive_name or route_path
await start_trace(trace_path, {"__location__": "library_client"})
async def gen():
try:
async for chunk in await func(**body):
data = json.dumps(convert_pydantic_to_json_value(chunk))
sse_event = f"data: {data}\n\n"
yield sse_event.encode("utf-8")
finally:
await end_trace()
wrapped_gen = preserve_contexts_async_generator(gen(), [CURRENT_TRACE_CONTEXT, PROVIDER_DATA_VAR])
wrapped_gen = preserve_contexts_async_generator(gen(), [PROVIDER_DATA_VAR])
mock_response = httpx.Response(
status_code=httpx.codes.OK,

View file

@ -392,8 +392,6 @@ async def instantiate_provider(
args = [config, deps]
if "policy" in inspect.signature(getattr(module, method)).parameters:
args.append(policy)
if "telemetry_enabled" in inspect.signature(getattr(module, method)).parameters and run_config.telemetry:
args.append(run_config.telemetry.enabled)
fn = getattr(module, method)
impl = await fn(*args)
@ -401,18 +399,6 @@ async def instantiate_provider(
impl.__provider_spec__ = provider_spec
impl.__provider_config__ = config
# Apply tracing if telemetry is enabled and any base class has __marked_for_tracing__ marker
if run_config.telemetry.enabled:
traced_classes = [
base for base in reversed(impl.__class__.__mro__) if getattr(base, "__marked_for_tracing__", False)
]
if traced_classes:
from llama_stack.core.telemetry.trace_protocol import trace_protocol
for cls in traced_classes:
trace_protocol(cls)
protocols = api_protocol_map_for_compliance_check(run_config)
additional_protocols = additional_protocols_map()
# TODO: check compliance for special tool groups

View file

@ -85,8 +85,6 @@ async def get_auto_router_impl(
)
await inference_store.initialize()
api_to_dep_impl["store"] = inference_store
api_to_dep_impl["telemetry_enabled"] = run_config.telemetry.enabled
elif api == Api.vector_io:
api_to_dep_impl["vector_stores_config"] = run_config.vector_stores
elif api == Api.safety:

View file

@ -7,7 +7,6 @@
import asyncio
import time
from collections.abc import AsyncIterator
from datetime import UTC, datetime
from typing import Annotated, Any
from fastapi import Body
@ -15,11 +14,7 @@ from openai.types.chat import ChatCompletionToolChoiceOptionParam as OpenAIChatC
from openai.types.chat import ChatCompletionToolParam as OpenAIChatCompletionToolParam
from pydantic import TypeAdapter
from llama_stack.core.telemetry.telemetry import MetricEvent
from llama_stack.core.telemetry.tracing import enqueue_event, get_current_span
from llama_stack.log import get_logger
from llama_stack.models.llama.llama3.chat_format import ChatFormat
from llama_stack.models.llama.llama3.tokenizer import Tokenizer
from llama_stack.providers.utils.inference.inference_store import InferenceStore
from llama_stack_api import (
HealthResponse,
@ -60,15 +55,10 @@ class InferenceRouter(Inference):
self,
routing_table: RoutingTable,
store: InferenceStore | None = None,
telemetry_enabled: bool = False,
) -> None:
logger.debug("Initializing InferenceRouter")
self.routing_table = routing_table
self.telemetry_enabled = telemetry_enabled
self.store = store
if self.telemetry_enabled:
self.tokenizer = Tokenizer.get_instance()
self.formatter = ChatFormat(self.tokenizer)
async def initialize(self) -> None:
logger.debug("InferenceRouter.initialize")
@ -94,54 +84,6 @@ class InferenceRouter(Inference):
)
await self.routing_table.register_model(model_id, provider_model_id, provider_id, metadata, model_type)
def _construct_metrics(
self,
prompt_tokens: int,
completion_tokens: int,
total_tokens: int,
fully_qualified_model_id: str,
provider_id: str,
) -> list[MetricEvent]:
"""Constructs a list of MetricEvent objects containing token usage metrics.
Args:
prompt_tokens: Number of tokens in the prompt
completion_tokens: Number of tokens in the completion
total_tokens: Total number of tokens used
fully_qualified_model_id:
provider_id: The provider identifier
Returns:
List of MetricEvent objects with token usage metrics
"""
span = get_current_span()
if span is None:
logger.warning("No span found for token usage metrics")
return []
metrics = [
("prompt_tokens", prompt_tokens),
("completion_tokens", completion_tokens),
("total_tokens", total_tokens),
]
metric_events = []
for metric_name, value in metrics:
metric_events.append(
MetricEvent(
trace_id=span.trace_id,
span_id=span.span_id,
metric=metric_name,
value=value,
timestamp=datetime.now(UTC),
unit="tokens",
attributes={
"model_id": fully_qualified_model_id,
"provider_id": provider_id,
},
)
)
return metric_events
async def _get_model_provider(self, model_id: str, expected_model_type: str) -> tuple[Inference, str]:
model = await self.routing_table.get_object_by_identifier("model", model_id)
if model:
@ -186,26 +128,9 @@ class InferenceRouter(Inference):
if params.stream:
return await provider.openai_completion(params)
# TODO: Metrics do NOT work with openai_completion stream=True due to the fact
# that we do not return an AsyncIterator, our tests expect a stream of chunks we cannot intercept currently.
response = await provider.openai_completion(params)
response.model = request_model_id
if self.telemetry_enabled and response.usage is not None:
metrics = self._construct_metrics(
prompt_tokens=response.usage.prompt_tokens,
completion_tokens=response.usage.completion_tokens,
total_tokens=response.usage.total_tokens,
fully_qualified_model_id=request_model_id,
provider_id=provider.__provider_id__,
)
for metric in metrics:
enqueue_event(metric)
# these metrics will show up in the client response.
response.metrics = (
metrics if not hasattr(response, "metrics") or response.metrics is None else response.metrics + metrics
)
return response
async def openai_chat_completion(
@ -254,20 +179,6 @@ class InferenceRouter(Inference):
if self.store:
asyncio.create_task(self.store.store_chat_completion(response, params.messages))
if self.telemetry_enabled and response.usage is not None:
metrics = self._construct_metrics(
prompt_tokens=response.usage.prompt_tokens,
completion_tokens=response.usage.completion_tokens,
total_tokens=response.usage.total_tokens,
fully_qualified_model_id=request_model_id,
provider_id=provider.__provider_id__,
)
for metric in metrics:
enqueue_event(metric)
# these metrics will show up in the client response.
response.metrics = (
metrics if not hasattr(response, "metrics") or response.metrics is None else response.metrics + metrics
)
return response
async def openai_embeddings(
@ -411,18 +322,6 @@ class InferenceRouter(Inference):
for choice_data in choices_data.values():
completion_text += "".join(choice_data["content_parts"])
# Add metrics to the chunk
if self.telemetry_enabled and hasattr(chunk, "usage") and chunk.usage:
metrics = self._construct_metrics(
prompt_tokens=chunk.usage.prompt_tokens,
completion_tokens=chunk.usage.completion_tokens,
total_tokens=chunk.usage.total_tokens,
fully_qualified_model_id=fully_qualified_model_id,
provider_id=provider_id,
)
for metric in metrics:
enqueue_event(metric)
yield chunk
finally:
# Store the final assembled completion

View file

@ -6,11 +6,15 @@
from typing import Any
from opentelemetry import trace
from llama_stack.core.datatypes import SafetyConfig
from llama_stack.log import get_logger
from llama_stack.telemetry.helpers import safety_request_span_attributes, safety_span_name
from llama_stack_api import ModerationObject, OpenAIMessageParam, RoutingTable, RunShieldResponse, Safety, Shield
logger = get_logger(name=__name__, category="core::routers")
tracer = trace.get_tracer(__name__)
class SafetyRouter(Safety):
@ -51,14 +55,18 @@ class SafetyRouter(Safety):
messages: list[OpenAIMessageParam],
params: dict[str, Any] = None,
) -> RunShieldResponse:
with tracer.start_as_current_span(name=safety_span_name(shield_id)):
logger.debug(f"SafetyRouter.run_shield: {shield_id}")
provider = await self.routing_table.get_provider_impl(shield_id)
return await provider.run_shield(
response = await provider.run_shield(
shield_id=shield_id,
messages=messages,
params=params,
)
safety_request_span_attributes(shield_id, messages, response)
return response
async def run_moderation(self, input: str | list[str], model: str | None = None) -> ModerationObject:
list_shields_response = await self.routing_table.list_shields()
shields = list_shields_response.data

View file

@ -50,8 +50,6 @@ from llama_stack.core.stack import (
cast_image_name_to_string,
replace_env_vars,
)
from llama_stack.core.telemetry import Telemetry
from llama_stack.core.telemetry.tracing import CURRENT_TRACE_CONTEXT, setup_logger
from llama_stack.core.utils.config import redact_sensitive_fields
from llama_stack.core.utils.config_resolution import Mode, resolve_config_or_distro
from llama_stack.core.utils.context import preserve_contexts_async_generator
@ -60,7 +58,6 @@ from llama_stack_api import Api, ConflictError, PaginatedResponse, ResourceNotFo
from .auth import AuthenticationMiddleware
from .quota import QuotaMiddleware
from .tracing import TracingMiddleware
REPO_ROOT = Path(__file__).parent.parent.parent.parent
@ -263,7 +260,7 @@ def create_dynamic_typed_route(func: Any, method: str, route: str) -> Callable:
try:
if is_streaming:
context_vars = [CURRENT_TRACE_CONTEXT, PROVIDER_DATA_VAR]
context_vars = [PROVIDER_DATA_VAR]
if test_context_var is not None:
context_vars.append(test_context_var)
gen = preserve_contexts_async_generator(sse_generator(func(**kwargs)), context_vars)
@ -441,9 +438,6 @@ def create_app() -> StackApp:
if cors_config:
app.add_middleware(CORSMiddleware, **cors_config.model_dump())
if config.telemetry.enabled:
setup_logger(Telemetry())
# Load external APIs if configured
external_apis = load_external_apis(config)
all_routes = get_all_api_routes(external_apis)
@ -500,9 +494,6 @@ def create_app() -> StackApp:
app.exception_handler(RequestValidationError)(global_exception_handler)
app.exception_handler(Exception)(global_exception_handler)
if config.telemetry.enabled:
app.add_middleware(TracingMiddleware, impls=impls, external_apis=external_apis)
return app

View file

@ -1,80 +0,0 @@
# Copyright (c) Meta Platforms, Inc. and affiliates.
# All rights reserved.
#
# This source code is licensed under the terms described in the LICENSE file in
# the root directory of this source tree.
from aiohttp import hdrs
from llama_stack.core.external import ExternalApiSpec
from llama_stack.core.server.routes import find_matching_route, initialize_route_impls
from llama_stack.core.telemetry.tracing import end_trace, start_trace
from llama_stack.log import get_logger
logger = get_logger(name=__name__, category="core::server")
class TracingMiddleware:
def __init__(self, app, impls, external_apis: dict[str, ExternalApiSpec]):
self.app = app
self.impls = impls
self.external_apis = external_apis
# FastAPI built-in paths that should bypass custom routing
self.fastapi_paths = ("/docs", "/redoc", "/openapi.json", "/favicon.ico", "/static")
async def __call__(self, scope, receive, send):
if scope.get("type") == "lifespan":
return await self.app(scope, receive, send)
path = scope.get("path", "")
# Check if the path is a FastAPI built-in path
if path.startswith(self.fastapi_paths):
# Pass through to FastAPI's built-in handlers
logger.debug(f"Bypassing custom routing for FastAPI built-in path: {path}")
return await self.app(scope, receive, send)
if not hasattr(self, "route_impls"):
self.route_impls = initialize_route_impls(self.impls, self.external_apis)
try:
_, _, route_path, webmethod = find_matching_route(
scope.get("method", hdrs.METH_GET), path, self.route_impls
)
except ValueError:
# If no matching endpoint is found, pass through to FastAPI
logger.debug(f"No matching route found for path: {path}, falling back to FastAPI")
return await self.app(scope, receive, send)
# Log deprecation warning if route is deprecated
if getattr(webmethod, "deprecated", False):
logger.warning(
f"DEPRECATED ROUTE USED: {scope.get('method', 'GET')} {path} - "
f"This route is deprecated and may be removed in a future version. "
f"Please check the docs for the supported version."
)
trace_attributes = {"__location__": "server", "raw_path": path}
# Extract W3C trace context headers and store as trace attributes
headers = dict(scope.get("headers", []))
traceparent = headers.get(b"traceparent", b"").decode()
if traceparent:
trace_attributes["traceparent"] = traceparent
tracestate = headers.get(b"tracestate", b"").decode()
if tracestate:
trace_attributes["tracestate"] = tracestate
trace_path = webmethod.descriptive_name or route_path
trace_context = await start_trace(trace_path, trace_attributes)
async def send_with_trace_id(message):
if message["type"] == "http.response.start":
headers = message.get("headers", [])
headers.append([b"x-trace-id", str(trace_context.trace_id).encode()])
message["headers"] = headers
await send(message)
try:
return await self.app(scope, receive, send_with_trace_id)
finally:
await end_trace()

View file

@ -1,32 +0,0 @@
# Copyright (c) Meta Platforms, Inc. and affiliates.
# All rights reserved.
#
# This source code is licensed under the terms described in the LICENSE file in
# the root directory of this source tree.
from .telemetry import Telemetry
from .trace_protocol import serialize_value, trace_protocol
from .tracing import (
CURRENT_TRACE_CONTEXT,
ROOT_SPAN_MARKERS,
end_trace,
enqueue_event,
get_current_span,
setup_logger,
span,
start_trace,
)
__all__ = [
"Telemetry",
"trace_protocol",
"serialize_value",
"CURRENT_TRACE_CONTEXT",
"ROOT_SPAN_MARKERS",
"end_trace",
"enqueue_event",
"get_current_span",
"setup_logger",
"span",
"start_trace",
]

View file

@ -1,629 +0,0 @@
# Copyright (c) Meta Platforms, Inc. and affiliates.
# All rights reserved.
#
# This source code is licensed under the terms described in the LICENSE file in
# the root directory of this source tree.
import os
import threading
from collections.abc import Mapping, Sequence
from datetime import datetime
from enum import Enum
from typing import (
Annotated,
Any,
Literal,
cast,
)
from opentelemetry import metrics, trace
from opentelemetry.exporter.otlp.proto.http.metric_exporter import OTLPMetricExporter
from opentelemetry.exporter.otlp.proto.http.trace_exporter import OTLPSpanExporter
from opentelemetry.sdk.metrics import MeterProvider
from opentelemetry.sdk.metrics.export import PeriodicExportingMetricReader
from opentelemetry.sdk.trace import TracerProvider
from opentelemetry.sdk.trace.export import BatchSpanProcessor
from opentelemetry.trace.propagation.tracecontext import TraceContextTextMapPropagator
from pydantic import BaseModel, Field
from llama_stack.log import get_logger
from llama_stack.models.llama.datatypes import Primitive
from llama_stack_api import json_schema_type, register_schema
ROOT_SPAN_MARKERS = ["__root__", "__root_span__"]
# Type alias for OpenTelemetry attribute values (excludes None)
AttributeValue = str | bool | int | float | Sequence[str] | Sequence[bool] | Sequence[int] | Sequence[float]
Attributes = Mapping[str, AttributeValue]
@json_schema_type
class SpanStatus(Enum):
"""The status of a span indicating whether it completed successfully or with an error.
:cvar OK: Span completed successfully without errors
:cvar ERROR: Span completed with an error or failure
"""
OK = "ok"
ERROR = "error"
@json_schema_type
class Span(BaseModel):
"""A span representing a single operation within a trace.
:param span_id: Unique identifier for the span
:param trace_id: Unique identifier for the trace this span belongs to
:param parent_span_id: (Optional) Unique identifier for the parent span, if this is a child span
:param name: Human-readable name describing the operation this span represents
:param start_time: Timestamp when the operation began
:param end_time: (Optional) Timestamp when the operation finished, if completed
:param attributes: (Optional) Key-value pairs containing additional metadata about the span
"""
span_id: str
trace_id: str
parent_span_id: str | None = None
name: str
start_time: datetime
end_time: datetime | None = None
attributes: dict[str, Any] | None = Field(default_factory=lambda: {})
def set_attribute(self, key: str, value: Any):
if self.attributes is None:
self.attributes = {}
self.attributes[key] = value
@json_schema_type
class Trace(BaseModel):
"""A trace representing the complete execution path of a request across multiple operations.
:param trace_id: Unique identifier for the trace
:param root_span_id: Unique identifier for the root span that started this trace
:param start_time: Timestamp when the trace began
:param end_time: (Optional) Timestamp when the trace finished, if completed
"""
trace_id: str
root_span_id: str
start_time: datetime
end_time: datetime | None = None
@json_schema_type
class EventType(Enum):
"""The type of telemetry event being logged.
:cvar UNSTRUCTURED_LOG: A simple log message with severity level
:cvar STRUCTURED_LOG: A structured log event with typed payload data
:cvar METRIC: A metric measurement with value and unit
"""
UNSTRUCTURED_LOG = "unstructured_log"
STRUCTURED_LOG = "structured_log"
METRIC = "metric"
@json_schema_type
class LogSeverity(Enum):
"""The severity level of a log message.
:cvar VERBOSE: Detailed diagnostic information for troubleshooting
:cvar DEBUG: Debug information useful during development
:cvar INFO: General informational messages about normal operation
:cvar WARN: Warning messages about potentially problematic situations
:cvar ERROR: Error messages indicating failures that don't stop execution
:cvar CRITICAL: Critical error messages indicating severe failures
"""
VERBOSE = "verbose"
DEBUG = "debug"
INFO = "info"
WARN = "warn"
ERROR = "error"
CRITICAL = "critical"
class EventCommon(BaseModel):
"""Common fields shared by all telemetry events.
:param trace_id: Unique identifier for the trace this event belongs to
:param span_id: Unique identifier for the span this event belongs to
:param timestamp: Timestamp when the event occurred
:param attributes: (Optional) Key-value pairs containing additional metadata about the event
"""
trace_id: str
span_id: str
timestamp: datetime
attributes: dict[str, Primitive] | None = Field(default_factory=lambda: {})
@json_schema_type
class UnstructuredLogEvent(EventCommon):
"""An unstructured log event containing a simple text message.
:param type: Event type identifier set to UNSTRUCTURED_LOG
:param message: The log message text
:param severity: The severity level of the log message
"""
type: Literal[EventType.UNSTRUCTURED_LOG] = EventType.UNSTRUCTURED_LOG
message: str
severity: LogSeverity
@json_schema_type
class MetricEvent(EventCommon):
"""A metric event containing a measured value.
:param type: Event type identifier set to METRIC
:param metric: The name of the metric being measured
:param value: The numeric value of the metric measurement
:param unit: The unit of measurement for the metric value
"""
type: Literal[EventType.METRIC] = EventType.METRIC
metric: str # this would be an enum
value: int | float
unit: str
@json_schema_type
class StructuredLogType(Enum):
"""The type of structured log event payload.
:cvar SPAN_START: Event indicating the start of a new span
:cvar SPAN_END: Event indicating the completion of a span
"""
SPAN_START = "span_start"
SPAN_END = "span_end"
@json_schema_type
class SpanStartPayload(BaseModel):
"""Payload for a span start event.
:param type: Payload type identifier set to SPAN_START
:param name: Human-readable name describing the operation this span represents
:param parent_span_id: (Optional) Unique identifier for the parent span, if this is a child span
"""
type: Literal[StructuredLogType.SPAN_START] = StructuredLogType.SPAN_START
name: str
parent_span_id: str | None = None
@json_schema_type
class SpanEndPayload(BaseModel):
"""Payload for a span end event.
:param type: Payload type identifier set to SPAN_END
:param status: The final status of the span indicating success or failure
"""
type: Literal[StructuredLogType.SPAN_END] = StructuredLogType.SPAN_END
status: SpanStatus
StructuredLogPayload = Annotated[
SpanStartPayload | SpanEndPayload,
Field(discriminator="type"),
]
register_schema(StructuredLogPayload, name="StructuredLogPayload")
@json_schema_type
class StructuredLogEvent(EventCommon):
"""A structured log event containing typed payload data.
:param type: Event type identifier set to STRUCTURED_LOG
:param payload: The structured payload data for the log event
"""
type: Literal[EventType.STRUCTURED_LOG] = EventType.STRUCTURED_LOG
payload: StructuredLogPayload
Event = Annotated[
UnstructuredLogEvent | MetricEvent | StructuredLogEvent,
Field(discriminator="type"),
]
register_schema(Event, name="Event")
@json_schema_type
class EvalTrace(BaseModel):
"""A trace record for evaluation purposes.
:param session_id: Unique identifier for the evaluation session
:param step: The evaluation step or phase identifier
:param input: The input data for the evaluation
:param output: The actual output produced during evaluation
:param expected_output: The expected output for comparison during evaluation
"""
session_id: str
step: str
input: str
output: str
expected_output: str
@json_schema_type
class SpanWithStatus(Span):
"""A span that includes status information.
:param status: (Optional) The current status of the span
"""
status: SpanStatus | None = None
@json_schema_type
class QueryConditionOp(Enum):
"""Comparison operators for query conditions.
:cvar EQ: Equal to comparison
:cvar NE: Not equal to comparison
:cvar GT: Greater than comparison
:cvar LT: Less than comparison
"""
EQ = "eq"
NE = "ne"
GT = "gt"
LT = "lt"
@json_schema_type
class QueryCondition(BaseModel):
"""A condition for filtering query results.
:param key: The attribute key to filter on
:param op: The comparison operator to apply
:param value: The value to compare against
"""
key: str
op: QueryConditionOp
value: Any
class QueryTracesResponse(BaseModel):
"""Response containing a list of traces.
:param data: List of traces matching the query criteria
"""
data: list[Trace]
class QuerySpansResponse(BaseModel):
"""Response containing a list of spans.
:param data: List of spans matching the query criteria
"""
data: list[Span]
class QuerySpanTreeResponse(BaseModel):
"""Response containing a tree structure of spans.
:param data: Dictionary mapping span IDs to spans with status information
"""
data: dict[str, SpanWithStatus]
class MetricQueryType(Enum):
"""The type of metric query to perform.
:cvar RANGE: Query metrics over a time range
:cvar INSTANT: Query metrics at a specific point in time
"""
RANGE = "range"
INSTANT = "instant"
class MetricLabelOperator(Enum):
"""Operators for matching metric labels.
:cvar EQUALS: Label value must equal the specified value
:cvar NOT_EQUALS: Label value must not equal the specified value
:cvar REGEX_MATCH: Label value must match the specified regular expression
:cvar REGEX_NOT_MATCH: Label value must not match the specified regular expression
"""
EQUALS = "="
NOT_EQUALS = "!="
REGEX_MATCH = "=~"
REGEX_NOT_MATCH = "!~"
class MetricLabelMatcher(BaseModel):
"""A matcher for filtering metrics by label values.
:param name: The name of the label to match
:param value: The value to match against
:param operator: The comparison operator to use for matching
"""
name: str
value: str
operator: MetricLabelOperator = MetricLabelOperator.EQUALS
@json_schema_type
class MetricLabel(BaseModel):
"""A label associated with a metric.
:param name: The name of the label
:param value: The value of the label
"""
name: str
value: str
@json_schema_type
class MetricDataPoint(BaseModel):
"""A single data point in a metric time series.
:param timestamp: Unix timestamp when the metric value was recorded
:param value: The numeric value of the metric at this timestamp
"""
timestamp: int
value: float
unit: str
@json_schema_type
class MetricSeries(BaseModel):
"""A time series of metric data points.
:param metric: The name of the metric
:param labels: List of labels associated with this metric series
:param values: List of data points in chronological order
"""
metric: str
labels: list[MetricLabel]
values: list[MetricDataPoint]
class QueryMetricsResponse(BaseModel):
"""Response containing metric time series data.
:param data: List of metric series matching the query criteria
"""
data: list[MetricSeries]
_GLOBAL_STORAGE: dict[str, dict[str | int, Any]] = {
"active_spans": {},
"counters": {},
"gauges": {},
"up_down_counters": {},
"histograms": {},
}
_global_lock = threading.Lock()
_TRACER_PROVIDER = None
logger = get_logger(name=__name__, category="telemetry")
def _clean_attributes(attrs: dict[str, Any] | None) -> Attributes | None:
"""Remove None values from attributes dict to match OpenTelemetry's expected type."""
if attrs is None:
return None
return {k: v for k, v in attrs.items() if v is not None}
def is_tracing_enabled(tracer):
with tracer.start_as_current_span("check_tracing") as span:
return span.is_recording()
class Telemetry:
def __init__(self) -> None:
self.meter = None
global _TRACER_PROVIDER
# Initialize the correct span processor based on the provider state.
# This is needed since once the span processor is set, it cannot be unset.
# Recreating the telemetry adapter multiple times will result in duplicate span processors.
# Since the library client can be recreated multiple times in a notebook,
# the kernel will hold on to the span processor and cause duplicate spans to be written.
if os.environ.get("OTEL_EXPORTER_OTLP_ENDPOINT"):
if _TRACER_PROVIDER is None:
provider = TracerProvider()
trace.set_tracer_provider(provider)
_TRACER_PROVIDER = provider
# Use single OTLP endpoint for all telemetry signals
# Let OpenTelemetry SDK handle endpoint construction automatically
# The SDK will read OTEL_EXPORTER_OTLP_ENDPOINT and construct appropriate URLs
# https://opentelemetry.io/docs/languages/sdk-configuration/otlp-exporter
span_exporter = OTLPSpanExporter()
span_processor = BatchSpanProcessor(span_exporter)
cast(TracerProvider, trace.get_tracer_provider()).add_span_processor(span_processor)
metric_reader = PeriodicExportingMetricReader(OTLPMetricExporter())
metric_provider = MeterProvider(metric_readers=[metric_reader])
metrics.set_meter_provider(metric_provider)
self.is_otel_endpoint_set = True
else:
logger.warning("OTEL_EXPORTER_OTLP_ENDPOINT is not set, skipping telemetry")
self.is_otel_endpoint_set = False
self.meter = metrics.get_meter(__name__)
self._lock = _global_lock
async def initialize(self) -> None:
pass
async def shutdown(self) -> None:
if self.is_otel_endpoint_set:
cast(TracerProvider, trace.get_tracer_provider()).force_flush()
async def log_event(self, event: Event, ttl_seconds: int = 604800) -> None:
if isinstance(event, UnstructuredLogEvent):
self._log_unstructured(event, ttl_seconds)
elif isinstance(event, MetricEvent):
self._log_metric(event)
elif isinstance(event, StructuredLogEvent):
self._log_structured(event, ttl_seconds)
else:
raise ValueError(f"Unknown event type: {event}")
def _log_unstructured(self, event: UnstructuredLogEvent, ttl_seconds: int) -> None:
with self._lock:
# Use global storage instead of instance storage
span_id = int(event.span_id, 16)
span = _GLOBAL_STORAGE["active_spans"].get(span_id)
if span:
timestamp_ns = int(event.timestamp.timestamp() * 1e9)
span.add_event(
name=event.type.value,
attributes={
"message": event.message,
"severity": event.severity.value,
"__ttl__": ttl_seconds,
**(event.attributes or {}),
},
timestamp=timestamp_ns,
)
else:
print(f"Warning: No active span found for span_id {span_id}. Dropping event: {event}")
def _get_or_create_counter(self, name: str, unit: str) -> metrics.Counter:
assert self.meter is not None
if name not in _GLOBAL_STORAGE["counters"]:
_GLOBAL_STORAGE["counters"][name] = self.meter.create_counter(
name=name,
unit=unit,
description=f"Counter for {name}",
)
return cast(metrics.Counter, _GLOBAL_STORAGE["counters"][name])
def _get_or_create_gauge(self, name: str, unit: str) -> metrics.ObservableGauge:
assert self.meter is not None
if name not in _GLOBAL_STORAGE["gauges"]:
_GLOBAL_STORAGE["gauges"][name] = self.meter.create_gauge(
name=name,
unit=unit,
description=f"Gauge for {name}",
)
return cast(metrics.ObservableGauge, _GLOBAL_STORAGE["gauges"][name])
def _get_or_create_histogram(self, name: str, unit: str) -> metrics.Histogram:
assert self.meter is not None
if name not in _GLOBAL_STORAGE["histograms"]:
_GLOBAL_STORAGE["histograms"][name] = self.meter.create_histogram(
name=name,
unit=unit,
description=f"Histogram for {name}",
)
return cast(metrics.Histogram, _GLOBAL_STORAGE["histograms"][name])
def _log_metric(self, event: MetricEvent) -> None:
# Add metric as an event to the current span
try:
with self._lock:
# Only try to add to span if we have a valid span_id
if event.span_id:
try:
span_id = int(event.span_id, 16)
span = _GLOBAL_STORAGE["active_spans"].get(span_id)
if span:
timestamp_ns = int(event.timestamp.timestamp() * 1e9)
span.add_event(
name=f"metric.{event.metric}",
attributes={
"value": event.value,
"unit": event.unit,
**(event.attributes or {}),
},
timestamp=timestamp_ns,
)
except (ValueError, KeyError):
# Invalid span_id or span not found, but we already logged to console above
pass
except Exception:
# Lock acquisition failed
logger.debug("Failed to acquire lock to add metric to span")
# Log to OpenTelemetry meter if available
if self.meter is None:
return
# Use histograms for token-related metrics (per-request measurements)
# Use counters for other cumulative metrics
token_metrics = {"prompt_tokens", "completion_tokens", "total_tokens"}
if event.metric in token_metrics:
# Token metrics are per-request measurements, use histogram
histogram = self._get_or_create_histogram(event.metric, event.unit)
histogram.record(event.value, attributes=_clean_attributes(event.attributes))
elif isinstance(event.value, int):
counter = self._get_or_create_counter(event.metric, event.unit)
counter.add(event.value, attributes=_clean_attributes(event.attributes))
elif isinstance(event.value, float):
up_down_counter = self._get_or_create_up_down_counter(event.metric, event.unit)
up_down_counter.add(event.value, attributes=_clean_attributes(event.attributes))
def _get_or_create_up_down_counter(self, name: str, unit: str) -> metrics.UpDownCounter:
assert self.meter is not None
if name not in _GLOBAL_STORAGE["up_down_counters"]:
_GLOBAL_STORAGE["up_down_counters"][name] = self.meter.create_up_down_counter(
name=name,
unit=unit,
description=f"UpDownCounter for {name}",
)
return cast(metrics.UpDownCounter, _GLOBAL_STORAGE["up_down_counters"][name])
def _log_structured(self, event: StructuredLogEvent, ttl_seconds: int) -> None:
with self._lock:
span_id = int(event.span_id, 16)
tracer = trace.get_tracer(__name__)
if event.attributes is None:
event.attributes = {}
event.attributes["__ttl__"] = ttl_seconds
# Extract these W3C trace context attributes so they are not written to
# underlying storage, as we just need them to propagate the trace context.
traceparent = event.attributes.pop("traceparent", None)
tracestate = event.attributes.pop("tracestate", None)
if traceparent:
# If we have a traceparent header value, we're not the root span.
for root_attribute in ROOT_SPAN_MARKERS:
event.attributes.pop(root_attribute, None)
if isinstance(event.payload, SpanStartPayload):
# Check if span already exists to prevent duplicates
if span_id in _GLOBAL_STORAGE["active_spans"]:
return
context = None
if event.payload.parent_span_id:
parent_span_id = int(event.payload.parent_span_id, 16)
parent_span = _GLOBAL_STORAGE["active_spans"].get(parent_span_id)
if parent_span:
context = trace.set_span_in_context(parent_span)
elif traceparent:
carrier = {
"traceparent": traceparent,
"tracestate": tracestate,
}
context = TraceContextTextMapPropagator().extract(carrier=carrier)
span = tracer.start_span(
name=event.payload.name,
context=context,
attributes=_clean_attributes(event.attributes),
)
_GLOBAL_STORAGE["active_spans"][span_id] = span
elif isinstance(event.payload, SpanEndPayload):
span = _GLOBAL_STORAGE["active_spans"].get(span_id) # type: ignore[assignment]
if span:
if event.attributes:
cleaned_attrs = _clean_attributes(event.attributes)
if cleaned_attrs:
span.set_attributes(cleaned_attrs)
status = (
trace.Status(status_code=trace.StatusCode.OK)
if event.payload.status == SpanStatus.OK
else trace.Status(status_code=trace.StatusCode.ERROR)
)
span.set_status(status)
span.end()
_GLOBAL_STORAGE["active_spans"].pop(span_id, None)
else:
raise ValueError(f"Unknown structured log event: {event}")

View file

@ -1,154 +0,0 @@
# Copyright (c) Meta Platforms, Inc. and affiliates.
# All rights reserved.
#
# This source code is licensed under the terms described in the LICENSE file in
# the root directory of this source tree.
import asyncio
import inspect
import json
from collections.abc import AsyncGenerator, Callable
from functools import wraps
from typing import Any, cast
from pydantic import BaseModel
from llama_stack.models.llama.datatypes import Primitive
type JSONValue = Primitive | list["JSONValue"] | dict[str, "JSONValue"]
def serialize_value(value: Any) -> str:
return str(_prepare_for_json(value))
def _prepare_for_json(value: Any) -> JSONValue:
"""Serialize a single value into JSON-compatible format."""
if value is None:
return ""
elif isinstance(value, str | int | float | bool):
return value
elif hasattr(value, "_name_"):
return cast(str, value._name_)
elif isinstance(value, BaseModel):
return cast(JSONValue, json.loads(value.model_dump_json()))
elif isinstance(value, list | tuple | set):
return [_prepare_for_json(item) for item in value]
elif isinstance(value, dict):
return {str(k): _prepare_for_json(v) for k, v in value.items()}
else:
try:
json.dumps(value)
return cast(JSONValue, value)
except Exception:
return str(value)
def trace_protocol[T: type[Any]](cls: T) -> T:
"""
A class decorator that automatically traces all methods in a protocol/base class
and its inheriting classes.
"""
def trace_method(method: Callable[..., Any]) -> Callable[..., Any]:
is_async = asyncio.iscoroutinefunction(method)
is_async_gen = inspect.isasyncgenfunction(method)
def create_span_context(self: Any, *args: Any, **kwargs: Any) -> tuple[str, str, dict[str, Primitive]]:
class_name = self.__class__.__name__
method_name = method.__name__
span_type = "async_generator" if is_async_gen else "async" if is_async else "sync"
sig = inspect.signature(method)
param_names = list(sig.parameters.keys())[1:] # Skip 'self'
combined_args: dict[str, str] = {}
for i, arg in enumerate(args):
param_name = param_names[i] if i < len(param_names) else f"position_{i + 1}"
combined_args[param_name] = serialize_value(arg)
for k, v in kwargs.items():
combined_args[str(k)] = serialize_value(v)
span_attributes: dict[str, Primitive] = {
"__autotraced__": True,
"__class__": class_name,
"__method__": method_name,
"__type__": span_type,
"__args__": json.dumps(combined_args),
}
return class_name, method_name, span_attributes
@wraps(method)
async def async_gen_wrapper(self: Any, *args: Any, **kwargs: Any) -> AsyncGenerator[Any, None]:
from llama_stack.core.telemetry import tracing
class_name, method_name, span_attributes = create_span_context(self, *args, **kwargs)
with tracing.span(f"{class_name}.{method_name}", span_attributes) as span:
count = 0
try:
async for item in method(self, *args, **kwargs):
yield item
count += 1
finally:
span.set_attribute("chunk_count", count)
@wraps(method)
async def async_wrapper(self: Any, *args: Any, **kwargs: Any) -> Any:
from llama_stack.core.telemetry import tracing
class_name, method_name, span_attributes = create_span_context(self, *args, **kwargs)
with tracing.span(f"{class_name}.{method_name}", span_attributes) as span:
try:
result = await method(self, *args, **kwargs)
span.set_attribute("output", serialize_value(result))
return result
except Exception as e:
span.set_attribute("error", str(e))
raise
@wraps(method)
def sync_wrapper(self: Any, *args: Any, **kwargs: Any) -> Any:
from llama_stack.core.telemetry import tracing
class_name, method_name, span_attributes = create_span_context(self, *args, **kwargs)
with tracing.span(f"{class_name}.{method_name}", span_attributes) as span:
try:
result = method(self, *args, **kwargs)
span.set_attribute("output", serialize_value(result))
return result
except Exception as e:
span.set_attribute("error", str(e))
raise
if is_async_gen:
return async_gen_wrapper
elif is_async:
return async_wrapper
else:
return sync_wrapper
# Wrap methods on the class itself (for classes applied at runtime)
# Skip if already wrapped (indicated by __wrapped__ attribute)
for name, method in vars(cls).items():
if inspect.isfunction(method) and not name.startswith("_"):
if not hasattr(method, "__wrapped__"):
wrapped = trace_method(method)
setattr(cls, name, wrapped) # noqa: B010
# Also set up __init_subclass__ for future subclasses
original_init_subclass = cast(Callable[..., Any] | None, getattr(cls, "__init_subclass__", None))
def __init_subclass__(cls_child: type[Any], **kwargs: Any) -> None: # noqa: N807
if original_init_subclass:
cast(Callable[..., None], original_init_subclass)(**kwargs)
for name, method in vars(cls_child).items():
if inspect.isfunction(method) and not name.startswith("_"):
setattr(cls_child, name, trace_method(method)) # noqa: B010
cls_any = cast(Any, cls)
cls_any.__init_subclass__ = classmethod(__init_subclass__)
return cls

View file

@ -1,388 +0,0 @@
# Copyright (c) Meta Platforms, Inc. and affiliates.
# All rights reserved.
#
# This source code is licensed under the terms described in the LICENSE file in
# the root directory of this source tree.
import asyncio
import contextvars
import logging # allow-direct-logging
import queue
import secrets
import sys
import threading
import time
from collections.abc import Callable
from datetime import UTC, datetime
from functools import wraps
from typing import Any, Self
from llama_stack.core.telemetry.telemetry import (
ROOT_SPAN_MARKERS,
Event,
LogSeverity,
Span,
SpanEndPayload,
SpanStartPayload,
SpanStatus,
StructuredLogEvent,
Telemetry,
UnstructuredLogEvent,
)
from llama_stack.core.telemetry.trace_protocol import serialize_value
from llama_stack.log import get_logger
logger = get_logger(__name__, category="core")
# Fallback logger that does NOT propagate to TelemetryHandler to avoid recursion
_fallback_logger = logging.getLogger("llama_stack.telemetry.background")
if not _fallback_logger.handlers:
_fallback_logger.propagate = False
_fallback_logger.setLevel(logging.ERROR)
_fallback_handler = logging.StreamHandler(sys.stderr)
_fallback_handler.setLevel(logging.ERROR)
_fallback_handler.setFormatter(logging.Formatter("%(asctime)s [%(levelname)s] %(name)s: %(message)s"))
_fallback_logger.addHandler(_fallback_handler)
INVALID_SPAN_ID = 0x0000000000000000
INVALID_TRACE_ID = 0x00000000000000000000000000000000
# The logical root span may not be visible to this process if a parent context
# is passed in. The local root span is the first local span in a trace.
LOCAL_ROOT_SPAN_MARKER = "__local_root_span__"
def trace_id_to_str(trace_id: int) -> str:
"""Convenience trace ID formatting method
Args:
trace_id: Trace ID int
Returns:
The trace ID as 32-byte hexadecimal string
"""
return format(trace_id, "032x")
def span_id_to_str(span_id: int) -> str:
"""Convenience span ID formatting method
Args:
span_id: Span ID int
Returns:
The span ID as 16-byte hexadecimal string
"""
return format(span_id, "016x")
def generate_span_id() -> str:
span_id = secrets.randbits(64)
while span_id == INVALID_SPAN_ID:
span_id = secrets.randbits(64)
return span_id_to_str(span_id)
def generate_trace_id() -> str:
trace_id = secrets.randbits(128)
while trace_id == INVALID_TRACE_ID:
trace_id = secrets.randbits(128)
return trace_id_to_str(trace_id)
LOG_QUEUE_FULL_LOG_INTERVAL_SECONDS = 60.0
class BackgroundLogger:
def __init__(self, api: Telemetry, capacity: int = 100000):
self.api = api
self.log_queue: queue.Queue[Any] = queue.Queue(maxsize=capacity)
self.worker_thread = threading.Thread(target=self._worker, daemon=True)
self.worker_thread.start()
self._last_queue_full_log_time: float = 0.0
self._dropped_since_last_notice: int = 0
def log_event(self, event: Event) -> None:
try:
self.log_queue.put_nowait(event)
except queue.Full:
# Aggregate drops and emit at most once per interval via fallback logger
self._dropped_since_last_notice += 1
current_time = time.time()
if current_time - self._last_queue_full_log_time >= LOG_QUEUE_FULL_LOG_INTERVAL_SECONDS:
_fallback_logger.error(
"Log queue is full; dropped %d events since last notice",
self._dropped_since_last_notice,
)
self._last_queue_full_log_time = current_time
self._dropped_since_last_notice = 0
def _worker(self):
loop = asyncio.new_event_loop()
asyncio.set_event_loop(loop)
loop.run_until_complete(self._process_logs())
async def _process_logs(self):
while True:
try:
event = self.log_queue.get()
await self.api.log_event(event)
except Exception:
import traceback
traceback.print_exc()
print("Error processing log event")
finally:
self.log_queue.task_done()
def __del__(self) -> None:
self.log_queue.join()
BACKGROUND_LOGGER: BackgroundLogger | None = None
def enqueue_event(event: Event) -> None:
"""Enqueue a telemetry event to the background logger if available.
This provides a non-blocking path for routers and other hot paths to
submit telemetry without awaiting the Telemetry API, reducing contention
with the main event loop.
"""
global BACKGROUND_LOGGER
if BACKGROUND_LOGGER is None:
raise RuntimeError("Telemetry API not initialized")
BACKGROUND_LOGGER.log_event(event)
class TraceContext:
def __init__(self, logger: BackgroundLogger, trace_id: str):
self.logger = logger
self.trace_id = trace_id
self.spans: list[Span] = []
def push_span(self, name: str, attributes: dict[str, Any] | None = None) -> Span:
current_span = self.get_current_span()
span = Span(
span_id=generate_span_id(),
trace_id=self.trace_id,
name=name,
start_time=datetime.now(UTC),
parent_span_id=current_span.span_id if current_span else None,
attributes=attributes,
)
self.logger.log_event(
StructuredLogEvent(
trace_id=span.trace_id,
span_id=span.span_id,
timestamp=span.start_time,
attributes=span.attributes,
payload=SpanStartPayload(
name=span.name,
parent_span_id=span.parent_span_id,
),
)
)
self.spans.append(span)
return span
def pop_span(self, status: SpanStatus = SpanStatus.OK) -> None:
span = self.spans.pop()
if span is not None:
self.logger.log_event(
StructuredLogEvent(
trace_id=span.trace_id,
span_id=span.span_id,
timestamp=span.start_time,
attributes=span.attributes,
payload=SpanEndPayload(
status=status,
),
)
)
def get_current_span(self) -> Span | None:
return self.spans[-1] if self.spans else None
CURRENT_TRACE_CONTEXT: contextvars.ContextVar[TraceContext | None] = contextvars.ContextVar(
"trace_context", default=None
)
def setup_logger(api: Telemetry, level: int = logging.INFO):
global BACKGROUND_LOGGER
if BACKGROUND_LOGGER is None:
BACKGROUND_LOGGER = BackgroundLogger(api)
root_logger = logging.getLogger()
root_logger.setLevel(level)
root_logger.addHandler(TelemetryHandler())
async def start_trace(name: str, attributes: dict[str, Any] | None = None) -> TraceContext | None:
global CURRENT_TRACE_CONTEXT, BACKGROUND_LOGGER
if BACKGROUND_LOGGER is None:
logger.debug("No Telemetry implementation set. Skipping trace initialization...")
return None
trace_id = generate_trace_id()
context = TraceContext(BACKGROUND_LOGGER, trace_id)
# Mark this span as the root for the trace for now. The processing of
# traceparent context if supplied comes later and will result in the
# ROOT_SPAN_MARKERS being removed. Also mark this is the 'local' root,
# i.e. the root of the spans originating in this process as this is
# needed to ensure that we insert this 'local' root span's id into
# the trace record in sqlite store.
attributes = dict.fromkeys(ROOT_SPAN_MARKERS, True) | {LOCAL_ROOT_SPAN_MARKER: True} | (attributes or {})
context.push_span(name, attributes)
CURRENT_TRACE_CONTEXT.set(context)
return context
async def end_trace(status: SpanStatus = SpanStatus.OK):
global CURRENT_TRACE_CONTEXT
context = CURRENT_TRACE_CONTEXT.get()
if context is None:
logger.debug("No trace context to end")
return
context.pop_span(status)
CURRENT_TRACE_CONTEXT.set(None)
def severity(levelname: str) -> LogSeverity:
if levelname == "DEBUG":
return LogSeverity.DEBUG
elif levelname == "INFO":
return LogSeverity.INFO
elif levelname == "WARNING":
return LogSeverity.WARN
elif levelname == "ERROR":
return LogSeverity.ERROR
elif levelname == "CRITICAL":
return LogSeverity.CRITICAL
else:
raise ValueError(f"Unknown log level: {levelname}")
# TODO: ideally, the actual emitting should be done inside a separate daemon
# process completely isolated from the server
class TelemetryHandler(logging.Handler):
def emit(self, record: logging.LogRecord) -> None:
# horrendous hack to avoid logging from asyncio and getting into an infinite loop
if record.module in ("asyncio", "selector_events"):
return
global CURRENT_TRACE_CONTEXT
context = CURRENT_TRACE_CONTEXT.get()
if context is None:
return
span = context.get_current_span()
if span is None:
return
enqueue_event(
UnstructuredLogEvent(
trace_id=span.trace_id,
span_id=span.span_id,
timestamp=datetime.now(UTC),
message=self.format(record),
severity=severity(record.levelname),
)
)
def close(self) -> None:
pass
class SpanContextManager:
def __init__(self, name: str, attributes: dict[str, Any] | None = None):
self.name = name
self.attributes = attributes
self.span: Span | None = None
def __enter__(self) -> Self:
global CURRENT_TRACE_CONTEXT
context = CURRENT_TRACE_CONTEXT.get()
if not context:
logger.debug("No trace context to push span")
return self
self.span = context.push_span(self.name, self.attributes)
return self
def __exit__(self, exc_type, exc_value, traceback) -> None:
global CURRENT_TRACE_CONTEXT
context = CURRENT_TRACE_CONTEXT.get()
if not context:
logger.debug("No trace context to pop span")
return
context.pop_span()
def set_attribute(self, key: str, value: Any) -> None:
if self.span:
if self.span.attributes is None:
self.span.attributes = {}
self.span.attributes[key] = serialize_value(value)
async def __aenter__(self) -> Self:
global CURRENT_TRACE_CONTEXT
context = CURRENT_TRACE_CONTEXT.get()
if not context:
logger.debug("No trace context to push span")
return self
self.span = context.push_span(self.name, self.attributes)
return self
async def __aexit__(self, exc_type, exc_value, traceback) -> None:
global CURRENT_TRACE_CONTEXT
context = CURRENT_TRACE_CONTEXT.get()
if not context:
logger.debug("No trace context to pop span")
return
context.pop_span()
def __call__(self, func: Callable[..., Any]) -> Callable[..., Any]:
@wraps(func)
def sync_wrapper(*args: Any, **kwargs: Any) -> Any:
with self:
return func(*args, **kwargs)
@wraps(func)
async def async_wrapper(*args: Any, **kwargs: Any) -> Any:
async with self:
return await func(*args, **kwargs)
@wraps(func)
def wrapper(*args: Any, **kwargs: Any) -> Any:
if asyncio.iscoroutinefunction(func):
return async_wrapper(*args, **kwargs)
else:
return sync_wrapper(*args, **kwargs)
return wrapper
def span(name: str, attributes: dict[str, Any] | None = None) -> SpanContextManager:
return SpanContextManager(name, attributes)
def get_current_span() -> Span | None:
global CURRENT_TRACE_CONTEXT
if CURRENT_TRACE_CONTEXT is None:
logger.debug("No trace context to get current span")
return None
context = CURRENT_TRACE_CONTEXT.get()
if context:
return context.get_current_span()
return None

View file

@ -7,8 +7,6 @@
from collections.abc import AsyncGenerator
from contextvars import ContextVar
from llama_stack.core.telemetry.tracing import CURRENT_TRACE_CONTEXT
_MISSING = object()
@ -69,16 +67,12 @@ def preserve_contexts_async_generator[T](
try:
yield item
# Update our tracked values with any changes made during this iteration
# Only for non-trace context vars - trace context must persist across yields
# to allow nested span tracking for telemetry
# This allows context changes to persist across generator iterations
for context_var in context_vars:
if context_var is not CURRENT_TRACE_CONTEXT:
initial_context_values[context_var.name] = context_var.get()
finally:
# Restore non-trace context vars after each yield to prevent leaks between requests
# CURRENT_TRACE_CONTEXT is NOT restored here to preserve telemetry span stack
# Restore context vars after each yield to prevent leaks between requests
for context_var in context_vars:
if context_var is not CURRENT_TRACE_CONTEXT:
_restore_context_var(context_var)
return wrapper()

View file

@ -281,8 +281,6 @@ registered_resources:
provider_id: rag-runtime
server:
port: 8321
telemetry:
enabled: true
vector_stores:
default_provider_id: faiss
default_embedding_model:

View file

@ -272,8 +272,6 @@ registered_resources:
provider_id: rag-runtime
server:
port: 8321
telemetry:
enabled: true
vector_stores:
default_provider_id: faiss
default_embedding_model:

View file

@ -140,5 +140,3 @@ registered_resources:
provider_id: rag-runtime
server:
port: 8321
telemetry:
enabled: true

View file

@ -131,5 +131,3 @@ registered_resources:
provider_id: rag-runtime
server:
port: 8321
telemetry:
enabled: true

View file

@ -153,5 +153,3 @@ registered_resources:
provider_id: rag-runtime
server:
port: 8321
telemetry:
enabled: true

View file

@ -138,5 +138,3 @@ registered_resources:
provider_id: rag-runtime
server:
port: 8321
telemetry:
enabled: true

View file

@ -135,5 +135,3 @@ registered_resources:
provider_id: rag-runtime
server:
port: 8321
telemetry:
enabled: true

View file

@ -114,5 +114,3 @@ registered_resources:
provider_id: rag-runtime
server:
port: 8321
telemetry:
enabled: true

View file

@ -132,5 +132,3 @@ registered_resources:
provider_id: tavily-search
server:
port: 8321
telemetry:
enabled: true

View file

@ -251,5 +251,3 @@ registered_resources:
provider_id: rag-runtime
server:
port: 8321
telemetry:
enabled: true

View file

@ -114,5 +114,3 @@ registered_resources:
provider_id: rag-runtime
server:
port: 8321
telemetry:
enabled: true

View file

@ -284,8 +284,6 @@ registered_resources:
provider_id: rag-runtime
server:
port: 8321
telemetry:
enabled: true
vector_stores:
default_provider_id: faiss
default_embedding_model:

View file

@ -275,8 +275,6 @@ registered_resources:
provider_id: rag-runtime
server:
port: 8321
telemetry:
enabled: true
vector_stores:
default_provider_id: faiss
default_embedding_model:

View file

@ -281,8 +281,6 @@ registered_resources:
provider_id: rag-runtime
server:
port: 8321
telemetry:
enabled: true
vector_stores:
default_provider_id: faiss
default_embedding_model:

View file

@ -272,8 +272,6 @@ registered_resources:
provider_id: rag-runtime
server:
port: 8321
telemetry:
enabled: true
vector_stores:
default_provider_id: faiss
default_embedding_model:

View file

@ -24,7 +24,6 @@ from llama_stack.core.datatypes import (
Provider,
SafetyConfig,
ShieldInput,
TelemetryConfig,
ToolGroupInput,
VectorStoresConfig,
)
@ -189,7 +188,6 @@ class RunConfigSettings(BaseModel):
default_benchmarks: list[BenchmarkInput] | None = None
vector_stores_config: VectorStoresConfig | None = None
safety_config: SafetyConfig | None = None
telemetry: TelemetryConfig = Field(default_factory=lambda: TelemetryConfig(enabled=True))
storage_backends: dict[str, Any] | None = None
storage_stores: dict[str, Any] | None = None
@ -289,7 +287,6 @@ class RunConfigSettings(BaseModel):
"server": {
"port": 8321,
},
"telemetry": self.telemetry.model_dump(exclude_none=True) if self.telemetry else None,
}
if self.vector_stores_config:

View file

@ -132,5 +132,3 @@ registered_resources:
provider_id: rag-runtime
server:
port: 8321
telemetry:
enabled: true

View file

@ -37,7 +37,6 @@ CATEGORIES = [
"eval",
"tools",
"client",
"telemetry",
"openai",
"openai_responses",
"openai_conversations",

View file

@ -15,7 +15,6 @@ async def get_provider_impl(
config: MetaReferenceAgentsImplConfig,
deps: dict[Api, Any],
policy: list[AccessRule],
telemetry_enabled: bool = False,
):
from .agents import MetaReferenceAgentsImpl
@ -29,7 +28,6 @@ async def get_provider_impl(
deps[Api.conversations],
deps[Api.prompts],
deps[Api.files],
telemetry_enabled,
policy,
)
await impl.initialize()

View file

@ -50,7 +50,6 @@ class MetaReferenceAgentsImpl(Agents):
prompts_api: Prompts,
files_api: Files,
policy: list[AccessRule],
telemetry_enabled: bool = False,
):
self.config = config
self.inference_api = inference_api
@ -59,7 +58,6 @@ class MetaReferenceAgentsImpl(Agents):
self.tool_runtime_api = tool_runtime_api
self.tool_groups_api = tool_groups_api
self.conversations_api = conversations_api
self.telemetry_enabled = telemetry_enabled
self.prompts_api = prompts_api
self.files_api = files_api
self.in_memory_store = InmemoryKVStoreImpl()

View file

@ -8,7 +8,8 @@ import uuid
from collections.abc import AsyncIterator
from typing import Any
from llama_stack.core.telemetry import tracing
from opentelemetry import trace
from llama_stack.log import get_logger
from llama_stack.providers.utils.inference.prompt_adapter import interleaved_content_as_str
from llama_stack_api import (
@ -79,6 +80,7 @@ from .utils import (
)
logger = get_logger(name=__name__, category="agents::meta_reference")
tracer = trace.get_tracer(__name__)
def convert_tooldef_to_chat_tool(tool_def):
@ -1106,8 +1108,10 @@ class StreamingResponseOrchestrator:
"server_url": mcp_tool.server_url,
"mcp_list_tools_id": list_id,
}
# List MCP tools with authorization from tool config
async with tracing.span("list_mcp_tools", attributes):
# TODO: follow semantic conventions for Open Telemetry tool spans
# https://opentelemetry.io/docs/specs/semconv/gen-ai/gen-ai-spans/#execute-tool-span
with tracer.start_as_current_span("list_mcp_tools", attributes=attributes):
tool_defs = await list_mcp_tools(
endpoint=mcp_tool.server_url,
headers=mcp_tool.headers,
@ -1183,9 +1187,9 @@ class StreamingResponseOrchestrator:
if mcp_server.require_approval == "never":
return False
if isinstance(mcp_server, ApprovalFilter):
if tool_name in mcp_server.always:
if mcp_server.always and tool_name in mcp_server.always:
return True
if tool_name in mcp_server.never:
if mcp_server.never and tool_name in mcp_server.never:
return False
return True

View file

@ -9,7 +9,8 @@ import json
from collections.abc import AsyncIterator
from typing import Any
from llama_stack.core.telemetry import tracing
from opentelemetry import trace
from llama_stack.log import get_logger
from llama_stack_api import (
ImageContentItem,
@ -42,6 +43,7 @@ from llama_stack_api import (
from .types import ChatCompletionContext, ToolExecutionResult
logger = get_logger(name=__name__, category="agents::meta_reference")
tracer = trace.get_tracer(__name__)
class ToolExecutor:
@ -296,8 +298,9 @@ class ToolExecutor:
"server_url": mcp_tool.server_url,
"tool_name": function_name,
}
# Invoke MCP tool with authorization from tool config
async with tracing.span("invoke_mcp_tool", attributes):
# TODO: follow semantic conventions for Open Telemetry tool spans
# https://opentelemetry.io/docs/specs/semconv/gen-ai/gen-ai-spans/#execute-tool-span
with tracer.start_as_current_span("invoke_mcp_tool", attributes=attributes):
result = await invoke_mcp_tool(
endpoint=mcp_tool.server_url,
tool_name=function_name,
@ -318,7 +321,7 @@ class ToolExecutor:
# Use vector_stores.search API instead of knowledge_search tool
# to support filters and ranking_options
query = tool_kwargs.get("query", "")
async with tracing.span("knowledge_search", {}):
with tracer.start_as_current_span("knowledge_search"):
result = await self._execute_knowledge_search_via_vector_store(
query=query,
response_file_search_tool=response_file_search_tool,
@ -327,7 +330,9 @@ class ToolExecutor:
attributes = {
"tool_name": function_name,
}
async with tracing.span("invoke_tool", attributes):
# TODO: follow semantic conventions for Open Telemetry tool spans
# https://opentelemetry.io/docs/specs/semconv/gen-ai/gen-ai-spans/#execute-tool-span
with tracer.start_as_current_span("invoke_tool", attributes=attributes):
result = await self.tool_runtime_api.invoke_tool(
tool_name=function_name,
kwargs=tool_kwargs,

View file

@ -6,7 +6,6 @@
import asyncio
from llama_stack.core.telemetry import tracing
from llama_stack.log import get_logger
from llama_stack_api import OpenAIMessageParam, Safety, SafetyViolation, ViolationLevel
@ -31,15 +30,12 @@ class ShieldRunnerMixin:
self.output_shields = output_shields
async def run_multiple_shields(self, messages: list[OpenAIMessageParam], identifiers: list[str]) -> None:
async def run_shield_with_span(identifier: str):
async with tracing.span(f"run_shield_{identifier}"):
return await self.safety_api.run_shield(
shield_id=identifier,
messages=messages,
params={},
responses = await asyncio.gather(
*[
self.safety_api.run_shield(shield_id=identifier, messages=messages, params={})
for identifier in identifiers
]
)
responses = await asyncio.gather(*[run_shield_with_span(identifier) for identifier in identifiers])
for identifier, response in zip(identifiers, responses, strict=False):
if not response.violation:
continue

View file

@ -8,7 +8,6 @@ from collections.abc import AsyncIterator, Iterable
from openai import AuthenticationError
from llama_stack.core.telemetry.tracing import get_current_span
from llama_stack.log import get_logger
from llama_stack.providers.utils.inference.openai_mixin import OpenAIMixin
from llama_stack_api import (
@ -84,7 +83,7 @@ class BedrockInferenceAdapter(OpenAIMixin):
) -> OpenAIChatCompletion | AsyncIterator[OpenAIChatCompletionChunk]:
"""Override to enable streaming usage metrics and handle authentication errors."""
# Enable streaming usage metrics when telemetry is active
if params.stream and get_current_span() is not None:
if params.stream:
if params.stream_options is None:
params.stream_options = {"include_usage": True}
elif "include_usage" not in params.stream_options:

View file

@ -10,7 +10,6 @@ from typing import Any
import litellm
import requests
from llama_stack.core.telemetry.tracing import get_current_span
from llama_stack.log import get_logger
from llama_stack.providers.remote.inference.watsonx.config import WatsonXConfig
from llama_stack.providers.utils.inference.litellm_openai_mixin import LiteLLMOpenAIMixin
@ -59,7 +58,7 @@ class WatsonXInferenceAdapter(LiteLLMOpenAIMixin):
# Add usage tracking for streaming when telemetry is active
stream_options = params.stream_options
if params.stream and get_current_span() is not None:
if params.stream:
if stream_options is None:
stream_options = {"include_usage": True}
elif "include_usage" not in stream_options:

View file

@ -217,10 +217,9 @@ class LiteLLMOpenAIMixin(
params: OpenAIChatCompletionRequestWithExtraBody,
) -> OpenAIChatCompletion | AsyncIterator[OpenAIChatCompletionChunk]:
# Add usage tracking for streaming when telemetry is active
from llama_stack.core.telemetry.tracing import get_current_span
stream_options = params.stream_options
if params.stream and get_current_span() is not None:
if params.stream:
if stream_options is None:
stream_options = {"include_usage": True}
elif "include_usage" not in stream_options:

View file

@ -89,6 +89,7 @@ async def client_wrapper(endpoint: str, headers: dict[str, str]) -> AsyncGenerat
# sse_client and streamablehttp_client have different signatures, but both
# are called the same way here, so we cast to Any to avoid type errors
client = cast(Any, sse_client)
async with client(endpoint, headers=headers) as client_streams:
async with ClientSession(read_stream=client_streams[0], write_stream=client_streams[1]) as session:
await session.initialize()

View file

@ -0,0 +1,5 @@
# Copyright (c) Meta Platforms, Inc. and affiliates.
# All rights reserved.
#
# This source code is licensed under the terms described in the LICENSE file in
# the root directory of this source tree.

View file

@ -0,0 +1,27 @@
# Copyright (c) Meta Platforms, Inc. and affiliates.
# All rights reserved.
#
# This source code is licensed under the terms described in the LICENSE file in
# the root directory of this source tree.
"""
This file contains constants used for naming data captured for telemetry.
This is used to ensure that the data captured for telemetry is consistent and can be used to
identify and correlate data. If custom telemetry data is added to llama stack, please add
constants for it here.
"""
llama_stack_prefix = "llama_stack"
# Safety Attributes
RUN_SHIELD_OPERATION_NAME = "run_shield"
SAFETY_REQUEST_PREFIX = f"{llama_stack_prefix}.safety.request"
SAFETY_REQUEST_SHIELD_ID_ATTRIBUTE = f"{SAFETY_REQUEST_PREFIX}.shield_id"
SAFETY_REQUEST_MESSAGES_ATTRIBUTE = f"{SAFETY_REQUEST_PREFIX}.messages"
SAFETY_RESPONSE_PREFIX = f"{llama_stack_prefix}.safety.response"
SAFETY_RESPONSE_METADATA_ATTRIBUTE = f"{SAFETY_RESPONSE_PREFIX}.metadata"
SAFETY_RESPONSE_VIOLATION_LEVEL_ATTRIBUTE = f"{SAFETY_RESPONSE_PREFIX}.violation.level"
SAFETY_RESPONSE_USER_MESSAGE_ATTRIBUTE = f"{SAFETY_RESPONSE_PREFIX}.violation.user_message"

View file

@ -0,0 +1,43 @@
# Copyright (c) Meta Platforms, Inc. and affiliates.
# All rights reserved.
#
# This source code is licensed under the terms described in the LICENSE file in
# the root directory of this source tree.
import json
from opentelemetry import trace
from llama_stack_api import OpenAIMessageParam, RunShieldResponse
from .constants import (
RUN_SHIELD_OPERATION_NAME,
SAFETY_REQUEST_MESSAGES_ATTRIBUTE,
SAFETY_REQUEST_SHIELD_ID_ATTRIBUTE,
SAFETY_RESPONSE_METADATA_ATTRIBUTE,
SAFETY_RESPONSE_USER_MESSAGE_ATTRIBUTE,
SAFETY_RESPONSE_VIOLATION_LEVEL_ATTRIBUTE,
)
def safety_span_name(shield_id: str) -> str:
return f"{RUN_SHIELD_OPERATION_NAME} {shield_id}"
# TODO: Consider using Wrapt to automatically instrument code
# This is the industry standard way to package automatically instrumentation in python.
def safety_request_span_attributes(
shield_id: str, messages: list[OpenAIMessageParam], response: RunShieldResponse
) -> None:
span = trace.get_current_span()
span.set_attribute(SAFETY_REQUEST_SHIELD_ID_ATTRIBUTE, shield_id)
messages_json = json.dumps([msg.model_dump() for msg in messages])
span.set_attribute(SAFETY_REQUEST_MESSAGES_ATTRIBUTE, messages_json)
if response.violation:
if response.violation.metadata:
metadata_json = json.dumps(response.violation.metadata)
span.set_attribute(SAFETY_RESPONSE_METADATA_ATTRIBUTE, metadata_json)
if response.violation.user_message:
span.set_attribute(SAFETY_RESPONSE_USER_MESSAGE_ATTRIBUTE, response.violation.user_message)
span.set_attribute(SAFETY_RESPONSE_VIOLATION_LEVEL_ATTRIBUTE, response.violation.violation_level.value)

View file

@ -17,7 +17,6 @@ from unittest.mock import AsyncMock, patch
import pytest
from llama_stack.core.library_client import LlamaStackAsLibraryClient
from llama_stack.core.telemetry.telemetry import MetricEvent
from llama_stack_api import (
Api,
OpenAIAssistantMessageParam,
@ -27,10 +26,6 @@ from llama_stack_api import (
)
class OpenAIChatCompletionWithMetrics(OpenAIChatCompletion):
metrics: list[MetricEvent] | None = None
def test_unregistered_model_routing_with_provider_data(client_with_models):
"""
Test that a model can be routed using provider_id/model_id format
@ -72,7 +67,7 @@ def test_unregistered_model_routing_with_provider_data(client_with_models):
# The inference router's routing_table.impls_by_provider_id should have anthropic
# Let's patch the anthropic provider's openai_chat_completion method
# to avoid making real API calls
mock_response = OpenAIChatCompletionWithMetrics(
mock_response = OpenAIChatCompletion(
id="chatcmpl-test-123",
created=1234567890,
model="claude-3-5-sonnet-20241022",

View file

@ -15,11 +15,10 @@ from opentelemetry.sdk.trace import TracerProvider
from opentelemetry.sdk.trace.export import SimpleSpanProcessor
from opentelemetry.sdk.trace.export.in_memory_span_exporter import InMemorySpanExporter
import llama_stack.core.telemetry.telemetry as telemetry_module
from .base import BaseTelemetryCollector, MetricStub, SpanStub
# TODO: Fix thi to work with Automatic Instrumentation
class InMemoryTelemetryCollector(BaseTelemetryCollector):
"""In-memory telemetry collector for library-client tests.
@ -75,13 +74,10 @@ class InMemoryTelemetryManager:
meter_provider = MeterProvider(metric_readers=[metric_reader])
metrics.set_meter_provider(meter_provider)
telemetry_module._TRACER_PROVIDER = tracer_provider
self.collector = InMemoryTelemetryCollector(span_exporter, metric_reader)
self._tracer_provider = tracer_provider
self._meter_provider = meter_provider
def shutdown(self) -> None:
telemetry_module._TRACER_PROVIDER = None
self._tracer_provider.shutdown()
self._meter_provider.shutdown()

View file

@ -15,6 +15,7 @@ from tests.integration.fixtures.common import instantiate_llama_stack_client
from tests.integration.telemetry.collectors import InMemoryTelemetryManager, OtlpHttpTestCollector
# TODO: Fix this to work with Automatic Instrumentation
@pytest.fixture(scope="session")
def telemetry_test_collector():
stack_mode = os.environ.get("LLAMA_STACK_TEST_STACK_CONFIG_TYPE", "library_client")
@ -48,6 +49,7 @@ def telemetry_test_collector():
manager.shutdown()
# TODO: Fix this to work with Automatic Instrumentation
@pytest.fixture(scope="session")
def llama_stack_client(telemetry_test_collector, request):
"""Ensure telemetry collector is ready before initializing the stack client."""

View file

@ -155,9 +155,6 @@ def old_config():
provider_type: inline::meta-reference
config: {{}}
api_providers:
telemetry:
provider_type: noop
config: {{}}
"""
)
@ -181,7 +178,7 @@ def test_parse_and_maybe_upgrade_config_up_to_date(up_to_date_config):
def test_parse_and_maybe_upgrade_config_old_format(old_config):
result = parse_and_maybe_upgrade_config(old_config)
assert result.version == LLAMA_STACK_RUN_CONFIG_VERSION
assert all(api in result.providers for api in ["inference", "safety", "memory", "telemetry"])
assert all(api in result.providers for api in ["inference", "safety", "memory"])
safety_provider = result.providers["safety"][0]
assert safety_provider.provider_type == "inline::meta-reference"
assert "llama_guard_shield" in safety_provider.config

View file

@ -83,7 +83,7 @@ class TestProviderInitialization:
new_callable=AsyncMock,
):
# Should not raise any exception
provider = await get_provider_impl(config, mock_deps, policy=[], telemetry_enabled=False)
provider = await get_provider_impl(config, mock_deps, policy=[])
assert provider is not None
async def test_initialization_without_safety_api(self, mock_persistence_config, mock_deps):
@ -97,7 +97,7 @@ class TestProviderInitialization:
new_callable=AsyncMock,
):
# Should not raise any exception
provider = await get_provider_impl(config, mock_deps, policy=[], telemetry_enabled=False)
provider = await get_provider_impl(config, mock_deps, policy=[])
assert provider is not None
assert provider.safety_api is None