mirror of
https://github.com/meta-llama/llama-stack.git
synced 2025-07-06 05:59:13 +00:00
feat: improve telemetry (#2590)
Some checks failed
Integration Tests / test-matrix (library, 3.12, inspect) (push) Failing after 4s
Integration Tests / test-matrix (library, 3.12, inference) (push) Failing after 7s
Integration Tests / test-matrix (library, 3.12, tool_runtime) (push) Failing after 8s
Integration Auth Tests / test-matrix (oauth2_token) (push) Failing after 12s
Integration Tests / test-matrix (library, 3.12, datasets) (push) Failing after 10s
Integration Tests / test-matrix (library, 3.12, vector_io) (push) Failing after 13s
Integration Tests / test-matrix (library, 3.13, post_training) (push) Failing after 14s
Integration Tests / test-matrix (library, 3.13, agents) (push) Failing after 15s
Integration Tests / test-matrix (library, 3.13, vector_io) (push) Failing after 13s
Integration Tests / test-matrix (library, 3.13, inspect) (push) Failing after 12s
Integration Tests / test-matrix (server, 3.12, agents) (push) Failing after 14s
Integration Tests / test-matrix (library, 3.12, post_training) (push) Failing after 17s
Integration Tests / test-matrix (library, 3.12, scoring) (push) Failing after 20s
Integration Tests / test-matrix (server, 3.12, datasets) (push) Failing after 12s
Integration Tests / test-matrix (library, 3.13, providers) (push) Failing after 16s
Integration Tests / test-matrix (library, 3.13, datasets) (push) Failing after 17s
Integration Tests / test-matrix (server, 3.12, inference) (push) Failing after 10s
Integration Tests / test-matrix (library, 3.12, agents) (push) Failing after 20s
Integration Tests / test-matrix (library, 3.13, scoring) (push) Failing after 20s
Integration Tests / test-matrix (server, 3.12, inspect) (push) Failing after 10s
Integration Tests / test-matrix (library, 3.13, inference) (push) Failing after 18s
Integration Tests / test-matrix (library, 3.12, providers) (push) Failing after 17s
Integration Tests / test-matrix (server, 3.12, post_training) (push) Failing after 10s
Integration Tests / test-matrix (server, 3.12, providers) (push) Failing after 8s
Integration Tests / test-matrix (library, 3.13, tool_runtime) (push) Failing after 10s
Integration Tests / test-matrix (server, 3.12, scoring) (push) Failing after 9s
Integration Tests / test-matrix (server, 3.12, vector_io) (push) Failing after 6s
Integration Tests / test-matrix (server, 3.13, datasets) (push) Failing after 7s
Integration Tests / test-matrix (server, 3.13, inspect) (push) Failing after 8s
Integration Tests / test-matrix (server, 3.13, providers) (push) Failing after 6s
Integration Tests / test-matrix (server, 3.13, tool_runtime) (push) Failing after 5s
Integration Tests / test-matrix (server, 3.13, vector_io) (push) Failing after 5s
Vector IO Integration Tests / test-matrix (3.12, inline::faiss) (push) Failing after 4s
Integration Tests / test-matrix (server, 3.12, tool_runtime) (push) Failing after 18s
Integration Tests / test-matrix (server, 3.13, agents) (push) Failing after 19s
Integration Tests / test-matrix (server, 3.13, post_training) (push) Failing after 16s
Integration Tests / test-matrix (server, 3.13, inference) (push) Failing after 18s
Integration Tests / test-matrix (server, 3.13, scoring) (push) Failing after 17s
Vector IO Integration Tests / test-matrix (3.12, inline::milvus) (push) Failing after 14s
Vector IO Integration Tests / test-matrix (3.12, inline::sqlite-vec) (push) Failing after 12s
Vector IO Integration Tests / test-matrix (3.12, remote::pgvector) (push) Failing after 10s
Vector IO Integration Tests / test-matrix (3.13, inline::milvus) (push) Failing after 9s
Vector IO Integration Tests / test-matrix (3.13, remote::chromadb) (push) Failing after 7s
Test Llama Stack Build / generate-matrix (push) Successful in 3s
Vector IO Integration Tests / test-matrix (3.12, remote::chromadb) (push) Failing after 15s
Python Package Build Test / build (3.13) (push) Failing after 0s
Test Llama Stack Build / build-ubi9-container-distribution (push) Failing after 3s
Test Llama Stack Build / build-single-provider (push) Failing after 6s
Vector IO Integration Tests / test-matrix (3.13, inline::faiss) (push) Failing after 17s
Update ReadTheDocs / update-readthedocs (push) Failing after 4s
Test Llama Stack Build / build (push) Failing after 4s
Test Llama Stack Build / build-custom-container-distribution (push) Failing after 7s
Test External Providers / test-external-providers (venv) (push) Failing after 5s
Unit Tests / unit-tests (3.13) (push) Failing after 4s
Unit Tests / unit-tests (3.12) (push) Failing after 7s
Vector IO Integration Tests / test-matrix (3.13, remote::pgvector) (push) Failing after 58s
Vector IO Integration Tests / test-matrix (3.13, inline::sqlite-vec) (push) Failing after 1m0s
Python Package Build Test / build (3.12) (push) Failing after 49s
Pre-commit / pre-commit (push) Successful in 1m40s
Some checks failed
Integration Tests / test-matrix (library, 3.12, inspect) (push) Failing after 4s
Integration Tests / test-matrix (library, 3.12, inference) (push) Failing after 7s
Integration Tests / test-matrix (library, 3.12, tool_runtime) (push) Failing after 8s
Integration Auth Tests / test-matrix (oauth2_token) (push) Failing after 12s
Integration Tests / test-matrix (library, 3.12, datasets) (push) Failing after 10s
Integration Tests / test-matrix (library, 3.12, vector_io) (push) Failing after 13s
Integration Tests / test-matrix (library, 3.13, post_training) (push) Failing after 14s
Integration Tests / test-matrix (library, 3.13, agents) (push) Failing after 15s
Integration Tests / test-matrix (library, 3.13, vector_io) (push) Failing after 13s
Integration Tests / test-matrix (library, 3.13, inspect) (push) Failing after 12s
Integration Tests / test-matrix (server, 3.12, agents) (push) Failing after 14s
Integration Tests / test-matrix (library, 3.12, post_training) (push) Failing after 17s
Integration Tests / test-matrix (library, 3.12, scoring) (push) Failing after 20s
Integration Tests / test-matrix (server, 3.12, datasets) (push) Failing after 12s
Integration Tests / test-matrix (library, 3.13, providers) (push) Failing after 16s
Integration Tests / test-matrix (library, 3.13, datasets) (push) Failing after 17s
Integration Tests / test-matrix (server, 3.12, inference) (push) Failing after 10s
Integration Tests / test-matrix (library, 3.12, agents) (push) Failing after 20s
Integration Tests / test-matrix (library, 3.13, scoring) (push) Failing after 20s
Integration Tests / test-matrix (server, 3.12, inspect) (push) Failing after 10s
Integration Tests / test-matrix (library, 3.13, inference) (push) Failing after 18s
Integration Tests / test-matrix (library, 3.12, providers) (push) Failing after 17s
Integration Tests / test-matrix (server, 3.12, post_training) (push) Failing after 10s
Integration Tests / test-matrix (server, 3.12, providers) (push) Failing after 8s
Integration Tests / test-matrix (library, 3.13, tool_runtime) (push) Failing after 10s
Integration Tests / test-matrix (server, 3.12, scoring) (push) Failing after 9s
Integration Tests / test-matrix (server, 3.12, vector_io) (push) Failing after 6s
Integration Tests / test-matrix (server, 3.13, datasets) (push) Failing after 7s
Integration Tests / test-matrix (server, 3.13, inspect) (push) Failing after 8s
Integration Tests / test-matrix (server, 3.13, providers) (push) Failing after 6s
Integration Tests / test-matrix (server, 3.13, tool_runtime) (push) Failing after 5s
Integration Tests / test-matrix (server, 3.13, vector_io) (push) Failing after 5s
Vector IO Integration Tests / test-matrix (3.12, inline::faiss) (push) Failing after 4s
Integration Tests / test-matrix (server, 3.12, tool_runtime) (push) Failing after 18s
Integration Tests / test-matrix (server, 3.13, agents) (push) Failing after 19s
Integration Tests / test-matrix (server, 3.13, post_training) (push) Failing after 16s
Integration Tests / test-matrix (server, 3.13, inference) (push) Failing after 18s
Integration Tests / test-matrix (server, 3.13, scoring) (push) Failing after 17s
Vector IO Integration Tests / test-matrix (3.12, inline::milvus) (push) Failing after 14s
Vector IO Integration Tests / test-matrix (3.12, inline::sqlite-vec) (push) Failing after 12s
Vector IO Integration Tests / test-matrix (3.12, remote::pgvector) (push) Failing after 10s
Vector IO Integration Tests / test-matrix (3.13, inline::milvus) (push) Failing after 9s
Vector IO Integration Tests / test-matrix (3.13, remote::chromadb) (push) Failing after 7s
Test Llama Stack Build / generate-matrix (push) Successful in 3s
Vector IO Integration Tests / test-matrix (3.12, remote::chromadb) (push) Failing after 15s
Python Package Build Test / build (3.13) (push) Failing after 0s
Test Llama Stack Build / build-ubi9-container-distribution (push) Failing after 3s
Test Llama Stack Build / build-single-provider (push) Failing after 6s
Vector IO Integration Tests / test-matrix (3.13, inline::faiss) (push) Failing after 17s
Update ReadTheDocs / update-readthedocs (push) Failing after 4s
Test Llama Stack Build / build (push) Failing after 4s
Test Llama Stack Build / build-custom-container-distribution (push) Failing after 7s
Test External Providers / test-external-providers (venv) (push) Failing after 5s
Unit Tests / unit-tests (3.13) (push) Failing after 4s
Unit Tests / unit-tests (3.12) (push) Failing after 7s
Vector IO Integration Tests / test-matrix (3.13, remote::pgvector) (push) Failing after 58s
Vector IO Integration Tests / test-matrix (3.13, inline::sqlite-vec) (push) Failing after 1m0s
Python Package Build Test / build (3.12) (push) Failing after 49s
Pre-commit / pre-commit (push) Successful in 1m40s
# What does this PR do? * Use a single env variable to setup OTEL endpoint * Update telemetry provider doc * Update general telemetry doc with the metric with generate * Left a script to setup telemetry for testing Closes: https://github.com/meta-llama/llama-stack/issues/783 Note to reviewer: the `setup_telemetry.sh` script was useful for me, it was nicely generated by AI, if we don't want it in the repo, and I can delete it, and I would understand. Signed-off-by: Sébastien Han <seb@redhat.com>
This commit is contained in:
parent
4eae0cbfa4
commit
ea966565f6
11 changed files with 237 additions and 38 deletions
|
@ -24,37 +24,106 @@ structured_log_event = SpanStartPayload(name="my_span", parent_span_id="parent_s
|
|||
- **Spans**: Represent operations with timing and hierarchical relationships
|
||||
- **Traces**: Collection of related spans forming a complete request flow
|
||||
|
||||
### Metrics
|
||||
|
||||
Llama Stack automatically generates metrics during inference operations. These metrics are aggregated at the **inference request level** and provide insights into token usage and model performance.
|
||||
|
||||
#### Available Metrics
|
||||
|
||||
The following metrics are automatically generated for each inference request:
|
||||
|
||||
| Metric Name | Type | Unit | Description | Labels |
|
||||
|-------------|------|------|-------------|--------|
|
||||
| `llama_stack_prompt_tokens_total` | Counter | `tokens` | Number of tokens in the input prompt | `model_id`, `provider_id` |
|
||||
| `llama_stack_completion_tokens_total` | Counter | `tokens` | Number of tokens in the generated response | `model_id`, `provider_id` |
|
||||
| `llama_stack_tokens_total` | Counter | `tokens` | Total tokens used (prompt + completion) | `model_id`, `provider_id` |
|
||||
|
||||
#### Metric Generation Flow
|
||||
|
||||
1. **Token Counting**: During inference operations (chat completion, completion, etc.), the system counts tokens in both input prompts and generated responses
|
||||
2. **Metric Construction**: For each request, `MetricEvent` objects are created with the token counts
|
||||
3. **Telemetry Logging**: Metrics are sent to the configured telemetry sinks
|
||||
4. **OpenTelemetry Export**: When OpenTelemetry is enabled, metrics are exposed as standard OpenTelemetry counters
|
||||
|
||||
#### Metric Aggregation Level
|
||||
|
||||
All metrics are generated and aggregated at the **inference request level**. This means:
|
||||
|
||||
- Each individual inference request generates its own set of metrics
|
||||
- Metrics are not pre-aggregated across multiple requests
|
||||
- Aggregation (sums, averages, etc.) can be performed by your observability tools (Prometheus, Grafana, etc.)
|
||||
- Each metric includes labels for `model_id` and `provider_id` to enable filtering and grouping
|
||||
|
||||
#### Example Metric Event
|
||||
|
||||
```python
|
||||
MetricEvent(
|
||||
trace_id="1234567890abcdef",
|
||||
span_id="abcdef1234567890",
|
||||
metric="total_tokens",
|
||||
value=150,
|
||||
timestamp=1703123456.789,
|
||||
unit="tokens",
|
||||
attributes={"model_id": "meta-llama/Llama-3.2-3B-Instruct", "provider_id": "tgi"},
|
||||
)
|
||||
```
|
||||
|
||||
#### Querying Metrics
|
||||
|
||||
When using the OpenTelemetry sink, metrics are exposed in standard OpenTelemetry format and can be queried through:
|
||||
|
||||
- **Prometheus**: Scrape metrics from the OpenTelemetry Collector's metrics endpoint
|
||||
- **Grafana**: Create dashboards using Prometheus as a data source
|
||||
- **OpenTelemetry Collector**: Forward metrics to other observability systems
|
||||
|
||||
Example Prometheus queries:
|
||||
```promql
|
||||
# Total tokens used across all models
|
||||
sum(llama_stack_tokens_total)
|
||||
|
||||
# Tokens per model
|
||||
sum by (model_id) (llama_stack_tokens_total)
|
||||
|
||||
# Average tokens per request
|
||||
rate(llama_stack_tokens_total[5m])
|
||||
```
|
||||
|
||||
### Sinks
|
||||
- **OpenTelemetry**: Send events to an OpenTelemetry Collector. This is useful for visualizing traces in a tool like Jaeger.
|
||||
- **OpenTelemetry**: Send events to an OpenTelemetry Collector. This is useful for visualizing traces in a tool like Jaeger and collecting metrics for Prometheus.
|
||||
- **SQLite**: Store events in a local SQLite database. This is needed if you want to query the events later through the Llama Stack API.
|
||||
- **Console**: Print events to the console.
|
||||
|
||||
### Providers
|
||||
|
||||
#### Meta-Reference Provider
|
||||
Currently, only the meta-reference provider is implemented. It can be configured to send events to three sink types:
|
||||
1) OpenTelemetry Collector
|
||||
2) SQLite
|
||||
3) Console
|
||||
Currently, only the meta-reference provider is implemented. It can be configured to send events to multiple sink types:
|
||||
1) OpenTelemetry Collector (traces and metrics)
|
||||
2) SQLite (traces only)
|
||||
3) Console (all events)
|
||||
|
||||
#### Configuration
|
||||
|
||||
Here's an example that sends telemetry signals to all three sink types. Your configuration might use only one.
|
||||
Here's an example that sends telemetry signals to all sink types. Your configuration might use only one or a subset.
|
||||
|
||||
```yaml
|
||||
telemetry:
|
||||
- provider_id: meta-reference
|
||||
provider_type: inline::meta-reference
|
||||
config:
|
||||
service_name: "llama-stack-service"
|
||||
sinks: ['console', 'sqlite', 'otel_trace', 'otel_metric']
|
||||
otel_trace_endpoint: "http://localhost:4318/v1/traces"
|
||||
otel_metric_endpoint: "http://localhost:4318/v1/metrics"
|
||||
otel_exporter_otlp_endpoint: "http://localhost:4318"
|
||||
sqlite_db_path: "/path/to/telemetry.db"
|
||||
```
|
||||
|
||||
**Environment Variables:**
|
||||
- `OTEL_EXPORTER_OTLP_ENDPOINT`: OpenTelemetry Collector endpoint (default: `http://localhost:4318`)
|
||||
- `OTEL_SERVICE_NAME`: Service name for telemetry (default: empty string)
|
||||
- `TELEMETRY_SINKS`: Comma-separated list of sinks (default: `console,sqlite`)
|
||||
|
||||
### Jaeger to visualize traces
|
||||
|
||||
The `otel` sink works with any service compatible with the OpenTelemetry collector, traces and metrics has two separate endpoints.
|
||||
Let's use Jaeger to visualize this data.
|
||||
The `otel_trace` sink works with any service compatible with the OpenTelemetry collector. Traces and metrics use separate endpoints but can share the same collector.
|
||||
|
||||
Start a Jaeger instance with the OTLP HTTP endpoint at 4318 and the Jaeger UI at 16686 using the following command:
|
||||
|
||||
|
@ -68,4 +137,7 @@ Once the Jaeger instance is running, you can visualize traces by navigating to h
|
|||
|
||||
### Querying Traces Stored in SQLite
|
||||
|
||||
The `sqlite` sink allows you to query traces without an external system. Here are some example queries. Refer to the notebook at [Llama Stack Building AI Applications](https://github.com/meta-llama/llama-stack/blob/main/docs/getting_started.ipynb) for more examples on how to query traces and spaces.
|
||||
The `sqlite` sink allows you to query traces without an external system. Here are some example
|
||||
queries. Refer to the notebook at [Llama Stack Building AI
|
||||
Applications](https://github.com/meta-llama/llama-stack/blob/main/docs/getting_started.ipynb) for
|
||||
more examples on how to query traces and spans.
|
||||
|
|
|
@ -8,10 +8,9 @@ Meta's reference implementation of telemetry and observability using OpenTelemet
|
|||
|
||||
| Field | Type | Required | Default | Description |
|
||||
|-------|------|----------|---------|-------------|
|
||||
| `otel_trace_endpoint` | `str \| None` | No | | The OpenTelemetry collector endpoint URL for traces |
|
||||
| `otel_metric_endpoint` | `str \| None` | No | | The OpenTelemetry collector endpoint URL for metrics |
|
||||
| `otel_exporter_otlp_endpoint` | `str \| None` | No | | The OpenTelemetry collector endpoint URL (base URL for traces, metrics, and logs). If not set, the SDK will use OTEL_EXPORTER_OTLP_ENDPOINT environment variable. |
|
||||
| `service_name` | `<class 'str'>` | No | | The service name to use for telemetry |
|
||||
| `sinks` | `list[inline.telemetry.meta_reference.config.TelemetrySink` | No | [<TelemetrySink.CONSOLE: 'console'>, <TelemetrySink.SQLITE: 'sqlite'>] | List of telemetry sinks to enable (possible values: otel, sqlite, console) |
|
||||
| `sinks` | `list[inline.telemetry.meta_reference.config.TelemetrySink` | No | [<TelemetrySink.CONSOLE: 'console'>, <TelemetrySink.SQLITE: 'sqlite'>] | List of telemetry sinks to enable (possible values: otel_trace, otel_metric, sqlite, console) |
|
||||
| `sqlite_db_path` | `<class 'str'>` | No | ~/.llama/runtime/trace_store.db | The path to the SQLite database to use for storing traces |
|
||||
|
||||
## Sample Configuration
|
||||
|
@ -20,6 +19,7 @@ Meta's reference implementation of telemetry and observability using OpenTelemet
|
|||
service_name: "${env.OTEL_SERVICE_NAME:=\u200B}"
|
||||
sinks: ${env.TELEMETRY_SINKS:=console,sqlite}
|
||||
sqlite_db_path: ${env.SQLITE_STORE_DIR:=~/.llama/dummy}/trace_store.db
|
||||
otel_exporter_otlp_endpoint: ${env.OTEL_EXPORTER_OTLP_ENDPOINT:=}
|
||||
|
||||
```
|
||||
|
||||
|
|
|
@ -20,13 +20,9 @@ class TelemetrySink(StrEnum):
|
|||
|
||||
|
||||
class TelemetryConfig(BaseModel):
|
||||
otel_trace_endpoint: str | None = Field(
|
||||
otel_exporter_otlp_endpoint: str | None = Field(
|
||||
default=None,
|
||||
description="The OpenTelemetry collector endpoint URL for traces",
|
||||
)
|
||||
otel_metric_endpoint: str | None = Field(
|
||||
default=None,
|
||||
description="The OpenTelemetry collector endpoint URL for metrics",
|
||||
description="The OpenTelemetry collector endpoint URL (base URL for traces, metrics, and logs). If not set, the SDK will use OTEL_EXPORTER_OTLP_ENDPOINT environment variable.",
|
||||
)
|
||||
service_name: str = Field(
|
||||
# service name is always the same, use zero-width space to avoid clutter
|
||||
|
@ -35,7 +31,7 @@ class TelemetryConfig(BaseModel):
|
|||
)
|
||||
sinks: list[TelemetrySink] = Field(
|
||||
default=[TelemetrySink.CONSOLE, TelemetrySink.SQLITE],
|
||||
description="List of telemetry sinks to enable (possible values: otel, sqlite, console)",
|
||||
description="List of telemetry sinks to enable (possible values: otel_trace, otel_metric, sqlite, console)",
|
||||
)
|
||||
sqlite_db_path: str = Field(
|
||||
default_factory=lambda: (RUNTIME_BASE_DIR / "trace_store.db").as_posix(),
|
||||
|
@ -55,4 +51,5 @@ class TelemetryConfig(BaseModel):
|
|||
"service_name": "${env.OTEL_SERVICE_NAME:=\u200b}",
|
||||
"sinks": "${env.TELEMETRY_SINKS:=console,sqlite}",
|
||||
"sqlite_db_path": "${env.SQLITE_STORE_DIR:=" + __distro_dir__ + "}/" + db_name,
|
||||
"otel_exporter_otlp_endpoint": "${env.OTEL_EXPORTER_OTLP_ENDPOINT:=}",
|
||||
}
|
||||
|
|
|
@ -86,24 +86,27 @@ class TelemetryAdapter(TelemetryDatasetMixin, Telemetry):
|
|||
provider = TracerProvider(resource=resource)
|
||||
trace.set_tracer_provider(provider)
|
||||
_TRACER_PROVIDER = provider
|
||||
if TelemetrySink.OTEL_TRACE in self.config.sinks:
|
||||
if self.config.otel_trace_endpoint is None:
|
||||
raise ValueError("otel_trace_endpoint is required when OTEL_TRACE is enabled")
|
||||
span_exporter = OTLPSpanExporter(
|
||||
endpoint=self.config.otel_trace_endpoint,
|
||||
)
|
||||
span_processor = BatchSpanProcessor(span_exporter)
|
||||
trace.get_tracer_provider().add_span_processor(span_processor)
|
||||
if TelemetrySink.OTEL_METRIC in self.config.sinks:
|
||||
if self.config.otel_metric_endpoint is None:
|
||||
raise ValueError("otel_metric_endpoint is required when OTEL_METRIC is enabled")
|
||||
metric_reader = PeriodicExportingMetricReader(
|
||||
OTLPMetricExporter(
|
||||
endpoint=self.config.otel_metric_endpoint,
|
||||
|
||||
# Use single OTLP endpoint for all telemetry signals
|
||||
if TelemetrySink.OTEL_TRACE in self.config.sinks or TelemetrySink.OTEL_METRIC in self.config.sinks:
|
||||
if self.config.otel_exporter_otlp_endpoint is None:
|
||||
raise ValueError(
|
||||
"otel_exporter_otlp_endpoint is required when OTEL_TRACE or OTEL_METRIC is enabled"
|
||||
)
|
||||
)
|
||||
metric_provider = MeterProvider(resource=resource, metric_readers=[metric_reader])
|
||||
metrics.set_meter_provider(metric_provider)
|
||||
|
||||
# Let OpenTelemetry SDK handle endpoint construction automatically
|
||||
# The SDK will read OTEL_EXPORTER_OTLP_ENDPOINT and construct appropriate URLs
|
||||
# https://opentelemetry.io/docs/languages/sdk-configuration/otlp-exporter
|
||||
if TelemetrySink.OTEL_TRACE in self.config.sinks:
|
||||
span_exporter = OTLPSpanExporter()
|
||||
span_processor = BatchSpanProcessor(span_exporter)
|
||||
trace.get_tracer_provider().add_span_processor(span_processor)
|
||||
|
||||
if TelemetrySink.OTEL_METRIC in self.config.sinks:
|
||||
metric_reader = PeriodicExportingMetricReader(OTLPMetricExporter())
|
||||
metric_provider = MeterProvider(resource=resource, metric_readers=[metric_reader])
|
||||
metrics.set_meter_provider(metric_provider)
|
||||
|
||||
if TelemetrySink.SQLITE in self.config.sinks:
|
||||
trace.get_tracer_provider().add_span_processor(SQLiteSpanProcessor(self.config.sqlite_db_path))
|
||||
if TelemetrySink.CONSOLE in self.config.sinks:
|
||||
|
|
|
@ -64,6 +64,7 @@ providers:
|
|||
service_name: "${env.OTEL_SERVICE_NAME:=\u200B}"
|
||||
sinks: ${env.TELEMETRY_SINKS:=console,sqlite}
|
||||
sqlite_db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/meta-reference-gpu}/trace_store.db
|
||||
otel_exporter_otlp_endpoint: ${env.OTEL_EXPORTER_OTLP_ENDPOINT:=}
|
||||
eval:
|
||||
- provider_id: meta-reference
|
||||
provider_type: inline::meta-reference
|
||||
|
|
|
@ -54,6 +54,7 @@ providers:
|
|||
service_name: "${env.OTEL_SERVICE_NAME:=\u200B}"
|
||||
sinks: ${env.TELEMETRY_SINKS:=console,sqlite}
|
||||
sqlite_db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/meta-reference-gpu}/trace_store.db
|
||||
otel_exporter_otlp_endpoint: ${env.OTEL_EXPORTER_OTLP_ENDPOINT:=}
|
||||
eval:
|
||||
- provider_id: meta-reference
|
||||
provider_type: inline::meta-reference
|
||||
|
|
|
@ -73,6 +73,7 @@ providers:
|
|||
service_name: "${env.OTEL_SERVICE_NAME:=\u200B}"
|
||||
sinks: ${env.TELEMETRY_SINKS:=console,sqlite}
|
||||
sqlite_db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/open-benchmark}/trace_store.db
|
||||
otel_exporter_otlp_endpoint: ${env.OTEL_EXPORTER_OTLP_ENDPOINT:=}
|
||||
eval:
|
||||
- provider_id: meta-reference
|
||||
provider_type: inline::meta-reference
|
||||
|
|
|
@ -193,6 +193,7 @@ providers:
|
|||
service_name: "${env.OTEL_SERVICE_NAME:=\u200B}"
|
||||
sinks: ${env.TELEMETRY_SINKS:=console,sqlite}
|
||||
sqlite_db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/starter}/trace_store.db
|
||||
otel_exporter_otlp_endpoint: ${env.OTEL_EXPORTER_OTLP_ENDPOINT:=}
|
||||
post_training:
|
||||
- provider_id: huggingface
|
||||
provider_type: inline::huggingface
|
||||
|
|
|
@ -53,6 +53,7 @@ providers:
|
|||
service_name: "${env.OTEL_SERVICE_NAME:=\u200B}"
|
||||
sinks: ${env.TELEMETRY_SINKS:=console,sqlite}
|
||||
sqlite_db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/vllm-gpu}/trace_store.db
|
||||
otel_exporter_otlp_endpoint: ${env.OTEL_EXPORTER_OTLP_ENDPOINT:=}
|
||||
eval:
|
||||
- provider_id: meta-reference
|
||||
provider_type: inline::meta-reference
|
||||
|
|
|
@ -50,6 +50,7 @@ providers:
|
|||
service_name: "${env.OTEL_SERVICE_NAME:=\u200B}"
|
||||
sinks: ${env.TELEMETRY_SINKS:=console,sqlite}
|
||||
sqlite_db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/watsonx}/trace_store.db
|
||||
otel_exporter_otlp_endpoint: ${env.OTEL_EXPORTER_OTLP_ENDPOINT:=}
|
||||
eval:
|
||||
- provider_id: meta-reference
|
||||
provider_type: inline::meta-reference
|
||||
|
|
121
scripts/setup_telemetry.sh
Executable file
121
scripts/setup_telemetry.sh
Executable file
|
@ -0,0 +1,121 @@
|
|||
#!/usr/bin/env bash
|
||||
|
||||
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
||||
# All rights reserved.
|
||||
#
|
||||
# This source code is licensed under the terms described in the LICENSE file in
|
||||
# the root directory of this source tree.
|
||||
|
||||
# Telemetry Setup Script for Llama Stack
|
||||
# This script sets up Jaeger, OpenTelemetry Collector, Prometheus, and Grafana using Podman
|
||||
# For whoever is interested in testing the telemetry stack, you can run this script to set up the stack.
|
||||
# export OTEL_EXPORTER_OTLP_ENDPOINT=http://localhost:4318
|
||||
# export TELEMETRY_SINKS=otel_trace,otel_metric
|
||||
# export OTEL_SERVICE_NAME=my-llama-app
|
||||
# Then run the distro server
|
||||
|
||||
set -Eeuo pipefail
|
||||
|
||||
CONTAINER_RUNTIME=${CONTAINER_RUNTIME:-docker}
|
||||
|
||||
echo "🚀 Setting up telemetry stack for Llama Stack using Podman..."
|
||||
|
||||
if ! command -v "$CONTAINER_RUNTIME" &> /dev/null; then
|
||||
echo "🚨 $CONTAINER_RUNTIME could not be found"
|
||||
echo "Docker or Podman is required. Install Docker: https://docs.docker.com/get-docker/ or Podman: https://podman.io/getting-started/installation"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# Create a network for the services
|
||||
echo "📡 Creating $CONTAINER_RUNTIME network..."
|
||||
$CONTAINER_RUNTIME network create llama-telemetry 2>/dev/null || echo "Network already exists"
|
||||
|
||||
# Stop and remove existing containers
|
||||
echo "🧹 Cleaning up existing containers..."
|
||||
$CONTAINER_RUNTIME stop jaeger otel-collector prometheus grafana 2>/dev/null || true
|
||||
$CONTAINER_RUNTIME rm jaeger otel-collector prometheus grafana 2>/dev/null || true
|
||||
|
||||
# Start Jaeger
|
||||
echo "🔍 Starting Jaeger..."
|
||||
$CONTAINER_RUNTIME run -d --name jaeger \
|
||||
--network llama-telemetry \
|
||||
-e COLLECTOR_ZIPKIN_HOST_PORT=:9411 \
|
||||
-p 16686:16686 \
|
||||
-p 14250:14250 \
|
||||
-p 9411:9411 \
|
||||
docker.io/jaegertracing/all-in-one:latest
|
||||
|
||||
# Start OpenTelemetry Collector
|
||||
echo "📊 Starting OpenTelemetry Collector..."
|
||||
$CONTAINER_RUNTIME run -d --name otel-collector \
|
||||
--network llama-telemetry \
|
||||
-p 4318:4318 \
|
||||
-p 4317:4317 \
|
||||
-p 9464:9464 \
|
||||
-p 13133:13133 \
|
||||
-v $(pwd)/otel-collector-config.yaml:/etc/otel-collector-config.yaml:Z \
|
||||
docker.io/otel/opentelemetry-collector-contrib:latest \
|
||||
--config /etc/otel-collector-config.yaml
|
||||
|
||||
# Start Prometheus
|
||||
echo "📈 Starting Prometheus..."
|
||||
$CONTAINER_RUNTIME run -d --name prometheus \
|
||||
--network llama-telemetry \
|
||||
-p 9090:9090 \
|
||||
-v $(pwd)/prometheus.yml:/etc/prometheus/prometheus.yml:Z \
|
||||
docker.io/prom/prometheus:latest \
|
||||
--config.file=/etc/prometheus/prometheus.yml \
|
||||
--storage.tsdb.path=/prometheus \
|
||||
--web.console.libraries=/etc/prometheus/console_libraries \
|
||||
--web.console.templates=/etc/prometheus/consoles \
|
||||
--storage.tsdb.retention.time=200h \
|
||||
--web.enable-lifecycle
|
||||
|
||||
# Start Grafana
|
||||
echo "📊 Starting Grafana..."
|
||||
$CONTAINER_RUNTIME run -d --name grafana \
|
||||
--network llama-telemetry \
|
||||
-p 3000:3000 \
|
||||
-e GF_SECURITY_ADMIN_PASSWORD=admin \
|
||||
-e GF_USERS_ALLOW_SIGN_UP=false \
|
||||
docker.io/grafana/grafana:latest
|
||||
|
||||
# Wait for services to start
|
||||
echo "⏳ Waiting for services to start..."
|
||||
sleep 10
|
||||
|
||||
# Check if services are running
|
||||
echo "🔍 Checking service status..."
|
||||
$CONTAINER_RUNTIME ps --filter "name=jaeger|otel-collector|prometheus|grafana" --format "table {{.Names}}\t{{.Status}}\t{{.Ports}}"
|
||||
|
||||
echo ""
|
||||
echo "✅ Telemetry stack is ready!"
|
||||
echo ""
|
||||
echo "🌐 Service URLs:"
|
||||
echo " Jaeger UI: http://localhost:16686"
|
||||
echo " Prometheus: http://localhost:9090"
|
||||
echo " Grafana: http://localhost:3000 (admin/admin)"
|
||||
echo " OTEL Collector: http://localhost:4318 (OTLP endpoint)"
|
||||
echo ""
|
||||
echo "🔧 Environment variables for Llama Stack:"
|
||||
echo " export OTEL_EXPORTER_OTLP_ENDPOINT=http://localhost:4318"
|
||||
echo " export TELEMETRY_SINKS=otel_trace,otel_metric"
|
||||
echo " export OTEL_SERVICE_NAME=my-llama-app"
|
||||
echo ""
|
||||
echo "📊 Next steps:"
|
||||
echo " 1. Set the environment variables above"
|
||||
echo " 2. Start your Llama Stack application"
|
||||
echo " 3. Make some inference calls to generate metrics"
|
||||
echo " 4. Check Jaeger for traces: http://localhost:16686"
|
||||
echo " 5. Check Prometheus for metrics: http://localhost:9090"
|
||||
echo " 6. Set up Grafana dashboards: http://localhost:3000"
|
||||
echo ""
|
||||
echo "🔍 To test the setup, run:"
|
||||
echo " curl -X POST http://localhost:5000/v1/inference/chat/completions \\"
|
||||
echo " -H 'Content-Type: application/json' \\"
|
||||
echo " -d '{\"model_id\": \"your-model\", \"messages\": [{\"role\": \"user\", \"content\": \"Hello\"}]}'"
|
||||
echo ""
|
||||
echo "🧹 To clean up when done:"
|
||||
echo " $CONTAINER_RUNTIME stop jaeger otel-collector prometheus grafana"
|
||||
echo " $CONTAINER_RUNTIME rm jaeger otel-collector prometheus grafana"
|
||||
echo " $CONTAINER_RUNTIME network rm llama-telemetry"
|
Loading…
Add table
Add a link
Reference in a new issue