feat: add metrics query API (#1394)

# What does this PR do?
Adds the API to query metrics from telemetry.

## Test Plan
llama stack run ~/.llama/distributions/fireworks/fireworks-run.yaml

---------

Co-authored-by: Ashwin Bharambe <ashwin.bharambe@gmail.com>
This commit is contained in:
Dinesh Yeduguru 2025-05-07 10:11:26 -07:00 committed by GitHub
parent 6371bb1b33
commit fe5f5e530c
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
4 changed files with 387 additions and 0 deletions

View file

@ -3475,6 +3475,58 @@
}
}
},
"/v1/telemetry/metrics/{metric_name}": {
"post": {
"responses": {
"200": {
"description": "OK",
"content": {
"application/json": {
"schema": {
"$ref": "#/components/schemas/QueryMetricsResponse"
}
}
}
},
"400": {
"$ref": "#/components/responses/BadRequest400"
},
"429": {
"$ref": "#/components/responses/TooManyRequests429"
},
"500": {
"$ref": "#/components/responses/InternalServerError500"
},
"default": {
"$ref": "#/components/responses/DefaultError"
}
},
"tags": [
"Telemetry"
],
"description": "",
"parameters": [
{
"name": "metric_name",
"in": "path",
"required": true,
"schema": {
"type": "string"
}
}
],
"requestBody": {
"content": {
"application/json": {
"schema": {
"$ref": "#/components/schemas/QueryMetricsRequest"
}
}
},
"required": true
}
}
},
"/v1/telemetry/spans": {
"post": {
"responses": {
@ -11270,6 +11322,143 @@
],
"title": "QueryChunksResponse"
},
"QueryMetricsRequest": {
"type": "object",
"properties": {
"start_time": {
"type": "integer"
},
"end_time": {
"type": "integer"
},
"granularity": {
"type": "string"
},
"query_type": {
"type": "string",
"enum": [
"range",
"instant"
],
"title": "MetricQueryType"
},
"label_matchers": {
"type": "array",
"items": {
"type": "object",
"properties": {
"name": {
"type": "string"
},
"value": {
"type": "string"
},
"operator": {
"type": "string",
"enum": [
"=",
"!=",
"=~",
"!~"
],
"title": "MetricLabelOperator",
"default": "="
}
},
"additionalProperties": false,
"required": [
"name",
"value",
"operator"
],
"title": "MetricLabelMatcher"
}
}
},
"additionalProperties": false,
"required": [
"start_time",
"query_type"
],
"title": "QueryMetricsRequest"
},
"MetricDataPoint": {
"type": "object",
"properties": {
"timestamp": {
"type": "integer"
},
"value": {
"type": "number"
}
},
"additionalProperties": false,
"required": [
"timestamp",
"value"
],
"title": "MetricDataPoint"
},
"MetricLabel": {
"type": "object",
"properties": {
"name": {
"type": "string"
},
"value": {
"type": "string"
}
},
"additionalProperties": false,
"required": [
"name",
"value"
],
"title": "MetricLabel"
},
"MetricSeries": {
"type": "object",
"properties": {
"metric": {
"type": "string"
},
"labels": {
"type": "array",
"items": {
"$ref": "#/components/schemas/MetricLabel"
}
},
"values": {
"type": "array",
"items": {
"$ref": "#/components/schemas/MetricDataPoint"
}
}
},
"additionalProperties": false,
"required": [
"metric",
"labels",
"values"
],
"title": "MetricSeries"
},
"QueryMetricsResponse": {
"type": "object",
"properties": {
"data": {
"type": "array",
"items": {
"$ref": "#/components/schemas/MetricSeries"
}
}
},
"additionalProperties": false,
"required": [
"data"
],
"title": "QueryMetricsResponse"
},
"QueryCondition": {
"type": "object",
"properties": {

View file

@ -2397,6 +2397,40 @@ paths:
schema:
$ref: '#/components/schemas/QueryChunksRequest'
required: true
/v1/telemetry/metrics/{metric_name}:
post:
responses:
'200':
description: OK
content:
application/json:
schema:
$ref: '#/components/schemas/QueryMetricsResponse'
'400':
$ref: '#/components/responses/BadRequest400'
'429':
$ref: >-
#/components/responses/TooManyRequests429
'500':
$ref: >-
#/components/responses/InternalServerError500
default:
$ref: '#/components/responses/DefaultError'
tags:
- Telemetry
description: ''
parameters:
- name: metric_name
in: path
required: true
schema:
type: string
requestBody:
content:
application/json:
schema:
$ref: '#/components/schemas/QueryMetricsRequest'
required: true
/v1/telemetry/spans:
post:
responses:
@ -7762,6 +7796,104 @@ components:
- chunks
- scores
title: QueryChunksResponse
QueryMetricsRequest:
type: object
properties:
start_time:
type: integer
end_time:
type: integer
granularity:
type: string
query_type:
type: string
enum:
- range
- instant
title: MetricQueryType
label_matchers:
type: array
items:
type: object
properties:
name:
type: string
value:
type: string
operator:
type: string
enum:
- '='
- '!='
- =~
- '!~'
title: MetricLabelOperator
default: '='
additionalProperties: false
required:
- name
- value
- operator
title: MetricLabelMatcher
additionalProperties: false
required:
- start_time
- query_type
title: QueryMetricsRequest
MetricDataPoint:
type: object
properties:
timestamp:
type: integer
value:
type: number
additionalProperties: false
required:
- timestamp
- value
title: MetricDataPoint
MetricLabel:
type: object
properties:
name:
type: string
value:
type: string
additionalProperties: false
required:
- name
- value
title: MetricLabel
MetricSeries:
type: object
properties:
metric:
type: string
labels:
type: array
items:
$ref: '#/components/schemas/MetricLabel'
values:
type: array
items:
$ref: '#/components/schemas/MetricDataPoint'
additionalProperties: false
required:
- metric
- labels
- values
title: MetricSeries
QueryMetricsResponse:
type: object
properties:
data:
type: array
items:
$ref: '#/components/schemas/MetricSeries'
additionalProperties: false
required:
- data
title: QueryMetricsResponse
QueryCondition:
type: object
properties:

View file

@ -203,6 +203,47 @@ class QuerySpanTreeResponse(BaseModel):
data: dict[str, SpanWithStatus]
class MetricQueryType(Enum):
RANGE = "range"
INSTANT = "instant"
class MetricLabelOperator(Enum):
EQUALS = "="
NOT_EQUALS = "!="
REGEX_MATCH = "=~"
REGEX_NOT_MATCH = "!~"
class MetricLabelMatcher(BaseModel):
name: str
value: str
operator: MetricLabelOperator = MetricLabelOperator.EQUALS
@json_schema_type
class MetricLabel(BaseModel):
name: str
value: str
@json_schema_type
class MetricDataPoint(BaseModel):
timestamp: int
value: float
@json_schema_type
class MetricSeries(BaseModel):
metric: str
labels: list[MetricLabel]
values: list[MetricDataPoint]
class QueryMetricsResponse(BaseModel):
data: list[MetricSeries]
@runtime_checkable
class Telemetry(Protocol):
@webmethod(route="/telemetry/events", method="POST")
@ -247,3 +288,14 @@ class Telemetry(Protocol):
dataset_id: str,
max_depth: int | None = None,
) -> None: ...
@webmethod(route="/telemetry/metrics/{metric_name}", method="POST")
async def query_metrics(
self,
metric_name: str,
start_time: int,
end_time: int | None = None,
granularity: str | None = "1d",
query_type: MetricQueryType = MetricQueryType.RANGE,
label_matchers: list[MetricLabelMatcher] | None = None,
) -> QueryMetricsResponse: ...

View file

@ -20,7 +20,10 @@ from opentelemetry.semconv.resource import ResourceAttributes
from llama_stack.apis.telemetry import (
Event,
MetricEvent,
MetricLabelMatcher,
MetricQueryType,
QueryCondition,
QueryMetricsResponse,
QuerySpanTreeResponse,
QueryTracesResponse,
Span,
@ -123,6 +126,17 @@ class TelemetryAdapter(TelemetryDatasetMixin, Telemetry):
else:
raise ValueError(f"Unknown event type: {event}")
async def query_metrics(
self,
metric_name: str,
start_time: int,
end_time: int | None = None,
granularity: str | None = "1d",
query_type: MetricQueryType = MetricQueryType.RANGE,
label_matchers: list[MetricLabelMatcher] | None = None,
) -> QueryMetricsResponse:
raise NotImplementedError("Querying metrics is not implemented")
def _log_unstructured(self, event: UnstructuredLogEvent, ttl_seconds: int) -> None:
with self._lock:
# Use global storage instead of instance storage