diff --git a/docs/_static/llama-stack-spec.html b/docs/_static/llama-stack-spec.html
index 2a9f4b6f7..c0e3061f9 100644
--- a/docs/_static/llama-stack-spec.html
+++ b/docs/_static/llama-stack-spec.html
@@ -898,6 +898,49 @@
"schema": {
"type": "string"
}
+ },
+ {
+ "name": "start_time",
+ "in": "query",
+ "required": true,
+ "schema": {
+ "type": "integer"
+ }
+ },
+ {
+ "name": "end_time",
+ "in": "query",
+ "required": false,
+ "schema": {
+ "type": "integer"
+ }
+ },
+ {
+ "name": "step",
+ "in": "query",
+ "required": false,
+ "schema": {
+ "type": "string"
+ }
+ },
+ {
+ "name": "query_type",
+ "in": "query",
+ "required": true,
+ "schema": {
+ "$ref": "#/components/schemas/MetricQueryType"
+ }
+ },
+ {
+ "name": "label_matchers",
+ "in": "query",
+ "required": false,
+ "schema": {
+ "type": "array",
+ "items": {
+ "$ref": "#/components/schemas/MetricLabelMatcher"
+ }
+ }
}
]
}
@@ -3559,6 +3602,12 @@
"CompletionResponse": {
"type": "object",
"properties": {
+ "metrics": {
+ "type": "array",
+ "items": {
+ "$ref": "#/components/schemas/Metric"
+ }
+ },
"content": {
"type": "string",
"description": "The generated completion text"
@@ -3926,6 +3975,12 @@
"CompletionResponseStreamChunk": {
"type": "object",
"properties": {
+ "metrics": {
+ "type": "array",
+ "items": {
+ "$ref": "#/components/schemas/Metric"
+ }
+ },
"delta": {
"type": "string",
"description": "New content generated since last chunk. This can be one or more tokens."
@@ -5092,6 +5147,12 @@
"EmbeddingsResponse": {
"type": "object",
"properties": {
+ "metrics": {
+ "type": "array",
+ "items": {
+ "$ref": "#/components/schemas/Metric"
+ }
+ },
"embeddings": {
"type": "array",
"items": {
diff --git a/docs/_static/llama-stack-spec.yaml b/docs/_static/llama-stack-spec.yaml
index a2329e47a..ed41fec64 100644
--- a/docs/_static/llama-stack-spec.yaml
+++ b/docs/_static/llama-stack-spec.yaml
@@ -2236,6 +2236,10 @@ components:
CompletionResponse:
type: object
properties:
+ metrics:
+ type: array
+ items:
+ $ref: '#/components/schemas/Metric'
content:
type: string
description: The generated completion text
@@ -2554,6 +2558,10 @@ components:
CompletionResponseStreamChunk:
type: object
properties:
+ metrics:
+ type: array
+ items:
+ $ref: '#/components/schemas/Metric'
delta:
type: string
description: >-
@@ -3341,6 +3349,10 @@ components:
EmbeddingsResponse:
type: object
properties:
+ metrics:
+ type: array
+ items:
+ $ref: '#/components/schemas/Metric'
embeddings:
type: array
items:
diff --git a/llama_stack/apis/telemetry/telemetry.py b/llama_stack/apis/telemetry/telemetry.py
index 845e23e79..237b22d41 100644
--- a/llama_stack/apis/telemetry/telemetry.py
+++ b/llama_stack/apis/telemetry/telemetry.py
@@ -235,15 +235,23 @@ class MetricsMixin(BaseModel):
@json_schema_type
class MetricQueryType(Enum):
- RANGE = "range" # Returns data points over time range
- INSTANT = "instant" # Returns single data point
+ RANGE = "range"
+ INSTANT = "instant"
+
+
+@json_schema_type
+class MetricLabelOperator(Enum):
+ EQUALS = "="
+ NOT_EQUALS = "!="
+ REGEX_MATCH = "=~"
+ REGEX_NOT_MATCH = "!~"
@json_schema_type
class MetricLabelMatcher(BaseModel):
name: str
value: str
- operator: Literal["=", "!=", "=~", "!~"] = "=" # Prometheus-style operators
+ operator: MetricLabelOperator = MetricLabelOperator.EQUALS
@json_schema_type
@@ -313,9 +321,9 @@ class Telemetry(Protocol):
async def get_metrics(
self,
metric_name: str,
- start_time: int, # Unix timestamp in seconds
- end_time: Optional[int] = None, # Unix timestamp in seconds
- step: Optional[str] = "1m", # Prometheus-style duration: 1m, 5m, 1h, etc.
+ start_time: int,
+ end_time: Optional[int] = None,
+ step: Optional[str] = "1m",
query_type: MetricQueryType = MetricQueryType.RANGE,
label_matchers: Optional[List[MetricLabelMatcher]] = None,
) -> GetMetricsResponse: ...