diff --git a/docs/_static/llama-stack-spec.html b/docs/_static/llama-stack-spec.html
index 709360ede..1a467f6f3 100644
--- a/docs/_static/llama-stack-spec.html
+++ b/docs/_static/llama-stack-spec.html
@@ -4630,8 +4630,6 @@
},
"additionalProperties": false,
"required": [
- "trace_id",
- "span_id",
"timestamp",
"type",
"metric",
@@ -8450,8 +8448,6 @@
},
"additionalProperties": false,
"required": [
- "trace_id",
- "span_id",
"timestamp",
"type",
"payload"
@@ -8524,8 +8520,6 @@
},
"additionalProperties": false,
"required": [
- "trace_id",
- "span_id",
"timestamp",
"type",
"message",
diff --git a/docs/_static/llama-stack-spec.yaml b/docs/_static/llama-stack-spec.yaml
index 4c00fbe63..f118d5df3 100644
--- a/docs/_static/llama-stack-spec.yaml
+++ b/docs/_static/llama-stack-spec.yaml
@@ -3149,8 +3149,6 @@ components:
type: string
additionalProperties: false
required:
- - trace_id
- - span_id
- timestamp
- type
- metric
@@ -5760,8 +5758,6 @@ components:
$ref: '#/components/schemas/StructuredLogPayload'
additionalProperties: false
required:
- - trace_id
- - span_id
- timestamp
- type
- payload
@@ -5804,8 +5800,6 @@ components:
$ref: '#/components/schemas/LogSeverity'
additionalProperties: false
required:
- - trace_id
- - span_id
- timestamp
- type
- message
diff --git a/llama_stack/apis/telemetry/telemetry.py b/llama_stack/apis/telemetry/telemetry.py
index fe75677e7..9b9061025 100644
--- a/llama_stack/apis/telemetry/telemetry.py
+++ b/llama_stack/apis/telemetry/telemetry.py
@@ -75,9 +75,9 @@ class LogSeverity(Enum):
class EventCommon(BaseModel):
- trace_id: str
- span_id: str
- timestamp: datetime
+ trace_id: Optional[str] = None
+ span_id: Optional[str] = None
+ timestamp: Optional[datetime] = None
attributes: Optional[Dict[str, Primitive]] = Field(default_factory=dict)
@@ -93,7 +93,14 @@ class MetricEvent(EventCommon):
type: Literal[EventType.METRIC.value] = EventType.METRIC.value
metric: str # this would be an enum
value: Union[int, float]
- unit: str
+ unit: Optional[str] = None
+
+
+@json_schema_type
+class MetricInResponse(BaseModel):
+ metric: str
+ value: Union[int, float]
+ unit: Optional[str] = None
# This is a short term solution to allow inference API to return metrics
@@ -117,7 +124,7 @@ class MetricEvent(EventCommon):
class MetricResponseMixin(BaseModel):
- metrics: Optional[List[MetricEvent]] = None
+ metrics: Optional[List[MetricInResponse]] = None
@json_schema_type
diff --git a/llama_stack/distribution/routers/routers.py b/llama_stack/distribution/routers/routers.py
index 68b8e55cb..b23822694 100644
--- a/llama_stack/distribution/routers/routers.py
+++ b/llama_stack/distribution/routers/routers.py
@@ -48,7 +48,7 @@ from llama_stack.apis.scoring import (
ScoringFnParams,
)
from llama_stack.apis.shields import Shield
-from llama_stack.apis.telemetry import MetricEvent, Telemetry
+from llama_stack.apis.telemetry import MetricEvent, MetricInResponse, Telemetry
from llama_stack.apis.tools import (
RAGDocument,
RAGQueryConfig,
@@ -206,12 +206,12 @@ class InferenceRouter(Inference):
completion_tokens: int,
total_tokens: int,
model: Model,
- ) -> List[MetricEvent]:
+ ) -> List[MetricInResponse]:
metrics = self._construct_metrics(prompt_tokens, completion_tokens, total_tokens, model)
if self.telemetry:
for metric in metrics:
await self.telemetry.log_event(metric)
- return metrics
+ return [MetricInResponse(metric=metric.metric, value=metric.value) for metric in metrics]
async def _count_tokens(
self,
diff --git a/llama_stack/providers/inline/telemetry/meta_reference/telemetry.py b/llama_stack/providers/inline/telemetry/meta_reference/telemetry.py
index 4cdb420b2..12faf80b3 100644
--- a/llama_stack/providers/inline/telemetry/meta_reference/telemetry.py
+++ b/llama_stack/providers/inline/telemetry/meta_reference/telemetry.py
@@ -153,20 +153,20 @@ class TelemetryAdapter(TelemetryDatasetMixin, Telemetry):
else:
print(f"Warning: No active span found for span_id {span_id}. Dropping event: {event}")
- def _get_or_create_counter(self, name: str, unit: str) -> metrics.Counter:
+ def _get_or_create_counter(self, name: str, unit: Optional[str] = None) -> metrics.Counter:
if name not in _GLOBAL_STORAGE["counters"]:
_GLOBAL_STORAGE["counters"][name] = self.meter.create_counter(
name=name,
- unit=unit,
+ unit=unit or "",
description=f"Counter for {name}",
)
return _GLOBAL_STORAGE["counters"][name]
- def _get_or_create_gauge(self, name: str, unit: str) -> metrics.ObservableGauge:
+ def _get_or_create_gauge(self, name: str, unit: Optional[str] = None) -> metrics.ObservableGauge:
if name not in _GLOBAL_STORAGE["gauges"]:
_GLOBAL_STORAGE["gauges"][name] = self.meter.create_gauge(
name=name,
- unit=unit,
+ unit=unit or "",
description=f"Gauge for {name}",
)
return _GLOBAL_STORAGE["gauges"][name]
@@ -181,11 +181,11 @@ class TelemetryAdapter(TelemetryDatasetMixin, Telemetry):
up_down_counter = self._get_or_create_up_down_counter(event.metric, event.unit)
up_down_counter.add(event.value, attributes=event.attributes)
- def _get_or_create_up_down_counter(self, name: str, unit: str) -> metrics.UpDownCounter:
+ def _get_or_create_up_down_counter(self, name: str, unit: Optional[str] = None) -> metrics.UpDownCounter:
if name not in _GLOBAL_STORAGE["up_down_counters"]:
_GLOBAL_STORAGE["up_down_counters"][name] = self.meter.create_up_down_counter(
name=name,
- unit=unit,
+ unit=unit or "",
description=f"UpDownCounter for {name}",
)
return _GLOBAL_STORAGE["up_down_counters"][name]