From b62f7e82bd2e5f536bfecd9ef75e5611c8906ccb Mon Sep 17 00:00:00 2001 From: Dinesh Yeduguru Date: Tue, 11 Feb 2025 14:53:08 -0800 Subject: [PATCH] address feedback --- llama_stack/apis/telemetry/telemetry.py | 23 +++++++++++++++++++++-- 1 file changed, 21 insertions(+), 2 deletions(-) diff --git a/llama_stack/apis/telemetry/telemetry.py b/llama_stack/apis/telemetry/telemetry.py index f57fe77ce..6a62e274d 100644 --- a/llama_stack/apis/telemetry/telemetry.py +++ b/llama_stack/apis/telemetry/telemetry.py @@ -94,8 +94,27 @@ class MetricEvent(EventCommon): unit: str -@json_schema_type -class MetricResponseMixin: +# This is a short term solution to allow inference API to return metrics +# The ideal way to do this is to have a way for all response types to include metrics +# and all metric events logged to the telemetry API to be inlcuded with the response +# To do this, we will need to augment all response types with a metrics field. +# We have hit a blocker from stainless SDK that prevents us from doing this. +# The blocker is that if we were to augment the response types that have a data field +# in them like so +# class ListModelsResponse(BaseModel): +# metrics: Optional[List[MetricEvent]] = None +# data: List[Models] +# ... +# The client SDK will need to access the data by using a .data field, which is not +# ergonomic. Stainless SDK does support unwrapping the response type, but it +# requires that the response type to only have a single field. + +# We will need a way in the client SDK to signal that the metrics are needed +# and if they are needed, the client SDK has to return the full response type +# without unwrapping it. + + +class MetricResponseMixin(BaseModel): metrics: Optional[List[MetricEvent]] = None