From b62f7e82bd2e5f536bfecd9ef75e5611c8906ccb Mon Sep 17 00:00:00 2001
From: Dinesh Yeduguru <yvdinesh@gmail.com>
Date: Tue, 11 Feb 2025 14:53:08 -0800
Subject: [PATCH] address feedback

---
 llama_stack/apis/telemetry/telemetry.py | 23 +++++++++++++++++++++--
 1 file changed, 21 insertions(+), 2 deletions(-)

diff --git a/llama_stack/apis/telemetry/telemetry.py b/llama_stack/apis/telemetry/telemetry.py
index f57fe77ce..6a62e274d 100644
--- a/llama_stack/apis/telemetry/telemetry.py
+++ b/llama_stack/apis/telemetry/telemetry.py
@@ -94,8 +94,27 @@ class MetricEvent(EventCommon):
     unit: str
 
 
-@json_schema_type
-class MetricResponseMixin:
+# This is a short term solution to allow inference API to return metrics
+# The ideal way to do this is to have a way for all response types to include metrics
+# and all metric events logged to the telemetry API to be inlcuded with the response
+# To do this, we will need to augment all response types with a metrics field.
+# We have hit a blocker from stainless SDK that prevents us from doing this.
+# The blocker is that if we were to augment the response types that have a data field
+# in them like so
+# class ListModelsResponse(BaseModel):
+# metrics: Optional[List[MetricEvent]] = None
+# data: List[Models]
+# ...
+# The client SDK will need to access the data by using a .data field, which is not
+# ergonomic. Stainless SDK does support unwrapping the response type, but it
+# requires that the response type to only have a single field.
+
+# We will need a way in the client SDK to signal that the metrics are needed
+# and if they are needed, the client SDK has to return the full response type
+# without unwrapping it.
+
+
+class MetricResponseMixin(BaseModel):
     metrics: Optional[List[MetricEvent]] = None