diff --git a/docs/_static/llama-stack-spec.html b/docs/_static/llama-stack-spec.html
index 151ac1451..75e0c4dfa 100644
--- a/docs/_static/llama-stack-spec.html
+++ b/docs/_static/llama-stack-spec.html
@@ -3106,6 +3106,12 @@
             "ChatCompletionResponse": {
                 "type": "object",
                 "properties": {
+                    "metrics": {
+                        "type": "array",
+                        "items": {
+                            "$ref": "#/components/schemas/MetricEvent"
+                        }
+                    },
                     "completion_message": {
                         "$ref": "#/components/schemas/CompletionMessage",
                         "description": "The complete response message"
@@ -3124,6 +3130,77 @@
                 ],
                 "description": "Response from a chat completion request."
             },
+            "MetricEvent": {
+                "type": "object",
+                "properties": {
+                    "trace_id": {
+                        "type": "string"
+                    },
+                    "span_id": {
+                        "type": "string"
+                    },
+                    "timestamp": {
+                        "type": "string",
+                        "format": "date-time"
+                    },
+                    "attributes": {
+                        "type": "object",
+                        "additionalProperties": {
+                            "oneOf": [
+                                {
+                                    "type": "null"
+                                },
+                                {
+                                    "type": "boolean"
+                                },
+                                {
+                                    "type": "number"
+                                },
+                                {
+                                    "type": "string"
+                                },
+                                {
+                                    "type": "array"
+                                },
+                                {
+                                    "type": "object"
+                                }
+                            ]
+                        }
+                    },
+                    "type": {
+                        "type": "string",
+                        "const": "metric",
+                        "default": "metric"
+                    },
+                    "metric": {
+                        "type": "string"
+                    },
+                    "value": {
+                        "oneOf": [
+                            {
+                                "type": "integer"
+                            },
+                            {
+                                "type": "number"
+                            }
+                        ]
+                    },
+                    "unit": {
+                        "type": "string"
+                    }
+                },
+                "additionalProperties": false,
+                "required": [
+                    "trace_id",
+                    "span_id",
+                    "timestamp",
+                    "type",
+                    "metric",
+                    "value",
+                    "unit"
+                ]
+            },
             "TokenLogProbs": {
                 "type": "object",
                 "properties": {
@@ -3388,6 +3465,12 @@
             "ChatCompletionResponseStreamChunk": {
                 "type": "object",
                 "properties": {
+                    "metrics": {
+                        "type": "array",
+                        "items": {
+                            "$ref": "#/components/schemas/MetricEvent"
+                        }
+                    },
                     "event": {
                         "$ref": "#/components/schemas/ChatCompletionResponseEvent",
                         "description": "The event containing the new content"
@@ -6374,77 +6457,6 @@
                     "critical"
                 ]
             },
-            "MetricEvent": {
-                "type": "object",
-                "properties": {
-                    "trace_id": {
-                        "type": "string"
-                    },
-                    "span_id": {
-                        "type": "string"
-                    },
-                    "timestamp": {
-                        "type": "string",
-                        "format": "date-time"
-                    },
-                    "attributes": {
-                        "type": "object",
-                        "additionalProperties": {
-                            "oneOf": [
-                                {
-                                    "type": "null"
-                                },
-                                {
-                                    "type": "boolean"
-                                },
-                                {
-                                    "type": "number"
-                                },
-                                {
-                                    "type": "string"
-                                },
-                                {
-                                    "type": "array"
-                                },
-                                {
-                                    "type": "object"
-                                }
-                            ]
-                        }
-                    },
-                    "type": {
-                        "type": "string",
-                        "const": "metric",
-                        "default": "metric"
-                    },
-                    "metric": {
-                        "type": "string"
-                    },
-                    "value": {
-                        "oneOf": [
-                            {
-                                "type": "integer"
-                            },
-                            {
-                                "type": "number"
-                            }
-                        ]
-                    },
-                    "unit": {
-                        "type": "string"
-                    }
-                },
-                "additionalProperties": false,
-                "required": [
-                    "trace_id",
-                    "span_id",
-                    "timestamp",
-                    "type",
-                    "metric",
-                    "value",
-                    "unit"
-                ]
-            },
             "SpanEndPayload": {
                 "type": "object",
                 "properties": {
diff --git a/docs/_static/llama-stack-spec.yaml b/docs/_static/llama-stack-spec.yaml
index 37fba4541..c60a002e2 100644
--- a/docs/_static/llama-stack-spec.yaml
+++ b/docs/_static/llama-stack-spec.yaml
@@ -1925,6 +1925,10 @@ components:
     ChatCompletionResponse:
       type: object
       properties:
+        metrics:
+          type: array
+          items:
+            $ref: '#/components/schemas/MetricEvent'
         completion_message:
           $ref: '#/components/schemas/CompletionMessage'
           description: The complete response message
@@ -1938,6 +1942,47 @@ components:
       required:
         - completion_message
       description: Response from a chat completion request.
+    MetricEvent:
+      type: object
+      properties:
+        trace_id:
+          type: string
+        span_id:
+          type: string
+        timestamp:
+          type: string
+          format: date-time
+        attributes:
+          type: object
+          additionalProperties:
+            oneOf:
+              - type: 'null'
+              - type: boolean
+              - type: number
+              - type: string
+              - type: array
+              - type: object
+        type:
+          type: string
+          const: metric
+          default: metric
+        metric:
+          type: string
+        value:
+          oneOf:
+            - type: integer
+            - type: number
+        unit:
+          type: string
+      additionalProperties: false
+      required:
+        - trace_id
+        - span_id
+        - timestamp
+        - type
+        - metric
+        - value
+        - unit
     TokenLogProbs:
       type: object
       properties:
@@ -2173,6 +2218,10 @@ components:
     ChatCompletionResponseStreamChunk:
       type: object
       properties:
+        metrics:
+          type: array
+          items:
+            $ref: '#/components/schemas/MetricEvent'
         event:
           $ref: '#/components/schemas/ChatCompletionResponseEvent'
           description: The event containing the new content
@@ -4070,47 +4119,6 @@ components:
         - warn
         - error
         - critical
-    MetricEvent:
-      type: object
-      properties:
-        trace_id:
-          type: string
-        span_id:
-          type: string
-        timestamp:
-          type: string
-          format: date-time
-        attributes:
-          type: object
-          additionalProperties:
-            oneOf:
-              - type: 'null'
-              - type: boolean
-              - type: number
-              - type: string
-              - type: array
-              - type: object
-        type:
-          type: string
-          const: metric
-          default: metric
-        metric:
-          type: string
-        value:
-          oneOf:
-            - type: integer
-            - type: number
-        unit:
-          type: string
-      additionalProperties: false
-      required:
-        - trace_id
-        - span_id
-        - timestamp
-        - type
-        - metric
-        - value
-        - unit
     SpanEndPayload:
       type: object
       properties:
diff --git a/llama_stack/apis/inference/inference.py b/llama_stack/apis/inference/inference.py
index 4e095e831..9fccd3911 100644
--- a/llama_stack/apis/inference/inference.py
+++ b/llama_stack/apis/inference/inference.py
@@ -13,8 +13,8 @@ from typing import (
     Literal,
     Optional,
     Protocol,
-    runtime_checkable,
     Union,
+    runtime_checkable,
 )
 
 from llama_models.llama3.api.datatypes import (
@@ -31,6 +31,7 @@ from typing_extensions import Annotated
 
 from llama_stack.apis.common.content_types import ContentDelta, InterleavedContent
 from llama_stack.apis.models import Model
+from llama_stack.apis.telemetry.telemetry import MetricResponseMixin
 from llama_stack.providers.utils.telemetry.trace_protocol import trace_protocol
 
 
@@ -357,7 +358,7 @@ class ChatCompletionRequest(BaseModel):
 
 
 @json_schema_type
-class ChatCompletionResponseStreamChunk(BaseModel):
+class ChatCompletionResponseStreamChunk(MetricResponseMixin, BaseModel):
     """A chunk of a streamed chat completion response.
 
     :param event: The event containing the new content
@@ -367,7 +368,7 @@ class ChatCompletionResponseStreamChunk(BaseModel):
 
 
 @json_schema_type
-class ChatCompletionResponse(BaseModel):
+class ChatCompletionResponse(MetricResponseMixin, BaseModel):
     """Response from a chat completion request.
 
     :param completion_message: The complete response message
diff --git a/llama_stack/apis/telemetry/telemetry.py b/llama_stack/apis/telemetry/telemetry.py
index 324064007..6a62e274d 100644
--- a/llama_stack/apis/telemetry/telemetry.py
+++ b/llama_stack/apis/telemetry/telemetry.py
@@ -13,8 +13,8 @@ from typing import (
     Literal,
     Optional,
     Protocol,
-    runtime_checkable,
     Union,
+    runtime_checkable,
 )
 
 from llama_models.schema_utils import json_schema_type, register_schema, webmethod
@@ -94,6 +94,30 @@ class MetricEvent(EventCommon):
     unit: str
 
 
+# This is a short term solution to allow inference API to return metrics
+# The ideal way to do this is to have a way for all response types to include metrics
+# and all metric events logged to the telemetry API to be inlcuded with the response
+# To do this, we will need to augment all response types with a metrics field.
+# We have hit a blocker from stainless SDK that prevents us from doing this.
+# The blocker is that if we were to augment the response types that have a data field
+# in them like so
+# class ListModelsResponse(BaseModel):
+# metrics: Optional[List[MetricEvent]] = None
+# data: List[Models]
+# ...
+# The client SDK will need to access the data by using a .data field, which is not
+# ergonomic. Stainless SDK does support unwrapping the response type, but it
+# requires that the response type to only have a single field.
+
+# We will need a way in the client SDK to signal that the metrics are needed
+# and if they are needed, the client SDK has to return the full response type
+# without unwrapping it.
+
+
+class MetricResponseMixin(BaseModel):
+    metrics: Optional[List[MetricEvent]] = None
+
+
 @json_schema_type
 class StructuredLogType(Enum):
     SPAN_START = "span_start"