Merge branch 'main' into delete_unused_imports

2026-01-02 17:34:31 +00:00 · 2025-03-12 16:23:56 -07:00 · 2025-03-12 16:23:56 -07:00 · e1a45ffba4
commit e1a45ffba4
parent f8a76da2ab 99bbe0e70b
7 changed files with 154 additions and 82 deletions
--- a/.github/workflows/unit-tests.yml
+++ b/.github/workflows/unit-tests.yml
@ -1,6 +1,8 @@
 name: Unit Tests
 on:
  push:
    branches: [ main ]
  pull_request:
    branches: [ main ]
  workflow_dispatch:
--- a/README.md
+++ b/README.md
@ -4,6 +4,7 @@
 [![PyPI - Downloads](https://img.shields.io/pypi/dm/llama-stack)](https://pypi.org/project/llama-stack/)
 [![License](https://img.shields.io/pypi/l/llama_stack.svg)](https://github.com/meta-llama/llama-stack/blob/main/LICENSE)
 [![Discord](https://img.shields.io/discord/1257833999603335178)](https://discord.gg/llama-stack)
 ![Unit](https://github.com/meta-llama/llama-stack/actions/workflows/unit-tests.yml/badge.svg?branch=main)
 [**Quick Start**](https://llama-stack.readthedocs.io/en/latest/getting_started/index.html) | [**Documentation**](https://llama-stack.readthedocs.io/en/latest/index.html) | [**Colab Notebook**](./docs/getting_started.ipynb)
--- a/docs/_static/llama-stack-spec.html
+++ b/docs/_static/llama-stack-spec.html
@ -4549,7 +4549,7 @@
                    "metrics": {
                        "type": "array",
                        "items": {
-                            "$ref": "#/components/schemas/MetricEvent"
+                            "$ref": "#/components/schemas/MetricInResponse"
                        }
                    },
                    "completion_message": {
@ -4571,46 +4571,9 @@
                "title": "ChatCompletionResponse",
                "description": "Response from a chat completion request."
            },
-            "MetricEvent": {
+            "MetricInResponse": {
                "type": "object",
                "properties": {
                    "trace_id": {
                        "type": "string"
                    },
                    "span_id": {
                        "type": "string"
                    },
                    "timestamp": {
                        "type": "string",
                        "format": "date-time"
                    },
                    "attributes": {
                        "type": "object",
                        "additionalProperties": {
                            "oneOf": [
                                {
                                    "type": "string"
                                },
                                {
                                    "type": "integer"
                                },
                                {
                                    "type": "number"
                                },
                                {
                                    "type": "boolean"
                                },
                                {
                                    "type": "null"
                                }
                            ]
                        }
                    },
                    "type": {
                        "type": "string",
                        "const": "metric",
                        "default": "metric"
                    },
                    "metric": {
                        "type": "string"
                    },
@ -4630,15 +4593,10 @@
                },
                "additionalProperties": false,
                "required": [
                    "trace_id",
                    "span_id",
                    "timestamp",
                    "type",
                    "metric",
-                    "value",
+                    "value"
                    "unit"
                ],
-                "title": "MetricEvent"
+                "title": "MetricInResponse"
            },
            "TokenLogProbs": {
                "type": "object",
@ -4715,6 +4673,12 @@
            "CompletionResponse": {
                "type": "object",
                "properties": {
                    "metrics": {
                        "type": "array",
                        "items": {
                            "$ref": "#/components/schemas/MetricInResponse"
                        }
                    },
                    "content": {
                        "type": "string",
                        "description": "The generated completion text"
@ -4924,7 +4888,7 @@
                    "metrics": {
                        "type": "array",
                        "items": {
-                            "$ref": "#/components/schemas/MetricEvent"
+                            "$ref": "#/components/schemas/MetricInResponse"
                        }
                    },
                    "event": {
@ -5082,6 +5046,12 @@
            "CompletionResponseStreamChunk": {
                "type": "object",
                "properties": {
                    "metrics": {
                        "type": "array",
                        "items": {
                            "$ref": "#/components/schemas/MetricInResponse"
                        }
                    },
                    "delta": {
                        "type": "string",
                        "description": "New content generated since last chunk. This can be one or more tokens."
@ -8363,6 +8333,75 @@
                ],
                "title": "LogSeverity"
            },
            "MetricEvent": {
                "type": "object",
                "properties": {
                    "trace_id": {
                        "type": "string"
                    },
                    "span_id": {
                        "type": "string"
                    },
                    "timestamp": {
                        "type": "string",
                        "format": "date-time"
                    },
                    "attributes": {
                        "type": "object",
                        "additionalProperties": {
                            "oneOf": [
                                {
                                    "type": "string"
                                },
                                {
                                    "type": "integer"
                                },
                                {
                                    "type": "number"
                                },
                                {
                                    "type": "boolean"
                                },
                                {
                                    "type": "null"
                                }
                            ]
                        }
                    },
                    "type": {
                        "type": "string",
                        "const": "metric",
                        "default": "metric"
                    },
                    "metric": {
                        "type": "string"
                    },
                    "value": {
                        "oneOf": [
                            {
                                "type": "integer"
                            },
                            {
                                "type": "number"
                            }
                        ]
                    },
                    "unit": {
                        "type": "string"
                    }
                },
                "additionalProperties": false,
                "required": [
                    "trace_id",
                    "span_id",
                    "timestamp",
                    "type",
                    "metric",
                    "value",
                    "unit"
                ],
                "title": "MetricEvent"
            },
            "SpanEndPayload": {
                "type": "object",
                "properties": {
--- a/docs/_static/llama-stack-spec.yaml
+++ b/docs/_static/llama-stack-spec.yaml
@ -3101,7 +3101,7 @@ components:
        metrics:
          type: array
          items:
-            $ref: '#/components/schemas/MetricEvent'
+            $ref: '#/components/schemas/MetricInResponse'
        completion_message:
          $ref: '#/components/schemas/CompletionMessage'
          description: The complete response message
@ -3116,29 +3116,9 @@ components:
        - completion_message
      title: ChatCompletionResponse
      description: Response from a chat completion request.
-    MetricEvent:
+    MetricInResponse:
      type: object
      properties:
        trace_id:
          type: string
        span_id:
          type: string
        timestamp:
          type: string
          format: date-time
        attributes:
          type: object
          additionalProperties:
            oneOf:
              - type: string
              - type: integer
              - type: number
              - type: boolean
              - type: 'null'
        type:
          type: string
          const: metric
          default: metric
        metric:
          type: string
        value:
@ -3149,14 +3129,9 @@ components:
          type: string
      additionalProperties: false
      required:
        - trace_id
        - span_id
        - timestamp
        - type
        - metric
        - value
-        - unit
+      title: MetricInResponse
      title: MetricEvent
    TokenLogProbs:
      type: object
      properties:
@ -3213,6 +3188,10 @@ components:
    CompletionResponse:
      type: object
      properties:
        metrics:
          type: array
          items:
            $ref: '#/components/schemas/MetricInResponse'
        content:
          type: string
          description: The generated completion text
@ -3412,7 +3391,7 @@ components:
        metrics:
          type: array
          items:
-            $ref: '#/components/schemas/MetricEvent'
+            $ref: '#/components/schemas/MetricInResponse'
        event:
          $ref: '#/components/schemas/ChatCompletionResponseEvent'
          description: The event containing the new content
@ -3531,6 +3510,10 @@ components:
    CompletionResponseStreamChunk:
      type: object
      properties:
        metrics:
          type: array
          items:
            $ref: '#/components/schemas/MetricInResponse'
        delta:
          type: string
          description: >-
@ -5703,6 +5686,47 @@ components:
        - error
        - critical
      title: LogSeverity
    MetricEvent:
      type: object
      properties:
        trace_id:
          type: string
        span_id:
          type: string
        timestamp:
          type: string
          format: date-time
        attributes:
          type: object
          additionalProperties:
            oneOf:
              - type: string
              - type: integer
              - type: number
              - type: boolean
              - type: 'null'
        type:
          type: string
          const: metric
          default: metric
        metric:
          type: string
        value:
          oneOf:
            - type: integer
            - type: number
        unit:
          type: string
      additionalProperties: false
      required:
        - trace_id
        - span_id
        - timestamp
        - type
        - metric
        - value
        - unit
      title: MetricEvent
    SpanEndPayload:
      type: object
      properties:
--- a/llama_stack/apis/telemetry/telemetry.py
+++ b/llama_stack/apis/telemetry/telemetry.py
@ -96,6 +96,13 @@ class MetricEvent(EventCommon):
    unit: str
@json_schema_type
 class MetricInResponse(BaseModel):
    metric: str
    value: Union[int, float]
    unit: Optional[str] = None
 # This is a short term solution to allow inference API to return metrics
 # The ideal way to do this is to have a way for all response types to include metrics
 # and all metric events logged to the telemetry API to be inlcuded with the response
@ -117,7 +124,7 @@ class MetricEvent(EventCommon):
 class MetricResponseMixin(BaseModel):
-    metrics: Optional[List[MetricEvent]] = None
+    metrics: Optional[List[MetricInResponse]] = None
@json_schema_type
--- a/llama_stack/distribution/routers/routers.py
+++ b/llama_stack/distribution/routers/routers.py
@ -48,7 +48,7 @@ from llama_stack.apis.scoring import (
    ScoringFnParams,
 )
 from llama_stack.apis.shields import Shield
-from llama_stack.apis.telemetry import MetricEvent, Telemetry
+from llama_stack.apis.telemetry import MetricEvent, MetricInResponse, Telemetry
 from llama_stack.apis.tools import (
    RAGDocument,
    RAGQueryConfig,
@ -206,12 +206,12 @@ class InferenceRouter(Inference):
        completion_tokens: int,
        total_tokens: int,
        model: Model,
-    ) -> List[MetricEvent]:
+    ) -> List[MetricInResponse]:
        metrics = self._construct_metrics(prompt_tokens, completion_tokens, total_tokens, model)
        if self.telemetry:
            for metric in metrics:
                await self.telemetry.log_event(metric)
-        return metrics
+        return [MetricInResponse(metric=metric.metric, value=metric.value) for metric in metrics]
    async def _count_tokens(
        self,
@ -238,7 +238,6 @@ class InferenceRouter(Inference):
        tool_config: Optional[ToolConfig] = None,
    ) -> Union[ChatCompletionResponse, AsyncIterator[ChatCompletionResponseStreamChunk]]:
        logger.debug(
            "core",
            f"InferenceRouter.chat_completion: {model_id=}, {stream=}, {messages=}, {tools=}, {tool_config=}, {response_format=}",
        )
        if sampling_params is None:
--- a/llama_stack/distribution/utils/context.py
+++ b/llama_stack/distribution/utils/context.py
@ -19,7 +19,7 @@ def preserve_contexts_async_generator(
    and we need to preserve the context across the event loop boundary.
    """
-    async def wrapper():
+    async def wrapper() -> AsyncGenerator[T, None]:
        while True:
            try:
                item = await gen.__anext__()