Merge branch 'main' into delete_unused_imports

This commit is contained in:
Xi Yan 2025-03-12 16:23:56 -07:00
commit e1a45ffba4
7 changed files with 154 additions and 82 deletions

View file

@ -1,6 +1,8 @@
name: Unit Tests name: Unit Tests
on: on:
push:
branches: [ main ]
pull_request: pull_request:
branches: [ main ] branches: [ main ]
workflow_dispatch: workflow_dispatch:

View file

@ -4,6 +4,7 @@
[![PyPI - Downloads](https://img.shields.io/pypi/dm/llama-stack)](https://pypi.org/project/llama-stack/) [![PyPI - Downloads](https://img.shields.io/pypi/dm/llama-stack)](https://pypi.org/project/llama-stack/)
[![License](https://img.shields.io/pypi/l/llama_stack.svg)](https://github.com/meta-llama/llama-stack/blob/main/LICENSE) [![License](https://img.shields.io/pypi/l/llama_stack.svg)](https://github.com/meta-llama/llama-stack/blob/main/LICENSE)
[![Discord](https://img.shields.io/discord/1257833999603335178)](https://discord.gg/llama-stack) [![Discord](https://img.shields.io/discord/1257833999603335178)](https://discord.gg/llama-stack)
![Unit](https://github.com/meta-llama/llama-stack/actions/workflows/unit-tests.yml/badge.svg?branch=main)
[**Quick Start**](https://llama-stack.readthedocs.io/en/latest/getting_started/index.html) | [**Documentation**](https://llama-stack.readthedocs.io/en/latest/index.html) | [**Colab Notebook**](./docs/getting_started.ipynb) [**Quick Start**](https://llama-stack.readthedocs.io/en/latest/getting_started/index.html) | [**Documentation**](https://llama-stack.readthedocs.io/en/latest/index.html) | [**Colab Notebook**](./docs/getting_started.ipynb)

View file

@ -4549,7 +4549,7 @@
"metrics": { "metrics": {
"type": "array", "type": "array",
"items": { "items": {
"$ref": "#/components/schemas/MetricEvent" "$ref": "#/components/schemas/MetricInResponse"
} }
}, },
"completion_message": { "completion_message": {
@ -4571,46 +4571,9 @@
"title": "ChatCompletionResponse", "title": "ChatCompletionResponse",
"description": "Response from a chat completion request." "description": "Response from a chat completion request."
}, },
"MetricEvent": { "MetricInResponse": {
"type": "object", "type": "object",
"properties": { "properties": {
"trace_id": {
"type": "string"
},
"span_id": {
"type": "string"
},
"timestamp": {
"type": "string",
"format": "date-time"
},
"attributes": {
"type": "object",
"additionalProperties": {
"oneOf": [
{
"type": "string"
},
{
"type": "integer"
},
{
"type": "number"
},
{
"type": "boolean"
},
{
"type": "null"
}
]
}
},
"type": {
"type": "string",
"const": "metric",
"default": "metric"
},
"metric": { "metric": {
"type": "string" "type": "string"
}, },
@ -4630,15 +4593,10 @@
}, },
"additionalProperties": false, "additionalProperties": false,
"required": [ "required": [
"trace_id",
"span_id",
"timestamp",
"type",
"metric", "metric",
"value", "value"
"unit"
], ],
"title": "MetricEvent" "title": "MetricInResponse"
}, },
"TokenLogProbs": { "TokenLogProbs": {
"type": "object", "type": "object",
@ -4715,6 +4673,12 @@
"CompletionResponse": { "CompletionResponse": {
"type": "object", "type": "object",
"properties": { "properties": {
"metrics": {
"type": "array",
"items": {
"$ref": "#/components/schemas/MetricInResponse"
}
},
"content": { "content": {
"type": "string", "type": "string",
"description": "The generated completion text" "description": "The generated completion text"
@ -4924,7 +4888,7 @@
"metrics": { "metrics": {
"type": "array", "type": "array",
"items": { "items": {
"$ref": "#/components/schemas/MetricEvent" "$ref": "#/components/schemas/MetricInResponse"
} }
}, },
"event": { "event": {
@ -5082,6 +5046,12 @@
"CompletionResponseStreamChunk": { "CompletionResponseStreamChunk": {
"type": "object", "type": "object",
"properties": { "properties": {
"metrics": {
"type": "array",
"items": {
"$ref": "#/components/schemas/MetricInResponse"
}
},
"delta": { "delta": {
"type": "string", "type": "string",
"description": "New content generated since last chunk. This can be one or more tokens." "description": "New content generated since last chunk. This can be one or more tokens."
@ -8363,6 +8333,75 @@
], ],
"title": "LogSeverity" "title": "LogSeverity"
}, },
"MetricEvent": {
"type": "object",
"properties": {
"trace_id": {
"type": "string"
},
"span_id": {
"type": "string"
},
"timestamp": {
"type": "string",
"format": "date-time"
},
"attributes": {
"type": "object",
"additionalProperties": {
"oneOf": [
{
"type": "string"
},
{
"type": "integer"
},
{
"type": "number"
},
{
"type": "boolean"
},
{
"type": "null"
}
]
}
},
"type": {
"type": "string",
"const": "metric",
"default": "metric"
},
"metric": {
"type": "string"
},
"value": {
"oneOf": [
{
"type": "integer"
},
{
"type": "number"
}
]
},
"unit": {
"type": "string"
}
},
"additionalProperties": false,
"required": [
"trace_id",
"span_id",
"timestamp",
"type",
"metric",
"value",
"unit"
],
"title": "MetricEvent"
},
"SpanEndPayload": { "SpanEndPayload": {
"type": "object", "type": "object",
"properties": { "properties": {

View file

@ -3101,7 +3101,7 @@ components:
metrics: metrics:
type: array type: array
items: items:
$ref: '#/components/schemas/MetricEvent' $ref: '#/components/schemas/MetricInResponse'
completion_message: completion_message:
$ref: '#/components/schemas/CompletionMessage' $ref: '#/components/schemas/CompletionMessage'
description: The complete response message description: The complete response message
@ -3116,29 +3116,9 @@ components:
- completion_message - completion_message
title: ChatCompletionResponse title: ChatCompletionResponse
description: Response from a chat completion request. description: Response from a chat completion request.
MetricEvent: MetricInResponse:
type: object type: object
properties: properties:
trace_id:
type: string
span_id:
type: string
timestamp:
type: string
format: date-time
attributes:
type: object
additionalProperties:
oneOf:
- type: string
- type: integer
- type: number
- type: boolean
- type: 'null'
type:
type: string
const: metric
default: metric
metric: metric:
type: string type: string
value: value:
@ -3149,14 +3129,9 @@ components:
type: string type: string
additionalProperties: false additionalProperties: false
required: required:
- trace_id
- span_id
- timestamp
- type
- metric - metric
- value - value
- unit title: MetricInResponse
title: MetricEvent
TokenLogProbs: TokenLogProbs:
type: object type: object
properties: properties:
@ -3213,6 +3188,10 @@ components:
CompletionResponse: CompletionResponse:
type: object type: object
properties: properties:
metrics:
type: array
items:
$ref: '#/components/schemas/MetricInResponse'
content: content:
type: string type: string
description: The generated completion text description: The generated completion text
@ -3412,7 +3391,7 @@ components:
metrics: metrics:
type: array type: array
items: items:
$ref: '#/components/schemas/MetricEvent' $ref: '#/components/schemas/MetricInResponse'
event: event:
$ref: '#/components/schemas/ChatCompletionResponseEvent' $ref: '#/components/schemas/ChatCompletionResponseEvent'
description: The event containing the new content description: The event containing the new content
@ -3531,6 +3510,10 @@ components:
CompletionResponseStreamChunk: CompletionResponseStreamChunk:
type: object type: object
properties: properties:
metrics:
type: array
items:
$ref: '#/components/schemas/MetricInResponse'
delta: delta:
type: string type: string
description: >- description: >-
@ -5703,6 +5686,47 @@ components:
- error - error
- critical - critical
title: LogSeverity title: LogSeverity
MetricEvent:
type: object
properties:
trace_id:
type: string
span_id:
type: string
timestamp:
type: string
format: date-time
attributes:
type: object
additionalProperties:
oneOf:
- type: string
- type: integer
- type: number
- type: boolean
- type: 'null'
type:
type: string
const: metric
default: metric
metric:
type: string
value:
oneOf:
- type: integer
- type: number
unit:
type: string
additionalProperties: false
required:
- trace_id
- span_id
- timestamp
- type
- metric
- value
- unit
title: MetricEvent
SpanEndPayload: SpanEndPayload:
type: object type: object
properties: properties:

View file

@ -96,6 +96,13 @@ class MetricEvent(EventCommon):
unit: str unit: str
@json_schema_type
class MetricInResponse(BaseModel):
metric: str
value: Union[int, float]
unit: Optional[str] = None
# This is a short term solution to allow inference API to return metrics # This is a short term solution to allow inference API to return metrics
# The ideal way to do this is to have a way for all response types to include metrics # The ideal way to do this is to have a way for all response types to include metrics
# and all metric events logged to the telemetry API to be inlcuded with the response # and all metric events logged to the telemetry API to be inlcuded with the response
@ -117,7 +124,7 @@ class MetricEvent(EventCommon):
class MetricResponseMixin(BaseModel): class MetricResponseMixin(BaseModel):
metrics: Optional[List[MetricEvent]] = None metrics: Optional[List[MetricInResponse]] = None
@json_schema_type @json_schema_type

View file

@ -48,7 +48,7 @@ from llama_stack.apis.scoring import (
ScoringFnParams, ScoringFnParams,
) )
from llama_stack.apis.shields import Shield from llama_stack.apis.shields import Shield
from llama_stack.apis.telemetry import MetricEvent, Telemetry from llama_stack.apis.telemetry import MetricEvent, MetricInResponse, Telemetry
from llama_stack.apis.tools import ( from llama_stack.apis.tools import (
RAGDocument, RAGDocument,
RAGQueryConfig, RAGQueryConfig,
@ -206,12 +206,12 @@ class InferenceRouter(Inference):
completion_tokens: int, completion_tokens: int,
total_tokens: int, total_tokens: int,
model: Model, model: Model,
) -> List[MetricEvent]: ) -> List[MetricInResponse]:
metrics = self._construct_metrics(prompt_tokens, completion_tokens, total_tokens, model) metrics = self._construct_metrics(prompt_tokens, completion_tokens, total_tokens, model)
if self.telemetry: if self.telemetry:
for metric in metrics: for metric in metrics:
await self.telemetry.log_event(metric) await self.telemetry.log_event(metric)
return metrics return [MetricInResponse(metric=metric.metric, value=metric.value) for metric in metrics]
async def _count_tokens( async def _count_tokens(
self, self,
@ -238,7 +238,6 @@ class InferenceRouter(Inference):
tool_config: Optional[ToolConfig] = None, tool_config: Optional[ToolConfig] = None,
) -> Union[ChatCompletionResponse, AsyncIterator[ChatCompletionResponseStreamChunk]]: ) -> Union[ChatCompletionResponse, AsyncIterator[ChatCompletionResponseStreamChunk]]:
logger.debug( logger.debug(
"core",
f"InferenceRouter.chat_completion: {model_id=}, {stream=}, {messages=}, {tools=}, {tool_config=}, {response_format=}", f"InferenceRouter.chat_completion: {model_id=}, {stream=}, {messages=}, {tools=}, {tool_config=}, {response_format=}",
) )
if sampling_params is None: if sampling_params is None:

View file

@ -19,7 +19,7 @@ def preserve_contexts_async_generator(
and we need to preserve the context across the event loop boundary. and we need to preserve the context across the event loop boundary.
""" """
async def wrapper(): async def wrapper() -> AsyncGenerator[T, None]:
while True: while True:
try: try:
item = await gen.__anext__() item = await gen.__anext__()