mirror of
https://github.com/meta-llama/llama-stack.git
synced 2025-08-09 19:58:29 +00:00
add MetricResponseMixin to chat completion response types
This commit is contained in:
parent
71cae67d7b
commit
183e9a08cc
4 changed files with 142 additions and 116 deletions
154
docs/_static/llama-stack-spec.html
vendored
154
docs/_static/llama-stack-spec.html
vendored
|
@ -3106,6 +3106,12 @@
|
||||||
"ChatCompletionResponse": {
|
"ChatCompletionResponse": {
|
||||||
"type": "object",
|
"type": "object",
|
||||||
"properties": {
|
"properties": {
|
||||||
|
"metrics": {
|
||||||
|
"type": "array",
|
||||||
|
"items": {
|
||||||
|
"$ref": "#/components/schemas/MetricEvent"
|
||||||
|
}
|
||||||
|
},
|
||||||
"completion_message": {
|
"completion_message": {
|
||||||
"$ref": "#/components/schemas/CompletionMessage",
|
"$ref": "#/components/schemas/CompletionMessage",
|
||||||
"description": "The complete response message"
|
"description": "The complete response message"
|
||||||
|
@ -3124,6 +3130,77 @@
|
||||||
],
|
],
|
||||||
"description": "Response from a chat completion request."
|
"description": "Response from a chat completion request."
|
||||||
},
|
},
|
||||||
|
"MetricEvent": {
|
||||||
|
"type": "object",
|
||||||
|
"properties": {
|
||||||
|
"trace_id": {
|
||||||
|
"type": "string"
|
||||||
|
},
|
||||||
|
"span_id": {
|
||||||
|
"type": "string"
|
||||||
|
},
|
||||||
|
"timestamp": {
|
||||||
|
"type": "string",
|
||||||
|
"format": "date-time"
|
||||||
|
},
|
||||||
|
"attributes": {
|
||||||
|
"type": "object",
|
||||||
|
"additionalProperties": {
|
||||||
|
"oneOf": [
|
||||||
|
{
|
||||||
|
"type": "null"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"type": "boolean"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"type": "number"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"type": "string"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"type": "array"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"type": "object"
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"type": {
|
||||||
|
"type": "string",
|
||||||
|
"const": "metric",
|
||||||
|
"default": "metric"
|
||||||
|
},
|
||||||
|
"metric": {
|
||||||
|
"type": "string"
|
||||||
|
},
|
||||||
|
"value": {
|
||||||
|
"oneOf": [
|
||||||
|
{
|
||||||
|
"type": "integer"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"type": "number"
|
||||||
|
}
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"unit": {
|
||||||
|
"type": "string"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"additionalProperties": false,
|
||||||
|
"required": [
|
||||||
|
"trace_id",
|
||||||
|
"span_id",
|
||||||
|
"timestamp",
|
||||||
|
"type",
|
||||||
|
"metric",
|
||||||
|
"value",
|
||||||
|
"unit"
|
||||||
|
]
|
||||||
|
},
|
||||||
"TokenLogProbs": {
|
"TokenLogProbs": {
|
||||||
"type": "object",
|
"type": "object",
|
||||||
"properties": {
|
"properties": {
|
||||||
|
@ -3388,6 +3465,12 @@
|
||||||
"ChatCompletionResponseStreamChunk": {
|
"ChatCompletionResponseStreamChunk": {
|
||||||
"type": "object",
|
"type": "object",
|
||||||
"properties": {
|
"properties": {
|
||||||
|
"metrics": {
|
||||||
|
"type": "array",
|
||||||
|
"items": {
|
||||||
|
"$ref": "#/components/schemas/MetricEvent"
|
||||||
|
}
|
||||||
|
},
|
||||||
"event": {
|
"event": {
|
||||||
"$ref": "#/components/schemas/ChatCompletionResponseEvent",
|
"$ref": "#/components/schemas/ChatCompletionResponseEvent",
|
||||||
"description": "The event containing the new content"
|
"description": "The event containing the new content"
|
||||||
|
@ -6374,77 +6457,6 @@
|
||||||
"critical"
|
"critical"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
"MetricEvent": {
|
|
||||||
"type": "object",
|
|
||||||
"properties": {
|
|
||||||
"trace_id": {
|
|
||||||
"type": "string"
|
|
||||||
},
|
|
||||||
"span_id": {
|
|
||||||
"type": "string"
|
|
||||||
},
|
|
||||||
"timestamp": {
|
|
||||||
"type": "string",
|
|
||||||
"format": "date-time"
|
|
||||||
},
|
|
||||||
"attributes": {
|
|
||||||
"type": "object",
|
|
||||||
"additionalProperties": {
|
|
||||||
"oneOf": [
|
|
||||||
{
|
|
||||||
"type": "null"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"type": "boolean"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"type": "number"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"type": "string"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"type": "array"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"type": "object"
|
|
||||||
}
|
|
||||||
]
|
|
||||||
}
|
|
||||||
},
|
|
||||||
"type": {
|
|
||||||
"type": "string",
|
|
||||||
"const": "metric",
|
|
||||||
"default": "metric"
|
|
||||||
},
|
|
||||||
"metric": {
|
|
||||||
"type": "string"
|
|
||||||
},
|
|
||||||
"value": {
|
|
||||||
"oneOf": [
|
|
||||||
{
|
|
||||||
"type": "integer"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"type": "number"
|
|
||||||
}
|
|
||||||
]
|
|
||||||
},
|
|
||||||
"unit": {
|
|
||||||
"type": "string"
|
|
||||||
}
|
|
||||||
},
|
|
||||||
"additionalProperties": false,
|
|
||||||
"required": [
|
|
||||||
"trace_id",
|
|
||||||
"span_id",
|
|
||||||
"timestamp",
|
|
||||||
"type",
|
|
||||||
"metric",
|
|
||||||
"value",
|
|
||||||
"unit"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
"SpanEndPayload": {
|
"SpanEndPayload": {
|
||||||
"type": "object",
|
"type": "object",
|
||||||
"properties": {
|
"properties": {
|
||||||
|
|
90
docs/_static/llama-stack-spec.yaml
vendored
90
docs/_static/llama-stack-spec.yaml
vendored
|
@ -1925,6 +1925,10 @@ components:
|
||||||
ChatCompletionResponse:
|
ChatCompletionResponse:
|
||||||
type: object
|
type: object
|
||||||
properties:
|
properties:
|
||||||
|
metrics:
|
||||||
|
type: array
|
||||||
|
items:
|
||||||
|
$ref: '#/components/schemas/MetricEvent'
|
||||||
completion_message:
|
completion_message:
|
||||||
$ref: '#/components/schemas/CompletionMessage'
|
$ref: '#/components/schemas/CompletionMessage'
|
||||||
description: The complete response message
|
description: The complete response message
|
||||||
|
@ -1938,6 +1942,47 @@ components:
|
||||||
required:
|
required:
|
||||||
- completion_message
|
- completion_message
|
||||||
description: Response from a chat completion request.
|
description: Response from a chat completion request.
|
||||||
|
MetricEvent:
|
||||||
|
type: object
|
||||||
|
properties:
|
||||||
|
trace_id:
|
||||||
|
type: string
|
||||||
|
span_id:
|
||||||
|
type: string
|
||||||
|
timestamp:
|
||||||
|
type: string
|
||||||
|
format: date-time
|
||||||
|
attributes:
|
||||||
|
type: object
|
||||||
|
additionalProperties:
|
||||||
|
oneOf:
|
||||||
|
- type: 'null'
|
||||||
|
- type: boolean
|
||||||
|
- type: number
|
||||||
|
- type: string
|
||||||
|
- type: array
|
||||||
|
- type: object
|
||||||
|
type:
|
||||||
|
type: string
|
||||||
|
const: metric
|
||||||
|
default: metric
|
||||||
|
metric:
|
||||||
|
type: string
|
||||||
|
value:
|
||||||
|
oneOf:
|
||||||
|
- type: integer
|
||||||
|
- type: number
|
||||||
|
unit:
|
||||||
|
type: string
|
||||||
|
additionalProperties: false
|
||||||
|
required:
|
||||||
|
- trace_id
|
||||||
|
- span_id
|
||||||
|
- timestamp
|
||||||
|
- type
|
||||||
|
- metric
|
||||||
|
- value
|
||||||
|
- unit
|
||||||
TokenLogProbs:
|
TokenLogProbs:
|
||||||
type: object
|
type: object
|
||||||
properties:
|
properties:
|
||||||
|
@ -2173,6 +2218,10 @@ components:
|
||||||
ChatCompletionResponseStreamChunk:
|
ChatCompletionResponseStreamChunk:
|
||||||
type: object
|
type: object
|
||||||
properties:
|
properties:
|
||||||
|
metrics:
|
||||||
|
type: array
|
||||||
|
items:
|
||||||
|
$ref: '#/components/schemas/MetricEvent'
|
||||||
event:
|
event:
|
||||||
$ref: '#/components/schemas/ChatCompletionResponseEvent'
|
$ref: '#/components/schemas/ChatCompletionResponseEvent'
|
||||||
description: The event containing the new content
|
description: The event containing the new content
|
||||||
|
@ -4070,47 +4119,6 @@ components:
|
||||||
- warn
|
- warn
|
||||||
- error
|
- error
|
||||||
- critical
|
- critical
|
||||||
MetricEvent:
|
|
||||||
type: object
|
|
||||||
properties:
|
|
||||||
trace_id:
|
|
||||||
type: string
|
|
||||||
span_id:
|
|
||||||
type: string
|
|
||||||
timestamp:
|
|
||||||
type: string
|
|
||||||
format: date-time
|
|
||||||
attributes:
|
|
||||||
type: object
|
|
||||||
additionalProperties:
|
|
||||||
oneOf:
|
|
||||||
- type: 'null'
|
|
||||||
- type: boolean
|
|
||||||
- type: number
|
|
||||||
- type: string
|
|
||||||
- type: array
|
|
||||||
- type: object
|
|
||||||
type:
|
|
||||||
type: string
|
|
||||||
const: metric
|
|
||||||
default: metric
|
|
||||||
metric:
|
|
||||||
type: string
|
|
||||||
value:
|
|
||||||
oneOf:
|
|
||||||
- type: integer
|
|
||||||
- type: number
|
|
||||||
unit:
|
|
||||||
type: string
|
|
||||||
additionalProperties: false
|
|
||||||
required:
|
|
||||||
- trace_id
|
|
||||||
- span_id
|
|
||||||
- timestamp
|
|
||||||
- type
|
|
||||||
- metric
|
|
||||||
- value
|
|
||||||
- unit
|
|
||||||
SpanEndPayload:
|
SpanEndPayload:
|
||||||
type: object
|
type: object
|
||||||
properties:
|
properties:
|
||||||
|
|
|
@ -13,8 +13,8 @@ from typing import (
|
||||||
Literal,
|
Literal,
|
||||||
Optional,
|
Optional,
|
||||||
Protocol,
|
Protocol,
|
||||||
runtime_checkable,
|
|
||||||
Union,
|
Union,
|
||||||
|
runtime_checkable,
|
||||||
)
|
)
|
||||||
|
|
||||||
from llama_models.llama3.api.datatypes import (
|
from llama_models.llama3.api.datatypes import (
|
||||||
|
@ -31,6 +31,7 @@ from typing_extensions import Annotated
|
||||||
|
|
||||||
from llama_stack.apis.common.content_types import ContentDelta, InterleavedContent
|
from llama_stack.apis.common.content_types import ContentDelta, InterleavedContent
|
||||||
from llama_stack.apis.models import Model
|
from llama_stack.apis.models import Model
|
||||||
|
from llama_stack.apis.telemetry.telemetry import MetricResponseMixin
|
||||||
from llama_stack.providers.utils.telemetry.trace_protocol import trace_protocol
|
from llama_stack.providers.utils.telemetry.trace_protocol import trace_protocol
|
||||||
|
|
||||||
|
|
||||||
|
@ -357,7 +358,7 @@ class ChatCompletionRequest(BaseModel):
|
||||||
|
|
||||||
|
|
||||||
@json_schema_type
|
@json_schema_type
|
||||||
class ChatCompletionResponseStreamChunk(BaseModel):
|
class ChatCompletionResponseStreamChunk(MetricResponseMixin, BaseModel):
|
||||||
"""A chunk of a streamed chat completion response.
|
"""A chunk of a streamed chat completion response.
|
||||||
|
|
||||||
:param event: The event containing the new content
|
:param event: The event containing the new content
|
||||||
|
@ -367,7 +368,7 @@ class ChatCompletionResponseStreamChunk(BaseModel):
|
||||||
|
|
||||||
|
|
||||||
@json_schema_type
|
@json_schema_type
|
||||||
class ChatCompletionResponse(BaseModel):
|
class ChatCompletionResponse(MetricResponseMixin, BaseModel):
|
||||||
"""Response from a chat completion request.
|
"""Response from a chat completion request.
|
||||||
|
|
||||||
:param completion_message: The complete response message
|
:param completion_message: The complete response message
|
||||||
|
|
|
@ -13,8 +13,8 @@ from typing import (
|
||||||
Literal,
|
Literal,
|
||||||
Optional,
|
Optional,
|
||||||
Protocol,
|
Protocol,
|
||||||
runtime_checkable,
|
|
||||||
Union,
|
Union,
|
||||||
|
runtime_checkable,
|
||||||
)
|
)
|
||||||
|
|
||||||
from llama_models.schema_utils import json_schema_type, register_schema, webmethod
|
from llama_models.schema_utils import json_schema_type, register_schema, webmethod
|
||||||
|
@ -94,6 +94,11 @@ class MetricEvent(EventCommon):
|
||||||
unit: str
|
unit: str
|
||||||
|
|
||||||
|
|
||||||
|
@json_schema_type
|
||||||
|
class MetricResponseMixin:
|
||||||
|
metrics: Optional[List[MetricEvent]] = None
|
||||||
|
|
||||||
|
|
||||||
@json_schema_type
|
@json_schema_type
|
||||||
class StructuredLogType(Enum):
|
class StructuredLogType(Enum):
|
||||||
SPAN_START = "span_start"
|
SPAN_START = "span_start"
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue