forked from phoenix-oss/llama-stack-mirror
feat: Add new compact MetricInResponse type (#1593)
# What does this PR do? This change adds a compact type to include metrics in response as opposed to the full MetricEvent which is relevant for internal logging purposes. ## Test Plan ``` LLAMA_STACK_CONFIG=~/.llama/distributions/fireworks/fireworks-run.yaml pytest -s -v agents/test_agents.py --safety-shield meta-llama/Llama-Guard-3-8B --text-model meta-llama/Llama-3.1-8B-Instruct llama stack run ~/.llama/distributions/fireworks/fireworks-run.yaml curl --request POST \ --url http://localhost:8321/v1/inference/chat-completion \ --header 'content-type: application/json' \ --data '{ "model_id": "meta-llama/Llama-3.1-70B-Instruct", "messages": [ { "role": "user", "content": { "type": "text", "text": "where do humans live" } } ], "stream": false }' { "metrics": [ { "metric": "prompt_tokens", "value": 10, "unit": null }, { "metric": "completion_tokens", "value": 522, "unit": null }, { "metric": "total_tokens", "value": 532, "unit": null } ], "completion_message": { "role": "assistant", "content": "Humans live in various parts of the world...............", "stop_reason": "out_of_tokens", "tool_calls": [] }, "logprobs": null } ```
This commit is contained in:
parent
ad939c97c3
commit
99bbe0e70b
4 changed files with 150 additions and 80 deletions
133
docs/_static/llama-stack-spec.html
vendored
133
docs/_static/llama-stack-spec.html
vendored
|
@ -4549,7 +4549,7 @@
|
||||||
"metrics": {
|
"metrics": {
|
||||||
"type": "array",
|
"type": "array",
|
||||||
"items": {
|
"items": {
|
||||||
"$ref": "#/components/schemas/MetricEvent"
|
"$ref": "#/components/schemas/MetricInResponse"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"completion_message": {
|
"completion_message": {
|
||||||
|
@ -4571,46 +4571,9 @@
|
||||||
"title": "ChatCompletionResponse",
|
"title": "ChatCompletionResponse",
|
||||||
"description": "Response from a chat completion request."
|
"description": "Response from a chat completion request."
|
||||||
},
|
},
|
||||||
"MetricEvent": {
|
"MetricInResponse": {
|
||||||
"type": "object",
|
"type": "object",
|
||||||
"properties": {
|
"properties": {
|
||||||
"trace_id": {
|
|
||||||
"type": "string"
|
|
||||||
},
|
|
||||||
"span_id": {
|
|
||||||
"type": "string"
|
|
||||||
},
|
|
||||||
"timestamp": {
|
|
||||||
"type": "string",
|
|
||||||
"format": "date-time"
|
|
||||||
},
|
|
||||||
"attributes": {
|
|
||||||
"type": "object",
|
|
||||||
"additionalProperties": {
|
|
||||||
"oneOf": [
|
|
||||||
{
|
|
||||||
"type": "string"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"type": "integer"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"type": "number"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"type": "boolean"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"type": "null"
|
|
||||||
}
|
|
||||||
]
|
|
||||||
}
|
|
||||||
},
|
|
||||||
"type": {
|
|
||||||
"type": "string",
|
|
||||||
"const": "metric",
|
|
||||||
"default": "metric"
|
|
||||||
},
|
|
||||||
"metric": {
|
"metric": {
|
||||||
"type": "string"
|
"type": "string"
|
||||||
},
|
},
|
||||||
|
@ -4630,15 +4593,10 @@
|
||||||
},
|
},
|
||||||
"additionalProperties": false,
|
"additionalProperties": false,
|
||||||
"required": [
|
"required": [
|
||||||
"trace_id",
|
|
||||||
"span_id",
|
|
||||||
"timestamp",
|
|
||||||
"type",
|
|
||||||
"metric",
|
"metric",
|
||||||
"value",
|
"value"
|
||||||
"unit"
|
|
||||||
],
|
],
|
||||||
"title": "MetricEvent"
|
"title": "MetricInResponse"
|
||||||
},
|
},
|
||||||
"TokenLogProbs": {
|
"TokenLogProbs": {
|
||||||
"type": "object",
|
"type": "object",
|
||||||
|
@ -4715,6 +4673,12 @@
|
||||||
"CompletionResponse": {
|
"CompletionResponse": {
|
||||||
"type": "object",
|
"type": "object",
|
||||||
"properties": {
|
"properties": {
|
||||||
|
"metrics": {
|
||||||
|
"type": "array",
|
||||||
|
"items": {
|
||||||
|
"$ref": "#/components/schemas/MetricInResponse"
|
||||||
|
}
|
||||||
|
},
|
||||||
"content": {
|
"content": {
|
||||||
"type": "string",
|
"type": "string",
|
||||||
"description": "The generated completion text"
|
"description": "The generated completion text"
|
||||||
|
@ -4924,7 +4888,7 @@
|
||||||
"metrics": {
|
"metrics": {
|
||||||
"type": "array",
|
"type": "array",
|
||||||
"items": {
|
"items": {
|
||||||
"$ref": "#/components/schemas/MetricEvent"
|
"$ref": "#/components/schemas/MetricInResponse"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"event": {
|
"event": {
|
||||||
|
@ -5082,6 +5046,12 @@
|
||||||
"CompletionResponseStreamChunk": {
|
"CompletionResponseStreamChunk": {
|
||||||
"type": "object",
|
"type": "object",
|
||||||
"properties": {
|
"properties": {
|
||||||
|
"metrics": {
|
||||||
|
"type": "array",
|
||||||
|
"items": {
|
||||||
|
"$ref": "#/components/schemas/MetricInResponse"
|
||||||
|
}
|
||||||
|
},
|
||||||
"delta": {
|
"delta": {
|
||||||
"type": "string",
|
"type": "string",
|
||||||
"description": "New content generated since last chunk. This can be one or more tokens."
|
"description": "New content generated since last chunk. This can be one or more tokens."
|
||||||
|
@ -8363,6 +8333,75 @@
|
||||||
],
|
],
|
||||||
"title": "LogSeverity"
|
"title": "LogSeverity"
|
||||||
},
|
},
|
||||||
|
"MetricEvent": {
|
||||||
|
"type": "object",
|
||||||
|
"properties": {
|
||||||
|
"trace_id": {
|
||||||
|
"type": "string"
|
||||||
|
},
|
||||||
|
"span_id": {
|
||||||
|
"type": "string"
|
||||||
|
},
|
||||||
|
"timestamp": {
|
||||||
|
"type": "string",
|
||||||
|
"format": "date-time"
|
||||||
|
},
|
||||||
|
"attributes": {
|
||||||
|
"type": "object",
|
||||||
|
"additionalProperties": {
|
||||||
|
"oneOf": [
|
||||||
|
{
|
||||||
|
"type": "string"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"type": "integer"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"type": "number"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"type": "boolean"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"type": "null"
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"type": {
|
||||||
|
"type": "string",
|
||||||
|
"const": "metric",
|
||||||
|
"default": "metric"
|
||||||
|
},
|
||||||
|
"metric": {
|
||||||
|
"type": "string"
|
||||||
|
},
|
||||||
|
"value": {
|
||||||
|
"oneOf": [
|
||||||
|
{
|
||||||
|
"type": "integer"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"type": "number"
|
||||||
|
}
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"unit": {
|
||||||
|
"type": "string"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"additionalProperties": false,
|
||||||
|
"required": [
|
||||||
|
"trace_id",
|
||||||
|
"span_id",
|
||||||
|
"timestamp",
|
||||||
|
"type",
|
||||||
|
"metric",
|
||||||
|
"value",
|
||||||
|
"unit"
|
||||||
|
],
|
||||||
|
"title": "MetricEvent"
|
||||||
|
},
|
||||||
"SpanEndPayload": {
|
"SpanEndPayload": {
|
||||||
"type": "object",
|
"type": "object",
|
||||||
"properties": {
|
"properties": {
|
||||||
|
|
82
docs/_static/llama-stack-spec.yaml
vendored
82
docs/_static/llama-stack-spec.yaml
vendored
|
@ -3101,7 +3101,7 @@ components:
|
||||||
metrics:
|
metrics:
|
||||||
type: array
|
type: array
|
||||||
items:
|
items:
|
||||||
$ref: '#/components/schemas/MetricEvent'
|
$ref: '#/components/schemas/MetricInResponse'
|
||||||
completion_message:
|
completion_message:
|
||||||
$ref: '#/components/schemas/CompletionMessage'
|
$ref: '#/components/schemas/CompletionMessage'
|
||||||
description: The complete response message
|
description: The complete response message
|
||||||
|
@ -3116,29 +3116,9 @@ components:
|
||||||
- completion_message
|
- completion_message
|
||||||
title: ChatCompletionResponse
|
title: ChatCompletionResponse
|
||||||
description: Response from a chat completion request.
|
description: Response from a chat completion request.
|
||||||
MetricEvent:
|
MetricInResponse:
|
||||||
type: object
|
type: object
|
||||||
properties:
|
properties:
|
||||||
trace_id:
|
|
||||||
type: string
|
|
||||||
span_id:
|
|
||||||
type: string
|
|
||||||
timestamp:
|
|
||||||
type: string
|
|
||||||
format: date-time
|
|
||||||
attributes:
|
|
||||||
type: object
|
|
||||||
additionalProperties:
|
|
||||||
oneOf:
|
|
||||||
- type: string
|
|
||||||
- type: integer
|
|
||||||
- type: number
|
|
||||||
- type: boolean
|
|
||||||
- type: 'null'
|
|
||||||
type:
|
|
||||||
type: string
|
|
||||||
const: metric
|
|
||||||
default: metric
|
|
||||||
metric:
|
metric:
|
||||||
type: string
|
type: string
|
||||||
value:
|
value:
|
||||||
|
@ -3149,14 +3129,9 @@ components:
|
||||||
type: string
|
type: string
|
||||||
additionalProperties: false
|
additionalProperties: false
|
||||||
required:
|
required:
|
||||||
- trace_id
|
|
||||||
- span_id
|
|
||||||
- timestamp
|
|
||||||
- type
|
|
||||||
- metric
|
- metric
|
||||||
- value
|
- value
|
||||||
- unit
|
title: MetricInResponse
|
||||||
title: MetricEvent
|
|
||||||
TokenLogProbs:
|
TokenLogProbs:
|
||||||
type: object
|
type: object
|
||||||
properties:
|
properties:
|
||||||
|
@ -3213,6 +3188,10 @@ components:
|
||||||
CompletionResponse:
|
CompletionResponse:
|
||||||
type: object
|
type: object
|
||||||
properties:
|
properties:
|
||||||
|
metrics:
|
||||||
|
type: array
|
||||||
|
items:
|
||||||
|
$ref: '#/components/schemas/MetricInResponse'
|
||||||
content:
|
content:
|
||||||
type: string
|
type: string
|
||||||
description: The generated completion text
|
description: The generated completion text
|
||||||
|
@ -3412,7 +3391,7 @@ components:
|
||||||
metrics:
|
metrics:
|
||||||
type: array
|
type: array
|
||||||
items:
|
items:
|
||||||
$ref: '#/components/schemas/MetricEvent'
|
$ref: '#/components/schemas/MetricInResponse'
|
||||||
event:
|
event:
|
||||||
$ref: '#/components/schemas/ChatCompletionResponseEvent'
|
$ref: '#/components/schemas/ChatCompletionResponseEvent'
|
||||||
description: The event containing the new content
|
description: The event containing the new content
|
||||||
|
@ -3531,6 +3510,10 @@ components:
|
||||||
CompletionResponseStreamChunk:
|
CompletionResponseStreamChunk:
|
||||||
type: object
|
type: object
|
||||||
properties:
|
properties:
|
||||||
|
metrics:
|
||||||
|
type: array
|
||||||
|
items:
|
||||||
|
$ref: '#/components/schemas/MetricInResponse'
|
||||||
delta:
|
delta:
|
||||||
type: string
|
type: string
|
||||||
description: >-
|
description: >-
|
||||||
|
@ -5703,6 +5686,47 @@ components:
|
||||||
- error
|
- error
|
||||||
- critical
|
- critical
|
||||||
title: LogSeverity
|
title: LogSeverity
|
||||||
|
MetricEvent:
|
||||||
|
type: object
|
||||||
|
properties:
|
||||||
|
trace_id:
|
||||||
|
type: string
|
||||||
|
span_id:
|
||||||
|
type: string
|
||||||
|
timestamp:
|
||||||
|
type: string
|
||||||
|
format: date-time
|
||||||
|
attributes:
|
||||||
|
type: object
|
||||||
|
additionalProperties:
|
||||||
|
oneOf:
|
||||||
|
- type: string
|
||||||
|
- type: integer
|
||||||
|
- type: number
|
||||||
|
- type: boolean
|
||||||
|
- type: 'null'
|
||||||
|
type:
|
||||||
|
type: string
|
||||||
|
const: metric
|
||||||
|
default: metric
|
||||||
|
metric:
|
||||||
|
type: string
|
||||||
|
value:
|
||||||
|
oneOf:
|
||||||
|
- type: integer
|
||||||
|
- type: number
|
||||||
|
unit:
|
||||||
|
type: string
|
||||||
|
additionalProperties: false
|
||||||
|
required:
|
||||||
|
- trace_id
|
||||||
|
- span_id
|
||||||
|
- timestamp
|
||||||
|
- type
|
||||||
|
- metric
|
||||||
|
- value
|
||||||
|
- unit
|
||||||
|
title: MetricEvent
|
||||||
SpanEndPayload:
|
SpanEndPayload:
|
||||||
type: object
|
type: object
|
||||||
properties:
|
properties:
|
||||||
|
|
|
@ -96,6 +96,13 @@ class MetricEvent(EventCommon):
|
||||||
unit: str
|
unit: str
|
||||||
|
|
||||||
|
|
||||||
|
@json_schema_type
|
||||||
|
class MetricInResponse(BaseModel):
|
||||||
|
metric: str
|
||||||
|
value: Union[int, float]
|
||||||
|
unit: Optional[str] = None
|
||||||
|
|
||||||
|
|
||||||
# This is a short term solution to allow inference API to return metrics
|
# This is a short term solution to allow inference API to return metrics
|
||||||
# The ideal way to do this is to have a way for all response types to include metrics
|
# The ideal way to do this is to have a way for all response types to include metrics
|
||||||
# and all metric events logged to the telemetry API to be inlcuded with the response
|
# and all metric events logged to the telemetry API to be inlcuded with the response
|
||||||
|
@ -117,7 +124,7 @@ class MetricEvent(EventCommon):
|
||||||
|
|
||||||
|
|
||||||
class MetricResponseMixin(BaseModel):
|
class MetricResponseMixin(BaseModel):
|
||||||
metrics: Optional[List[MetricEvent]] = None
|
metrics: Optional[List[MetricInResponse]] = None
|
||||||
|
|
||||||
|
|
||||||
@json_schema_type
|
@json_schema_type
|
||||||
|
|
|
@ -48,7 +48,7 @@ from llama_stack.apis.scoring import (
|
||||||
ScoringFnParams,
|
ScoringFnParams,
|
||||||
)
|
)
|
||||||
from llama_stack.apis.shields import Shield
|
from llama_stack.apis.shields import Shield
|
||||||
from llama_stack.apis.telemetry import MetricEvent, Telemetry
|
from llama_stack.apis.telemetry import MetricEvent, MetricInResponse, Telemetry
|
||||||
from llama_stack.apis.tools import (
|
from llama_stack.apis.tools import (
|
||||||
RAGDocument,
|
RAGDocument,
|
||||||
RAGQueryConfig,
|
RAGQueryConfig,
|
||||||
|
@ -206,12 +206,12 @@ class InferenceRouter(Inference):
|
||||||
completion_tokens: int,
|
completion_tokens: int,
|
||||||
total_tokens: int,
|
total_tokens: int,
|
||||||
model: Model,
|
model: Model,
|
||||||
) -> List[MetricEvent]:
|
) -> List[MetricInResponse]:
|
||||||
metrics = self._construct_metrics(prompt_tokens, completion_tokens, total_tokens, model)
|
metrics = self._construct_metrics(prompt_tokens, completion_tokens, total_tokens, model)
|
||||||
if self.telemetry:
|
if self.telemetry:
|
||||||
for metric in metrics:
|
for metric in metrics:
|
||||||
await self.telemetry.log_event(metric)
|
await self.telemetry.log_event(metric)
|
||||||
return metrics
|
return [MetricInResponse(metric=metric.metric, value=metric.value) for metric in metrics]
|
||||||
|
|
||||||
async def _count_tokens(
|
async def _count_tokens(
|
||||||
self,
|
self,
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue