feat: Add new compact MetricInResponse type (#1593)

# What does this PR do?
This change adds a compact type to include metrics in response as
opposed to the full MetricEvent which is relevant for internal logging
purposes.

## Test Plan
```
LLAMA_STACK_CONFIG=~/.llama/distributions/fireworks/fireworks-run.yaml pytest -s -v agents/test_agents.py --safety-shield meta-llama/Llama-Guard-3-8B --text-model meta-llama/Llama-3.1-8B-Instruct

 llama stack run ~/.llama/distributions/fireworks/fireworks-run.yaml

curl --request POST \
  --url http://localhost:8321/v1/inference/chat-completion \
  --header 'content-type: application/json' \
  --data '{
  "model_id": "meta-llama/Llama-3.1-70B-Instruct",
  "messages": [
    {
      "role": "user",
      "content": {
        "type": "text",
        "text": "where do humans live"
      }
    }
  ],
  "stream": false
}'

{
  "metrics": [
    {
      "metric": "prompt_tokens",
      "value": 10,
      "unit": null
    },
    {
      "metric": "completion_tokens",
      "value": 522,
      "unit": null
    },
    {
      "metric": "total_tokens",
      "value": 532,
      "unit": null
    }
  ],
  "completion_message": {
    "role": "assistant",
    "content": "Humans live in various parts of the world...............",
    "stop_reason": "out_of_tokens",
    "tool_calls": []
  },
  "logprobs": null
}
```
This commit is contained in:
Dinesh Yeduguru 2025-03-12 15:45:44 -07:00 committed by GitHub
parent ad939c97c3
commit 99bbe0e70b
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
4 changed files with 150 additions and 80 deletions

View file

@ -4549,7 +4549,7 @@
"metrics": {
"type": "array",
"items": {
"$ref": "#/components/schemas/MetricEvent"
"$ref": "#/components/schemas/MetricInResponse"
}
},
"completion_message": {
@ -4571,46 +4571,9 @@
"title": "ChatCompletionResponse",
"description": "Response from a chat completion request."
},
"MetricEvent": {
"MetricInResponse": {
"type": "object",
"properties": {
"trace_id": {
"type": "string"
},
"span_id": {
"type": "string"
},
"timestamp": {
"type": "string",
"format": "date-time"
},
"attributes": {
"type": "object",
"additionalProperties": {
"oneOf": [
{
"type": "string"
},
{
"type": "integer"
},
{
"type": "number"
},
{
"type": "boolean"
},
{
"type": "null"
}
]
}
},
"type": {
"type": "string",
"const": "metric",
"default": "metric"
},
"metric": {
"type": "string"
},
@ -4630,15 +4593,10 @@
},
"additionalProperties": false,
"required": [
"trace_id",
"span_id",
"timestamp",
"type",
"metric",
"value",
"unit"
"value"
],
"title": "MetricEvent"
"title": "MetricInResponse"
},
"TokenLogProbs": {
"type": "object",
@ -4715,6 +4673,12 @@
"CompletionResponse": {
"type": "object",
"properties": {
"metrics": {
"type": "array",
"items": {
"$ref": "#/components/schemas/MetricInResponse"
}
},
"content": {
"type": "string",
"description": "The generated completion text"
@ -4924,7 +4888,7 @@
"metrics": {
"type": "array",
"items": {
"$ref": "#/components/schemas/MetricEvent"
"$ref": "#/components/schemas/MetricInResponse"
}
},
"event": {
@ -5082,6 +5046,12 @@
"CompletionResponseStreamChunk": {
"type": "object",
"properties": {
"metrics": {
"type": "array",
"items": {
"$ref": "#/components/schemas/MetricInResponse"
}
},
"delta": {
"type": "string",
"description": "New content generated since last chunk. This can be one or more tokens."
@ -8363,6 +8333,75 @@
],
"title": "LogSeverity"
},
"MetricEvent": {
"type": "object",
"properties": {
"trace_id": {
"type": "string"
},
"span_id": {
"type": "string"
},
"timestamp": {
"type": "string",
"format": "date-time"
},
"attributes": {
"type": "object",
"additionalProperties": {
"oneOf": [
{
"type": "string"
},
{
"type": "integer"
},
{
"type": "number"
},
{
"type": "boolean"
},
{
"type": "null"
}
]
}
},
"type": {
"type": "string",
"const": "metric",
"default": "metric"
},
"metric": {
"type": "string"
},
"value": {
"oneOf": [
{
"type": "integer"
},
{
"type": "number"
}
]
},
"unit": {
"type": "string"
}
},
"additionalProperties": false,
"required": [
"trace_id",
"span_id",
"timestamp",
"type",
"metric",
"value",
"unit"
],
"title": "MetricEvent"
},
"SpanEndPayload": {
"type": "object",
"properties": {

View file

@ -3101,7 +3101,7 @@ components:
metrics:
type: array
items:
$ref: '#/components/schemas/MetricEvent'
$ref: '#/components/schemas/MetricInResponse'
completion_message:
$ref: '#/components/schemas/CompletionMessage'
description: The complete response message
@ -3116,29 +3116,9 @@ components:
- completion_message
title: ChatCompletionResponse
description: Response from a chat completion request.
MetricEvent:
MetricInResponse:
type: object
properties:
trace_id:
type: string
span_id:
type: string
timestamp:
type: string
format: date-time
attributes:
type: object
additionalProperties:
oneOf:
- type: string
- type: integer
- type: number
- type: boolean
- type: 'null'
type:
type: string
const: metric
default: metric
metric:
type: string
value:
@ -3149,14 +3129,9 @@ components:
type: string
additionalProperties: false
required:
- trace_id
- span_id
- timestamp
- type
- metric
- value
- unit
title: MetricEvent
title: MetricInResponse
TokenLogProbs:
type: object
properties:
@ -3213,6 +3188,10 @@ components:
CompletionResponse:
type: object
properties:
metrics:
type: array
items:
$ref: '#/components/schemas/MetricInResponse'
content:
type: string
description: The generated completion text
@ -3412,7 +3391,7 @@ components:
metrics:
type: array
items:
$ref: '#/components/schemas/MetricEvent'
$ref: '#/components/schemas/MetricInResponse'
event:
$ref: '#/components/schemas/ChatCompletionResponseEvent'
description: The event containing the new content
@ -3531,6 +3510,10 @@ components:
CompletionResponseStreamChunk:
type: object
properties:
metrics:
type: array
items:
$ref: '#/components/schemas/MetricInResponse'
delta:
type: string
description: >-
@ -5703,6 +5686,47 @@ components:
- error
- critical
title: LogSeverity
MetricEvent:
type: object
properties:
trace_id:
type: string
span_id:
type: string
timestamp:
type: string
format: date-time
attributes:
type: object
additionalProperties:
oneOf:
- type: string
- type: integer
- type: number
- type: boolean
- type: 'null'
type:
type: string
const: metric
default: metric
metric:
type: string
value:
oneOf:
- type: integer
- type: number
unit:
type: string
additionalProperties: false
required:
- trace_id
- span_id
- timestamp
- type
- metric
- value
- unit
title: MetricEvent
SpanEndPayload:
type: object
properties: