mirror of
https://github.com/meta-llama/llama-stack.git
synced 2025-07-23 04:53:14 +00:00
feat: Add new compact MetricInResponse type (#1593)
# What does this PR do? This change adds a compact type to include metrics in response as opposed to the full MetricEvent which is relevant for internal logging purposes. ## Test Plan ``` LLAMA_STACK_CONFIG=~/.llama/distributions/fireworks/fireworks-run.yaml pytest -s -v agents/test_agents.py --safety-shield meta-llama/Llama-Guard-3-8B --text-model meta-llama/Llama-3.1-8B-Instruct llama stack run ~/.llama/distributions/fireworks/fireworks-run.yaml curl --request POST \ --url http://localhost:8321/v1/inference/chat-completion \ --header 'content-type: application/json' \ --data '{ "model_id": "meta-llama/Llama-3.1-70B-Instruct", "messages": [ { "role": "user", "content": { "type": "text", "text": "where do humans live" } } ], "stream": false }' { "metrics": [ { "metric": "prompt_tokens", "value": 10, "unit": null }, { "metric": "completion_tokens", "value": 522, "unit": null }, { "metric": "total_tokens", "value": 532, "unit": null } ], "completion_message": { "role": "assistant", "content": "Humans live in various parts of the world...............", "stop_reason": "out_of_tokens", "tool_calls": [] }, "logprobs": null } ```
This commit is contained in:
parent
ad939c97c3
commit
99bbe0e70b
4 changed files with 150 additions and 80 deletions
82
docs/_static/llama-stack-spec.yaml
vendored
82
docs/_static/llama-stack-spec.yaml
vendored
|
@ -3101,7 +3101,7 @@ components:
|
|||
metrics:
|
||||
type: array
|
||||
items:
|
||||
$ref: '#/components/schemas/MetricEvent'
|
||||
$ref: '#/components/schemas/MetricInResponse'
|
||||
completion_message:
|
||||
$ref: '#/components/schemas/CompletionMessage'
|
||||
description: The complete response message
|
||||
|
@ -3116,29 +3116,9 @@ components:
|
|||
- completion_message
|
||||
title: ChatCompletionResponse
|
||||
description: Response from a chat completion request.
|
||||
MetricEvent:
|
||||
MetricInResponse:
|
||||
type: object
|
||||
properties:
|
||||
trace_id:
|
||||
type: string
|
||||
span_id:
|
||||
type: string
|
||||
timestamp:
|
||||
type: string
|
||||
format: date-time
|
||||
attributes:
|
||||
type: object
|
||||
additionalProperties:
|
||||
oneOf:
|
||||
- type: string
|
||||
- type: integer
|
||||
- type: number
|
||||
- type: boolean
|
||||
- type: 'null'
|
||||
type:
|
||||
type: string
|
||||
const: metric
|
||||
default: metric
|
||||
metric:
|
||||
type: string
|
||||
value:
|
||||
|
@ -3149,14 +3129,9 @@ components:
|
|||
type: string
|
||||
additionalProperties: false
|
||||
required:
|
||||
- trace_id
|
||||
- span_id
|
||||
- timestamp
|
||||
- type
|
||||
- metric
|
||||
- value
|
||||
- unit
|
||||
title: MetricEvent
|
||||
title: MetricInResponse
|
||||
TokenLogProbs:
|
||||
type: object
|
||||
properties:
|
||||
|
@ -3213,6 +3188,10 @@ components:
|
|||
CompletionResponse:
|
||||
type: object
|
||||
properties:
|
||||
metrics:
|
||||
type: array
|
||||
items:
|
||||
$ref: '#/components/schemas/MetricInResponse'
|
||||
content:
|
||||
type: string
|
||||
description: The generated completion text
|
||||
|
@ -3412,7 +3391,7 @@ components:
|
|||
metrics:
|
||||
type: array
|
||||
items:
|
||||
$ref: '#/components/schemas/MetricEvent'
|
||||
$ref: '#/components/schemas/MetricInResponse'
|
||||
event:
|
||||
$ref: '#/components/schemas/ChatCompletionResponseEvent'
|
||||
description: The event containing the new content
|
||||
|
@ -3531,6 +3510,10 @@ components:
|
|||
CompletionResponseStreamChunk:
|
||||
type: object
|
||||
properties:
|
||||
metrics:
|
||||
type: array
|
||||
items:
|
||||
$ref: '#/components/schemas/MetricInResponse'
|
||||
delta:
|
||||
type: string
|
||||
description: >-
|
||||
|
@ -5703,6 +5686,47 @@ components:
|
|||
- error
|
||||
- critical
|
||||
title: LogSeverity
|
||||
MetricEvent:
|
||||
type: object
|
||||
properties:
|
||||
trace_id:
|
||||
type: string
|
||||
span_id:
|
||||
type: string
|
||||
timestamp:
|
||||
type: string
|
||||
format: date-time
|
||||
attributes:
|
||||
type: object
|
||||
additionalProperties:
|
||||
oneOf:
|
||||
- type: string
|
||||
- type: integer
|
||||
- type: number
|
||||
- type: boolean
|
||||
- type: 'null'
|
||||
type:
|
||||
type: string
|
||||
const: metric
|
||||
default: metric
|
||||
metric:
|
||||
type: string
|
||||
value:
|
||||
oneOf:
|
||||
- type: integer
|
||||
- type: number
|
||||
unit:
|
||||
type: string
|
||||
additionalProperties: false
|
||||
required:
|
||||
- trace_id
|
||||
- span_id
|
||||
- timestamp
|
||||
- type
|
||||
- metric
|
||||
- value
|
||||
- unit
|
||||
title: MetricEvent
|
||||
SpanEndPayload:
|
||||
type: object
|
||||
properties:
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue