feat: Add new compact MetricInResponse type (#1593)

# What does this PR do?
This change adds a compact type to include metrics in response as
opposed to the full MetricEvent which is relevant for internal logging
purposes.

## Test Plan
```
LLAMA_STACK_CONFIG=~/.llama/distributions/fireworks/fireworks-run.yaml pytest -s -v agents/test_agents.py --safety-shield meta-llama/Llama-Guard-3-8B --text-model meta-llama/Llama-3.1-8B-Instruct

 llama stack run ~/.llama/distributions/fireworks/fireworks-run.yaml

curl --request POST \
  --url http://localhost:8321/v1/inference/chat-completion \
  --header 'content-type: application/json' \
  --data '{
  "model_id": "meta-llama/Llama-3.1-70B-Instruct",
  "messages": [
    {
      "role": "user",
      "content": {
        "type": "text",
        "text": "where do humans live"
      }
    }
  ],
  "stream": false
}'

{
  "metrics": [
    {
      "metric": "prompt_tokens",
      "value": 10,
      "unit": null
    },
    {
      "metric": "completion_tokens",
      "value": 522,
      "unit": null
    },
    {
      "metric": "total_tokens",
      "value": 532,
      "unit": null
    }
  ],
  "completion_message": {
    "role": "assistant",
    "content": "Humans live in various parts of the world...............",
    "stop_reason": "out_of_tokens",
    "tool_calls": []
  },
  "logprobs": null
}
```
This commit is contained in:
Dinesh Yeduguru 2025-03-12 15:45:44 -07:00 committed by GitHub
parent ad939c97c3
commit 99bbe0e70b
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
4 changed files with 150 additions and 80 deletions

View file

@ -96,6 +96,13 @@ class MetricEvent(EventCommon):
unit: str
@json_schema_type
class MetricInResponse(BaseModel):
metric: str
value: Union[int, float]
unit: Optional[str] = None
# This is a short term solution to allow inference API to return metrics
# The ideal way to do this is to have a way for all response types to include metrics
# and all metric events logged to the telemetry API to be inlcuded with the response
@ -117,7 +124,7 @@ class MetricEvent(EventCommon):
class MetricResponseMixin(BaseModel):
metrics: Optional[List[MetricEvent]] = None
metrics: Optional[List[MetricInResponse]] = None
@json_schema_type