diff --git a/docs/_static/llama-stack-spec.html b/docs/_static/llama-stack-spec.html
index ea7a8f210..b93f6a380 100644
--- a/docs/_static/llama-stack-spec.html
+++ b/docs/_static/llama-stack-spec.html
@@ -2699,8 +2699,7 @@
"auto",
"required"
],
- "description": "Whether tool use is required or automatic. This is a hint to the model which may not be followed. It depends on the Instruction Following capabilities of the model.",
- "default": "auto"
+ "description": "Whether tool use is required or automatic. This is a hint to the model which may not be followed. It depends on the Instruction Following capabilities of the model."
},
"tool_prompt_format": {
"type": "string",
@@ -2815,6 +2814,11 @@
"BenchmarkConfig": {
"type": "object",
"properties": {
+ "type": {
+ "type": "string",
+ "const": "benchmark",
+ "default": "benchmark"
+ },
"eval_candidate": {
"$ref": "#/components/schemas/EvalCandidate"
},
@@ -2830,6 +2834,7 @@
},
"additionalProperties": false,
"required": [
+ "type",
"eval_candidate",
"scoring_params"
]
@@ -3498,17 +3503,7 @@
"ScoringResult": {
"type": "object",
"properties": {
- "metrics": {
- "type": "array",
- "items": {
- "$ref": "#/components/schemas/MetricEvent"
- }
- },
- "completion_message": {
- "$ref": "#/components/schemas/CompletionMessage",
- "description": "The complete response message"
- },
- "logprobs": {
+ "score_rows": {
"type": "array",
"items": {
"type": "object",
@@ -3568,75 +3563,7 @@
"aggregated_results"
]
},
- "MetricEvent": {
- "type": "object",
- "properties": {
- "trace_id": {
- "type": "string"
- },
- "span_id": {
- "type": "string"
- },
- "timestamp": {
- "type": "string",
- "format": "date-time"
- },
- "attributes": {
- "type": "object",
- "additionalProperties": {
- "oneOf": [
- {
- "type": "string"
- },
- {
- "type": "integer"
- },
- {
- "type": "number"
- },
- {
- "type": "boolean"
- },
- {
- "type": "null"
- }
- ]
- }
- },
- "type": {
- "type": "string",
- "const": "metric",
- "default": "metric"
- },
- "metric": {
- "type": "string"
- },
- "value": {
- "oneOf": [
- {
- "type": "integer"
- },
- {
- "type": "number"
- }
- ]
- },
- "unit": {
- "type": "string"
- }
- },
- "additionalProperties": false,
- "required": [
- "trace_id",
- "span_id",
- "timestamp",
- "type",
- "metric",
- "value",
- "unit"
- ]
- },
- "TokenLogProbs": {
+ "Benchmark": {
"type": "object",
"properties": {
"identifier": {
@@ -4225,6 +4152,12 @@
"ChatCompletionResponse": {
"type": "object",
"properties": {
+ "metrics": {
+ "type": "array",
+ "items": {
+ "$ref": "#/components/schemas/MetricEvent"
+ }
+ },
"completion_message": {
"$ref": "#/components/schemas/CompletionMessage",
"description": "The complete response message"
@@ -4243,6 +4176,74 @@
],
"description": "Response from a chat completion request."
},
+ "MetricEvent": {
+ "type": "object",
+ "properties": {
+ "trace_id": {
+ "type": "string"
+ },
+ "span_id": {
+ "type": "string"
+ },
+ "timestamp": {
+ "type": "string",
+ "format": "date-time"
+ },
+ "attributes": {
+ "type": "object",
+ "additionalProperties": {
+ "oneOf": [
+ {
+ "type": "string"
+ },
+ {
+ "type": "integer"
+ },
+ {
+ "type": "number"
+ },
+ {
+ "type": "boolean"
+ },
+ {
+ "type": "null"
+ }
+ ]
+ }
+ },
+ "type": {
+ "type": "string",
+ "const": "metric",
+ "default": "metric"
+ },
+ "metric": {
+ "type": "string"
+ },
+ "value": {
+ "oneOf": [
+ {
+ "type": "integer"
+ },
+ {
+ "type": "number"
+ }
+ ]
+ },
+ "unit": {
+ "type": "string"
+ }
+ },
+ "additionalProperties": false,
+ "required": [
+ "trace_id",
+ "span_id",
+ "timestamp",
+ "type",
+ "metric",
+ "value",
+ "unit"
+ ]
+ },
"TokenLogProbs": {
"type": "object",
"properties": {
@@ -4470,6 +4471,12 @@
"ChatCompletionResponseStreamChunk": {
"type": "object",
"properties": {
+ "metrics": {
+ "type": "array",
+ "items": {
+ "$ref": "#/components/schemas/MetricEvent"
+ }
+ },
"event": {
"$ref": "#/components/schemas/ChatCompletionResponseEvent",
"description": "The event containing the new content"
diff --git a/docs/_static/llama-stack-spec.yaml b/docs/_static/llama-stack-spec.yaml
index 19c646bf9..b30025020 100644
--- a/docs/_static/llama-stack-spec.yaml
+++ b/docs/_static/llama-stack-spec.yaml
@@ -1641,7 +1641,6 @@ components:
Whether tool use is required or automatic. This is a hint to the model
which may not be followed. It depends on the Instruction Following capabilities
of the model.
- default: auto
tool_prompt_format:
type: string
enum:
@@ -1713,6 +1712,10 @@ components:
BenchmarkConfig:
type: object
properties:
+ type:
+ type: string
+ const: benchmark
+ default: benchmark
eval_candidate:
$ref: '#/components/schemas/EvalCandidate'
scoring_params:
@@ -1723,6 +1726,7 @@ components:
type: integer
additionalProperties: false
required:
+ - type
- eval_candidate
- scoring_params
EvalCandidate:
@@ -2960,185 +2964,6 @@ components:
- delta
description: >-
A chunk of a streamed completion response.
- AgentConfig:
- type: object
- properties:
- sampling_params:
- $ref: '#/components/schemas/SamplingParams'
- input_shields:
- type: array
- items:
- type: string
- output_shields:
- type: array
- items:
- type: string
- toolgroups:
- type: array
- items:
- $ref: '#/components/schemas/AgentTool'
- client_tools:
- type: array
- items:
- $ref: '#/components/schemas/ToolDef'
- tool_choice:
- type: string
- enum:
- - auto
- - required
- description: >-
- Whether tool use is required or automatic. This is a hint to the model
- which may not be followed. It depends on the Instruction Following capabilities
- of the model.
- tool_prompt_format:
- type: string
- enum:
- - json
- - function_tag
- - python_list
- description: >-
- Prompt format for calling custom / zero shot tools.
- tool_config:
- $ref: '#/components/schemas/ToolConfig'
- max_infer_iters:
- type: integer
- default: 10
- model:
- type: string
- instructions:
- type: string
- enable_session_persistence:
- type: boolean
- response_format:
- $ref: '#/components/schemas/ResponseFormat'
- additionalProperties: false
- required:
- - model
- - instructions
- - enable_session_persistence
- AgentTool:
- oneOf:
- - type: string
- - type: object
- properties:
- name:
- type: string
- args:
- type: object
- additionalProperties:
- oneOf:
- - type: 'null'
- - type: boolean
- - type: number
- - type: string
- - type: array
- - type: object
- additionalProperties: false
- required:
- - name
- - args
- ToolDef:
- type: object
- properties:
- type:
- type: string
- const: text
- default: text
- text:
- type: string
- additionalProperties: false
- required:
- - type
- - text
- ToolCallDelta:
- type: object
- properties:
- type:
- type: string
- const: tool_call
- default: tool_call
- tool_call:
- oneOf:
- - type: string
- - $ref: '#/components/schemas/ToolCall'
- parse_status:
- type: string
- enum:
- - started
- - in_progress
- - failed
- - succeeded
- additionalProperties: false
- required:
- - type
- - tool_call
- - parse_status
- CompletionRequest:
- type: object
- properties:
- model_id:
- type: string
- description: >-
- The identifier of the model to use. The model must be registered with
- Llama Stack and available via the /models endpoint.
- content:
- $ref: '#/components/schemas/InterleavedContent'
- description: The content to generate a completion for
- sampling_params:
- $ref: '#/components/schemas/SamplingParams'
- description: >-
- (Optional) Parameters to control the sampling strategy
- response_format:
- $ref: '#/components/schemas/ResponseFormat'
- description: >-
- (Optional) Grammar specification for guided (structured) decoding
- stream:
- type: boolean
- description: >-
- (Optional) If True, generate an SSE event stream of the response. Defaults
- to False.
- logprobs:
- type: object
- properties:
- top_k:
- type: integer
- default: 0
- description: >-
- How many tokens (for each position) to return log probabilities for.
- additionalProperties: false
- description: >-
- (Optional) If specified, log probabilities for each token position will
- be returned.
- additionalProperties: false
- required:
- - model_id
- - content
- CompletionResponseStreamChunk:
- type: object
- properties:
- delta:
- type: string
- description: >-
- New content generated since last chunk. This can be one or more tokens.
- stop_reason:
- type: string
- enum:
- - end_of_turn
- - end_of_message
- - out_of_tokens
- description: >-
- Optional reason why generation stopped, if complete
- logprobs:
- type: array
- items:
- $ref: '#/components/schemas/TokenLogProbs'
- description: >-
- Optional log probabilities for generated tokens
- additionalProperties: false
- required:
- - delta
- description: >-
- A chunk of a streamed completion response.
CreateAgentRequest:
type: object
properties: