diff --git a/docs/_static/llama-stack-spec.html b/docs/_static/llama-stack-spec.html index ea7a8f210..b93f6a380 100644 --- a/docs/_static/llama-stack-spec.html +++ b/docs/_static/llama-stack-spec.html @@ -2699,8 +2699,7 @@ "auto", "required" ], - "description": "Whether tool use is required or automatic. This is a hint to the model which may not be followed. It depends on the Instruction Following capabilities of the model.", - "default": "auto" + "description": "Whether tool use is required or automatic. This is a hint to the model which may not be followed. It depends on the Instruction Following capabilities of the model." }, "tool_prompt_format": { "type": "string", @@ -2815,6 +2814,11 @@ "BenchmarkConfig": { "type": "object", "properties": { + "type": { + "type": "string", + "const": "benchmark", + "default": "benchmark" + }, "eval_candidate": { "$ref": "#/components/schemas/EvalCandidate" }, @@ -2830,6 +2834,7 @@ }, "additionalProperties": false, "required": [ + "type", "eval_candidate", "scoring_params" ] @@ -3498,17 +3503,7 @@ "ScoringResult": { "type": "object", "properties": { - "metrics": { - "type": "array", - "items": { - "$ref": "#/components/schemas/MetricEvent" - } - }, - "completion_message": { - "$ref": "#/components/schemas/CompletionMessage", - "description": "The complete response message" - }, - "logprobs": { + "score_rows": { "type": "array", "items": { "type": "object", @@ -3568,75 +3563,7 @@ "aggregated_results" ] }, - "MetricEvent": { - "type": "object", - "properties": { - "trace_id": { - "type": "string" - }, - "span_id": { - "type": "string" - }, - "timestamp": { - "type": "string", - "format": "date-time" - }, - "attributes": { - "type": "object", - "additionalProperties": { - "oneOf": [ - { - "type": "string" - }, - { - "type": "integer" - }, - { - "type": "number" - }, - { - "type": "boolean" - }, - { - "type": "null" - } - ] - } - }, - "type": { - "type": "string", - "const": "metric", - "default": "metric" - }, - "metric": { - "type": "string" - }, - "value": { - "oneOf": [ - { - "type": "integer" - }, - { - "type": "number" - } - ] - }, - "unit": { - "type": "string" - } - }, - "additionalProperties": false, - "required": [ - "trace_id", - "span_id", - "timestamp", - "type", - "metric", - "value", - "unit" - ] - }, - "TokenLogProbs": { + "Benchmark": { "type": "object", "properties": { "identifier": { @@ -4225,6 +4152,12 @@ "ChatCompletionResponse": { "type": "object", "properties": { + "metrics": { + "type": "array", + "items": { + "$ref": "#/components/schemas/MetricEvent" + } + }, "completion_message": { "$ref": "#/components/schemas/CompletionMessage", "description": "The complete response message" @@ -4243,6 +4176,74 @@ ], "description": "Response from a chat completion request." }, + "MetricEvent": { + "type": "object", + "properties": { + "trace_id": { + "type": "string" + }, + "span_id": { + "type": "string" + }, + "timestamp": { + "type": "string", + "format": "date-time" + }, + "attributes": { + "type": "object", + "additionalProperties": { + "oneOf": [ + { + "type": "string" + }, + { + "type": "integer" + }, + { + "type": "number" + }, + { + "type": "boolean" + }, + { + "type": "null" + } + ] + } + }, + "type": { + "type": "string", + "const": "metric", + "default": "metric" + }, + "metric": { + "type": "string" + }, + "value": { + "oneOf": [ + { + "type": "integer" + }, + { + "type": "number" + } + ] + }, + "unit": { + "type": "string" + } + }, + "additionalProperties": false, + "required": [ + "trace_id", + "span_id", + "timestamp", + "type", + "metric", + "value", + "unit" + ] + }, "TokenLogProbs": { "type": "object", "properties": { @@ -4470,6 +4471,12 @@ "ChatCompletionResponseStreamChunk": { "type": "object", "properties": { + "metrics": { + "type": "array", + "items": { + "$ref": "#/components/schemas/MetricEvent" + } + }, "event": { "$ref": "#/components/schemas/ChatCompletionResponseEvent", "description": "The event containing the new content" diff --git a/docs/_static/llama-stack-spec.yaml b/docs/_static/llama-stack-spec.yaml index 19c646bf9..b30025020 100644 --- a/docs/_static/llama-stack-spec.yaml +++ b/docs/_static/llama-stack-spec.yaml @@ -1641,7 +1641,6 @@ components: Whether tool use is required or automatic. This is a hint to the model which may not be followed. It depends on the Instruction Following capabilities of the model. - default: auto tool_prompt_format: type: string enum: @@ -1713,6 +1712,10 @@ components: BenchmarkConfig: type: object properties: + type: + type: string + const: benchmark + default: benchmark eval_candidate: $ref: '#/components/schemas/EvalCandidate' scoring_params: @@ -1723,6 +1726,7 @@ components: type: integer additionalProperties: false required: + - type - eval_candidate - scoring_params EvalCandidate: @@ -2960,185 +2964,6 @@ components: - delta description: >- A chunk of a streamed completion response. - AgentConfig: - type: object - properties: - sampling_params: - $ref: '#/components/schemas/SamplingParams' - input_shields: - type: array - items: - type: string - output_shields: - type: array - items: - type: string - toolgroups: - type: array - items: - $ref: '#/components/schemas/AgentTool' - client_tools: - type: array - items: - $ref: '#/components/schemas/ToolDef' - tool_choice: - type: string - enum: - - auto - - required - description: >- - Whether tool use is required or automatic. This is a hint to the model - which may not be followed. It depends on the Instruction Following capabilities - of the model. - tool_prompt_format: - type: string - enum: - - json - - function_tag - - python_list - description: >- - Prompt format for calling custom / zero shot tools. - tool_config: - $ref: '#/components/schemas/ToolConfig' - max_infer_iters: - type: integer - default: 10 - model: - type: string - instructions: - type: string - enable_session_persistence: - type: boolean - response_format: - $ref: '#/components/schemas/ResponseFormat' - additionalProperties: false - required: - - model - - instructions - - enable_session_persistence - AgentTool: - oneOf: - - type: string - - type: object - properties: - name: - type: string - args: - type: object - additionalProperties: - oneOf: - - type: 'null' - - type: boolean - - type: number - - type: string - - type: array - - type: object - additionalProperties: false - required: - - name - - args - ToolDef: - type: object - properties: - type: - type: string - const: text - default: text - text: - type: string - additionalProperties: false - required: - - type - - text - ToolCallDelta: - type: object - properties: - type: - type: string - const: tool_call - default: tool_call - tool_call: - oneOf: - - type: string - - $ref: '#/components/schemas/ToolCall' - parse_status: - type: string - enum: - - started - - in_progress - - failed - - succeeded - additionalProperties: false - required: - - type - - tool_call - - parse_status - CompletionRequest: - type: object - properties: - model_id: - type: string - description: >- - The identifier of the model to use. The model must be registered with - Llama Stack and available via the /models endpoint. - content: - $ref: '#/components/schemas/InterleavedContent' - description: The content to generate a completion for - sampling_params: - $ref: '#/components/schemas/SamplingParams' - description: >- - (Optional) Parameters to control the sampling strategy - response_format: - $ref: '#/components/schemas/ResponseFormat' - description: >- - (Optional) Grammar specification for guided (structured) decoding - stream: - type: boolean - description: >- - (Optional) If True, generate an SSE event stream of the response. Defaults - to False. - logprobs: - type: object - properties: - top_k: - type: integer - default: 0 - description: >- - How many tokens (for each position) to return log probabilities for. - additionalProperties: false - description: >- - (Optional) If specified, log probabilities for each token position will - be returned. - additionalProperties: false - required: - - model_id - - content - CompletionResponseStreamChunk: - type: object - properties: - delta: - type: string - description: >- - New content generated since last chunk. This can be one or more tokens. - stop_reason: - type: string - enum: - - end_of_turn - - end_of_message - - out_of_tokens - description: >- - Optional reason why generation stopped, if complete - logprobs: - type: array - items: - $ref: '#/components/schemas/TokenLogProbs' - description: >- - Optional log probabilities for generated tokens - additionalProperties: false - required: - - delta - description: >- - A chunk of a streamed completion response. CreateAgentRequest: type: object properties: