forked from phoenix-oss/llama-stack-mirror
docs: api documentation for agents/eval/scoring/datasets (#1400)
# What does this PR do? - add some docs to OpenAPI for agents/eval/scoring/datasetio [//]: # (If resolving an issue, uncomment and update the line below) [//]: # (Closes #[issue-number]) ## Test Plan - read [//]: # (## Documentation)
This commit is contained in:
parent
0d18274d34
commit
3d9331840e
6 changed files with 586 additions and 137 deletions
195
docs/_static/llama-stack-spec.yaml
vendored
195
docs/_static/llama-stack-spec.yaml
vendored
|
@ -31,25 +31,32 @@ paths:
|
|||
$ref: '#/components/responses/DefaultError'
|
||||
tags:
|
||||
- DatasetIO
|
||||
description: ''
|
||||
description: >-
|
||||
Get a paginated list of rows from a dataset.
|
||||
parameters:
|
||||
- name: dataset_id
|
||||
in: query
|
||||
description: >-
|
||||
The ID of the dataset to get the rows from.
|
||||
required: true
|
||||
schema:
|
||||
type: string
|
||||
- name: rows_in_page
|
||||
in: query
|
||||
description: The number of rows to get per page.
|
||||
required: true
|
||||
schema:
|
||||
type: integer
|
||||
- name: page_token
|
||||
in: query
|
||||
description: The token to get the next page of rows.
|
||||
required: false
|
||||
schema:
|
||||
type: string
|
||||
- name: filter_condition
|
||||
in: query
|
||||
description: >-
|
||||
(Optional) A condition to filter the rows by.
|
||||
required: false
|
||||
schema:
|
||||
type: string
|
||||
|
@ -234,7 +241,8 @@ paths:
|
|||
post:
|
||||
responses:
|
||||
'200':
|
||||
description: OK
|
||||
description: >-
|
||||
An AgentCreateResponse with the agent ID.
|
||||
content:
|
||||
application/json:
|
||||
schema:
|
||||
|
@ -251,7 +259,8 @@ paths:
|
|||
$ref: '#/components/responses/DefaultError'
|
||||
tags:
|
||||
- Agents
|
||||
description: ''
|
||||
description: >-
|
||||
Create an agent with the given configuration.
|
||||
parameters: []
|
||||
requestBody:
|
||||
content:
|
||||
|
@ -263,7 +272,7 @@ paths:
|
|||
post:
|
||||
responses:
|
||||
'200':
|
||||
description: OK
|
||||
description: An AgentSessionCreateResponse.
|
||||
content:
|
||||
application/json:
|
||||
schema:
|
||||
|
@ -280,10 +289,12 @@ paths:
|
|||
$ref: '#/components/responses/DefaultError'
|
||||
tags:
|
||||
- Agents
|
||||
description: ''
|
||||
description: Create a new session for an agent.
|
||||
parameters:
|
||||
- name: agent_id
|
||||
in: path
|
||||
description: >-
|
||||
The ID of the agent to create the session for.
|
||||
required: true
|
||||
schema:
|
||||
type: string
|
||||
|
@ -298,8 +309,8 @@ paths:
|
|||
responses:
|
||||
'200':
|
||||
description: >-
|
||||
A single turn in an interaction with an Agentic System. **OR** streamed
|
||||
agent turn completion response.
|
||||
If stream=False, returns a Turn object. If stream=True, returns an SSE
|
||||
event stream of AgentTurnResponseStreamChunk
|
||||
content:
|
||||
application/json:
|
||||
schema:
|
||||
|
@ -319,15 +330,19 @@ paths:
|
|||
$ref: '#/components/responses/DefaultError'
|
||||
tags:
|
||||
- Agents
|
||||
description: ''
|
||||
description: Create a new turn for an agent.
|
||||
parameters:
|
||||
- name: agent_id
|
||||
in: path
|
||||
description: >-
|
||||
The ID of the agent to create the turn for.
|
||||
required: true
|
||||
schema:
|
||||
type: string
|
||||
- name: session_id
|
||||
in: path
|
||||
description: >-
|
||||
The ID of the session to create the turn for.
|
||||
required: true
|
||||
schema:
|
||||
type: string
|
||||
|
@ -411,10 +426,11 @@ paths:
|
|||
$ref: '#/components/responses/DefaultError'
|
||||
tags:
|
||||
- Agents
|
||||
description: ''
|
||||
description: Delete an agent by its ID.
|
||||
parameters:
|
||||
- name: agent_id
|
||||
in: path
|
||||
description: The ID of the agent to delete.
|
||||
required: true
|
||||
schema:
|
||||
type: string
|
||||
|
@ -439,20 +455,25 @@ paths:
|
|||
$ref: '#/components/responses/DefaultError'
|
||||
tags:
|
||||
- Agents
|
||||
description: ''
|
||||
description: Retrieve an agent session by its ID.
|
||||
parameters:
|
||||
- name: session_id
|
||||
in: path
|
||||
description: The ID of the session to get.
|
||||
required: true
|
||||
schema:
|
||||
type: string
|
||||
- name: agent_id
|
||||
in: path
|
||||
description: >-
|
||||
The ID of the agent to get the session for.
|
||||
required: true
|
||||
schema:
|
||||
type: string
|
||||
- name: turn_ids
|
||||
in: query
|
||||
description: >-
|
||||
(Optional) List of turn IDs to filter the session by.
|
||||
required: false
|
||||
schema:
|
||||
type: array
|
||||
|
@ -474,15 +495,18 @@ paths:
|
|||
$ref: '#/components/responses/DefaultError'
|
||||
tags:
|
||||
- Agents
|
||||
description: ''
|
||||
description: Delete an agent session by its ID.
|
||||
parameters:
|
||||
- name: session_id
|
||||
in: path
|
||||
description: The ID of the session to delete.
|
||||
required: true
|
||||
schema:
|
||||
type: string
|
||||
- name: agent_id
|
||||
in: path
|
||||
description: >-
|
||||
The ID of the agent to delete the session for.
|
||||
required: true
|
||||
schema:
|
||||
type: string
|
||||
|
@ -596,7 +620,8 @@ paths:
|
|||
post:
|
||||
responses:
|
||||
'200':
|
||||
description: OK
|
||||
description: >-
|
||||
EvaluateResponse object containing generations and scores
|
||||
content:
|
||||
application/json:
|
||||
schema:
|
||||
|
@ -613,10 +638,12 @@ paths:
|
|||
$ref: '#/components/responses/DefaultError'
|
||||
tags:
|
||||
- Eval
|
||||
description: ''
|
||||
description: Evaluate a list of rows on a benchmark.
|
||||
parameters:
|
||||
- name: benchmark_id
|
||||
in: path
|
||||
description: >-
|
||||
The ID of the benchmark to run the evaluation on.
|
||||
required: true
|
||||
schema:
|
||||
type: string
|
||||
|
@ -630,7 +657,7 @@ paths:
|
|||
get:
|
||||
responses:
|
||||
'200':
|
||||
description: OK
|
||||
description: An AgentStepResponse.
|
||||
content:
|
||||
application/json:
|
||||
schema:
|
||||
|
@ -647,25 +674,30 @@ paths:
|
|||
$ref: '#/components/responses/DefaultError'
|
||||
tags:
|
||||
- Agents
|
||||
description: ''
|
||||
description: Retrieve an agent step by its ID.
|
||||
parameters:
|
||||
- name: agent_id
|
||||
in: path
|
||||
description: The ID of the agent to get the step for.
|
||||
required: true
|
||||
schema:
|
||||
type: string
|
||||
- name: session_id
|
||||
in: path
|
||||
description: >-
|
||||
The ID of the session to get the step for.
|
||||
required: true
|
||||
schema:
|
||||
type: string
|
||||
- name: turn_id
|
||||
in: path
|
||||
description: The ID of the turn to get the step for.
|
||||
required: true
|
||||
schema:
|
||||
type: string
|
||||
- name: step_id
|
||||
in: path
|
||||
description: The ID of the step to get.
|
||||
required: true
|
||||
schema:
|
||||
type: string
|
||||
|
@ -673,7 +705,7 @@ paths:
|
|||
get:
|
||||
responses:
|
||||
'200':
|
||||
description: OK
|
||||
description: A Turn.
|
||||
content:
|
||||
application/json:
|
||||
schema:
|
||||
|
@ -690,20 +722,24 @@ paths:
|
|||
$ref: '#/components/responses/DefaultError'
|
||||
tags:
|
||||
- Agents
|
||||
description: ''
|
||||
description: Retrieve an agent turn by its ID.
|
||||
parameters:
|
||||
- name: agent_id
|
||||
in: path
|
||||
description: The ID of the agent to get the turn for.
|
||||
required: true
|
||||
schema:
|
||||
type: string
|
||||
- name: session_id
|
||||
in: path
|
||||
description: >-
|
||||
The ID of the session to get the turn for.
|
||||
required: true
|
||||
schema:
|
||||
type: string
|
||||
- name: turn_id
|
||||
in: path
|
||||
description: The ID of the turn to get.
|
||||
required: true
|
||||
schema:
|
||||
type: string
|
||||
|
@ -1391,7 +1427,7 @@ paths:
|
|||
get:
|
||||
responses:
|
||||
'200':
|
||||
description: OK
|
||||
description: The status of the evaluationjob.
|
||||
content:
|
||||
application/json:
|
||||
schema:
|
||||
|
@ -1410,15 +1446,18 @@ paths:
|
|||
$ref: '#/components/responses/DefaultError'
|
||||
tags:
|
||||
- Eval
|
||||
description: ''
|
||||
description: Get the status of a job.
|
||||
parameters:
|
||||
- name: benchmark_id
|
||||
in: path
|
||||
description: >-
|
||||
The ID of the benchmark to run the evaluation on.
|
||||
required: true
|
||||
schema:
|
||||
type: string
|
||||
- name: job_id
|
||||
in: path
|
||||
description: The ID of the job to get the status of.
|
||||
required: true
|
||||
schema:
|
||||
type: string
|
||||
|
@ -1438,15 +1477,18 @@ paths:
|
|||
$ref: '#/components/responses/DefaultError'
|
||||
tags:
|
||||
- Eval
|
||||
description: ''
|
||||
description: Cancel a job.
|
||||
parameters:
|
||||
- name: benchmark_id
|
||||
in: path
|
||||
description: >-
|
||||
The ID of the benchmark to run the evaluation on.
|
||||
required: true
|
||||
schema:
|
||||
type: string
|
||||
- name: job_id
|
||||
in: path
|
||||
description: The ID of the job to cancel.
|
||||
required: true
|
||||
schema:
|
||||
type: string
|
||||
|
@ -1454,7 +1496,7 @@ paths:
|
|||
get:
|
||||
responses:
|
||||
'200':
|
||||
description: OK
|
||||
description: The result of the job.
|
||||
content:
|
||||
application/json:
|
||||
schema:
|
||||
|
@ -1471,15 +1513,18 @@ paths:
|
|||
$ref: '#/components/responses/DefaultError'
|
||||
tags:
|
||||
- Eval
|
||||
description: ''
|
||||
description: Get the result of a job.
|
||||
parameters:
|
||||
- name: benchmark_id
|
||||
in: path
|
||||
description: >-
|
||||
The ID of the benchmark to run the evaluation on.
|
||||
required: true
|
||||
schema:
|
||||
type: string
|
||||
- name: job_id
|
||||
in: path
|
||||
description: The ID of the job to get the result of.
|
||||
required: true
|
||||
schema:
|
||||
type: string
|
||||
|
@ -2192,7 +2237,8 @@ paths:
|
|||
post:
|
||||
responses:
|
||||
'200':
|
||||
description: OK
|
||||
description: >-
|
||||
The job that was created to run the evaluation.
|
||||
content:
|
||||
application/json:
|
||||
schema:
|
||||
|
@ -2209,10 +2255,12 @@ paths:
|
|||
$ref: '#/components/responses/DefaultError'
|
||||
tags:
|
||||
- Eval
|
||||
description: ''
|
||||
description: Run an evaluation on a benchmark.
|
||||
parameters:
|
||||
- name: benchmark_id
|
||||
in: path
|
||||
description: >-
|
||||
The ID of the benchmark to run the evaluation on.
|
||||
required: true
|
||||
schema:
|
||||
type: string
|
||||
|
@ -2280,7 +2328,8 @@ paths:
|
|||
post:
|
||||
responses:
|
||||
'200':
|
||||
description: OK
|
||||
description: >-
|
||||
ScoreResponse object containing rows and aggregated results
|
||||
content:
|
||||
application/json:
|
||||
schema:
|
||||
|
@ -2297,7 +2346,7 @@ paths:
|
|||
$ref: '#/components/responses/DefaultError'
|
||||
tags:
|
||||
- Scoring
|
||||
description: ''
|
||||
description: Score a list of rows.
|
||||
parameters: []
|
||||
requestBody:
|
||||
content:
|
||||
|
@ -3567,6 +3616,7 @@ components:
|
|||
properties:
|
||||
agent_config:
|
||||
$ref: '#/components/schemas/AgentConfig'
|
||||
description: The configuration for the agent.
|
||||
additionalProperties: false
|
||||
required:
|
||||
- agent_config
|
||||
|
@ -3585,6 +3635,7 @@ components:
|
|||
properties:
|
||||
session_name:
|
||||
type: string
|
||||
description: The name of the session to create.
|
||||
additionalProperties: false
|
||||
required:
|
||||
- session_name
|
||||
|
@ -3607,8 +3658,12 @@ components:
|
|||
oneOf:
|
||||
- $ref: '#/components/schemas/UserMessage'
|
||||
- $ref: '#/components/schemas/ToolResponseMessage'
|
||||
description: List of messages to start the turn with.
|
||||
stream:
|
||||
type: boolean
|
||||
description: >-
|
||||
(Optional) If True, generate an SSE event stream of the response. Defaults
|
||||
to False.
|
||||
documents:
|
||||
type: array
|
||||
items:
|
||||
|
@ -3622,19 +3677,30 @@ components:
|
|||
items:
|
||||
$ref: '#/components/schemas/InterleavedContentItem'
|
||||
- $ref: '#/components/schemas/URL'
|
||||
description: The content of the document.
|
||||
mime_type:
|
||||
type: string
|
||||
description: The MIME type of the document.
|
||||
additionalProperties: false
|
||||
required:
|
||||
- content
|
||||
- mime_type
|
||||
title: Document
|
||||
description: A document to be used by an agent.
|
||||
description: >-
|
||||
(Optional) List of documents to create the turn with.
|
||||
toolgroups:
|
||||
type: array
|
||||
items:
|
||||
$ref: '#/components/schemas/AgentTool'
|
||||
description: >-
|
||||
(Optional) List of toolgroups to create the turn with, will be used in
|
||||
addition to the agent's config toolgroups for the request.
|
||||
tool_config:
|
||||
$ref: '#/components/schemas/ToolConfig'
|
||||
description: >-
|
||||
(Optional) The tool configuration to create the turn with, will be used
|
||||
to override the agent's tool_config.
|
||||
additionalProperties: false
|
||||
required:
|
||||
- messages
|
||||
|
@ -3644,20 +3710,25 @@ components:
|
|||
properties:
|
||||
turn_id:
|
||||
type: string
|
||||
description: The ID of the turn.
|
||||
step_id:
|
||||
type: string
|
||||
description: The ID of the step.
|
||||
started_at:
|
||||
type: string
|
||||
format: date-time
|
||||
description: The time the step started.
|
||||
completed_at:
|
||||
type: string
|
||||
format: date-time
|
||||
description: The time the step completed.
|
||||
step_type:
|
||||
type: string
|
||||
const: inference
|
||||
default: inference
|
||||
model_response:
|
||||
$ref: '#/components/schemas/CompletionMessage'
|
||||
description: The response from the LLM.
|
||||
additionalProperties: false
|
||||
required:
|
||||
- turn_id
|
||||
|
@ -3665,27 +3736,36 @@ components:
|
|||
- step_type
|
||||
- model_response
|
||||
title: InferenceStep
|
||||
description: An inference step in an agent turn.
|
||||
MemoryRetrievalStep:
|
||||
type: object
|
||||
properties:
|
||||
turn_id:
|
||||
type: string
|
||||
description: The ID of the turn.
|
||||
step_id:
|
||||
type: string
|
||||
description: The ID of the step.
|
||||
started_at:
|
||||
type: string
|
||||
format: date-time
|
||||
description: The time the step started.
|
||||
completed_at:
|
||||
type: string
|
||||
format: date-time
|
||||
description: The time the step completed.
|
||||
step_type:
|
||||
type: string
|
||||
const: memory_retrieval
|
||||
default: memory_retrieval
|
||||
vector_db_ids:
|
||||
type: string
|
||||
description: >-
|
||||
The IDs of the vector databases to retrieve context from.
|
||||
inserted_context:
|
||||
$ref: '#/components/schemas/InterleavedContent'
|
||||
description: >-
|
||||
The context retrieved from the vector databases.
|
||||
additionalProperties: false
|
||||
required:
|
||||
- turn_id
|
||||
|
@ -3694,6 +3774,8 @@ components:
|
|||
- vector_db_ids
|
||||
- inserted_context
|
||||
title: MemoryRetrievalStep
|
||||
description: >-
|
||||
A memory retrieval step in an agent turn.
|
||||
SafetyViolation:
|
||||
type: object
|
||||
properties:
|
||||
|
@ -3721,39 +3803,49 @@ components:
|
|||
properties:
|
||||
turn_id:
|
||||
type: string
|
||||
description: The ID of the turn.
|
||||
step_id:
|
||||
type: string
|
||||
description: The ID of the step.
|
||||
started_at:
|
||||
type: string
|
||||
format: date-time
|
||||
description: The time the step started.
|
||||
completed_at:
|
||||
type: string
|
||||
format: date-time
|
||||
description: The time the step completed.
|
||||
step_type:
|
||||
type: string
|
||||
const: shield_call
|
||||
default: shield_call
|
||||
violation:
|
||||
$ref: '#/components/schemas/SafetyViolation'
|
||||
description: The violation from the shield call.
|
||||
additionalProperties: false
|
||||
required:
|
||||
- turn_id
|
||||
- step_id
|
||||
- step_type
|
||||
title: ShieldCallStep
|
||||
description: A shield call step in an agent turn.
|
||||
ToolExecutionStep:
|
||||
type: object
|
||||
properties:
|
||||
turn_id:
|
||||
type: string
|
||||
description: The ID of the turn.
|
||||
step_id:
|
||||
type: string
|
||||
description: The ID of the step.
|
||||
started_at:
|
||||
type: string
|
||||
format: date-time
|
||||
description: The time the step started.
|
||||
completed_at:
|
||||
type: string
|
||||
format: date-time
|
||||
description: The time the step completed.
|
||||
step_type:
|
||||
type: string
|
||||
const: tool_execution
|
||||
|
@ -3762,10 +3854,12 @@ components:
|
|||
type: array
|
||||
items:
|
||||
$ref: '#/components/schemas/ToolCall'
|
||||
description: The tool calls to execute.
|
||||
tool_responses:
|
||||
type: array
|
||||
items:
|
||||
$ref: '#/components/schemas/ToolResponse'
|
||||
description: The tool responses from the tool calls.
|
||||
additionalProperties: false
|
||||
required:
|
||||
- turn_id
|
||||
|
@ -3774,6 +3868,7 @@ components:
|
|||
- tool_calls
|
||||
- tool_responses
|
||||
title: ToolExecutionStep
|
||||
description: A tool execution step in an agent turn.
|
||||
ToolResponse:
|
||||
type: object
|
||||
properties:
|
||||
|
@ -3850,13 +3945,16 @@ components:
|
|||
items:
|
||||
$ref: '#/components/schemas/InterleavedContentItem'
|
||||
- $ref: '#/components/schemas/URL'
|
||||
description: The content of the attachment.
|
||||
mime_type:
|
||||
type: string
|
||||
description: The MIME type of the attachment.
|
||||
additionalProperties: false
|
||||
required:
|
||||
- content
|
||||
- mime_type
|
||||
title: Attachment
|
||||
description: An attachment to an agent turn.
|
||||
started_at:
|
||||
type: string
|
||||
format: date-time
|
||||
|
@ -3922,6 +4020,7 @@ components:
|
|||
- shield_call
|
||||
- memory_retrieval
|
||||
title: StepType
|
||||
description: Type of the step in an agent turn.
|
||||
step_id:
|
||||
type: string
|
||||
step_details:
|
||||
|
@ -3959,6 +4058,7 @@ components:
|
|||
- shield_call
|
||||
- memory_retrieval
|
||||
title: StepType
|
||||
description: Type of the step in an agent turn.
|
||||
step_id:
|
||||
type: string
|
||||
delta:
|
||||
|
@ -3985,6 +4085,7 @@ components:
|
|||
- shield_call
|
||||
- memory_retrieval
|
||||
title: StepType
|
||||
description: Type of the step in an agent turn.
|
||||
step_id:
|
||||
type: string
|
||||
metadata:
|
||||
|
@ -4212,11 +4313,14 @@ components:
|
|||
default: agent
|
||||
config:
|
||||
$ref: '#/components/schemas/AgentConfig'
|
||||
description: >-
|
||||
The configuration for the agent candidate.
|
||||
additionalProperties: false
|
||||
required:
|
||||
- type
|
||||
- config
|
||||
title: AgentCandidate
|
||||
description: An agent candidate for evaluation.
|
||||
AggregationFunctionType:
|
||||
type: string
|
||||
enum:
|
||||
|
@ -4245,17 +4349,26 @@ components:
|
|||
properties:
|
||||
eval_candidate:
|
||||
$ref: '#/components/schemas/EvalCandidate'
|
||||
description: The candidate to evaluate.
|
||||
scoring_params:
|
||||
type: object
|
||||
additionalProperties:
|
||||
$ref: '#/components/schemas/ScoringFnParams'
|
||||
description: >-
|
||||
Map between scoring function id and parameters for each scoring function
|
||||
you want to run
|
||||
num_examples:
|
||||
type: integer
|
||||
description: >-
|
||||
(Optional) The number of examples to evaluate. If not provided, all examples
|
||||
in the dataset will be evaluated
|
||||
additionalProperties: false
|
||||
required:
|
||||
- eval_candidate
|
||||
- scoring_params
|
||||
title: BenchmarkConfig
|
||||
description: >-
|
||||
A benchmark configuration for evaluation.
|
||||
EvalCandidate:
|
||||
oneOf:
|
||||
- $ref: '#/components/schemas/ModelCandidate'
|
||||
|
@ -4298,16 +4411,22 @@ components:
|
|||
default: model
|
||||
model:
|
||||
type: string
|
||||
description: The model ID to evaluate.
|
||||
sampling_params:
|
||||
$ref: '#/components/schemas/SamplingParams'
|
||||
description: The sampling parameters for the model.
|
||||
system_message:
|
||||
$ref: '#/components/schemas/SystemMessage'
|
||||
description: >-
|
||||
(Optional) The system message providing instructions or context to the
|
||||
model.
|
||||
additionalProperties: false
|
||||
required:
|
||||
- type
|
||||
- model
|
||||
- sampling_params
|
||||
title: ModelCandidate
|
||||
description: A model candidate for evaluation.
|
||||
RegexParserScoringFnParams:
|
||||
type: object
|
||||
properties:
|
||||
|
@ -4353,12 +4472,16 @@ components:
|
|||
- type: string
|
||||
- type: array
|
||||
- type: object
|
||||
description: The rows to evaluate.
|
||||
scoring_functions:
|
||||
type: array
|
||||
items:
|
||||
type: string
|
||||
description: >-
|
||||
The scoring functions to use for the evaluation.
|
||||
benchmark_config:
|
||||
$ref: '#/components/schemas/BenchmarkConfig'
|
||||
description: The configuration for the benchmark.
|
||||
additionalProperties: false
|
||||
required:
|
||||
- input_rows
|
||||
|
@ -4380,15 +4503,18 @@ components:
|
|||
- type: string
|
||||
- type: array
|
||||
- type: object
|
||||
description: The generations from the evaluation.
|
||||
scores:
|
||||
type: object
|
||||
additionalProperties:
|
||||
$ref: '#/components/schemas/ScoringResult'
|
||||
description: The scores from the evaluation.
|
||||
additionalProperties: false
|
||||
required:
|
||||
- generations
|
||||
- scores
|
||||
title: EvaluateResponse
|
||||
description: The response from an evaluation.
|
||||
ScoringResult:
|
||||
type: object
|
||||
properties:
|
||||
|
@ -4404,6 +4530,8 @@ components:
|
|||
- type: string
|
||||
- type: array
|
||||
- type: object
|
||||
description: >-
|
||||
The scoring result for each row. Each row is a map of column name to value.
|
||||
aggregated_results:
|
||||
type: object
|
||||
additionalProperties:
|
||||
|
@ -4414,11 +4542,13 @@ components:
|
|||
- type: string
|
||||
- type: array
|
||||
- type: object
|
||||
description: Map of metric name to aggregated value
|
||||
additionalProperties: false
|
||||
required:
|
||||
- score_rows
|
||||
- aggregated_results
|
||||
title: ScoringResult
|
||||
description: A scoring result for a single row.
|
||||
Session:
|
||||
type: object
|
||||
properties:
|
||||
|
@ -4731,15 +4861,19 @@ components:
|
|||
- type: string
|
||||
- type: array
|
||||
- type: object
|
||||
description: The rows in the current page.
|
||||
total_count:
|
||||
type: integer
|
||||
description: The total number of rows in the dataset.
|
||||
next_page_token:
|
||||
type: string
|
||||
description: The token to get the next page of rows.
|
||||
additionalProperties: false
|
||||
required:
|
||||
- rows
|
||||
- total_count
|
||||
title: PaginatedRowsResult
|
||||
description: A paginated list of rows from a dataset.
|
||||
ScoringFn:
|
||||
type: object
|
||||
properties:
|
||||
|
@ -6170,6 +6304,7 @@ components:
|
|||
properties:
|
||||
benchmark_config:
|
||||
$ref: '#/components/schemas/BenchmarkConfig'
|
||||
description: The configuration for the benchmark.
|
||||
additionalProperties: false
|
||||
required:
|
||||
- benchmark_config
|
||||
|
@ -6251,12 +6386,15 @@ components:
|
|||
- type: string
|
||||
- type: array
|
||||
- type: object
|
||||
description: The rows to score.
|
||||
scoring_functions:
|
||||
type: object
|
||||
additionalProperties:
|
||||
oneOf:
|
||||
- $ref: '#/components/schemas/ScoringFnParams'
|
||||
- type: 'null'
|
||||
description: >-
|
||||
The scoring functions to use for the scoring.
|
||||
additionalProperties: false
|
||||
required:
|
||||
- input_rows
|
||||
|
@ -6269,10 +6407,13 @@ components:
|
|||
type: object
|
||||
additionalProperties:
|
||||
$ref: '#/components/schemas/ScoringResult'
|
||||
description: >-
|
||||
A map of scoring function name to ScoringResult.
|
||||
additionalProperties: false
|
||||
required:
|
||||
- results
|
||||
title: ScoreResponse
|
||||
description: The response from scoring.
|
||||
ScoreBatchRequest:
|
||||
type: object
|
||||
properties:
|
||||
|
@ -6543,6 +6684,8 @@ tags:
|
|||
- name: DatasetIO
|
||||
- name: Datasets
|
||||
- name: Eval
|
||||
x-displayName: >-
|
||||
Llama Stack Evaluation API for running evaluations on model and agent candidates.
|
||||
- name: Files (Coming Soon)
|
||||
- name: Inference
|
||||
description: >-
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue