docs: api documentation for agents/eval/scoring/datasets (#1400)

# What does this PR do? - add some docs to OpenAPI for agents/eval/scoring/datasetio [//]: # (If resolving an issue, uncomment and update the line below) [//]: # (Closes #[issue-number]) ## Test Plan - read [//]: # (## Documentation)
2025-03-05 09:40:24 -08:00 · 2025-03-05 09:40:24 -08:00 · 3d9331840e
commit 3d9331840e
parent 0d18274d34
6 changed files with 586 additions and 137 deletions
--- a/docs/_static/llama-stack-spec.yaml
+++ b/docs/_static/llama-stack-spec.yaml
@ -31,25 +31,32 @@ paths:
          $ref: '#/components/responses/DefaultError'
      tags:
        - DatasetIO
-      description: ''
+      description: >-
+        Get a paginated list of rows from a dataset.
      parameters:
        - name: dataset_id
          in: query
+          description: >-
+            The ID of the dataset to get the rows from.
          required: true
          schema:
            type: string
        - name: rows_in_page
          in: query
+          description: The number of rows to get per page.
          required: true
          schema:
            type: integer
        - name: page_token
          in: query
+          description: The token to get the next page of rows.
          required: false
          schema:
            type: string
        - name: filter_condition
          in: query
+          description: >-
+            (Optional) A condition to filter the rows by.
          required: false
          schema:
            type: string
@ -234,7 +241,8 @@ paths:
    post:
      responses:
        '200':
-          description: OK
+          description: >-
+            An AgentCreateResponse with the agent ID.
          content:
            application/json:
              schema:
@ -251,7 +259,8 @@ paths:
          $ref: '#/components/responses/DefaultError'
      tags:
        - Agents
-      description: ''
+      description: >-
+        Create an agent with the given configuration.
      parameters: []
      requestBody:
        content:
@ -263,7 +272,7 @@ paths:
    post:
      responses:
        '200':
-          description: OK
+          description: An AgentSessionCreateResponse.
          content:
            application/json:
              schema:
@ -280,10 +289,12 @@ paths:
          $ref: '#/components/responses/DefaultError'
      tags:
        - Agents
-      description: ''
+      description: Create a new session for an agent.
      parameters:
        - name: agent_id
          in: path
+          description: >-
+            The ID of the agent to create the session for.
          required: true
          schema:
            type: string
@ -298,8 +309,8 @@ paths:
      responses:
        '200':
          description: >-
-            A single turn in an interaction with an Agentic System. **OR** streamed
-            agent turn completion response.
+            If stream=False, returns a Turn object. If stream=True, returns an SSE
+            event stream of AgentTurnResponseStreamChunk
          content:
            application/json:
              schema:
@ -319,15 +330,19 @@ paths:
          $ref: '#/components/responses/DefaultError'
      tags:
        - Agents
-      description: ''
+      description: Create a new turn for an agent.
      parameters:
        - name: agent_id
          in: path
+          description: >-
+            The ID of the agent to create the turn for.
          required: true
          schema:
            type: string
        - name: session_id
          in: path
+          description: >-
+            The ID of the session to create the turn for.
          required: true
          schema:
            type: string
@ -411,10 +426,11 @@ paths:
          $ref: '#/components/responses/DefaultError'
      tags:
        - Agents
-      description: ''
+      description: Delete an agent by its ID.
      parameters:
        - name: agent_id
          in: path
+          description: The ID of the agent to delete.
          required: true
          schema:
            type: string
@ -439,20 +455,25 @@ paths:
          $ref: '#/components/responses/DefaultError'
      tags:
        - Agents
-      description: ''
+      description: Retrieve an agent session by its ID.
      parameters:
        - name: session_id
          in: path
+          description: The ID of the session to get.
          required: true
          schema:
            type: string
        - name: agent_id
          in: path
+          description: >-
+            The ID of the agent to get the session for.
          required: true
          schema:
            type: string
        - name: turn_ids
          in: query
+          description: >-
+            (Optional) List of turn IDs to filter the session by.
          required: false
          schema:
            type: array
@ -474,15 +495,18 @@ paths:
          $ref: '#/components/responses/DefaultError'
      tags:
        - Agents
-      description: ''
+      description: Delete an agent session by its ID.
      parameters:
        - name: session_id
          in: path
+          description: The ID of the session to delete.
          required: true
          schema:
            type: string
        - name: agent_id
          in: path
+          description: >-
+            The ID of the agent to delete the session for.
          required: true
          schema:
            type: string
@ -596,7 +620,8 @@ paths:
    post:
      responses:
        '200':
-          description: OK
+          description: >-
+            EvaluateResponse object containing generations and scores
          content:
            application/json:
              schema:
@ -613,10 +638,12 @@ paths:
          $ref: '#/components/responses/DefaultError'
      tags:
        - Eval
-      description: ''
+      description: Evaluate a list of rows on a benchmark.
      parameters:
        - name: benchmark_id
          in: path
+          description: >-
+            The ID of the benchmark to run the evaluation on.
          required: true
          schema:
            type: string
@ -630,7 +657,7 @@ paths:
    get:
      responses:
        '200':
-          description: OK
+          description: An AgentStepResponse.
          content:
            application/json:
              schema:
@ -647,25 +674,30 @@ paths:
          $ref: '#/components/responses/DefaultError'
      tags:
        - Agents
-      description: ''
+      description: Retrieve an agent step by its ID.
      parameters:
        - name: agent_id
          in: path
+          description: The ID of the agent to get the step for.
          required: true
          schema:
            type: string
        - name: session_id
          in: path
+          description: >-
+            The ID of the session to get the step for.
          required: true
          schema:
            type: string
        - name: turn_id
          in: path
+          description: The ID of the turn to get the step for.
          required: true
          schema:
            type: string
        - name: step_id
          in: path
+          description: The ID of the step to get.
          required: true
          schema:
            type: string
@ -673,7 +705,7 @@ paths:
    get:
      responses:
        '200':
-          description: OK
+          description: A Turn.
          content:
            application/json:
              schema:
@ -690,20 +722,24 @@ paths:
          $ref: '#/components/responses/DefaultError'
      tags:
        - Agents
-      description: ''
+      description: Retrieve an agent turn by its ID.
      parameters:
        - name: agent_id
          in: path
+          description: The ID of the agent to get the turn for.
          required: true
          schema:
            type: string
        - name: session_id
          in: path
+          description: >-
+            The ID of the session to get the turn for.
          required: true
          schema:
            type: string
        - name: turn_id
          in: path
+          description: The ID of the turn to get.
          required: true
          schema:
            type: string
@ -1391,7 +1427,7 @@ paths:
    get:
      responses:
        '200':
-          description: OK
+          description: The status of the evaluationjob.
          content:
            application/json:
              schema:
@ -1410,15 +1446,18 @@ paths:
          $ref: '#/components/responses/DefaultError'
      tags:
        - Eval
-      description: ''
+      description: Get the status of a job.
      parameters:
        - name: benchmark_id
          in: path
+          description: >-
+            The ID of the benchmark to run the evaluation on.
          required: true
          schema:
            type: string
        - name: job_id
          in: path
+          description: The ID of the job to get the status of.
          required: true
          schema:
            type: string
@ -1438,15 +1477,18 @@ paths:
          $ref: '#/components/responses/DefaultError'
      tags:
        - Eval
-      description: ''
+      description: Cancel a job.
      parameters:
        - name: benchmark_id
          in: path
+          description: >-
+            The ID of the benchmark to run the evaluation on.
          required: true
          schema:
            type: string
        - name: job_id
          in: path
+          description: The ID of the job to cancel.
          required: true
          schema:
            type: string
@ -1454,7 +1496,7 @@ paths:
    get:
      responses:
        '200':
-          description: OK
+          description: The result of the job.
          content:
            application/json:
              schema:
@ -1471,15 +1513,18 @@ paths:
          $ref: '#/components/responses/DefaultError'
      tags:
        - Eval
-      description: ''
+      description: Get the result of a job.
      parameters:
        - name: benchmark_id
          in: path
+          description: >-
+            The ID of the benchmark to run the evaluation on.
          required: true
          schema:
            type: string
        - name: job_id
          in: path
+          description: The ID of the job to get the result of.
          required: true
          schema:
            type: string
@ -2192,7 +2237,8 @@ paths:
    post:
      responses:
        '200':
-          description: OK
+          description: >-
+            The job that was created to run the evaluation.
          content:
            application/json:
              schema:
@ -2209,10 +2255,12 @@ paths:
          $ref: '#/components/responses/DefaultError'
      tags:
        - Eval
-      description: ''
+      description: Run an evaluation on a benchmark.
      parameters:
        - name: benchmark_id
          in: path
+          description: >-
+            The ID of the benchmark to run the evaluation on.
          required: true
          schema:
            type: string
@ -2280,7 +2328,8 @@ paths:
    post:
      responses:
        '200':
-          description: OK
+          description: >-
+            ScoreResponse object containing rows and aggregated results
          content:
            application/json:
              schema:
@ -2297,7 +2346,7 @@ paths:
          $ref: '#/components/responses/DefaultError'
      tags:
        - Scoring
-      description: ''
+      description: Score a list of rows.
      parameters: []
      requestBody:
        content:
@ -3567,6 +3616,7 @@ components:
      properties:
        agent_config:
          $ref: '#/components/schemas/AgentConfig'
+          description: The configuration for the agent.
      additionalProperties: false
      required:
        - agent_config
@ -3585,6 +3635,7 @@ components:
      properties:
        session_name:
          type: string
+          description: The name of the session to create.
      additionalProperties: false
      required:
        - session_name
@ -3607,8 +3658,12 @@ components:
            oneOf:
              - $ref: '#/components/schemas/UserMessage'
              - $ref: '#/components/schemas/ToolResponseMessage'
+          description: List of messages to start the turn with.
        stream:
          type: boolean
+          description: >-
+            (Optional) If True, generate an SSE event stream of the response. Defaults
+            to False.
        documents:
          type: array
          items:
@ -3622,19 +3677,30 @@ components:
                    items:
                      $ref: '#/components/schemas/InterleavedContentItem'
                  - $ref: '#/components/schemas/URL'
+                description: The content of the document.
              mime_type:
                type: string
+                description: The MIME type of the document.
            additionalProperties: false
            required:
              - content
              - mime_type
            title: Document
+            description: A document to be used by an agent.
+          description: >-
+            (Optional) List of documents to create the turn with.
        toolgroups:
          type: array
          items:
            $ref: '#/components/schemas/AgentTool'
+          description: >-
+            (Optional) List of toolgroups to create the turn with, will be used in
+            addition to the agent's config toolgroups for the request.
        tool_config:
          $ref: '#/components/schemas/ToolConfig'
+          description: >-
+            (Optional) The tool configuration to create the turn with, will be used
+            to override the agent's tool_config.
      additionalProperties: false
      required:
        - messages
@ -3644,20 +3710,25 @@ components:
      properties:
        turn_id:
          type: string
+          description: The ID of the turn.
        step_id:
          type: string
+          description: The ID of the step.
        started_at:
          type: string
          format: date-time
+          description: The time the step started.
        completed_at:
          type: string
          format: date-time
+          description: The time the step completed.
        step_type:
          type: string
          const: inference
          default: inference
        model_response:
          $ref: '#/components/schemas/CompletionMessage'
+          description: The response from the LLM.
      additionalProperties: false
      required:
        - turn_id
@ -3665,27 +3736,36 @@ components:
        - step_type
        - model_response
      title: InferenceStep
+      description: An inference step in an agent turn.
    MemoryRetrievalStep:
      type: object
      properties:
        turn_id:
          type: string
+          description: The ID of the turn.
        step_id:
          type: string
+          description: The ID of the step.
        started_at:
          type: string
          format: date-time
+          description: The time the step started.
        completed_at:
          type: string
          format: date-time
+          description: The time the step completed.
        step_type:
          type: string
          const: memory_retrieval
          default: memory_retrieval
        vector_db_ids:
          type: string
+          description: >-
+            The IDs of the vector databases to retrieve context from.
        inserted_context:
          $ref: '#/components/schemas/InterleavedContent'
+          description: >-
+            The context retrieved from the vector databases.
      additionalProperties: false
      required:
        - turn_id
@ -3694,6 +3774,8 @@ components:
        - vector_db_ids
        - inserted_context
      title: MemoryRetrievalStep
+      description: >-
+        A memory retrieval step in an agent turn.
    SafetyViolation:
      type: object
      properties:
@ -3721,39 +3803,49 @@ components:
      properties:
        turn_id:
          type: string
+          description: The ID of the turn.
        step_id:
          type: string
+          description: The ID of the step.
        started_at:
          type: string
          format: date-time
+          description: The time the step started.
        completed_at:
          type: string
          format: date-time
+          description: The time the step completed.
        step_type:
          type: string
          const: shield_call
          default: shield_call
        violation:
          $ref: '#/components/schemas/SafetyViolation'
+          description: The violation from the shield call.
      additionalProperties: false
      required:
        - turn_id
        - step_id
        - step_type
      title: ShieldCallStep
+      description: A shield call step in an agent turn.
    ToolExecutionStep:
      type: object
      properties:
        turn_id:
          type: string
+          description: The ID of the turn.
        step_id:
          type: string
+          description: The ID of the step.
        started_at:
          type: string
          format: date-time
+          description: The time the step started.
        completed_at:
          type: string
          format: date-time
+          description: The time the step completed.
        step_type:
          type: string
          const: tool_execution
@ -3762,10 +3854,12 @@ components:
          type: array
          items:
            $ref: '#/components/schemas/ToolCall'
+          description: The tool calls to execute.
        tool_responses:
          type: array
          items:
            $ref: '#/components/schemas/ToolResponse'
+          description: The tool responses from the tool calls.
      additionalProperties: false
      required:
        - turn_id
@ -3774,6 +3868,7 @@ components:
        - tool_calls
        - tool_responses
      title: ToolExecutionStep
+      description: A tool execution step in an agent turn.
    ToolResponse:
      type: object
      properties:
@ -3850,13 +3945,16 @@ components:
                    items:
                      $ref: '#/components/schemas/InterleavedContentItem'
                  - $ref: '#/components/schemas/URL'
+                description: The content of the attachment.
              mime_type:
                type: string
+                description: The MIME type of the attachment.
            additionalProperties: false
            required:
              - content
              - mime_type
            title: Attachment
+            description: An attachment to an agent turn.
        started_at:
          type: string
          format: date-time
@ -3922,6 +4020,7 @@ components:
            - shield_call
            - memory_retrieval
          title: StepType
+          description: Type of the step in an agent turn.
        step_id:
          type: string
        step_details:
@ -3959,6 +4058,7 @@ components:
            - shield_call
            - memory_retrieval
          title: StepType
+          description: Type of the step in an agent turn.
        step_id:
          type: string
        delta:
@ -3985,6 +4085,7 @@ components:
            - shield_call
            - memory_retrieval
          title: StepType
+          description: Type of the step in an agent turn.
        step_id:
          type: string
        metadata:
@ -4212,11 +4313,14 @@ components:
          default: agent
        config:
          $ref: '#/components/schemas/AgentConfig'
+          description: >-
+            The configuration for the agent candidate.
      additionalProperties: false
      required:
        - type
        - config
      title: AgentCandidate
+      description: An agent candidate for evaluation.
    AggregationFunctionType:
      type: string
      enum:
@ -4245,17 +4349,26 @@ components:
      properties:
        eval_candidate:
          $ref: '#/components/schemas/EvalCandidate'
+          description: The candidate to evaluate.
        scoring_params:
          type: object
          additionalProperties:
            $ref: '#/components/schemas/ScoringFnParams'
+          description: >-
+            Map between scoring function id and parameters for each scoring function
+            you want to run
        num_examples:
          type: integer
+          description: >-
+            (Optional) The number of examples to evaluate. If not provided, all examples
+            in the dataset will be evaluated
      additionalProperties: false
      required:
        - eval_candidate
        - scoring_params
      title: BenchmarkConfig
+      description: >-
+        A benchmark configuration for evaluation.
    EvalCandidate:
      oneOf:
        - $ref: '#/components/schemas/ModelCandidate'
@ -4298,16 +4411,22 @@ components:
          default: model
        model:
          type: string
+          description: The model ID to evaluate.
        sampling_params:
          $ref: '#/components/schemas/SamplingParams'
+          description: The sampling parameters for the model.
        system_message:
          $ref: '#/components/schemas/SystemMessage'
+          description: >-
+            (Optional) The system message providing instructions or context to the
+            model.
      additionalProperties: false
      required:
        - type
        - model
        - sampling_params
      title: ModelCandidate
+      description: A model candidate for evaluation.
    RegexParserScoringFnParams:
      type: object
      properties:
@ -4353,12 +4472,16 @@ components:
                - type: string
                - type: array
                - type: object
+          description: The rows to evaluate.
        scoring_functions:
          type: array
          items:
            type: string
+          description: >-
+            The scoring functions to use for the evaluation.
        benchmark_config:
          $ref: '#/components/schemas/BenchmarkConfig'
+          description: The configuration for the benchmark.
      additionalProperties: false
      required:
        - input_rows
@ -4380,15 +4503,18 @@ components:
                - type: string
                - type: array
                - type: object
+          description: The generations from the evaluation.
        scores:
          type: object
          additionalProperties:
            $ref: '#/components/schemas/ScoringResult'
+          description: The scores from the evaluation.
      additionalProperties: false
      required:
        - generations
        - scores
      title: EvaluateResponse
+      description: The response from an evaluation.
    ScoringResult:
      type: object
      properties:
@ -4404,6 +4530,8 @@ components:
                - type: string
                - type: array
                - type: object
+          description: >-
+            The scoring result for each row. Each row is a map of column name to value.
        aggregated_results:
          type: object
          additionalProperties:
@ -4414,11 +4542,13 @@ components:
              - type: string
              - type: array
              - type: object
+          description: Map of metric name to aggregated value
      additionalProperties: false
      required:
        - score_rows
        - aggregated_results
      title: ScoringResult
+      description: A scoring result for a single row.
    Session:
      type: object
      properties:
@ -4731,15 +4861,19 @@ components:
                - type: string
                - type: array
                - type: object
+          description: The rows in the current page.
        total_count:
          type: integer
+          description: The total number of rows in the dataset.
        next_page_token:
          type: string
+          description: The token to get the next page of rows.
      additionalProperties: false
      required:
        - rows
        - total_count
      title: PaginatedRowsResult
+      description: A paginated list of rows from a dataset.
    ScoringFn:
      type: object
      properties:
@ -6170,6 +6304,7 @@ components:
      properties:
        benchmark_config:
          $ref: '#/components/schemas/BenchmarkConfig'
+          description: The configuration for the benchmark.
      additionalProperties: false
      required:
        - benchmark_config
@ -6251,12 +6386,15 @@ components:
                - type: string
                - type: array
                - type: object
+          description: The rows to score.
        scoring_functions:
          type: object
          additionalProperties:
            oneOf:
              - $ref: '#/components/schemas/ScoringFnParams'
              - type: 'null'
+          description: >-
+            The scoring functions to use for the scoring.
      additionalProperties: false
      required:
        - input_rows
@ -6269,10 +6407,13 @@ components:
          type: object
          additionalProperties:
            $ref: '#/components/schemas/ScoringResult'
+          description: >-
+            A map of scoring function name to ScoringResult.
      additionalProperties: false
      required:
        - results
      title: ScoreResponse
+      description: The response from scoring.
    ScoreBatchRequest:
      type: object
      properties:
@ -6543,6 +6684,8 @@ tags:
  - name: DatasetIO
  - name: Datasets
  - name: Eval
+    x-displayName: >-
+      Llama Stack Evaluation API for running evaluations on model and agent candidates.
  - name: Files (Coming Soon)
  - name: Inference
    description: >-