llama-stack-mirror/docs/static/experimental-llama-stack-spec.yaml

openapi: 3.1.0
info:
  title: Llama Stack Specification - Experimental APIs
  description: |-
    This is the specification of the Llama Stack that provides
                        a set of endpoints and their corresponding interfaces that are
        tailored to
                        best leverage Llama Models.

        **🧪 EXPERIMENTAL**: Pre-release APIs (v1alpha, v1beta) that may change before
        becoming stable.
  version: v1
servers:
- url: http://any-hosted-llama-stack.com
paths:
  /v1beta/datasetio/append-rows/{dataset_id}:
    post:
      responses:
        '200':
          description: Successful Response
          content:
            application/json:
              schema: {}
        '400':
          description: Bad Request
          $ref: '#/components/responses/BadRequest400'
        '429':
          description: Too Many Requests
          $ref: '#/components/responses/TooManyRequests429'
        '500':
          description: Internal Server Error
          $ref: '#/components/responses/InternalServerError500'
        default:
          description: Default Response
          $ref: '#/components/responses/DefaultError'
      tags:
      - Datasetio
      summary: Append Rows
      description: Append rows to a dataset.
      operationId: append_rows_v1beta_datasetio_append_rows__dataset_id__post
      parameters:
      - name: dataset_id
        in: path
        required: true
        schema:
          type: string
        description: 'Path parameter: dataset_id'
      requestBody:
        content:
          application/json:
            schema:
              $ref: '#/components/schemas/AppendRowsRequest'
        required: true
  /v1beta/datasetio/iterrows/{dataset_id}:
    get:
      responses:
        '200':
          description: A PaginatedResponse.
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/PaginatedResponse'
        '400':
          $ref: '#/components/responses/BadRequest400'
          description: Bad Request
        '429':
          $ref: '#/components/responses/TooManyRequests429'
          description: Too Many Requests
        '500':
          $ref: '#/components/responses/InternalServerError500'
          description: Internal Server Error
        default:
          $ref: '#/components/responses/DefaultError'
          description: Default Response
      tags:
      - Datasetio
      summary: Iterrows
      description: |-
        Get a paginated list of rows from a dataset.

        Uses offset-based pagination where:
        - start_index: The starting index (0-based). If None, starts from beginning.
        - limit: Number of items to return. If None or -1, returns all items.

        The response includes:
        - data: List of items for the current page.
        - has_more: Whether there are more items available after this set.
      operationId: iterrows_v1beta_datasetio_iterrows__dataset_id__get
      parameters:
      - name: limit
        in: query
        required: false
        schema:
          anyOf:
          - type: integer
          - type: 'null'
          title: Limit
      - name: start_index
        in: query
        required: false
        schema:
          anyOf:
          - type: integer
          - type: 'null'
          title: Start Index
      - name: dataset_id
        in: path
        required: true
        schema:
          type: string
        description: 'Path parameter: dataset_id'
  /v1beta/datasets:
    get:
      responses:
        '200':
          description: A ListDatasetsResponse.
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/ListDatasetsResponse'
        '400':
          description: Bad Request
          $ref: '#/components/responses/BadRequest400'
        '429':
          description: Too Many Requests
          $ref: '#/components/responses/TooManyRequests429'
        '500':
          description: Internal Server Error
          $ref: '#/components/responses/InternalServerError500'
        default:
          description: Default Response
          $ref: '#/components/responses/DefaultError'
      tags:
      - Datasets
      summary: List Datasets
      description: List all datasets.
      operationId: list_datasets_v1beta_datasets_get
  /v1beta/datasets/{dataset_id}:
    get:
      responses:
        '200':
          description: A Dataset.
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/Dataset'
        '400':
          description: Bad Request
          $ref: '#/components/responses/BadRequest400'
        '429':
          description: Too Many Requests
          $ref: '#/components/responses/TooManyRequests429'
        '500':
          description: Internal Server Error
          $ref: '#/components/responses/InternalServerError500'
        default:
          description: Default Response
          $ref: '#/components/responses/DefaultError'
      tags:
      - Datasets
      summary: Get Dataset
      description: Get a dataset by its ID.
      operationId: get_dataset_v1beta_datasets__dataset_id__get
      parameters:
      - name: dataset_id
        in: path
        required: true
        schema:
          type: string
        description: 'Path parameter: dataset_id'
  /v1alpha/eval/benchmarks:
    get:
      responses:
        '200':
          description: A ListBenchmarksResponse.
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/ListBenchmarksResponse'
        '400':
          description: Bad Request
          $ref: '#/components/responses/BadRequest400'
        '429':
          description: Too Many Requests
          $ref: '#/components/responses/TooManyRequests429'
        '500':
          description: Internal Server Error
          $ref: '#/components/responses/InternalServerError500'
        default:
          description: Default Response
          $ref: '#/components/responses/DefaultError'
      tags:
      - Benchmarks
      summary: List Benchmarks
      description: List all benchmarks.
      operationId: list_benchmarks_v1alpha_eval_benchmarks_get
  /v1alpha/eval/benchmarks/{benchmark_id}:
    get:
      responses:
        '200':
          description: A Benchmark.
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/Benchmark'
        '400':
          description: Bad Request
          $ref: '#/components/responses/BadRequest400'
        '429':
          description: Too Many Requests
          $ref: '#/components/responses/TooManyRequests429'
        '500':
          description: Internal Server Error
          $ref: '#/components/responses/InternalServerError500'
        default:
          description: Default Response
          $ref: '#/components/responses/DefaultError'
      tags:
      - Benchmarks
      summary: Get Benchmark
      description: Get a benchmark by its ID.
      operationId: get_benchmark_v1alpha_eval_benchmarks__benchmark_id__get
      parameters:
      - name: benchmark_id
        in: path
        required: true
        schema:
          type: string
        description: 'Path parameter: benchmark_id'
  /v1alpha/eval/benchmarks/{benchmark_id}/evaluations:
    post:
      responses:
        '200':
          description: EvaluateResponse object containing generations and scores.
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/EvaluateResponse'
        '400':
          description: Bad Request
          $ref: '#/components/responses/BadRequest400'
        '429':
          description: Too Many Requests
          $ref: '#/components/responses/TooManyRequests429'
        '500':
          description: Internal Server Error
          $ref: '#/components/responses/InternalServerError500'
        default:
          description: Default Response
          $ref: '#/components/responses/DefaultError'
      tags:
      - Eval
      summary: Evaluate Rows
      description: Evaluate a list of rows on a benchmark.
      operationId: evaluate_rows_v1alpha_eval_benchmarks__benchmark_id__evaluations_post
      parameters:
      - name: benchmark_id
        in: path
        required: true
        schema:
          type: string
        description: 'Path parameter: benchmark_id'
      requestBody:
        content:
          application/json:
            schema:
              $ref: '#/components/schemas/EvaluateRowsRequest'
        required: true
  /v1alpha/eval/benchmarks/{benchmark_id}/jobs:
    post:
      responses:
        '200':
          description: The job that was created to run the evaluation.
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/Job'
        '400':
          description: Bad Request
          $ref: '#/components/responses/BadRequest400'
        '429':
          description: Too Many Requests
          $ref: '#/components/responses/TooManyRequests429'
        '500':
          description: Internal Server Error
          $ref: '#/components/responses/InternalServerError500'
        default:
          description: Default Response
          $ref: '#/components/responses/DefaultError'
      tags:
      - Eval
      summary: Run Eval
      description: Run an evaluation on a benchmark.
      operationId: run_eval_v1alpha_eval_benchmarks__benchmark_id__jobs_post
      parameters:
      - name: benchmark_id
        in: path
        required: true
        schema:
          type: string
        description: 'Path parameter: benchmark_id'
      requestBody:
        content:
          application/json:
            schema:
              $ref: '#/components/schemas/BenchmarkConfig'
        required: true
  /v1alpha/eval/benchmarks/{benchmark_id}/jobs/{job_id}:
    get:
      responses:
        '200':
          description: The status of the evaluation job.
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/Job'
        '400':
          description: Bad Request
          $ref: '#/components/responses/BadRequest400'
        '429':
          description: Too Many Requests
          $ref: '#/components/responses/TooManyRequests429'
        '500':
          description: Internal Server Error
          $ref: '#/components/responses/InternalServerError500'
        default:
          description: Default Response
          $ref: '#/components/responses/DefaultError'
      tags:
      - Eval
      summary: Job Status
      description: Get the status of a job.
      operationId: job_status_v1alpha_eval_benchmarks__benchmark_id__jobs__job_id__get
      parameters:
      - name: benchmark_id
        in: path
        required: true
        schema:
          type: string
        description: 'Path parameter: benchmark_id'
      - name: job_id
        in: path
        required: true
        schema:
          type: string
        description: 'Path parameter: job_id'
    delete:
      responses:
        '200':
          description: Successful Response
          content:
            application/json:
              schema: {}
        '400':
          description: Bad Request
          $ref: '#/components/responses/BadRequest400'
        '429':
          description: Too Many Requests
          $ref: '#/components/responses/TooManyRequests429'
        '500':
          description: Internal Server Error
          $ref: '#/components/responses/InternalServerError500'
        default:
          description: Default Response
          $ref: '#/components/responses/DefaultError'
      tags:
      - Eval
      summary: Job Cancel
      description: Cancel a job.
      operationId: job_cancel_v1alpha_eval_benchmarks__benchmark_id__jobs__job_id__delete
      parameters:
      - name: benchmark_id
        in: path
        required: true
        schema:
          type: string
        description: 'Path parameter: benchmark_id'
      - name: job_id
        in: path
        required: true
        schema:
          type: string
        description: 'Path parameter: job_id'
  /v1alpha/eval/benchmarks/{benchmark_id}/jobs/{job_id}/result:
    get:
      responses:
        '200':
          description: The result of the job.
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/EvaluateResponse'
        '400':
          description: Bad Request
          $ref: '#/components/responses/BadRequest400'
        '429':
          description: Too Many Requests
          $ref: '#/components/responses/TooManyRequests429'
        '500':
          description: Internal Server Error
          $ref: '#/components/responses/InternalServerError500'
        default:
          description: Default Response
          $ref: '#/components/responses/DefaultError'
      tags:
      - Eval
      summary: Job Result
      description: Get the result of a job.
      operationId: job_result_v1alpha_eval_benchmarks__benchmark_id__jobs__job_id__result_get
      parameters:
      - name: benchmark_id
        in: path
        required: true
        schema:
          type: string
        description: 'Path parameter: benchmark_id'
      - name: job_id
        in: path
        required: true
        schema:
          type: string
        description: 'Path parameter: job_id'
  /v1alpha/inference/rerank:
    post:
      responses:
        '200':
          description: RerankResponse with indices sorted by relevance score (descending).
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/RerankResponse'
        '400':
          description: Bad Request
          $ref: '#/components/responses/BadRequest400'
        '429':
          description: Too Many Requests
          $ref: '#/components/responses/TooManyRequests429'
        '500':
          description: Internal Server Error
          $ref: '#/components/responses/InternalServerError500'
        default:
          description: Default Response
          $ref: '#/components/responses/DefaultError'
      tags:
      - Inference
      summary: Rerank
      description: Rerank a list of documents based on their relevance to a query.
      operationId: rerank_v1alpha_inference_rerank_post
      requestBody:
        content:
          application/json:
            schema:
              $ref: '#/components/schemas/RerankRequest'
        required: true
  /v1alpha/post-training/job/artifacts:
    get:
      responses:
        '200':
          description: A PostTrainingJobArtifactsResponse.
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/PostTrainingJobArtifactsResponse'
        '400':
          $ref: '#/components/responses/BadRequest400'
          description: Bad Request
        '429':
          $ref: '#/components/responses/TooManyRequests429'
          description: Too Many Requests
        '500':
          $ref: '#/components/responses/InternalServerError500'
          description: Internal Server Error
        default:
          $ref: '#/components/responses/DefaultError'
          description: Default Response
      tags:
      - Post Training
      summary: Get Training Job Artifacts
      description: Get the artifacts of a training job.
      operationId: get_training_job_artifacts_v1alpha_post_training_job_artifacts_get
      parameters:
      - name: job_uuid
        in: query
        required: true
        schema:
          type: string
          title: Job Uuid
  /v1alpha/post-training/job/cancel:
    post:
      responses:
        '200':
          description: Successful Response
          content:
            application/json:
              schema: {}
        '400':
          description: Bad Request
          $ref: '#/components/responses/BadRequest400'
        '429':
          description: Too Many Requests
          $ref: '#/components/responses/TooManyRequests429'
        '500':
          description: Internal Server Error
          $ref: '#/components/responses/InternalServerError500'
        default:
          description: Default Response
          $ref: '#/components/responses/DefaultError'
      tags:
      - Post Training
      summary: Cancel Training Job
      description: Cancel a training job.
      operationId: cancel_training_job_v1alpha_post_training_job_cancel_post
      requestBody:
        content:
          application/json:
            schema:
              $ref: '#/components/schemas/CancelTrainingJobRequest'
        required: true
  /v1alpha/post-training/job/status:
    get:
      responses:
        '200':
          description: A PostTrainingJobStatusResponse.
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/PostTrainingJobStatusResponse'
        '400':
          $ref: '#/components/responses/BadRequest400'
          description: Bad Request
        '429':
          $ref: '#/components/responses/TooManyRequests429'
          description: Too Many Requests
        '500':
          $ref: '#/components/responses/InternalServerError500'
          description: Internal Server Error
        default:
          $ref: '#/components/responses/DefaultError'
          description: Default Response
      tags:
      - Post Training
      summary: Get Training Job Status
      description: Get the status of a training job.
      operationId: get_training_job_status_v1alpha_post_training_job_status_get
      parameters:
      - name: job_uuid
        in: query
        required: true
        schema:
          type: string
          title: Job Uuid
  /v1alpha/post-training/jobs:
    get:
      responses:
        '200':
          description: A ListPostTrainingJobsResponse.
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/ListPostTrainingJobsResponse'
        '400':
          description: Bad Request
          $ref: '#/components/responses/BadRequest400'
        '429':
          description: Too Many Requests
          $ref: '#/components/responses/TooManyRequests429'
        '500':
          description: Internal Server Error
          $ref: '#/components/responses/InternalServerError500'
        default:
          description: Default Response
          $ref: '#/components/responses/DefaultError'
      tags:
      - Post Training
      summary: Get Training Jobs
      description: Get all training jobs.
      operationId: get_training_jobs_v1alpha_post_training_jobs_get
  /v1alpha/post-training/preference-optimize:
    post:
      responses:
        '200':
          description: A PostTrainingJob.
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/PostTrainingJob'
        '400':
          description: Bad Request
          $ref: '#/components/responses/BadRequest400'
        '429':
          description: Too Many Requests
          $ref: '#/components/responses/TooManyRequests429'
        '500':
          description: Internal Server Error
          $ref: '#/components/responses/InternalServerError500'
        default:
          description: Default Response
          $ref: '#/components/responses/DefaultError'
      tags:
      - Post Training
      summary: Preference Optimize
      description: Run preference optimization of a model.
      operationId: preference_optimize_v1alpha_post_training_preference_optimize_post
      requestBody:
        content:
          application/json:
            schema:
              $ref: '#/components/schemas/PreferenceOptimizeRequest'
        required: true
  /v1alpha/post-training/supervised-fine-tune:
    post:
      responses:
        '200':
          description: A PostTrainingJob.
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/PostTrainingJob'
        '400':
          description: Bad Request
          $ref: '#/components/responses/BadRequest400'
        '429':
          description: Too Many Requests
          $ref: '#/components/responses/TooManyRequests429'
        '500':
          description: Internal Server Error
          $ref: '#/components/responses/InternalServerError500'
        default:
          description: Default Response
          $ref: '#/components/responses/DefaultError'
      tags:
      - Post Training
      summary: Supervised Fine Tune
      description: Run supervised fine-tuning of a model.
      operationId: supervised_fine_tune_v1alpha_post_training_supervised_fine_tune_post
      requestBody:
        content:
          application/json:
            schema:
              $ref: '#/components/schemas/SupervisedFineTuneRequest'
        required: true
components:
  schemas:
    Error:
      description: Error response from the API. Roughly follows RFC 7807.
      properties:
        status:
          title: Status
          type: integer
        title:
          title: Title
          type: string
        detail:
          title: Detail
          type: string
        instance:
          anyOf:
          - type: string
          - type: 'null'
          nullable: true
      required:
      - status
      - title
      - detail
      title: Error
      type: object
    OpenAIChatCompletionContentPartImageParam:
      properties:
        type:
          type: string
          const: image_url
          title: Type
          default: image_url
        image_url:
          $ref: '#/components/schemas/OpenAIImageURL'
      type: object
      required:
      - image_url
      title: OpenAIChatCompletionContentPartImageParam
      description: Image content part for OpenAI-compatible chat completion messages.
    OpenAIChatCompletionContentPartTextParam:
      properties:
        type:
          type: string
          const: text
          title: Type
          default: text
        text:
          type: string
          title: Text
      type: object
      required:
      - text
      title: OpenAIChatCompletionContentPartTextParam
      description: Text content part for OpenAI-compatible chat completion messages.
    OpenAIImageURL:
      properties:
        url:
          type: string
          title: Url
        detail:
          anyOf:
          - type: string
          - type: 'null'
      type: object
      required:
      - url
      title: OpenAIImageURL
      description: Image URL specification for OpenAI-compatible chat completion messages.
    AggregationFunctionType:
      type: string
      enum:
      - average
      - weighted_average
      - median
      - categorical_count
      - accuracy
      title: AggregationFunctionType
      description: Types of aggregation functions for scoring results.
    BasicScoringFnParams:
      properties:
        type:
          type: string
          const: basic
          title: Type
          default: basic
        aggregation_functions:
          items:
            $ref: '#/components/schemas/AggregationFunctionType'
          type: array
          title: Aggregation Functions
          description: Aggregation functions to apply to the scores of each row
      type: object
      title: BasicScoringFnParams
      description: Parameters for basic scoring function configuration.
    LLMAsJudgeScoringFnParams:
      properties:
        type:
          type: string
          const: llm_as_judge
          title: Type
          default: llm_as_judge
        judge_model:
          type: string
          title: Judge Model
        prompt_template:
          anyOf:
          - type: string
          - type: 'null'
        judge_score_regexes:
          items:
            type: string
          type: array
          title: Judge Score Regexes
          description: Regexes to extract the answer from generated response
        aggregation_functions:
          items:
            $ref: '#/components/schemas/AggregationFunctionType'
          type: array
          title: Aggregation Functions
          description: Aggregation functions to apply to the scores of each row
      type: object
      required:
      - judge_model
      title: LLMAsJudgeScoringFnParams
      description: Parameters for LLM-as-judge scoring function configuration.
    RegexParserScoringFnParams:
      properties:
        type:
          type: string
          const: regex_parser
          title: Type
          default: regex_parser
        parsing_regexes:
          items:
            type: string
          type: array
          title: Parsing Regexes
          description: Regex to extract the answer from generated response
        aggregation_functions:
          items:
            $ref: '#/components/schemas/AggregationFunctionType'
          type: array
          title: Aggregation Functions
          description: Aggregation functions to apply to the scores of each row
      type: object
      title: RegexParserScoringFnParams
      description: Parameters for regex parser scoring function configuration.
    ScoringResult:
      properties:
        score_rows:
          items:
            additionalProperties: true
            type: object
          type: array
          title: Score Rows
        aggregated_results:
          additionalProperties: true
          type: object
          title: Aggregated Results
      type: object
      required:
      - score_rows
      - aggregated_results
      title: ScoringResult
      description: A scoring result for a single row.
    TextContentItem:
      properties:
        type:
          type: string
          const: text
          title: Type
          default: text
        text:
          type: string
          title: Text
      type: object
      required:
      - text
      title: TextContentItem
      description: A text content item
    URL:
      properties:
        uri:
          type: string
          title: Uri
      type: object
      required:
      - uri
      title: URL
      description: A URL reference to external content.
    AppendRowsRequest:
      properties:
        rows:
          items:
            additionalProperties: true
            type: object
          type: array
          title: Rows
      type: object
      required:
      - rows
      title: AppendRowsRequest
    PaginatedResponse:
      properties:
        data:
          items:
            additionalProperties: true
            type: object
          type: array
          title: Data
        has_more:
          type: boolean
          title: Has More
        url:
          anyOf:
          - type: string
          - type: 'null'
      type: object
      required:
      - data
      - has_more
      title: PaginatedResponse
      description: A generic paginated response that follows a simple format.
    Dataset:
      properties:
        identifier:
          type: string
          title: Identifier
          description: Unique identifier for this resource in llama stack
        provider_resource_id:
          anyOf:
          - type: string
          - type: 'null'
          description: Unique identifier for this resource in the provider
        provider_id:
          type: string
          title: Provider Id
          description: ID of the provider that owns this resource
        type:
          type: string
          const: dataset
          title: Type
          default: dataset
        purpose:
          $ref: '#/components/schemas/DatasetPurpose'
        source:
          oneOf:
          - $ref: '#/components/schemas/URIDataSource'
            title: URIDataSource
          - $ref: '#/components/schemas/RowsDataSource'
            title: RowsDataSource
          title: URIDataSource | RowsDataSource
          discriminator:
            propertyName: type
            mapping:
              rows: '#/components/schemas/RowsDataSource'
              uri: '#/components/schemas/URIDataSource'
        metadata:
          additionalProperties: true
          type: object
          title: Metadata
          description: Any additional metadata for this dataset
      type: object
      required:
      - identifier
      - provider_id
      - purpose
      - source
      title: Dataset
      description: Dataset resource for storing and accessing training or evaluation data.
    RowsDataSource:
      properties:
        type:
          type: string
          const: rows
          title: Type
          default: rows
        rows:
          items:
            additionalProperties: true
            type: object
          type: array
          title: Rows
      type: object
      required:
      - rows
      title: RowsDataSource
      description: A dataset stored in rows.
    URIDataSource:
      properties:
        type:
          type: string
          const: uri
          title: Type
          default: uri
        uri:
          type: string
          title: Uri
      type: object
      required:
      - uri
      title: URIDataSource
      description: A dataset that can be obtained from a URI.
    ListDatasetsResponse:
      properties:
        data:
          items:
            $ref: '#/components/schemas/Dataset'
          type: array
          title: Data
      type: object
      required:
      - data
      title: ListDatasetsResponse
      description: Response from listing datasets.
    Benchmark:
      properties:
        identifier:
          type: string
          title: Identifier
          description: Unique identifier for this resource in llama stack
        provider_resource_id:
          anyOf:
          - type: string
          - type: 'null'
          description: Unique identifier for this resource in the provider
        provider_id:
          type: string
          title: Provider Id
          description: ID of the provider that owns this resource
        type:
          type: string
          const: benchmark
          title: Type
          default: benchmark
        dataset_id:
          type: string
          title: Dataset Id
        scoring_functions:
          items:
            type: string
          type: array
          title: Scoring Functions
        metadata:
          additionalProperties: true
          type: object
          title: Metadata
          description: Metadata for this evaluation task
      type: object
      required:
      - identifier
      - provider_id
      - dataset_id
      - scoring_functions
      title: Benchmark
      description: A benchmark resource for evaluating model performance.
    ListBenchmarksResponse:
      properties:
        data:
          items:
            $ref: '#/components/schemas/Benchmark'
          type: array
          title: Data
      type: object
      required:
      - data
      title: ListBenchmarksResponse
    BenchmarkConfig:
      properties:
        eval_candidate:
          $ref: '#/components/schemas/ModelCandidate'
        scoring_params:
          additionalProperties:
            oneOf:
            - $ref: '#/components/schemas/LLMAsJudgeScoringFnParams'
              title: LLMAsJudgeScoringFnParams
            - $ref: '#/components/schemas/RegexParserScoringFnParams'
              title: RegexParserScoringFnParams
            - $ref: '#/components/schemas/BasicScoringFnParams'
              title: BasicScoringFnParams
            discriminator:
              propertyName: type
              mapping:
                basic: '#/components/schemas/BasicScoringFnParams'
                llm_as_judge: '#/components/schemas/LLMAsJudgeScoringFnParams'
                regex_parser: '#/components/schemas/RegexParserScoringFnParams'
            title: LLMAsJudgeScoringFnParams | RegexParserScoringFnParams | BasicScoringFnParams
          type: object
          title: Scoring Params
          description: Map between scoring function id and parameters for each scoring function you want to run
        num_examples:
          anyOf:
          - type: integer
          - type: 'null'
          description: Number of examples to evaluate (useful for testing), if not provided, all examples in the dataset will be evaluated
      type: object
      required:
      - eval_candidate
      title: BenchmarkConfig
      description: A benchmark configuration for evaluation.
    GreedySamplingStrategy:
      properties:
        type:
          type: string
          const: greedy
          title: Type
          default: greedy
      type: object
      title: GreedySamplingStrategy
      description: Greedy sampling strategy that selects the highest probability token at each step.
    ModelCandidate:
      properties:
        type:
          type: string
          const: model
          title: Type
          default: model
        model:
          type: string
          title: Model
        sampling_params:
          $ref: '#/components/schemas/SamplingParams'
        system_message:
          anyOf:
          - $ref: '#/components/schemas/SystemMessage'
            title: SystemMessage
          - type: 'null'
          title: SystemMessage
      type: object
      required:
      - model
      - sampling_params
      title: ModelCandidate
      description: A model candidate for evaluation.
    SamplingParams:
      properties:
        strategy:
          oneOf:
          - $ref: '#/components/schemas/GreedySamplingStrategy'
            title: GreedySamplingStrategy
          - $ref: '#/components/schemas/TopPSamplingStrategy'
            title: TopPSamplingStrategy
          - $ref: '#/components/schemas/TopKSamplingStrategy'
            title: TopKSamplingStrategy
          title: GreedySamplingStrategy | TopPSamplingStrategy | TopKSamplingStrategy
          discriminator:
            propertyName: type
            mapping:
              greedy: '#/components/schemas/GreedySamplingStrategy'
              top_k: '#/components/schemas/TopKSamplingStrategy'
              top_p: '#/components/schemas/TopPSamplingStrategy'
        max_tokens:
          anyOf:
          - type: integer
          - type: 'null'
        repetition_penalty:
          anyOf:
          - type: number
          - type: 'null'
          default: 1.0
        stop:
          anyOf:
          - items:
              type: string
            type: array
          - type: 'null'
      type: object
      title: SamplingParams
      description: Sampling parameters.
    SystemMessage:
      properties:
        role:
          type: string
          const: system
          title: Role
          default: system
        content:
          anyOf:
          - type: string
          - oneOf:
            - $ref: '#/components/schemas/ImageContentItem-Input'
              title: ImageContentItem-Input
            - $ref: '#/components/schemas/TextContentItem'
              title: TextContentItem
            discriminator:
              propertyName: type
              mapping:
                image: '#/components/schemas/ImageContentItem-Input'
                text: '#/components/schemas/TextContentItem'
            title: ImageContentItem-Input | TextContentItem
          - items:
              oneOf:
              - $ref: '#/components/schemas/ImageContentItem-Input'
                title: ImageContentItem-Input
              - $ref: '#/components/schemas/TextContentItem'
                title: TextContentItem
              discriminator:
                propertyName: type
                mapping:
                  image: '#/components/schemas/ImageContentItem-Input'
                  text: '#/components/schemas/TextContentItem'
              title: ImageContentItem-Input | TextContentItem
            type: array
            title: list[ImageContentItem-Input | TextContentItem]
          title: string | list[ImageContentItem-Input | TextContentItem]
      type: object
      required:
      - content
      title: SystemMessage
      description: A system message providing instructions or context to the model.
    TopKSamplingStrategy:
      properties:
        type:
          type: string
          const: top_k
          title: Type
          default: top_k
        top_k:
          type: integer
          minimum: 1.0
          title: Top K
      type: object
      required:
      - top_k
      title: TopKSamplingStrategy
      description: Top-k sampling strategy that restricts sampling to the k most likely tokens.
    TopPSamplingStrategy:
      properties:
        type:
          type: string
          const: top_p
          title: Type
          default: top_p
        temperature:
          anyOf:
          - type: number
            minimum: 0.0
          - type: 'null'
        top_p:
          anyOf:
          - type: number
          - type: 'null'
          default: 0.95
      type: object
      required:
      - temperature
      title: TopPSamplingStrategy
      description: Top-p (nucleus) sampling strategy that samples from the smallest set of tokens with cumulative probability >= p.
    EvaluateRowsRequest:
      properties:
        input_rows:
          items:
            additionalProperties: true
            type: object
          type: array
          title: Input Rows
        scoring_functions:
          items:
            type: string
          type: array
          title: Scoring Functions
        benchmark_config:
          $ref: '#/components/schemas/BenchmarkConfig'
      type: object
      required:
      - input_rows
      - scoring_functions
      - benchmark_config
      title: EvaluateRowsRequest
    EvaluateResponse:
      properties:
        generations:
          items:
            additionalProperties: true
            type: object
          type: array
          title: Generations
        scores:
          additionalProperties:
            $ref: '#/components/schemas/ScoringResult'
          type: object
          title: Scores
      type: object
      required:
      - generations
      - scores
      title: EvaluateResponse
      description: The response from an evaluation.
    Job:
      properties:
        job_id:
          type: string
          title: Job Id
        status:
          $ref: '#/components/schemas/JobStatus'
      type: object
      required:
      - job_id
      - status
      title: Job
      description: A job execution instance with status tracking.
    RerankRequest:
      properties:
        model:
          type: string
          title: Model
        query:
          anyOf:
          - type: string
          - $ref: '#/components/schemas/OpenAIChatCompletionContentPartTextParam'
            title: OpenAIChatCompletionContentPartTextParam
          - $ref: '#/components/schemas/OpenAIChatCompletionContentPartImageParam'
            title: OpenAIChatCompletionContentPartImageParam
          title: string | OpenAIChatCompletionContentPartTextParam | OpenAIChatCompletionContentPartImageParam
        items:
          items:
            anyOf:
            - type: string
            - $ref: '#/components/schemas/OpenAIChatCompletionContentPartTextParam'
              title: OpenAIChatCompletionContentPartTextParam
            - $ref: '#/components/schemas/OpenAIChatCompletionContentPartImageParam'
              title: OpenAIChatCompletionContentPartImageParam
            title: string | OpenAIChatCompletionContentPartTextParam | OpenAIChatCompletionContentPartImageParam
          type: array
          title: Items
        max_num_results:
          anyOf:
          - type: integer
          - type: 'null'
      type: object
      required:
      - model
      - query
      - items
      title: RerankRequest
    RerankData:
      properties:
        index:
          type: integer
          title: Index
        relevance_score:
          type: number
          title: Relevance Score
      type: object
      required:
      - index
      - relevance_score
      title: RerankData
      description: A single rerank result from a reranking response.
    RerankResponse:
      properties:
        data:
          items:
            $ref: '#/components/schemas/RerankData'
          type: array
          title: Data
      type: object
      required:
      - data
      title: RerankResponse
      description: Response from a reranking request.
    Checkpoint:
      properties:
        identifier:
          type: string
          title: Identifier
        created_at:
          type: string
          format: date-time
          title: Created At
        epoch:
          type: integer
          title: Epoch
        post_training_job_id:
          type: string
          title: Post Training Job Id
        path:
          type: string
          title: Path
        training_metrics:
          anyOf:
          - $ref: '#/components/schemas/PostTrainingMetric'
            title: PostTrainingMetric
          - type: 'null'
          title: PostTrainingMetric
      type: object
      required:
      - identifier
      - created_at
      - epoch
      - post_training_job_id
      - path
      title: Checkpoint
      description: Checkpoint created during training runs.
    PostTrainingJobArtifactsResponse:
      properties:
        job_uuid:
          type: string
          title: Job Uuid
        checkpoints:
          items:
            $ref: '#/components/schemas/Checkpoint'
          type: array
          title: Checkpoints
      type: object
      required:
      - job_uuid
      title: PostTrainingJobArtifactsResponse
      description: Artifacts of a finetuning job.
    PostTrainingMetric:
      properties:
        epoch:
          type: integer
          title: Epoch
        train_loss:
          type: number
          title: Train Loss
        validation_loss:
          type: number
          title: Validation Loss
        perplexity:
          type: number
          title: Perplexity
      type: object
      required:
      - epoch
      - train_loss
      - validation_loss
      - perplexity
      title: PostTrainingMetric
      description: Training metrics captured during post-training jobs.
    CancelTrainingJobRequest:
      properties:
        job_uuid:
          type: string
          title: Job Uuid
      type: object
      required:
      - job_uuid
      title: CancelTrainingJobRequest
    PostTrainingJobStatusResponse:
      properties:
        job_uuid:
          type: string
          title: Job Uuid
        status:
          $ref: '#/components/schemas/JobStatus'
        scheduled_at:
          anyOf:
          - type: string
            format: date-time
          - type: 'null'
        started_at:
          anyOf:
          - type: string
            format: date-time
          - type: 'null'
        completed_at:
          anyOf:
          - type: string
            format: date-time
          - type: 'null'
        resources_allocated:
          anyOf:
          - additionalProperties: true
            type: object
          - type: 'null'
        checkpoints:
          items:
            $ref: '#/components/schemas/Checkpoint'
          type: array
          title: Checkpoints
      type: object
      required:
      - job_uuid
      - status
      title: PostTrainingJobStatusResponse
      description: Status of a finetuning job.
    ListPostTrainingJobsResponse:
      properties:
        data:
          items:
            $ref: '#/components/schemas/PostTrainingJob'
          type: array
          title: Data
      type: object
      required:
      - data
      title: ListPostTrainingJobsResponse
    DPOAlignmentConfig:
      properties:
        beta:
          type: number
          title: Beta
        loss_type:
          $ref: '#/components/schemas/DPOLossType'
          default: sigmoid
      type: object
      required:
      - beta
      title: DPOAlignmentConfig
      description: Configuration for Direct Preference Optimization (DPO) alignment.
    DPOLossType:
      type: string
      enum:
      - sigmoid
      - hinge
      - ipo
      - kto_pair
      title: DPOLossType
    DataConfig:
      properties:
        dataset_id:
          type: string
          title: Dataset Id
        batch_size:
          type: integer
          title: Batch Size
        shuffle:
          type: boolean
          title: Shuffle
        data_format:
          $ref: '#/components/schemas/DatasetFormat'
        validation_dataset_id:
          anyOf:
          - type: string
          - type: 'null'
        packed:
          anyOf:
          - type: boolean
          - type: 'null'
          default: false
        train_on_input:
          anyOf:
          - type: boolean
          - type: 'null'
          default: false
      type: object
      required:
      - dataset_id
      - batch_size
      - shuffle
      - data_format
      title: DataConfig
      description: Configuration for training data and data loading.
    DatasetFormat:
      type: string
      enum:
      - instruct
      - dialog
      title: DatasetFormat
      description: Format of the training dataset.
    EfficiencyConfig:
      properties:
        enable_activation_checkpointing:
          anyOf:
          - type: boolean
          - type: 'null'
          default: false
        enable_activation_offloading:
          anyOf:
          - type: boolean
          - type: 'null'
          default: false
        memory_efficient_fsdp_wrap:
          anyOf:
          - type: boolean
          - type: 'null'
          default: false
        fsdp_cpu_offload:
          anyOf:
          - type: boolean
          - type: 'null'
          default: false
      type: object
      title: EfficiencyConfig
      description: Configuration for memory and compute efficiency optimizations.
    OptimizerConfig:
      properties:
        optimizer_type:
          $ref: '#/components/schemas/OptimizerType'
        lr:
          type: number
          title: Lr
        weight_decay:
          type: number
          title: Weight Decay
        num_warmup_steps:
          type: integer
          title: Num Warmup Steps
      type: object
      required:
      - optimizer_type
      - lr
      - weight_decay
      - num_warmup_steps
      title: OptimizerConfig
      description: Configuration parameters for the optimization algorithm.
    OptimizerType:
      type: string
      enum:
      - adam
      - adamw
      - sgd
      title: OptimizerType
      description: Available optimizer algorithms for training.
    TrainingConfig:
      properties:
        n_epochs:
          type: integer
          title: N Epochs
        max_steps_per_epoch:
          type: integer
          title: Max Steps Per Epoch
          default: 1
        gradient_accumulation_steps:
          type: integer
          title: Gradient Accumulation Steps
          default: 1
        max_validation_steps:
          anyOf:
          - type: integer
          - type: 'null'
          default: 1
        data_config:
          anyOf:
          - $ref: '#/components/schemas/DataConfig'
            title: DataConfig
          - type: 'null'
          title: DataConfig
        optimizer_config:
          anyOf:
          - $ref: '#/components/schemas/OptimizerConfig'
            title: OptimizerConfig
          - type: 'null'
          title: OptimizerConfig
        efficiency_config:
          anyOf:
          - $ref: '#/components/schemas/EfficiencyConfig'
            title: EfficiencyConfig
          - type: 'null'
          title: EfficiencyConfig
        dtype:
          anyOf:
          - type: string
          - type: 'null'
          default: bf16
      type: object
      required:
      - n_epochs
      title: TrainingConfig
      description: Comprehensive configuration for the training process.
    PreferenceOptimizeRequest:
      properties:
        job_uuid:
          type: string
          title: Job Uuid
        finetuned_model:
          type: string
          title: Finetuned Model
        algorithm_config:
          $ref: '#/components/schemas/DPOAlignmentConfig'
        training_config:
          $ref: '#/components/schemas/TrainingConfig'
        hyperparam_search_config:
          additionalProperties: true
          type: object
          title: Hyperparam Search Config
        logger_config:
          additionalProperties: true
          type: object
          title: Logger Config
      type: object
      required:
      - job_uuid
      - finetuned_model
      - algorithm_config
      - training_config
      - hyperparam_search_config
      - logger_config
      title: PreferenceOptimizeRequest
    PostTrainingJob:
      properties:
        job_uuid:
          type: string
          title: Job Uuid
      type: object
      required:
      - job_uuid
      title: PostTrainingJob
    LoraFinetuningConfig:
      properties:
        type:
          type: string
          const: LoRA
          title: Type
          default: LoRA
        lora_attn_modules:
          items:
            type: string
          type: array
          title: Lora Attn Modules
        apply_lora_to_mlp:
          type: boolean
          title: Apply Lora To Mlp
        apply_lora_to_output:
          type: boolean
          title: Apply Lora To Output
        rank:
          type: integer
          title: Rank
        alpha:
          type: integer
          title: Alpha
        use_dora:
          anyOf:
          - type: boolean
          - type: 'null'
          default: false
        quantize_base:
          anyOf:
          - type: boolean
          - type: 'null'
          default: false
      type: object
      required:
      - lora_attn_modules
      - apply_lora_to_mlp
      - apply_lora_to_output
      - rank
      - alpha
      title: LoraFinetuningConfig
      description: Configuration for Low-Rank Adaptation (LoRA) fine-tuning.
    QATFinetuningConfig:
      properties:
        type:
          type: string
          const: QAT
          title: Type
          default: QAT
        quantizer_name:
          type: string
          title: Quantizer Name
        group_size:
          type: integer
          title: Group Size
      type: object
      required:
      - quantizer_name
      - group_size
      title: QATFinetuningConfig
      description: Configuration for Quantization-Aware Training (QAT) fine-tuning.
    SupervisedFineTuneRequest:
      properties:
        job_uuid:
          type: string
          title: Job Uuid
        training_config:
          $ref: '#/components/schemas/TrainingConfig'
        hyperparam_search_config:
          additionalProperties: true
          type: object
          title: Hyperparam Search Config
        logger_config:
          additionalProperties: true
          type: object
          title: Logger Config
        model:
          anyOf:
          - type: string
          - type: 'null'
          description: Model descriptor for training if not in provider config`
        checkpoint_dir:
          anyOf:
          - type: string
          - type: 'null'
        algorithm_config:
          anyOf:
          - oneOf:
            - $ref: '#/components/schemas/LoraFinetuningConfig'
              title: LoraFinetuningConfig
            - $ref: '#/components/schemas/QATFinetuningConfig'
              title: QATFinetuningConfig
            discriminator:
              propertyName: type
              mapping:
                LoRA: '#/components/schemas/LoraFinetuningConfig'
                QAT: '#/components/schemas/QATFinetuningConfig'
            title: LoraFinetuningConfig | QATFinetuningConfig
          - type: 'null'
          title: Algorithm Config
      type: object
      required:
      - job_uuid
      - training_config
      - hyperparam_search_config
      - logger_config
      title: SupervisedFineTuneRequest
    DatasetPurpose:
      type: string
      enum:
      - post-training/messages
      - eval/question-answer
      - eval/messages-answer
      title: DatasetPurpose
      description: Purpose of the dataset. Each purpose has a required input data schema.
    ImageContentItem-Input:
      properties:
        type:
          type: string
          const: image
          title: Type
          default: image
        image:
          $ref: '#/components/schemas/_URLOrData'
      type: object
      required:
      - image
      title: ImageContentItem
      description: A image content item
    JobStatus:
      type: string
      enum:
      - completed
      - in_progress
      - failed
      - scheduled
      - cancelled
      title: JobStatus
      description: Status of a job execution.
    _URLOrData:
      properties:
        url:
          anyOf:
          - $ref: '#/components/schemas/URL'
            title: URL
          - type: 'null'
          title: URL
        data:
          anyOf:
          - type: string
          - type: 'null'
          contentEncoding: base64
      type: object
      title: _URLOrData
      description: A URL or a base64 encoded string
  responses:
    BadRequest400:
      description: The request was invalid or malformed
      content:
        application/json:
          schema:
            $ref: '#/components/schemas/Error'
          example:
            status: 400
            title: Bad Request
            detail: The request was invalid or malformed
    TooManyRequests429:
      description: The client has sent too many requests in a given amount of time
      content:
        application/json:
          schema:
            $ref: '#/components/schemas/Error'
          example:
            status: 429
            title: Too Many Requests
            detail: You have exceeded the rate limit. Please try again later.
    InternalServerError500:
      description: The server encountered an unexpected error
      content:
        application/json:
          schema:
            $ref: '#/components/schemas/Error'
          example:
            status: 500
            title: Internal Server Error
            detail: An unexpected error occurred
    DefaultError:
      description: An error occurred
      content:
        application/json:
          schema:
            $ref: '#/components/schemas/Error'
tags:
- description: APIs for creating and interacting with agentic systems.
  name: Agents
  x-displayName: Agents
- description: |-
    The API is designed to allow use of openai client libraries for seamless integration.

    This API provides the following extensions:
     - idempotent batch creation

    Note: This API is currently under active development and may undergo changes.
  name: Batches
  x-displayName: The Batches API enables efficient processing of multiple requests in a single operation, particularly useful for processing large datasets, batch evaluation workflows, and cost-effective inference at scale.
- description: ''
  name: Benchmarks
- description: Protocol for conversation management operations.
  name: Conversations
  x-displayName: Conversations
- description: ''
  name: DatasetIO
- description: ''
  name: Datasets
- description: Llama Stack Evaluation API for running evaluations on model and agent candidates.
  name: Eval
  x-displayName: Evaluations
- description: This API is used to upload documents that can be used with other Llama Stack APIs.
  name: Files
  x-displayName: Files
- description: |-
    Llama Stack Inference API for generating completions, chat completions, and embeddings.

    This API provides the raw interface to the underlying models. Three kinds of models are supported:
    - LLM models: these models generate "raw" and "chat" (conversational) completions.
    - Embedding models: these models generate embeddings to be used for semantic search.
    - Rerank models: these models reorder the documents based on their relevance to a query.
  name: Inference
  x-displayName: Inference
- description: APIs for inspecting the Llama Stack service, including health status, available API routes with methods and implementing providers.
  name: Inspect
  x-displayName: Inspect
- description: ''
  name: Models
- description: ''
  name: PostTraining (Coming Soon)
- description: Protocol for prompt management operations.
  name: Prompts
  x-displayName: Prompts
- description: Providers API for inspecting, listing, and modifying providers and their configurations.
  name: Providers
  x-displayName: Providers
- description: OpenAI-compatible Moderations API.
  name: Safety
  x-displayName: Safety
- description: ''
  name: Scoring
- description: ''
  name: ScoringFunctions
- description: ''
  name: Shields
- description: ''
  name: ToolGroups
- description: ''
  name: ToolRuntime
- description: ''
  name: VectorIO
x-tagGroups:
- name: Operations
  tags:
  - Agents
  - Batches
  - Benchmarks
  - Conversations
  - DatasetIO
  - Datasets
  - Eval
  - Files
  - Inference
  - Inspect
  - Models
  - PostTraining (Coming Soon)
  - Prompts
  - Providers
  - Safety
  - Scoring
  - ScoringFunctions
  - Shields
  - ToolGroups
  - ToolRuntime
  - VectorIO
security:
- Default: []