llama-stack-mirror/docs/static/experimental-llama-stack-spec.yaml

1915 lines
56 KiB
YAML

openapi: 3.1.0
info:
title: Llama Stack Specification - Experimental APIs
description: |-
This is the specification of the Llama Stack that provides
a set of endpoints and their corresponding interfaces that are
tailored to
best leverage Llama Models.
**🧪 EXPERIMENTAL**: Pre-release APIs (v1alpha, v1beta) that may change before
becoming stable.
version: v1
servers:
- url: http://any-hosted-llama-stack.com
paths:
/v1beta/datasetio/append-rows/{dataset_id}:
post:
responses:
'200':
description: Successful Response
content:
application/json:
schema: {}
'400':
description: Bad Request
$ref: '#/components/responses/BadRequest400'
'429':
description: Too Many Requests
$ref: '#/components/responses/TooManyRequests429'
'500':
description: Internal Server Error
$ref: '#/components/responses/InternalServerError500'
default:
description: Default Response
$ref: '#/components/responses/DefaultError'
tags:
- Datasetio
summary: Append Rows
description: Append rows to a dataset.
operationId: append_rows_v1beta_datasetio_append_rows__dataset_id__post
parameters:
- name: dataset_id
in: path
required: true
schema:
type: string
description: 'Path parameter: dataset_id'
requestBody:
content:
application/json:
schema:
$ref: '#/components/schemas/AppendRowsRequest'
required: true
/v1beta/datasetio/iterrows/{dataset_id}:
get:
responses:
'200':
description: A PaginatedResponse.
content:
application/json:
schema:
$ref: '#/components/schemas/PaginatedResponse'
'400':
$ref: '#/components/responses/BadRequest400'
description: Bad Request
'429':
$ref: '#/components/responses/TooManyRequests429'
description: Too Many Requests
'500':
$ref: '#/components/responses/InternalServerError500'
description: Internal Server Error
default:
$ref: '#/components/responses/DefaultError'
description: Default Response
tags:
- Datasetio
summary: Iterrows
description: |-
Get a paginated list of rows from a dataset.
Uses offset-based pagination where:
- start_index: The starting index (0-based). If None, starts from beginning.
- limit: Number of items to return. If None or -1, returns all items.
The response includes:
- data: List of items for the current page.
- has_more: Whether there are more items available after this set.
operationId: iterrows_v1beta_datasetio_iterrows__dataset_id__get
parameters:
- name: limit
in: query
required: false
schema:
anyOf:
- type: integer
- type: 'null'
title: Limit
- name: start_index
in: query
required: false
schema:
anyOf:
- type: integer
- type: 'null'
title: Start Index
- name: dataset_id
in: path
required: true
schema:
type: string
description: 'Path parameter: dataset_id'
/v1beta/datasets:
get:
responses:
'200':
description: A ListDatasetsResponse.
content:
application/json:
schema:
$ref: '#/components/schemas/ListDatasetsResponse'
'400':
description: Bad Request
$ref: '#/components/responses/BadRequest400'
'429':
description: Too Many Requests
$ref: '#/components/responses/TooManyRequests429'
'500':
description: Internal Server Error
$ref: '#/components/responses/InternalServerError500'
default:
description: Default Response
$ref: '#/components/responses/DefaultError'
tags:
- Datasets
summary: List Datasets
description: List all datasets.
operationId: list_datasets_v1beta_datasets_get
/v1beta/datasets/{dataset_id}:
get:
responses:
'200':
description: A Dataset.
content:
application/json:
schema:
$ref: '#/components/schemas/Dataset'
'400':
description: Bad Request
$ref: '#/components/responses/BadRequest400'
'429':
description: Too Many Requests
$ref: '#/components/responses/TooManyRequests429'
'500':
description: Internal Server Error
$ref: '#/components/responses/InternalServerError500'
default:
description: Default Response
$ref: '#/components/responses/DefaultError'
tags:
- Datasets
summary: Get Dataset
description: Get a dataset by its ID.
operationId: get_dataset_v1beta_datasets__dataset_id__get
parameters:
- name: dataset_id
in: path
required: true
schema:
type: string
description: 'Path parameter: dataset_id'
/v1alpha/eval/benchmarks:
get:
responses:
'200':
description: A ListBenchmarksResponse.
content:
application/json:
schema:
$ref: '#/components/schemas/ListBenchmarksResponse'
'400':
description: Bad Request
$ref: '#/components/responses/BadRequest400'
'429':
description: Too Many Requests
$ref: '#/components/responses/TooManyRequests429'
'500':
description: Internal Server Error
$ref: '#/components/responses/InternalServerError500'
default:
description: Default Response
$ref: '#/components/responses/DefaultError'
tags:
- Benchmarks
summary: List Benchmarks
description: List all benchmarks.
operationId: list_benchmarks_v1alpha_eval_benchmarks_get
/v1alpha/eval/benchmarks/{benchmark_id}:
get:
responses:
'200':
description: A Benchmark.
content:
application/json:
schema:
$ref: '#/components/schemas/Benchmark'
'400':
description: Bad Request
$ref: '#/components/responses/BadRequest400'
'429':
description: Too Many Requests
$ref: '#/components/responses/TooManyRequests429'
'500':
description: Internal Server Error
$ref: '#/components/responses/InternalServerError500'
default:
description: Default Response
$ref: '#/components/responses/DefaultError'
tags:
- Benchmarks
summary: Get Benchmark
description: Get a benchmark by its ID.
operationId: get_benchmark_v1alpha_eval_benchmarks__benchmark_id__get
parameters:
- name: benchmark_id
in: path
required: true
schema:
type: string
description: 'Path parameter: benchmark_id'
/v1alpha/eval/benchmarks/{benchmark_id}/evaluations:
post:
responses:
'200':
description: EvaluateResponse object containing generations and scores.
content:
application/json:
schema:
$ref: '#/components/schemas/EvaluateResponse'
'400':
description: Bad Request
$ref: '#/components/responses/BadRequest400'
'429':
description: Too Many Requests
$ref: '#/components/responses/TooManyRequests429'
'500':
description: Internal Server Error
$ref: '#/components/responses/InternalServerError500'
default:
description: Default Response
$ref: '#/components/responses/DefaultError'
tags:
- Eval
summary: Evaluate Rows
description: Evaluate a list of rows on a benchmark.
operationId: evaluate_rows_v1alpha_eval_benchmarks__benchmark_id__evaluations_post
parameters:
- name: benchmark_id
in: path
required: true
schema:
type: string
description: 'Path parameter: benchmark_id'
requestBody:
content:
application/json:
schema:
$ref: '#/components/schemas/EvaluateRowsRequest'
required: true
/v1alpha/eval/benchmarks/{benchmark_id}/jobs:
post:
responses:
'200':
description: The job that was created to run the evaluation.
content:
application/json:
schema:
$ref: '#/components/schemas/Job'
'400':
description: Bad Request
$ref: '#/components/responses/BadRequest400'
'429':
description: Too Many Requests
$ref: '#/components/responses/TooManyRequests429'
'500':
description: Internal Server Error
$ref: '#/components/responses/InternalServerError500'
default:
description: Default Response
$ref: '#/components/responses/DefaultError'
tags:
- Eval
summary: Run Eval
description: Run an evaluation on a benchmark.
operationId: run_eval_v1alpha_eval_benchmarks__benchmark_id__jobs_post
parameters:
- name: benchmark_id
in: path
required: true
schema:
type: string
description: 'Path parameter: benchmark_id'
requestBody:
content:
application/json:
schema:
$ref: '#/components/schemas/BenchmarkConfig'
required: true
/v1alpha/eval/benchmarks/{benchmark_id}/jobs/{job_id}:
get:
responses:
'200':
description: The status of the evaluation job.
content:
application/json:
schema:
$ref: '#/components/schemas/Job'
'400':
description: Bad Request
$ref: '#/components/responses/BadRequest400'
'429':
description: Too Many Requests
$ref: '#/components/responses/TooManyRequests429'
'500':
description: Internal Server Error
$ref: '#/components/responses/InternalServerError500'
default:
description: Default Response
$ref: '#/components/responses/DefaultError'
tags:
- Eval
summary: Job Status
description: Get the status of a job.
operationId: job_status_v1alpha_eval_benchmarks__benchmark_id__jobs__job_id__get
parameters:
- name: benchmark_id
in: path
required: true
schema:
type: string
description: 'Path parameter: benchmark_id'
- name: job_id
in: path
required: true
schema:
type: string
description: 'Path parameter: job_id'
delete:
responses:
'200':
description: Successful Response
content:
application/json:
schema: {}
'400':
description: Bad Request
$ref: '#/components/responses/BadRequest400'
'429':
description: Too Many Requests
$ref: '#/components/responses/TooManyRequests429'
'500':
description: Internal Server Error
$ref: '#/components/responses/InternalServerError500'
default:
description: Default Response
$ref: '#/components/responses/DefaultError'
tags:
- Eval
summary: Job Cancel
description: Cancel a job.
operationId: job_cancel_v1alpha_eval_benchmarks__benchmark_id__jobs__job_id__delete
parameters:
- name: benchmark_id
in: path
required: true
schema:
type: string
description: 'Path parameter: benchmark_id'
- name: job_id
in: path
required: true
schema:
type: string
description: 'Path parameter: job_id'
/v1alpha/eval/benchmarks/{benchmark_id}/jobs/{job_id}/result:
get:
responses:
'200':
description: The result of the job.
content:
application/json:
schema:
$ref: '#/components/schemas/EvaluateResponse'
'400':
description: Bad Request
$ref: '#/components/responses/BadRequest400'
'429':
description: Too Many Requests
$ref: '#/components/responses/TooManyRequests429'
'500':
description: Internal Server Error
$ref: '#/components/responses/InternalServerError500'
default:
description: Default Response
$ref: '#/components/responses/DefaultError'
tags:
- Eval
summary: Job Result
description: Get the result of a job.
operationId: job_result_v1alpha_eval_benchmarks__benchmark_id__jobs__job_id__result_get
parameters:
- name: benchmark_id
in: path
required: true
schema:
type: string
description: 'Path parameter: benchmark_id'
- name: job_id
in: path
required: true
schema:
type: string
description: 'Path parameter: job_id'
/v1alpha/inference/rerank:
post:
responses:
'200':
description: RerankResponse with indices sorted by relevance score (descending).
content:
application/json:
schema:
$ref: '#/components/schemas/RerankResponse'
'400':
description: Bad Request
$ref: '#/components/responses/BadRequest400'
'429':
description: Too Many Requests
$ref: '#/components/responses/TooManyRequests429'
'500':
description: Internal Server Error
$ref: '#/components/responses/InternalServerError500'
default:
description: Default Response
$ref: '#/components/responses/DefaultError'
tags:
- Inference
summary: Rerank
description: Rerank a list of documents based on their relevance to a query.
operationId: rerank_v1alpha_inference_rerank_post
requestBody:
content:
application/json:
schema:
$ref: '#/components/schemas/RerankRequest'
required: true
/v1alpha/post-training/job/artifacts:
get:
responses:
'200':
description: A PostTrainingJobArtifactsResponse.
content:
application/json:
schema:
$ref: '#/components/schemas/PostTrainingJobArtifactsResponse'
'400':
$ref: '#/components/responses/BadRequest400'
description: Bad Request
'429':
$ref: '#/components/responses/TooManyRequests429'
description: Too Many Requests
'500':
$ref: '#/components/responses/InternalServerError500'
description: Internal Server Error
default:
$ref: '#/components/responses/DefaultError'
description: Default Response
tags:
- Post Training
summary: Get Training Job Artifacts
description: Get the artifacts of a training job.
operationId: get_training_job_artifacts_v1alpha_post_training_job_artifacts_get
parameters:
- name: job_uuid
in: query
required: true
schema:
type: string
title: Job Uuid
/v1alpha/post-training/job/cancel:
post:
responses:
'200':
description: Successful Response
content:
application/json:
schema: {}
'400':
description: Bad Request
$ref: '#/components/responses/BadRequest400'
'429':
description: Too Many Requests
$ref: '#/components/responses/TooManyRequests429'
'500':
description: Internal Server Error
$ref: '#/components/responses/InternalServerError500'
default:
description: Default Response
$ref: '#/components/responses/DefaultError'
tags:
- Post Training
summary: Cancel Training Job
description: Cancel a training job.
operationId: cancel_training_job_v1alpha_post_training_job_cancel_post
requestBody:
content:
application/json:
schema:
$ref: '#/components/schemas/CancelTrainingJobRequest'
required: true
/v1alpha/post-training/job/status:
get:
responses:
'200':
description: A PostTrainingJobStatusResponse.
content:
application/json:
schema:
$ref: '#/components/schemas/PostTrainingJobStatusResponse'
'400':
$ref: '#/components/responses/BadRequest400'
description: Bad Request
'429':
$ref: '#/components/responses/TooManyRequests429'
description: Too Many Requests
'500':
$ref: '#/components/responses/InternalServerError500'
description: Internal Server Error
default:
$ref: '#/components/responses/DefaultError'
description: Default Response
tags:
- Post Training
summary: Get Training Job Status
description: Get the status of a training job.
operationId: get_training_job_status_v1alpha_post_training_job_status_get
parameters:
- name: job_uuid
in: query
required: true
schema:
type: string
title: Job Uuid
/v1alpha/post-training/jobs:
get:
responses:
'200':
description: A ListPostTrainingJobsResponse.
content:
application/json:
schema:
$ref: '#/components/schemas/ListPostTrainingJobsResponse'
'400':
description: Bad Request
$ref: '#/components/responses/BadRequest400'
'429':
description: Too Many Requests
$ref: '#/components/responses/TooManyRequests429'
'500':
description: Internal Server Error
$ref: '#/components/responses/InternalServerError500'
default:
description: Default Response
$ref: '#/components/responses/DefaultError'
tags:
- Post Training
summary: Get Training Jobs
description: Get all training jobs.
operationId: get_training_jobs_v1alpha_post_training_jobs_get
/v1alpha/post-training/preference-optimize:
post:
responses:
'200':
description: A PostTrainingJob.
content:
application/json:
schema:
$ref: '#/components/schemas/PostTrainingJob'
'400':
description: Bad Request
$ref: '#/components/responses/BadRequest400'
'429':
description: Too Many Requests
$ref: '#/components/responses/TooManyRequests429'
'500':
description: Internal Server Error
$ref: '#/components/responses/InternalServerError500'
default:
description: Default Response
$ref: '#/components/responses/DefaultError'
tags:
- Post Training
summary: Preference Optimize
description: Run preference optimization of a model.
operationId: preference_optimize_v1alpha_post_training_preference_optimize_post
requestBody:
content:
application/json:
schema:
$ref: '#/components/schemas/PreferenceOptimizeRequest'
required: true
/v1alpha/post-training/supervised-fine-tune:
post:
responses:
'200':
description: A PostTrainingJob.
content:
application/json:
schema:
$ref: '#/components/schemas/PostTrainingJob'
'400':
description: Bad Request
$ref: '#/components/responses/BadRequest400'
'429':
description: Too Many Requests
$ref: '#/components/responses/TooManyRequests429'
'500':
description: Internal Server Error
$ref: '#/components/responses/InternalServerError500'
default:
description: Default Response
$ref: '#/components/responses/DefaultError'
tags:
- Post Training
summary: Supervised Fine Tune
description: Run supervised fine-tuning of a model.
operationId: supervised_fine_tune_v1alpha_post_training_supervised_fine_tune_post
requestBody:
content:
application/json:
schema:
$ref: '#/components/schemas/SupervisedFineTuneRequest'
required: true
components:
schemas:
Error:
description: Error response from the API. Roughly follows RFC 7807.
properties:
status:
title: Status
type: integer
title:
title: Title
type: string
detail:
title: Detail
type: string
instance:
anyOf:
- type: string
- type: 'null'
nullable: true
required:
- status
- title
- detail
title: Error
type: object
OpenAIChatCompletionContentPartImageParam:
properties:
type:
type: string
const: image_url
title: Type
default: image_url
image_url:
$ref: '#/components/schemas/OpenAIImageURL'
type: object
required:
- image_url
title: OpenAIChatCompletionContentPartImageParam
description: Image content part for OpenAI-compatible chat completion messages.
OpenAIChatCompletionContentPartTextParam:
properties:
type:
type: string
const: text
title: Type
default: text
text:
type: string
title: Text
type: object
required:
- text
title: OpenAIChatCompletionContentPartTextParam
description: Text content part for OpenAI-compatible chat completion messages.
OpenAIImageURL:
properties:
url:
type: string
title: Url
detail:
anyOf:
- type: string
- type: 'null'
type: object
required:
- url
title: OpenAIImageURL
description: Image URL specification for OpenAI-compatible chat completion messages.
AggregationFunctionType:
type: string
enum:
- average
- weighted_average
- median
- categorical_count
- accuracy
title: AggregationFunctionType
description: Types of aggregation functions for scoring results.
BasicScoringFnParams:
properties:
type:
type: string
const: basic
title: Type
default: basic
aggregation_functions:
items:
$ref: '#/components/schemas/AggregationFunctionType'
type: array
title: Aggregation Functions
description: Aggregation functions to apply to the scores of each row
type: object
title: BasicScoringFnParams
description: Parameters for basic scoring function configuration.
LLMAsJudgeScoringFnParams:
properties:
type:
type: string
const: llm_as_judge
title: Type
default: llm_as_judge
judge_model:
type: string
title: Judge Model
prompt_template:
anyOf:
- type: string
- type: 'null'
judge_score_regexes:
items:
type: string
type: array
title: Judge Score Regexes
description: Regexes to extract the answer from generated response
aggregation_functions:
items:
$ref: '#/components/schemas/AggregationFunctionType'
type: array
title: Aggregation Functions
description: Aggregation functions to apply to the scores of each row
type: object
required:
- judge_model
title: LLMAsJudgeScoringFnParams
description: Parameters for LLM-as-judge scoring function configuration.
RegexParserScoringFnParams:
properties:
type:
type: string
const: regex_parser
title: Type
default: regex_parser
parsing_regexes:
items:
type: string
type: array
title: Parsing Regexes
description: Regex to extract the answer from generated response
aggregation_functions:
items:
$ref: '#/components/schemas/AggregationFunctionType'
type: array
title: Aggregation Functions
description: Aggregation functions to apply to the scores of each row
type: object
title: RegexParserScoringFnParams
description: Parameters for regex parser scoring function configuration.
ScoringResult:
properties:
score_rows:
items:
additionalProperties: true
type: object
type: array
title: Score Rows
aggregated_results:
additionalProperties: true
type: object
title: Aggregated Results
type: object
required:
- score_rows
- aggregated_results
title: ScoringResult
description: A scoring result for a single row.
TextContentItem:
properties:
type:
type: string
const: text
title: Type
default: text
text:
type: string
title: Text
type: object
required:
- text
title: TextContentItem
description: A text content item
URL:
properties:
uri:
type: string
title: Uri
type: object
required:
- uri
title: URL
description: A URL reference to external content.
AppendRowsRequest:
properties:
rows:
items:
additionalProperties: true
type: object
type: array
title: Rows
type: object
required:
- rows
title: AppendRowsRequest
PaginatedResponse:
properties:
data:
items:
additionalProperties: true
type: object
type: array
title: Data
has_more:
type: boolean
title: Has More
url:
anyOf:
- type: string
- type: 'null'
type: object
required:
- data
- has_more
title: PaginatedResponse
description: A generic paginated response that follows a simple format.
Dataset:
properties:
identifier:
type: string
title: Identifier
description: Unique identifier for this resource in llama stack
provider_resource_id:
anyOf:
- type: string
- type: 'null'
description: Unique identifier for this resource in the provider
provider_id:
type: string
title: Provider Id
description: ID of the provider that owns this resource
type:
type: string
const: dataset
title: Type
default: dataset
purpose:
$ref: '#/components/schemas/DatasetPurpose'
source:
oneOf:
- $ref: '#/components/schemas/URIDataSource'
title: URIDataSource
- $ref: '#/components/schemas/RowsDataSource'
title: RowsDataSource
title: URIDataSource | RowsDataSource
discriminator:
propertyName: type
mapping:
rows: '#/components/schemas/RowsDataSource'
uri: '#/components/schemas/URIDataSource'
metadata:
additionalProperties: true
type: object
title: Metadata
description: Any additional metadata for this dataset
type: object
required:
- identifier
- provider_id
- purpose
- source
title: Dataset
description: Dataset resource for storing and accessing training or evaluation data.
RowsDataSource:
properties:
type:
type: string
const: rows
title: Type
default: rows
rows:
items:
additionalProperties: true
type: object
type: array
title: Rows
type: object
required:
- rows
title: RowsDataSource
description: A dataset stored in rows.
URIDataSource:
properties:
type:
type: string
const: uri
title: Type
default: uri
uri:
type: string
title: Uri
type: object
required:
- uri
title: URIDataSource
description: A dataset that can be obtained from a URI.
ListDatasetsResponse:
properties:
data:
items:
$ref: '#/components/schemas/Dataset'
type: array
title: Data
type: object
required:
- data
title: ListDatasetsResponse
description: Response from listing datasets.
Benchmark:
properties:
identifier:
type: string
title: Identifier
description: Unique identifier for this resource in llama stack
provider_resource_id:
anyOf:
- type: string
- type: 'null'
description: Unique identifier for this resource in the provider
provider_id:
type: string
title: Provider Id
description: ID of the provider that owns this resource
type:
type: string
const: benchmark
title: Type
default: benchmark
dataset_id:
type: string
title: Dataset Id
scoring_functions:
items:
type: string
type: array
title: Scoring Functions
metadata:
additionalProperties: true
type: object
title: Metadata
description: Metadata for this evaluation task
type: object
required:
- identifier
- provider_id
- dataset_id
- scoring_functions
title: Benchmark
description: A benchmark resource for evaluating model performance.
ListBenchmarksResponse:
properties:
data:
items:
$ref: '#/components/schemas/Benchmark'
type: array
title: Data
type: object
required:
- data
title: ListBenchmarksResponse
BenchmarkConfig:
properties:
eval_candidate:
$ref: '#/components/schemas/ModelCandidate'
scoring_params:
additionalProperties:
oneOf:
- $ref: '#/components/schemas/LLMAsJudgeScoringFnParams'
title: LLMAsJudgeScoringFnParams
- $ref: '#/components/schemas/RegexParserScoringFnParams'
title: RegexParserScoringFnParams
- $ref: '#/components/schemas/BasicScoringFnParams'
title: BasicScoringFnParams
discriminator:
propertyName: type
mapping:
basic: '#/components/schemas/BasicScoringFnParams'
llm_as_judge: '#/components/schemas/LLMAsJudgeScoringFnParams'
regex_parser: '#/components/schemas/RegexParserScoringFnParams'
title: LLMAsJudgeScoringFnParams | RegexParserScoringFnParams | BasicScoringFnParams
type: object
title: Scoring Params
description: Map between scoring function id and parameters for each scoring function you want to run
num_examples:
anyOf:
- type: integer
- type: 'null'
description: Number of examples to evaluate (useful for testing), if not provided, all examples in the dataset will be evaluated
type: object
required:
- eval_candidate
title: BenchmarkConfig
description: A benchmark configuration for evaluation.
GreedySamplingStrategy:
properties:
type:
type: string
const: greedy
title: Type
default: greedy
type: object
title: GreedySamplingStrategy
description: Greedy sampling strategy that selects the highest probability token at each step.
ModelCandidate:
properties:
type:
type: string
const: model
title: Type
default: model
model:
type: string
title: Model
sampling_params:
$ref: '#/components/schemas/SamplingParams'
system_message:
anyOf:
- $ref: '#/components/schemas/SystemMessage'
title: SystemMessage
- type: 'null'
title: SystemMessage
type: object
required:
- model
- sampling_params
title: ModelCandidate
description: A model candidate for evaluation.
SamplingParams:
properties:
strategy:
oneOf:
- $ref: '#/components/schemas/GreedySamplingStrategy'
title: GreedySamplingStrategy
- $ref: '#/components/schemas/TopPSamplingStrategy'
title: TopPSamplingStrategy
- $ref: '#/components/schemas/TopKSamplingStrategy'
title: TopKSamplingStrategy
title: GreedySamplingStrategy | TopPSamplingStrategy | TopKSamplingStrategy
discriminator:
propertyName: type
mapping:
greedy: '#/components/schemas/GreedySamplingStrategy'
top_k: '#/components/schemas/TopKSamplingStrategy'
top_p: '#/components/schemas/TopPSamplingStrategy'
max_tokens:
anyOf:
- type: integer
- type: 'null'
repetition_penalty:
anyOf:
- type: number
- type: 'null'
default: 1.0
stop:
anyOf:
- items:
type: string
type: array
- type: 'null'
type: object
title: SamplingParams
description: Sampling parameters.
SystemMessage:
properties:
role:
type: string
const: system
title: Role
default: system
content:
anyOf:
- type: string
- oneOf:
- $ref: '#/components/schemas/ImageContentItem-Input'
title: ImageContentItem-Input
- $ref: '#/components/schemas/TextContentItem'
title: TextContentItem
discriminator:
propertyName: type
mapping:
image: '#/components/schemas/ImageContentItem-Input'
text: '#/components/schemas/TextContentItem'
title: ImageContentItem-Input | TextContentItem
- items:
oneOf:
- $ref: '#/components/schemas/ImageContentItem-Input'
title: ImageContentItem-Input
- $ref: '#/components/schemas/TextContentItem'
title: TextContentItem
discriminator:
propertyName: type
mapping:
image: '#/components/schemas/ImageContentItem-Input'
text: '#/components/schemas/TextContentItem'
title: ImageContentItem-Input | TextContentItem
type: array
title: list[ImageContentItem-Input | TextContentItem]
title: string | list[ImageContentItem-Input | TextContentItem]
type: object
required:
- content
title: SystemMessage
description: A system message providing instructions or context to the model.
TopKSamplingStrategy:
properties:
type:
type: string
const: top_k
title: Type
default: top_k
top_k:
type: integer
minimum: 1.0
title: Top K
type: object
required:
- top_k
title: TopKSamplingStrategy
description: Top-k sampling strategy that restricts sampling to the k most likely tokens.
TopPSamplingStrategy:
properties:
type:
type: string
const: top_p
title: Type
default: top_p
temperature:
anyOf:
- type: number
minimum: 0.0
- type: 'null'
top_p:
anyOf:
- type: number
- type: 'null'
default: 0.95
type: object
required:
- temperature
title: TopPSamplingStrategy
description: Top-p (nucleus) sampling strategy that samples from the smallest set of tokens with cumulative probability >= p.
EvaluateRowsRequest:
properties:
input_rows:
items:
additionalProperties: true
type: object
type: array
title: Input Rows
scoring_functions:
items:
type: string
type: array
title: Scoring Functions
benchmark_config:
$ref: '#/components/schemas/BenchmarkConfig'
type: object
required:
- input_rows
- scoring_functions
- benchmark_config
title: EvaluateRowsRequest
EvaluateResponse:
properties:
generations:
items:
additionalProperties: true
type: object
type: array
title: Generations
scores:
additionalProperties:
$ref: '#/components/schemas/ScoringResult'
type: object
title: Scores
type: object
required:
- generations
- scores
title: EvaluateResponse
description: The response from an evaluation.
Job:
properties:
job_id:
type: string
title: Job Id
status:
$ref: '#/components/schemas/JobStatus'
type: object
required:
- job_id
- status
title: Job
description: A job execution instance with status tracking.
RerankRequest:
properties:
model:
type: string
title: Model
query:
anyOf:
- type: string
- $ref: '#/components/schemas/OpenAIChatCompletionContentPartTextParam'
title: OpenAIChatCompletionContentPartTextParam
- $ref: '#/components/schemas/OpenAIChatCompletionContentPartImageParam'
title: OpenAIChatCompletionContentPartImageParam
title: string | OpenAIChatCompletionContentPartTextParam | OpenAIChatCompletionContentPartImageParam
items:
items:
anyOf:
- type: string
- $ref: '#/components/schemas/OpenAIChatCompletionContentPartTextParam'
title: OpenAIChatCompletionContentPartTextParam
- $ref: '#/components/schemas/OpenAIChatCompletionContentPartImageParam'
title: OpenAIChatCompletionContentPartImageParam
title: string | OpenAIChatCompletionContentPartTextParam | OpenAIChatCompletionContentPartImageParam
type: array
title: Items
max_num_results:
anyOf:
- type: integer
- type: 'null'
type: object
required:
- model
- query
- items
title: RerankRequest
RerankData:
properties:
index:
type: integer
title: Index
relevance_score:
type: number
title: Relevance Score
type: object
required:
- index
- relevance_score
title: RerankData
description: A single rerank result from a reranking response.
RerankResponse:
properties:
data:
items:
$ref: '#/components/schemas/RerankData'
type: array
title: Data
type: object
required:
- data
title: RerankResponse
description: Response from a reranking request.
Checkpoint:
properties:
identifier:
type: string
title: Identifier
created_at:
type: string
format: date-time
title: Created At
epoch:
type: integer
title: Epoch
post_training_job_id:
type: string
title: Post Training Job Id
path:
type: string
title: Path
training_metrics:
anyOf:
- $ref: '#/components/schemas/PostTrainingMetric'
title: PostTrainingMetric
- type: 'null'
title: PostTrainingMetric
type: object
required:
- identifier
- created_at
- epoch
- post_training_job_id
- path
title: Checkpoint
description: Checkpoint created during training runs.
PostTrainingJobArtifactsResponse:
properties:
job_uuid:
type: string
title: Job Uuid
checkpoints:
items:
$ref: '#/components/schemas/Checkpoint'
type: array
title: Checkpoints
type: object
required:
- job_uuid
title: PostTrainingJobArtifactsResponse
description: Artifacts of a finetuning job.
PostTrainingMetric:
properties:
epoch:
type: integer
title: Epoch
train_loss:
type: number
title: Train Loss
validation_loss:
type: number
title: Validation Loss
perplexity:
type: number
title: Perplexity
type: object
required:
- epoch
- train_loss
- validation_loss
- perplexity
title: PostTrainingMetric
description: Training metrics captured during post-training jobs.
CancelTrainingJobRequest:
properties:
job_uuid:
type: string
title: Job Uuid
type: object
required:
- job_uuid
title: CancelTrainingJobRequest
PostTrainingJobStatusResponse:
properties:
job_uuid:
type: string
title: Job Uuid
status:
$ref: '#/components/schemas/JobStatus'
scheduled_at:
anyOf:
- type: string
format: date-time
- type: 'null'
started_at:
anyOf:
- type: string
format: date-time
- type: 'null'
completed_at:
anyOf:
- type: string
format: date-time
- type: 'null'
resources_allocated:
anyOf:
- additionalProperties: true
type: object
- type: 'null'
checkpoints:
items:
$ref: '#/components/schemas/Checkpoint'
type: array
title: Checkpoints
type: object
required:
- job_uuid
- status
title: PostTrainingJobStatusResponse
description: Status of a finetuning job.
ListPostTrainingJobsResponse:
properties:
data:
items:
$ref: '#/components/schemas/PostTrainingJob'
type: array
title: Data
type: object
required:
- data
title: ListPostTrainingJobsResponse
DPOAlignmentConfig:
properties:
beta:
type: number
title: Beta
loss_type:
$ref: '#/components/schemas/DPOLossType'
default: sigmoid
type: object
required:
- beta
title: DPOAlignmentConfig
description: Configuration for Direct Preference Optimization (DPO) alignment.
DPOLossType:
type: string
enum:
- sigmoid
- hinge
- ipo
- kto_pair
title: DPOLossType
DataConfig:
properties:
dataset_id:
type: string
title: Dataset Id
batch_size:
type: integer
title: Batch Size
shuffle:
type: boolean
title: Shuffle
data_format:
$ref: '#/components/schemas/DatasetFormat'
validation_dataset_id:
anyOf:
- type: string
- type: 'null'
packed:
anyOf:
- type: boolean
- type: 'null'
default: false
train_on_input:
anyOf:
- type: boolean
- type: 'null'
default: false
type: object
required:
- dataset_id
- batch_size
- shuffle
- data_format
title: DataConfig
description: Configuration for training data and data loading.
DatasetFormat:
type: string
enum:
- instruct
- dialog
title: DatasetFormat
description: Format of the training dataset.
EfficiencyConfig:
properties:
enable_activation_checkpointing:
anyOf:
- type: boolean
- type: 'null'
default: false
enable_activation_offloading:
anyOf:
- type: boolean
- type: 'null'
default: false
memory_efficient_fsdp_wrap:
anyOf:
- type: boolean
- type: 'null'
default: false
fsdp_cpu_offload:
anyOf:
- type: boolean
- type: 'null'
default: false
type: object
title: EfficiencyConfig
description: Configuration for memory and compute efficiency optimizations.
OptimizerConfig:
properties:
optimizer_type:
$ref: '#/components/schemas/OptimizerType'
lr:
type: number
title: Lr
weight_decay:
type: number
title: Weight Decay
num_warmup_steps:
type: integer
title: Num Warmup Steps
type: object
required:
- optimizer_type
- lr
- weight_decay
- num_warmup_steps
title: OptimizerConfig
description: Configuration parameters for the optimization algorithm.
OptimizerType:
type: string
enum:
- adam
- adamw
- sgd
title: OptimizerType
description: Available optimizer algorithms for training.
TrainingConfig:
properties:
n_epochs:
type: integer
title: N Epochs
max_steps_per_epoch:
type: integer
title: Max Steps Per Epoch
default: 1
gradient_accumulation_steps:
type: integer
title: Gradient Accumulation Steps
default: 1
max_validation_steps:
anyOf:
- type: integer
- type: 'null'
default: 1
data_config:
anyOf:
- $ref: '#/components/schemas/DataConfig'
title: DataConfig
- type: 'null'
title: DataConfig
optimizer_config:
anyOf:
- $ref: '#/components/schemas/OptimizerConfig'
title: OptimizerConfig
- type: 'null'
title: OptimizerConfig
efficiency_config:
anyOf:
- $ref: '#/components/schemas/EfficiencyConfig'
title: EfficiencyConfig
- type: 'null'
title: EfficiencyConfig
dtype:
anyOf:
- type: string
- type: 'null'
default: bf16
type: object
required:
- n_epochs
title: TrainingConfig
description: Comprehensive configuration for the training process.
PreferenceOptimizeRequest:
properties:
job_uuid:
type: string
title: Job Uuid
finetuned_model:
type: string
title: Finetuned Model
algorithm_config:
$ref: '#/components/schemas/DPOAlignmentConfig'
training_config:
$ref: '#/components/schemas/TrainingConfig'
hyperparam_search_config:
additionalProperties: true
type: object
title: Hyperparam Search Config
logger_config:
additionalProperties: true
type: object
title: Logger Config
type: object
required:
- job_uuid
- finetuned_model
- algorithm_config
- training_config
- hyperparam_search_config
- logger_config
title: PreferenceOptimizeRequest
PostTrainingJob:
properties:
job_uuid:
type: string
title: Job Uuid
type: object
required:
- job_uuid
title: PostTrainingJob
LoraFinetuningConfig:
properties:
type:
type: string
const: LoRA
title: Type
default: LoRA
lora_attn_modules:
items:
type: string
type: array
title: Lora Attn Modules
apply_lora_to_mlp:
type: boolean
title: Apply Lora To Mlp
apply_lora_to_output:
type: boolean
title: Apply Lora To Output
rank:
type: integer
title: Rank
alpha:
type: integer
title: Alpha
use_dora:
anyOf:
- type: boolean
- type: 'null'
default: false
quantize_base:
anyOf:
- type: boolean
- type: 'null'
default: false
type: object
required:
- lora_attn_modules
- apply_lora_to_mlp
- apply_lora_to_output
- rank
- alpha
title: LoraFinetuningConfig
description: Configuration for Low-Rank Adaptation (LoRA) fine-tuning.
QATFinetuningConfig:
properties:
type:
type: string
const: QAT
title: Type
default: QAT
quantizer_name:
type: string
title: Quantizer Name
group_size:
type: integer
title: Group Size
type: object
required:
- quantizer_name
- group_size
title: QATFinetuningConfig
description: Configuration for Quantization-Aware Training (QAT) fine-tuning.
SupervisedFineTuneRequest:
properties:
job_uuid:
type: string
title: Job Uuid
training_config:
$ref: '#/components/schemas/TrainingConfig'
hyperparam_search_config:
additionalProperties: true
type: object
title: Hyperparam Search Config
logger_config:
additionalProperties: true
type: object
title: Logger Config
model:
anyOf:
- type: string
- type: 'null'
description: Model descriptor for training if not in provider config`
checkpoint_dir:
anyOf:
- type: string
- type: 'null'
algorithm_config:
anyOf:
- oneOf:
- $ref: '#/components/schemas/LoraFinetuningConfig'
title: LoraFinetuningConfig
- $ref: '#/components/schemas/QATFinetuningConfig'
title: QATFinetuningConfig
discriminator:
propertyName: type
mapping:
LoRA: '#/components/schemas/LoraFinetuningConfig'
QAT: '#/components/schemas/QATFinetuningConfig'
title: LoraFinetuningConfig | QATFinetuningConfig
- type: 'null'
title: Algorithm Config
type: object
required:
- job_uuid
- training_config
- hyperparam_search_config
- logger_config
title: SupervisedFineTuneRequest
DatasetPurpose:
type: string
enum:
- post-training/messages
- eval/question-answer
- eval/messages-answer
title: DatasetPurpose
description: Purpose of the dataset. Each purpose has a required input data schema.
ImageContentItem-Input:
properties:
type:
type: string
const: image
title: Type
default: image
image:
$ref: '#/components/schemas/_URLOrData'
type: object
required:
- image
title: ImageContentItem
description: A image content item
JobStatus:
type: string
enum:
- completed
- in_progress
- failed
- scheduled
- cancelled
title: JobStatus
description: Status of a job execution.
_URLOrData:
properties:
url:
anyOf:
- $ref: '#/components/schemas/URL'
title: URL
- type: 'null'
title: URL
data:
anyOf:
- type: string
- type: 'null'
contentEncoding: base64
type: object
title: _URLOrData
description: A URL or a base64 encoded string
responses:
BadRequest400:
description: The request was invalid or malformed
content:
application/json:
schema:
$ref: '#/components/schemas/Error'
example:
status: 400
title: Bad Request
detail: The request was invalid or malformed
TooManyRequests429:
description: The client has sent too many requests in a given amount of time
content:
application/json:
schema:
$ref: '#/components/schemas/Error'
example:
status: 429
title: Too Many Requests
detail: You have exceeded the rate limit. Please try again later.
InternalServerError500:
description: The server encountered an unexpected error
content:
application/json:
schema:
$ref: '#/components/schemas/Error'
example:
status: 500
title: Internal Server Error
detail: An unexpected error occurred
DefaultError:
description: An error occurred
content:
application/json:
schema:
$ref: '#/components/schemas/Error'
tags:
- description: APIs for creating and interacting with agentic systems.
name: Agents
x-displayName: Agents
- description: |-
The API is designed to allow use of openai client libraries for seamless integration.
This API provides the following extensions:
- idempotent batch creation
Note: This API is currently under active development and may undergo changes.
name: Batches
x-displayName: The Batches API enables efficient processing of multiple requests in a single operation, particularly useful for processing large datasets, batch evaluation workflows, and cost-effective inference at scale.
- description: ''
name: Benchmarks
- description: Protocol for conversation management operations.
name: Conversations
x-displayName: Conversations
- description: ''
name: DatasetIO
- description: ''
name: Datasets
- description: Llama Stack Evaluation API for running evaluations on model and agent candidates.
name: Eval
x-displayName: Evaluations
- description: This API is used to upload documents that can be used with other Llama Stack APIs.
name: Files
x-displayName: Files
- description: |-
Llama Stack Inference API for generating completions, chat completions, and embeddings.
This API provides the raw interface to the underlying models. Three kinds of models are supported:
- LLM models: these models generate "raw" and "chat" (conversational) completions.
- Embedding models: these models generate embeddings to be used for semantic search.
- Rerank models: these models reorder the documents based on their relevance to a query.
name: Inference
x-displayName: Inference
- description: APIs for inspecting the Llama Stack service, including health status, available API routes with methods and implementing providers.
name: Inspect
x-displayName: Inspect
- description: ''
name: Models
- description: ''
name: PostTraining (Coming Soon)
- description: Protocol for prompt management operations.
name: Prompts
x-displayName: Prompts
- description: Providers API for inspecting, listing, and modifying providers and their configurations.
name: Providers
x-displayName: Providers
- description: OpenAI-compatible Moderations API.
name: Safety
x-displayName: Safety
- description: ''
name: Scoring
- description: ''
name: ScoringFunctions
- description: ''
name: Shields
- description: ''
name: ToolGroups
- description: ''
name: ToolRuntime
- description: ''
name: VectorIO
x-tagGroups:
- name: Operations
tags:
- Agents
- Batches
- Benchmarks
- Conversations
- DatasetIO
- Datasets
- Eval
- Files
- Inference
- Inspect
- Models
- PostTraining (Coming Soon)
- Prompts
- Providers
- Safety
- Scoring
- ScoringFunctions
- Shields
- ToolGroups
- ToolRuntime
- VectorIO
security:
- Default: []