llama-stack-mirror/docs/static/experimental-llama-stack-spec.yaml

4798 lines
144 KiB
YAML

openapi: 3.1.0
info:
title: >-
Llama Stack Specification - Experimental APIs
version: v1
description: >-
This is the specification of the Llama Stack that provides
a set of endpoints and their corresponding interfaces that are
tailored to
best leverage Llama Models.
**🧪 EXPERIMENTAL**: Pre-release APIs (v1alpha, v1beta) that may change before
becoming stable.
servers:
- url: http://any-hosted-llama-stack.com
paths:
/v1beta/datasetio/append-rows/{dataset_id}:
post:
responses:
'200':
description: OK
'400':
$ref: '#/components/responses/BadRequest400'
'429':
$ref: >-
#/components/responses/TooManyRequests429
'500':
$ref: >-
#/components/responses/InternalServerError500
default:
$ref: '#/components/responses/DefaultError'
tags:
- DatasetIO
summary: Append rows to a dataset.
description: Append rows to a dataset.
parameters:
- name: dataset_id
in: path
description: >-
The ID of the dataset to append the rows to.
required: true
schema:
type: string
requestBody:
content:
application/json:
schema:
$ref: '#/components/schemas/AppendRowsRequest'
required: true
deprecated: false
/v1beta/datasetio/iterrows/{dataset_id}:
get:
responses:
'200':
description: A PaginatedResponse.
content:
application/json:
schema:
$ref: '#/components/schemas/PaginatedResponse'
'400':
$ref: '#/components/responses/BadRequest400'
'429':
$ref: >-
#/components/responses/TooManyRequests429
'500':
$ref: >-
#/components/responses/InternalServerError500
default:
$ref: '#/components/responses/DefaultError'
tags:
- DatasetIO
summary: >-
Get a paginated list of rows from a dataset.
description: >-
Get a paginated list of rows from a dataset.
Uses offset-based pagination where:
- start_index: The starting index (0-based). If None, starts from beginning.
- limit: Number of items to return. If None or -1, returns all items.
The response includes:
- data: List of items for the current page.
- has_more: Whether there are more items available after this set.
parameters:
- name: dataset_id
in: path
description: >-
The ID of the dataset to get the rows from.
required: true
schema:
type: string
- name: start_index
in: query
description: >-
Index into dataset for the first row to get. Get all rows if None.
required: false
schema:
type: integer
- name: limit
in: query
description: The number of rows to get.
required: false
schema:
type: integer
deprecated: false
/v1beta/datasets:
get:
responses:
'200':
description: A ListDatasetsResponse.
content:
application/json:
schema:
$ref: '#/components/schemas/ListDatasetsResponse'
'400':
$ref: '#/components/responses/BadRequest400'
'429':
$ref: >-
#/components/responses/TooManyRequests429
'500':
$ref: >-
#/components/responses/InternalServerError500
default:
$ref: '#/components/responses/DefaultError'
tags:
- Datasets
summary: List all datasets.
description: List all datasets.
parameters: []
deprecated: false
post:
responses:
'200':
description: A Dataset.
content:
application/json:
schema:
$ref: '#/components/schemas/Dataset'
'400':
$ref: '#/components/responses/BadRequest400'
'429':
$ref: >-
#/components/responses/TooManyRequests429
'500':
$ref: >-
#/components/responses/InternalServerError500
default:
$ref: '#/components/responses/DefaultError'
tags:
- Datasets
summary: Register a new dataset.
description: Register a new dataset.
parameters: []
requestBody:
content:
application/json:
schema:
$ref: '#/components/schemas/RegisterDatasetRequest'
required: true
deprecated: false
/v1beta/datasets/{dataset_id}:
get:
responses:
'200':
description: A Dataset.
content:
application/json:
schema:
$ref: '#/components/schemas/Dataset'
'400':
$ref: '#/components/responses/BadRequest400'
'429':
$ref: >-
#/components/responses/TooManyRequests429
'500':
$ref: >-
#/components/responses/InternalServerError500
default:
$ref: '#/components/responses/DefaultError'
tags:
- Datasets
summary: Get a dataset by its ID.
description: Get a dataset by its ID.
parameters:
- name: dataset_id
in: path
description: The ID of the dataset to get.
required: true
schema:
type: string
deprecated: false
delete:
responses:
'200':
description: OK
'400':
$ref: '#/components/responses/BadRequest400'
'429':
$ref: >-
#/components/responses/TooManyRequests429
'500':
$ref: >-
#/components/responses/InternalServerError500
default:
$ref: '#/components/responses/DefaultError'
tags:
- Datasets
summary: Unregister a dataset by its ID.
description: Unregister a dataset by its ID.
parameters:
- name: dataset_id
in: path
description: The ID of the dataset to unregister.
required: true
schema:
type: string
deprecated: false
/v1alpha/agents:
get:
responses:
'200':
description: A PaginatedResponse.
content:
application/json:
schema:
$ref: '#/components/schemas/PaginatedResponse'
'400':
$ref: '#/components/responses/BadRequest400'
'429':
$ref: >-
#/components/responses/TooManyRequests429
'500':
$ref: >-
#/components/responses/InternalServerError500
default:
$ref: '#/components/responses/DefaultError'
tags:
- Agents
summary: List all agents.
description: List all agents.
parameters:
- name: start_index
in: query
description: The index to start the pagination from.
required: false
schema:
type: integer
- name: limit
in: query
description: The number of agents to return.
required: false
schema:
type: integer
deprecated: false
post:
responses:
'200':
description: >-
An AgentCreateResponse with the agent ID.
content:
application/json:
schema:
$ref: '#/components/schemas/AgentCreateResponse'
'400':
$ref: '#/components/responses/BadRequest400'
'429':
$ref: >-
#/components/responses/TooManyRequests429
'500':
$ref: >-
#/components/responses/InternalServerError500
default:
$ref: '#/components/responses/DefaultError'
tags:
- Agents
summary: >-
Create an agent with the given configuration.
description: >-
Create an agent with the given configuration.
parameters: []
requestBody:
content:
application/json:
schema:
$ref: '#/components/schemas/CreateAgentRequest'
required: true
deprecated: false
/v1alpha/agents/{agent_id}:
get:
responses:
'200':
description: An Agent of the agent.
content:
application/json:
schema:
$ref: '#/components/schemas/Agent'
'400':
$ref: '#/components/responses/BadRequest400'
'429':
$ref: >-
#/components/responses/TooManyRequests429
'500':
$ref: >-
#/components/responses/InternalServerError500
default:
$ref: '#/components/responses/DefaultError'
tags:
- Agents
summary: Describe an agent by its ID.
description: Describe an agent by its ID.
parameters:
- name: agent_id
in: path
description: ID of the agent.
required: true
schema:
type: string
deprecated: false
delete:
responses:
'200':
description: OK
'400':
$ref: '#/components/responses/BadRequest400'
'429':
$ref: >-
#/components/responses/TooManyRequests429
'500':
$ref: >-
#/components/responses/InternalServerError500
default:
$ref: '#/components/responses/DefaultError'
tags:
- Agents
summary: >-
Delete an agent by its ID and its associated sessions and turns.
description: >-
Delete an agent by its ID and its associated sessions and turns.
parameters:
- name: agent_id
in: path
description: The ID of the agent to delete.
required: true
schema:
type: string
deprecated: false
/v1alpha/agents/{agent_id}/session:
post:
responses:
'200':
description: An AgentSessionCreateResponse.
content:
application/json:
schema:
$ref: '#/components/schemas/AgentSessionCreateResponse'
'400':
$ref: '#/components/responses/BadRequest400'
'429':
$ref: >-
#/components/responses/TooManyRequests429
'500':
$ref: >-
#/components/responses/InternalServerError500
default:
$ref: '#/components/responses/DefaultError'
tags:
- Agents
summary: Create a new session for an agent.
description: Create a new session for an agent.
parameters:
- name: agent_id
in: path
description: >-
The ID of the agent to create the session for.
required: true
schema:
type: string
requestBody:
content:
application/json:
schema:
$ref: '#/components/schemas/CreateAgentSessionRequest'
required: true
deprecated: false
/v1alpha/agents/{agent_id}/session/{session_id}:
get:
responses:
'200':
description: A Session.
content:
application/json:
schema:
$ref: '#/components/schemas/Session'
'400':
$ref: '#/components/responses/BadRequest400'
'429':
$ref: >-
#/components/responses/TooManyRequests429
'500':
$ref: >-
#/components/responses/InternalServerError500
default:
$ref: '#/components/responses/DefaultError'
tags:
- Agents
summary: Retrieve an agent session by its ID.
description: Retrieve an agent session by its ID.
parameters:
- name: session_id
in: path
description: The ID of the session to get.
required: true
schema:
type: string
- name: agent_id
in: path
description: >-
The ID of the agent to get the session for.
required: true
schema:
type: string
- name: turn_ids
in: query
description: >-
(Optional) List of turn IDs to filter the session by.
required: false
schema:
type: array
items:
type: string
deprecated: false
delete:
responses:
'200':
description: OK
'400':
$ref: '#/components/responses/BadRequest400'
'429':
$ref: >-
#/components/responses/TooManyRequests429
'500':
$ref: >-
#/components/responses/InternalServerError500
default:
$ref: '#/components/responses/DefaultError'
tags:
- Agents
summary: >-
Delete an agent session by its ID and its associated turns.
description: >-
Delete an agent session by its ID and its associated turns.
parameters:
- name: session_id
in: path
description: The ID of the session to delete.
required: true
schema:
type: string
- name: agent_id
in: path
description: >-
The ID of the agent to delete the session for.
required: true
schema:
type: string
deprecated: false
/v1alpha/agents/{agent_id}/session/{session_id}/turn:
post:
responses:
'200':
description: >-
If stream=False, returns a Turn object. If stream=True, returns an SSE
event stream of AgentTurnResponseStreamChunk.
content:
application/json:
schema:
$ref: '#/components/schemas/Turn'
text/event-stream:
schema:
$ref: '#/components/schemas/AgentTurnResponseStreamChunk'
'400':
$ref: '#/components/responses/BadRequest400'
'429':
$ref: >-
#/components/responses/TooManyRequests429
'500':
$ref: >-
#/components/responses/InternalServerError500
default:
$ref: '#/components/responses/DefaultError'
tags:
- Agents
summary: Create a new turn for an agent.
description: Create a new turn for an agent.
parameters:
- name: agent_id
in: path
description: >-
The ID of the agent to create the turn for.
required: true
schema:
type: string
- name: session_id
in: path
description: >-
The ID of the session to create the turn for.
required: true
schema:
type: string
requestBody:
content:
application/json:
schema:
$ref: '#/components/schemas/CreateAgentTurnRequest'
required: true
deprecated: false
/v1alpha/agents/{agent_id}/session/{session_id}/turn/{turn_id}:
get:
responses:
'200':
description: A Turn.
content:
application/json:
schema:
$ref: '#/components/schemas/Turn'
'400':
$ref: '#/components/responses/BadRequest400'
'429':
$ref: >-
#/components/responses/TooManyRequests429
'500':
$ref: >-
#/components/responses/InternalServerError500
default:
$ref: '#/components/responses/DefaultError'
tags:
- Agents
summary: Retrieve an agent turn by its ID.
description: Retrieve an agent turn by its ID.
parameters:
- name: agent_id
in: path
description: The ID of the agent to get the turn for.
required: true
schema:
type: string
- name: session_id
in: path
description: >-
The ID of the session to get the turn for.
required: true
schema:
type: string
- name: turn_id
in: path
description: The ID of the turn to get.
required: true
schema:
type: string
deprecated: false
/v1alpha/agents/{agent_id}/session/{session_id}/turn/{turn_id}/resume:
post:
responses:
'200':
description: >-
A Turn object if stream is False, otherwise an AsyncIterator of AgentTurnResponseStreamChunk
objects.
content:
application/json:
schema:
$ref: '#/components/schemas/Turn'
text/event-stream:
schema:
$ref: '#/components/schemas/AgentTurnResponseStreamChunk'
'400':
$ref: '#/components/responses/BadRequest400'
'429':
$ref: >-
#/components/responses/TooManyRequests429
'500':
$ref: >-
#/components/responses/InternalServerError500
default:
$ref: '#/components/responses/DefaultError'
tags:
- Agents
summary: >-
Resume an agent turn with executed tool call responses.
description: >-
Resume an agent turn with executed tool call responses.
When a Turn has the status `awaiting_input` due to pending input from client
side tool calls, this endpoint can be used to submit the outputs from the
tool calls once they are ready.
parameters:
- name: agent_id
in: path
description: The ID of the agent to resume.
required: true
schema:
type: string
- name: session_id
in: path
description: The ID of the session to resume.
required: true
schema:
type: string
- name: turn_id
in: path
description: The ID of the turn to resume.
required: true
schema:
type: string
requestBody:
content:
application/json:
schema:
$ref: '#/components/schemas/ResumeAgentTurnRequest'
required: true
deprecated: false
/v1alpha/agents/{agent_id}/session/{session_id}/turn/{turn_id}/step/{step_id}:
get:
responses:
'200':
description: An AgentStepResponse.
content:
application/json:
schema:
$ref: '#/components/schemas/AgentStepResponse'
'400':
$ref: '#/components/responses/BadRequest400'
'429':
$ref: >-
#/components/responses/TooManyRequests429
'500':
$ref: >-
#/components/responses/InternalServerError500
default:
$ref: '#/components/responses/DefaultError'
tags:
- Agents
summary: Retrieve an agent step by its ID.
description: Retrieve an agent step by its ID.
parameters:
- name: agent_id
in: path
description: The ID of the agent to get the step for.
required: true
schema:
type: string
- name: session_id
in: path
description: >-
The ID of the session to get the step for.
required: true
schema:
type: string
- name: turn_id
in: path
description: The ID of the turn to get the step for.
required: true
schema:
type: string
- name: step_id
in: path
description: The ID of the step to get.
required: true
schema:
type: string
deprecated: false
/v1alpha/agents/{agent_id}/sessions:
get:
responses:
'200':
description: A PaginatedResponse.
content:
application/json:
schema:
$ref: '#/components/schemas/PaginatedResponse'
'400':
$ref: '#/components/responses/BadRequest400'
'429':
$ref: >-
#/components/responses/TooManyRequests429
'500':
$ref: >-
#/components/responses/InternalServerError500
default:
$ref: '#/components/responses/DefaultError'
tags:
- Agents
summary: List all session(s) of a given agent.
description: List all session(s) of a given agent.
parameters:
- name: agent_id
in: path
description: >-
The ID of the agent to list sessions for.
required: true
schema:
type: string
- name: start_index
in: query
description: The index to start the pagination from.
required: false
schema:
type: integer
- name: limit
in: query
description: The number of sessions to return.
required: false
schema:
type: integer
deprecated: false
/v1alpha/eval/benchmarks:
get:
responses:
'200':
description: A ListBenchmarksResponse.
content:
application/json:
schema:
$ref: '#/components/schemas/ListBenchmarksResponse'
'400':
$ref: '#/components/responses/BadRequest400'
'429':
$ref: >-
#/components/responses/TooManyRequests429
'500':
$ref: >-
#/components/responses/InternalServerError500
default:
$ref: '#/components/responses/DefaultError'
tags:
- Benchmarks
summary: List all benchmarks.
description: List all benchmarks.
parameters: []
deprecated: false
post:
responses:
'200':
description: OK
'400':
$ref: '#/components/responses/BadRequest400'
'429':
$ref: >-
#/components/responses/TooManyRequests429
'500':
$ref: >-
#/components/responses/InternalServerError500
default:
$ref: '#/components/responses/DefaultError'
tags:
- Benchmarks
summary: Register a benchmark.
description: Register a benchmark.
parameters: []
requestBody:
content:
application/json:
schema:
$ref: '#/components/schemas/RegisterBenchmarkRequest'
required: true
deprecated: false
/v1alpha/eval/benchmarks/{benchmark_id}:
get:
responses:
'200':
description: A Benchmark.
content:
application/json:
schema:
$ref: '#/components/schemas/Benchmark'
'400':
$ref: '#/components/responses/BadRequest400'
'429':
$ref: >-
#/components/responses/TooManyRequests429
'500':
$ref: >-
#/components/responses/InternalServerError500
default:
$ref: '#/components/responses/DefaultError'
tags:
- Benchmarks
summary: Get a benchmark by its ID.
description: Get a benchmark by its ID.
parameters:
- name: benchmark_id
in: path
description: The ID of the benchmark to get.
required: true
schema:
type: string
deprecated: false
delete:
responses:
'200':
description: OK
'400':
$ref: '#/components/responses/BadRequest400'
'429':
$ref: >-
#/components/responses/TooManyRequests429
'500':
$ref: >-
#/components/responses/InternalServerError500
default:
$ref: '#/components/responses/DefaultError'
tags:
- Benchmarks
summary: Unregister a benchmark.
description: Unregister a benchmark.
parameters:
- name: benchmark_id
in: path
description: The ID of the benchmark to unregister.
required: true
schema:
type: string
deprecated: false
/v1alpha/eval/benchmarks/{benchmark_id}/evaluations:
post:
responses:
'200':
description: >-
EvaluateResponse object containing generations and scores.
content:
application/json:
schema:
$ref: '#/components/schemas/EvaluateResponse'
'400':
$ref: '#/components/responses/BadRequest400'
'429':
$ref: >-
#/components/responses/TooManyRequests429
'500':
$ref: >-
#/components/responses/InternalServerError500
default:
$ref: '#/components/responses/DefaultError'
tags:
- Eval
summary: Evaluate a list of rows on a benchmark.
description: Evaluate a list of rows on a benchmark.
parameters:
- name: benchmark_id
in: path
description: >-
The ID of the benchmark to run the evaluation on.
required: true
schema:
type: string
requestBody:
content:
application/json:
schema:
$ref: '#/components/schemas/EvaluateRowsRequest'
required: true
deprecated: false
/v1alpha/eval/benchmarks/{benchmark_id}/jobs:
post:
responses:
'200':
description: >-
The job that was created to run the evaluation.
content:
application/json:
schema:
$ref: '#/components/schemas/Job'
'400':
$ref: '#/components/responses/BadRequest400'
'429':
$ref: >-
#/components/responses/TooManyRequests429
'500':
$ref: >-
#/components/responses/InternalServerError500
default:
$ref: '#/components/responses/DefaultError'
tags:
- Eval
summary: Run an evaluation on a benchmark.
description: Run an evaluation on a benchmark.
parameters:
- name: benchmark_id
in: path
description: >-
The ID of the benchmark to run the evaluation on.
required: true
schema:
type: string
requestBody:
content:
application/json:
schema:
$ref: '#/components/schemas/RunEvalRequest'
required: true
deprecated: false
/v1alpha/eval/benchmarks/{benchmark_id}/jobs/{job_id}:
get:
responses:
'200':
description: The status of the evaluation job.
content:
application/json:
schema:
$ref: '#/components/schemas/Job'
'400':
$ref: '#/components/responses/BadRequest400'
'429':
$ref: >-
#/components/responses/TooManyRequests429
'500':
$ref: >-
#/components/responses/InternalServerError500
default:
$ref: '#/components/responses/DefaultError'
tags:
- Eval
summary: Get the status of a job.
description: Get the status of a job.
parameters:
- name: benchmark_id
in: path
description: >-
The ID of the benchmark to run the evaluation on.
required: true
schema:
type: string
- name: job_id
in: path
description: The ID of the job to get the status of.
required: true
schema:
type: string
deprecated: false
delete:
responses:
'200':
description: OK
'400':
$ref: '#/components/responses/BadRequest400'
'429':
$ref: >-
#/components/responses/TooManyRequests429
'500':
$ref: >-
#/components/responses/InternalServerError500
default:
$ref: '#/components/responses/DefaultError'
tags:
- Eval
summary: Cancel a job.
description: Cancel a job.
parameters:
- name: benchmark_id
in: path
description: >-
The ID of the benchmark to run the evaluation on.
required: true
schema:
type: string
- name: job_id
in: path
description: The ID of the job to cancel.
required: true
schema:
type: string
deprecated: false
/v1alpha/eval/benchmarks/{benchmark_id}/jobs/{job_id}/result:
get:
responses:
'200':
description: The result of the job.
content:
application/json:
schema:
$ref: '#/components/schemas/EvaluateResponse'
'400':
$ref: '#/components/responses/BadRequest400'
'429':
$ref: >-
#/components/responses/TooManyRequests429
'500':
$ref: >-
#/components/responses/InternalServerError500
default:
$ref: '#/components/responses/DefaultError'
tags:
- Eval
summary: Get the result of a job.
description: Get the result of a job.
parameters:
- name: benchmark_id
in: path
description: >-
The ID of the benchmark to run the evaluation on.
required: true
schema:
type: string
- name: job_id
in: path
description: The ID of the job to get the result of.
required: true
schema:
type: string
deprecated: false
/v1alpha/inference/rerank:
post:
responses:
'200':
description: >-
RerankResponse with indices sorted by relevance score (descending).
content:
application/json:
schema:
$ref: '#/components/schemas/RerankResponse'
'400':
$ref: '#/components/responses/BadRequest400'
'429':
$ref: >-
#/components/responses/TooManyRequests429
'500':
$ref: >-
#/components/responses/InternalServerError500
default:
$ref: '#/components/responses/DefaultError'
tags:
- Inference
summary: >-
Rerank a list of documents based on their relevance to a query.
description: >-
Rerank a list of documents based on their relevance to a query.
parameters: []
requestBody:
content:
application/json:
schema:
$ref: '#/components/schemas/RerankRequest'
required: true
deprecated: false
/v1alpha/post-training/job/artifacts:
get:
responses:
'200':
description: A PostTrainingJobArtifactsResponse.
content:
application/json:
schema:
$ref: '#/components/schemas/PostTrainingJobArtifactsResponse'
'400':
$ref: '#/components/responses/BadRequest400'
'429':
$ref: >-
#/components/responses/TooManyRequests429
'500':
$ref: >-
#/components/responses/InternalServerError500
default:
$ref: '#/components/responses/DefaultError'
tags:
- PostTraining (Coming Soon)
summary: Get the artifacts of a training job.
description: Get the artifacts of a training job.
parameters:
- name: job_uuid
in: query
description: >-
The UUID of the job to get the artifacts of.
required: true
schema:
type: string
deprecated: false
/v1alpha/post-training/job/cancel:
post:
responses:
'200':
description: OK
'400':
$ref: '#/components/responses/BadRequest400'
'429':
$ref: >-
#/components/responses/TooManyRequests429
'500':
$ref: >-
#/components/responses/InternalServerError500
default:
$ref: '#/components/responses/DefaultError'
tags:
- PostTraining (Coming Soon)
summary: Cancel a training job.
description: Cancel a training job.
parameters: []
requestBody:
content:
application/json:
schema:
$ref: '#/components/schemas/CancelTrainingJobRequest'
required: true
deprecated: false
/v1alpha/post-training/job/status:
get:
responses:
'200':
description: A PostTrainingJobStatusResponse.
content:
application/json:
schema:
$ref: '#/components/schemas/PostTrainingJobStatusResponse'
'400':
$ref: '#/components/responses/BadRequest400'
'429':
$ref: >-
#/components/responses/TooManyRequests429
'500':
$ref: >-
#/components/responses/InternalServerError500
default:
$ref: '#/components/responses/DefaultError'
tags:
- PostTraining (Coming Soon)
summary: Get the status of a training job.
description: Get the status of a training job.
parameters:
- name: job_uuid
in: query
description: >-
The UUID of the job to get the status of.
required: true
schema:
type: string
deprecated: false
/v1alpha/post-training/jobs:
get:
responses:
'200':
description: A ListPostTrainingJobsResponse.
content:
application/json:
schema:
$ref: '#/components/schemas/ListPostTrainingJobsResponse'
'400':
$ref: '#/components/responses/BadRequest400'
'429':
$ref: >-
#/components/responses/TooManyRequests429
'500':
$ref: >-
#/components/responses/InternalServerError500
default:
$ref: '#/components/responses/DefaultError'
tags:
- PostTraining (Coming Soon)
summary: Get all training jobs.
description: Get all training jobs.
parameters: []
deprecated: false
/v1alpha/post-training/preference-optimize:
post:
responses:
'200':
description: A PostTrainingJob.
content:
application/json:
schema:
$ref: '#/components/schemas/PostTrainingJob'
'400':
$ref: '#/components/responses/BadRequest400'
'429':
$ref: >-
#/components/responses/TooManyRequests429
'500':
$ref: >-
#/components/responses/InternalServerError500
default:
$ref: '#/components/responses/DefaultError'
tags:
- PostTraining (Coming Soon)
summary: Run preference optimization of a model.
description: Run preference optimization of a model.
parameters: []
requestBody:
content:
application/json:
schema:
$ref: '#/components/schemas/PreferenceOptimizeRequest'
required: true
deprecated: false
/v1alpha/post-training/supervised-fine-tune:
post:
responses:
'200':
description: A PostTrainingJob.
content:
application/json:
schema:
$ref: '#/components/schemas/PostTrainingJob'
'400':
$ref: '#/components/responses/BadRequest400'
'429':
$ref: >-
#/components/responses/TooManyRequests429
'500':
$ref: >-
#/components/responses/InternalServerError500
default:
$ref: '#/components/responses/DefaultError'
tags:
- PostTraining (Coming Soon)
summary: Run supervised fine-tuning of a model.
description: Run supervised fine-tuning of a model.
parameters: []
requestBody:
content:
application/json:
schema:
$ref: '#/components/schemas/SupervisedFineTuneRequest'
required: true
deprecated: false
/v1alpha/telemetry/metrics/{metric_name}:
post:
responses:
'200':
description: A QueryMetricsResponse.
content:
application/json:
schema:
$ref: '#/components/schemas/QueryMetricsResponse'
'400':
$ref: '#/components/responses/BadRequest400'
'429':
$ref: >-
#/components/responses/TooManyRequests429
'500':
$ref: >-
#/components/responses/InternalServerError500
default:
$ref: '#/components/responses/DefaultError'
tags:
- Telemetry
summary: Query metrics.
description: Query metrics.
parameters:
- name: metric_name
in: path
description: The name of the metric to query.
required: true
schema:
type: string
requestBody:
content:
application/json:
schema:
$ref: '#/components/schemas/QueryMetricsRequest'
required: true
deprecated: false
/v1alpha/telemetry/spans:
post:
responses:
'200':
description: A QuerySpansResponse.
content:
application/json:
schema:
$ref: '#/components/schemas/QuerySpansResponse'
'400':
$ref: '#/components/responses/BadRequest400'
'429':
$ref: >-
#/components/responses/TooManyRequests429
'500':
$ref: >-
#/components/responses/InternalServerError500
default:
$ref: '#/components/responses/DefaultError'
tags:
- Telemetry
summary: Query spans.
description: Query spans.
parameters: []
requestBody:
content:
application/json:
schema:
$ref: '#/components/schemas/QuerySpansRequest'
required: true
deprecated: false
/v1alpha/telemetry/spans/export:
post:
responses:
'200':
description: OK
'400':
$ref: '#/components/responses/BadRequest400'
'429':
$ref: >-
#/components/responses/TooManyRequests429
'500':
$ref: >-
#/components/responses/InternalServerError500
default:
$ref: '#/components/responses/DefaultError'
tags:
- Telemetry
summary: Save spans to a dataset.
description: Save spans to a dataset.
parameters: []
requestBody:
content:
application/json:
schema:
$ref: '#/components/schemas/SaveSpansToDatasetRequest'
required: true
deprecated: false
/v1alpha/telemetry/spans/{span_id}/tree:
post:
responses:
'200':
description: A QuerySpanTreeResponse.
content:
application/json:
schema:
$ref: '#/components/schemas/QuerySpanTreeResponse'
'400':
$ref: '#/components/responses/BadRequest400'
'429':
$ref: >-
#/components/responses/TooManyRequests429
'500':
$ref: >-
#/components/responses/InternalServerError500
default:
$ref: '#/components/responses/DefaultError'
tags:
- Telemetry
summary: Get a span tree by its ID.
description: Get a span tree by its ID.
parameters:
- name: span_id
in: path
description: The ID of the span to get the tree from.
required: true
schema:
type: string
requestBody:
content:
application/json:
schema:
$ref: '#/components/schemas/GetSpanTreeRequest'
required: true
deprecated: false
/v1alpha/telemetry/traces:
post:
responses:
'200':
description: A QueryTracesResponse.
content:
application/json:
schema:
$ref: '#/components/schemas/QueryTracesResponse'
'400':
$ref: '#/components/responses/BadRequest400'
'429':
$ref: >-
#/components/responses/TooManyRequests429
'500':
$ref: >-
#/components/responses/InternalServerError500
default:
$ref: '#/components/responses/DefaultError'
tags:
- Telemetry
summary: Query traces.
description: Query traces.
parameters: []
requestBody:
content:
application/json:
schema:
$ref: '#/components/schemas/QueryTracesRequest'
required: true
deprecated: false
/v1alpha/telemetry/traces/{trace_id}:
get:
responses:
'200':
description: A Trace.
content:
application/json:
schema:
$ref: '#/components/schemas/Trace'
'400':
$ref: '#/components/responses/BadRequest400'
'429':
$ref: >-
#/components/responses/TooManyRequests429
'500':
$ref: >-
#/components/responses/InternalServerError500
default:
$ref: '#/components/responses/DefaultError'
tags:
- Telemetry
summary: Get a trace by its ID.
description: Get a trace by its ID.
parameters:
- name: trace_id
in: path
description: The ID of the trace to get.
required: true
schema:
type: string
deprecated: false
/v1alpha/telemetry/traces/{trace_id}/spans/{span_id}:
get:
responses:
'200':
description: A Span.
content:
application/json:
schema:
$ref: '#/components/schemas/Span'
'400':
$ref: '#/components/responses/BadRequest400'
'429':
$ref: >-
#/components/responses/TooManyRequests429
'500':
$ref: >-
#/components/responses/InternalServerError500
default:
$ref: '#/components/responses/DefaultError'
tags:
- Telemetry
summary: Get a span by its ID.
description: Get a span by its ID.
parameters:
- name: trace_id
in: path
description: >-
The ID of the trace to get the span from.
required: true
schema:
type: string
- name: span_id
in: path
description: The ID of the span to get.
required: true
schema:
type: string
deprecated: false
jsonSchemaDialect: >-
https://json-schema.org/draft/2020-12/schema
components:
schemas:
Error:
type: object
properties:
status:
type: integer
description: HTTP status code
title:
type: string
description: >-
Error title, a short summary of the error which is invariant for an error
type
detail:
type: string
description: >-
Error detail, a longer human-readable description of the error
instance:
type: string
description: >-
(Optional) A URL which can be used to retrieve more information about
the specific occurrence of the error
additionalProperties: false
required:
- status
- title
- detail
title: Error
description: >-
Error response from the API. Roughly follows RFC 7807.
AppendRowsRequest:
type: object
properties:
rows:
type: array
items:
type: object
additionalProperties:
oneOf:
- type: 'null'
- type: boolean
- type: number
- type: string
- type: array
- type: object
description: The rows to append to the dataset.
additionalProperties: false
required:
- rows
title: AppendRowsRequest
PaginatedResponse:
type: object
properties:
data:
type: array
items:
type: object
additionalProperties:
oneOf:
- type: 'null'
- type: boolean
- type: number
- type: string
- type: array
- type: object
description: The list of items for the current page
has_more:
type: boolean
description: >-
Whether there are more items available after this set
url:
type: string
description: The URL for accessing this list
additionalProperties: false
required:
- data
- has_more
title: PaginatedResponse
description: >-
A generic paginated response that follows a simple format.
Dataset:
type: object
properties:
identifier:
type: string
provider_resource_id:
type: string
provider_id:
type: string
type:
type: string
enum:
- model
- shield
- vector_db
- dataset
- scoring_function
- benchmark
- tool
- tool_group
- prompt
const: dataset
default: dataset
description: >-
Type of resource, always 'dataset' for datasets
purpose:
type: string
enum:
- post-training/messages
- eval/question-answer
- eval/messages-answer
description: >-
Purpose of the dataset indicating its intended use
source:
oneOf:
- $ref: '#/components/schemas/URIDataSource'
- $ref: '#/components/schemas/RowsDataSource'
discriminator:
propertyName: type
mapping:
uri: '#/components/schemas/URIDataSource'
rows: '#/components/schemas/RowsDataSource'
description: >-
Data source configuration for the dataset
metadata:
type: object
additionalProperties:
oneOf:
- type: 'null'
- type: boolean
- type: number
- type: string
- type: array
- type: object
description: Additional metadata for the dataset
additionalProperties: false
required:
- identifier
- provider_id
- type
- purpose
- source
- metadata
title: Dataset
description: >-
Dataset resource for storing and accessing training or evaluation data.
RowsDataSource:
type: object
properties:
type:
type: string
const: rows
default: rows
rows:
type: array
items:
type: object
additionalProperties:
oneOf:
- type: 'null'
- type: boolean
- type: number
- type: string
- type: array
- type: object
description: >-
The dataset is stored in rows. E.g. - [ {"messages": [{"role": "user",
"content": "Hello, world!"}, {"role": "assistant", "content": "Hello,
world!"}]} ]
additionalProperties: false
required:
- type
- rows
title: RowsDataSource
description: A dataset stored in rows.
URIDataSource:
type: object
properties:
type:
type: string
const: uri
default: uri
uri:
type: string
description: >-
The dataset can be obtained from a URI. E.g. - "https://mywebsite.com/mydata.jsonl"
- "lsfs://mydata.jsonl" - "data:csv;base64,{base64_content}"
additionalProperties: false
required:
- type
- uri
title: URIDataSource
description: >-
A dataset that can be obtained from a URI.
ListDatasetsResponse:
type: object
properties:
data:
type: array
items:
$ref: '#/components/schemas/Dataset'
description: List of datasets
additionalProperties: false
required:
- data
title: ListDatasetsResponse
description: Response from listing datasets.
DataSource:
oneOf:
- $ref: '#/components/schemas/URIDataSource'
- $ref: '#/components/schemas/RowsDataSource'
discriminator:
propertyName: type
mapping:
uri: '#/components/schemas/URIDataSource'
rows: '#/components/schemas/RowsDataSource'
RegisterDatasetRequest:
type: object
properties:
purpose:
type: string
enum:
- post-training/messages
- eval/question-answer
- eval/messages-answer
description: >-
The purpose of the dataset. One of: - "post-training/messages": The dataset
contains a messages column with list of messages for post-training. {
"messages": [ {"role": "user", "content": "Hello, world!"}, {"role": "assistant",
"content": "Hello, world!"}, ] } - "eval/question-answer": The dataset
contains a question column and an answer column for evaluation. { "question":
"What is the capital of France?", "answer": "Paris" } - "eval/messages-answer":
The dataset contains a messages column with list of messages and an answer
column for evaluation. { "messages": [ {"role": "user", "content": "Hello,
my name is John Doe."}, {"role": "assistant", "content": "Hello, John
Doe. How can I help you today?"}, {"role": "user", "content": "What's
my name?"}, ], "answer": "John Doe" }
source:
$ref: '#/components/schemas/DataSource'
description: >-
The data source of the dataset. Ensure that the data source schema is
compatible with the purpose of the dataset. Examples: - { "type": "uri",
"uri": "https://mywebsite.com/mydata.jsonl" } - { "type": "uri", "uri":
"lsfs://mydata.jsonl" } - { "type": "uri", "uri": "data:csv;base64,{base64_content}"
} - { "type": "uri", "uri": "huggingface://llamastack/simpleqa?split=train"
} - { "type": "rows", "rows": [ { "messages": [ {"role": "user", "content":
"Hello, world!"}, {"role": "assistant", "content": "Hello, world!"}, ]
} ] }
metadata:
type: object
additionalProperties:
oneOf:
- type: 'null'
- type: boolean
- type: number
- type: string
- type: array
- type: object
description: >-
The metadata for the dataset. - E.g. {"description": "My dataset"}.
dataset_id:
type: string
description: >-
The ID of the dataset. If not provided, an ID will be generated.
additionalProperties: false
required:
- purpose
- source
title: RegisterDatasetRequest
AgentConfig:
type: object
properties:
sampling_params:
$ref: '#/components/schemas/SamplingParams'
input_shields:
type: array
items:
type: string
output_shields:
type: array
items:
type: string
toolgroups:
type: array
items:
$ref: '#/components/schemas/AgentTool'
client_tools:
type: array
items:
$ref: '#/components/schemas/ToolDef'
tool_choice:
type: string
enum:
- auto
- required
- none
title: ToolChoice
description: >-
Whether tool use is required or automatic. This is a hint to the model
which may not be followed. It depends on the Instruction Following capabilities
of the model.
deprecated: true
tool_prompt_format:
type: string
enum:
- json
- function_tag
- python_list
title: ToolPromptFormat
description: >-
Prompt format for calling custom / zero shot tools.
deprecated: true
tool_config:
$ref: '#/components/schemas/ToolConfig'
max_infer_iters:
type: integer
default: 10
model:
type: string
description: >-
The model identifier to use for the agent
instructions:
type: string
description: The system instructions for the agent
name:
type: string
description: >-
Optional name for the agent, used in telemetry and identification
enable_session_persistence:
type: boolean
default: false
description: >-
Optional flag indicating whether session data has to be persisted
response_format:
$ref: '#/components/schemas/ResponseFormat'
description: Optional response format configuration
additionalProperties: false
required:
- model
- instructions
title: AgentConfig
description: Configuration for an agent.
AgentTool:
oneOf:
- type: string
- type: object
properties:
name:
type: string
args:
type: object
additionalProperties:
oneOf:
- type: 'null'
- type: boolean
- type: number
- type: string
- type: array
- type: object
additionalProperties: false
required:
- name
- args
title: AgentToolGroupWithArgs
GrammarResponseFormat:
type: object
properties:
type:
type: string
enum:
- json_schema
- grammar
description: >-
Must be "grammar" to identify this format type
const: grammar
default: grammar
bnf:
type: object
additionalProperties:
oneOf:
- type: 'null'
- type: boolean
- type: number
- type: string
- type: array
- type: object
description: >-
The BNF grammar specification the response should conform to
additionalProperties: false
required:
- type
- bnf
title: GrammarResponseFormat
description: >-
Configuration for grammar-guided response generation.
GreedySamplingStrategy:
type: object
properties:
type:
type: string
const: greedy
default: greedy
description: >-
Must be "greedy" to identify this sampling strategy
additionalProperties: false
required:
- type
title: GreedySamplingStrategy
description: >-
Greedy sampling strategy that selects the highest probability token at each
step.
JsonSchemaResponseFormat:
type: object
properties:
type:
type: string
enum:
- json_schema
- grammar
description: >-
Must be "json_schema" to identify this format type
const: json_schema
default: json_schema
json_schema:
type: object
additionalProperties:
oneOf:
- type: 'null'
- type: boolean
- type: number
- type: string
- type: array
- type: object
description: >-
The JSON schema the response should conform to. In a Python SDK, this
is often a `pydantic` model.
additionalProperties: false
required:
- type
- json_schema
title: JsonSchemaResponseFormat
description: >-
Configuration for JSON schema-guided response generation.
ResponseFormat:
oneOf:
- $ref: '#/components/schemas/JsonSchemaResponseFormat'
- $ref: '#/components/schemas/GrammarResponseFormat'
discriminator:
propertyName: type
mapping:
json_schema: '#/components/schemas/JsonSchemaResponseFormat'
grammar: '#/components/schemas/GrammarResponseFormat'
SamplingParams:
type: object
properties:
strategy:
oneOf:
- $ref: '#/components/schemas/GreedySamplingStrategy'
- $ref: '#/components/schemas/TopPSamplingStrategy'
- $ref: '#/components/schemas/TopKSamplingStrategy'
discriminator:
propertyName: type
mapping:
greedy: '#/components/schemas/GreedySamplingStrategy'
top_p: '#/components/schemas/TopPSamplingStrategy'
top_k: '#/components/schemas/TopKSamplingStrategy'
description: The sampling strategy.
max_tokens:
type: integer
default: 0
description: >-
The maximum number of tokens that can be generated in the completion.
The token count of your prompt plus max_tokens cannot exceed the model's
context length.
repetition_penalty:
type: number
default: 1.0
description: >-
Number between -2.0 and 2.0. Positive values penalize new tokens based
on whether they appear in the text so far, increasing the model's likelihood
to talk about new topics.
stop:
type: array
items:
type: string
description: >-
Up to 4 sequences where the API will stop generating further tokens. The
returned text will not contain the stop sequence.
additionalProperties: false
required:
- strategy
title: SamplingParams
description: Sampling parameters.
ToolConfig:
type: object
properties:
tool_choice:
oneOf:
- type: string
enum:
- auto
- required
- none
title: ToolChoice
description: >-
Whether tool use is required or automatic. This is a hint to the model
which may not be followed. It depends on the Instruction Following
capabilities of the model.
- type: string
default: auto
description: >-
(Optional) Whether tool use is automatic, required, or none. Can also
specify a tool name to use a specific tool. Defaults to ToolChoice.auto.
tool_prompt_format:
type: string
enum:
- json
- function_tag
- python_list
description: >-
(Optional) Instructs the model how to format tool calls. By default, Llama
Stack will attempt to use a format that is best adapted to the model.
- `ToolPromptFormat.json`: The tool calls are formatted as a JSON object.
- `ToolPromptFormat.function_tag`: The tool calls are enclosed in a <function=function_name>
tag. - `ToolPromptFormat.python_list`: The tool calls are output as Python
syntax -- a list of function calls.
system_message_behavior:
type: string
enum:
- append
- replace
description: >-
(Optional) Config for how to override the default system prompt. - `SystemMessageBehavior.append`:
Appends the provided system message to the default system prompt. - `SystemMessageBehavior.replace`:
Replaces the default system prompt with the provided system message. The
system message can include the string '{{function_definitions}}' to indicate
where the function definitions should be inserted.
default: append
additionalProperties: false
title: ToolConfig
description: Configuration for tool use.
ToolDef:
type: object
properties:
toolgroup_id:
type: string
description: >-
(Optional) ID of the tool group this tool belongs to
name:
type: string
description: Name of the tool
description:
type: string
description: >-
(Optional) Human-readable description of what the tool does
input_schema:
type: object
additionalProperties:
oneOf:
- type: 'null'
- type: boolean
- type: number
- type: string
- type: array
- type: object
description: >-
(Optional) JSON Schema for tool inputs (MCP inputSchema)
output_schema:
type: object
additionalProperties:
oneOf:
- type: 'null'
- type: boolean
- type: number
- type: string
- type: array
- type: object
description: >-
(Optional) JSON Schema for tool outputs (MCP outputSchema)
metadata:
type: object
additionalProperties:
oneOf:
- type: 'null'
- type: boolean
- type: number
- type: string
- type: array
- type: object
description: >-
(Optional) Additional metadata about the tool
additionalProperties: false
required:
- name
title: ToolDef
description: >-
Tool definition used in runtime contexts.
TopKSamplingStrategy:
type: object
properties:
type:
type: string
const: top_k
default: top_k
description: >-
Must be "top_k" to identify this sampling strategy
top_k:
type: integer
description: >-
Number of top tokens to consider for sampling. Must be at least 1
additionalProperties: false
required:
- type
- top_k
title: TopKSamplingStrategy
description: >-
Top-k sampling strategy that restricts sampling to the k most likely tokens.
TopPSamplingStrategy:
type: object
properties:
type:
type: string
const: top_p
default: top_p
description: >-
Must be "top_p" to identify this sampling strategy
temperature:
type: number
description: >-
Controls randomness in sampling. Higher values increase randomness
top_p:
type: number
default: 0.95
description: >-
Cumulative probability threshold for nucleus sampling. Defaults to 0.95
additionalProperties: false
required:
- type
title: TopPSamplingStrategy
description: >-
Top-p (nucleus) sampling strategy that samples from the smallest set of tokens
with cumulative probability >= p.
CreateAgentRequest:
type: object
properties:
agent_config:
$ref: '#/components/schemas/AgentConfig'
description: The configuration for the agent.
additionalProperties: false
required:
- agent_config
title: CreateAgentRequest
AgentCreateResponse:
type: object
properties:
agent_id:
type: string
description: Unique identifier for the created agent
additionalProperties: false
required:
- agent_id
title: AgentCreateResponse
description: >-
Response returned when creating a new agent.
Agent:
type: object
properties:
agent_id:
type: string
description: Unique identifier for the agent
agent_config:
$ref: '#/components/schemas/AgentConfig'
description: Configuration settings for the agent
created_at:
type: string
format: date-time
description: Timestamp when the agent was created
additionalProperties: false
required:
- agent_id
- agent_config
- created_at
title: Agent
description: >-
An agent instance with configuration and metadata.
CreateAgentSessionRequest:
type: object
properties:
session_name:
type: string
description: The name of the session to create.
additionalProperties: false
required:
- session_name
title: CreateAgentSessionRequest
AgentSessionCreateResponse:
type: object
properties:
session_id:
type: string
description: >-
Unique identifier for the created session
additionalProperties: false
required:
- session_id
title: AgentSessionCreateResponse
description: >-
Response returned when creating a new agent session.
CompletionMessage:
type: object
properties:
role:
type: string
const: assistant
default: assistant
description: >-
Must be "assistant" to identify this as the model's response
content:
$ref: '#/components/schemas/InterleavedContent'
description: The content of the model's response
stop_reason:
type: string
enum:
- end_of_turn
- end_of_message
- out_of_tokens
description: >-
Reason why the model stopped generating. Options are: - `StopReason.end_of_turn`:
The model finished generating the entire response. - `StopReason.end_of_message`:
The model finished generating but generated a partial response -- usually,
a tool call. The user may call the tool and continue the conversation
with the tool's response. - `StopReason.out_of_tokens`: The model ran
out of token budget.
tool_calls:
type: array
items:
$ref: '#/components/schemas/ToolCall'
description: >-
List of tool calls. Each tool call is a ToolCall object.
additionalProperties: false
required:
- role
- content
- stop_reason
title: CompletionMessage
description: >-
A message containing the model's (assistant) response in a chat conversation.
ImageContentItem:
type: object
properties:
type:
type: string
const: image
default: image
description: >-
Discriminator type of the content item. Always "image"
image:
type: object
properties:
url:
$ref: '#/components/schemas/URL'
description: >-
A URL of the image or data URL in the format of data:image/{type};base64,{data}.
Note that URL could have length limits.
data:
type: string
contentEncoding: base64
description: base64 encoded image data as string
additionalProperties: false
description: >-
Image as a base64 encoded string or an URL
additionalProperties: false
required:
- type
- image
title: ImageContentItem
description: A image content item
InferenceStep:
type: object
properties:
turn_id:
type: string
description: The ID of the turn.
step_id:
type: string
description: The ID of the step.
started_at:
type: string
format: date-time
description: The time the step started.
completed_at:
type: string
format: date-time
description: The time the step completed.
step_type:
type: string
enum:
- inference
- tool_execution
- shield_call
- memory_retrieval
title: StepType
description: Type of the step in an agent turn.
const: inference
default: inference
model_response:
$ref: '#/components/schemas/CompletionMessage'
description: The response from the LLM.
additionalProperties: false
required:
- turn_id
- step_id
- step_type
- model_response
title: InferenceStep
description: An inference step in an agent turn.
InterleavedContent:
oneOf:
- type: string
- $ref: '#/components/schemas/InterleavedContentItem'
- type: array
items:
$ref: '#/components/schemas/InterleavedContentItem'
InterleavedContentItem:
oneOf:
- $ref: '#/components/schemas/ImageContentItem'
- $ref: '#/components/schemas/TextContentItem'
discriminator:
propertyName: type
mapping:
image: '#/components/schemas/ImageContentItem'
text: '#/components/schemas/TextContentItem'
MemoryRetrievalStep:
type: object
properties:
turn_id:
type: string
description: The ID of the turn.
step_id:
type: string
description: The ID of the step.
started_at:
type: string
format: date-time
description: The time the step started.
completed_at:
type: string
format: date-time
description: The time the step completed.
step_type:
type: string
enum:
- inference
- tool_execution
- shield_call
- memory_retrieval
title: StepType
description: Type of the step in an agent turn.
const: memory_retrieval
default: memory_retrieval
vector_db_ids:
type: string
description: >-
The IDs of the vector databases to retrieve context from.
inserted_context:
$ref: '#/components/schemas/InterleavedContent'
description: >-
The context retrieved from the vector databases.
additionalProperties: false
required:
- turn_id
- step_id
- step_type
- vector_db_ids
- inserted_context
title: MemoryRetrievalStep
description: >-
A memory retrieval step in an agent turn.
SafetyViolation:
type: object
properties:
violation_level:
$ref: '#/components/schemas/ViolationLevel'
description: Severity level of the violation
user_message:
type: string
description: >-
(Optional) Message to convey to the user about the violation
metadata:
type: object
additionalProperties:
oneOf:
- type: 'null'
- type: boolean
- type: number
- type: string
- type: array
- type: object
description: >-
Additional metadata including specific violation codes for debugging and
telemetry
additionalProperties: false
required:
- violation_level
- metadata
title: SafetyViolation
description: >-
Details of a safety violation detected by content moderation.
Session:
type: object
properties:
session_id:
type: string
description: >-
Unique identifier for the conversation session
session_name:
type: string
description: Human-readable name for the session
turns:
type: array
items:
$ref: '#/components/schemas/Turn'
description: >-
List of all turns that have occurred in this session
started_at:
type: string
format: date-time
description: Timestamp when the session was created
additionalProperties: false
required:
- session_id
- session_name
- turns
- started_at
title: Session
description: >-
A single session of an interaction with an Agentic System.
ShieldCallStep:
type: object
properties:
turn_id:
type: string
description: The ID of the turn.
step_id:
type: string
description: The ID of the step.
started_at:
type: string
format: date-time
description: The time the step started.
completed_at:
type: string
format: date-time
description: The time the step completed.
step_type:
type: string
enum:
- inference
- tool_execution
- shield_call
- memory_retrieval
title: StepType
description: Type of the step in an agent turn.
const: shield_call
default: shield_call
violation:
$ref: '#/components/schemas/SafetyViolation'
description: The violation from the shield call.
additionalProperties: false
required:
- turn_id
- step_id
- step_type
title: ShieldCallStep
description: A shield call step in an agent turn.
TextContentItem:
type: object
properties:
type:
type: string
const: text
default: text
description: >-
Discriminator type of the content item. Always "text"
text:
type: string
description: Text content
additionalProperties: false
required:
- type
- text
title: TextContentItem
description: A text content item
ToolCall:
type: object
properties:
call_id:
type: string
tool_name:
oneOf:
- type: string
enum:
- brave_search
- wolfram_alpha
- photogen
- code_interpreter
title: BuiltinTool
- type: string
arguments:
type: string
additionalProperties: false
required:
- call_id
- tool_name
- arguments
title: ToolCall
ToolExecutionStep:
type: object
properties:
turn_id:
type: string
description: The ID of the turn.
step_id:
type: string
description: The ID of the step.
started_at:
type: string
format: date-time
description: The time the step started.
completed_at:
type: string
format: date-time
description: The time the step completed.
step_type:
type: string
enum:
- inference
- tool_execution
- shield_call
- memory_retrieval
title: StepType
description: Type of the step in an agent turn.
const: tool_execution
default: tool_execution
tool_calls:
type: array
items:
$ref: '#/components/schemas/ToolCall'
description: The tool calls to execute.
tool_responses:
type: array
items:
$ref: '#/components/schemas/ToolResponse'
description: The tool responses from the tool calls.
additionalProperties: false
required:
- turn_id
- step_id
- step_type
- tool_calls
- tool_responses
title: ToolExecutionStep
description: A tool execution step in an agent turn.
ToolResponse:
type: object
properties:
call_id:
type: string
description: >-
Unique identifier for the tool call this response is for
tool_name:
oneOf:
- type: string
enum:
- brave_search
- wolfram_alpha
- photogen
- code_interpreter
title: BuiltinTool
- type: string
description: Name of the tool that was invoked
content:
$ref: '#/components/schemas/InterleavedContent'
description: The response content from the tool
metadata:
type: object
additionalProperties:
oneOf:
- type: 'null'
- type: boolean
- type: number
- type: string
- type: array
- type: object
description: >-
(Optional) Additional metadata about the tool response
additionalProperties: false
required:
- call_id
- tool_name
- content
title: ToolResponse
description: Response from a tool invocation.
ToolResponseMessage:
type: object
properties:
role:
type: string
const: tool
default: tool
description: >-
Must be "tool" to identify this as a tool response
call_id:
type: string
description: >-
Unique identifier for the tool call this response is for
content:
$ref: '#/components/schemas/InterleavedContent'
description: The response content from the tool
additionalProperties: false
required:
- role
- call_id
- content
title: ToolResponseMessage
description: >-
A message representing the result of a tool invocation.
Turn:
type: object
properties:
turn_id:
type: string
description: >-
Unique identifier for the turn within a session
session_id:
type: string
description: >-
Unique identifier for the conversation session
input_messages:
type: array
items:
oneOf:
- $ref: '#/components/schemas/UserMessage'
- $ref: '#/components/schemas/ToolResponseMessage'
description: >-
List of messages that initiated this turn
steps:
type: array
items:
oneOf:
- $ref: '#/components/schemas/InferenceStep'
- $ref: '#/components/schemas/ToolExecutionStep'
- $ref: '#/components/schemas/ShieldCallStep'
- $ref: '#/components/schemas/MemoryRetrievalStep'
discriminator:
propertyName: step_type
mapping:
inference: '#/components/schemas/InferenceStep'
tool_execution: '#/components/schemas/ToolExecutionStep'
shield_call: '#/components/schemas/ShieldCallStep'
memory_retrieval: '#/components/schemas/MemoryRetrievalStep'
description: >-
Ordered list of processing steps executed during this turn
output_message:
$ref: '#/components/schemas/CompletionMessage'
description: >-
The model's generated response containing content and metadata
output_attachments:
type: array
items:
type: object
properties:
content:
oneOf:
- type: string
- $ref: '#/components/schemas/InterleavedContentItem'
- type: array
items:
$ref: '#/components/schemas/InterleavedContentItem'
- $ref: '#/components/schemas/URL'
description: The content of the attachment.
mime_type:
type: string
description: The MIME type of the attachment.
additionalProperties: false
required:
- content
- mime_type
title: Attachment
description: An attachment to an agent turn.
description: >-
(Optional) Files or media attached to the agent's response
started_at:
type: string
format: date-time
description: Timestamp when the turn began
completed_at:
type: string
format: date-time
description: >-
(Optional) Timestamp when the turn finished, if completed
additionalProperties: false
required:
- turn_id
- session_id
- input_messages
- steps
- output_message
- started_at
title: Turn
description: >-
A single turn in an interaction with an Agentic System.
URL:
type: object
properties:
uri:
type: string
description: The URL string pointing to the resource
additionalProperties: false
required:
- uri
title: URL
description: A URL reference to external content.
UserMessage:
type: object
properties:
role:
type: string
const: user
default: user
description: >-
Must be "user" to identify this as a user message
content:
$ref: '#/components/schemas/InterleavedContent'
description: >-
The content of the message, which can include text and other media
context:
$ref: '#/components/schemas/InterleavedContent'
description: >-
(Optional) This field is used internally by Llama Stack to pass RAG context.
This field may be removed in the API in the future.
additionalProperties: false
required:
- role
- content
title: UserMessage
description: >-
A message from the user in a chat conversation.
ViolationLevel:
type: string
enum:
- info
- warn
- error
title: ViolationLevel
description: Severity level of a safety violation.
CreateAgentTurnRequest:
type: object
properties:
messages:
type: array
items:
oneOf:
- $ref: '#/components/schemas/UserMessage'
- $ref: '#/components/schemas/ToolResponseMessage'
description: List of messages to start the turn with.
stream:
type: boolean
description: >-
(Optional) If True, generate an SSE event stream of the response. Defaults
to False.
documents:
type: array
items:
type: object
properties:
content:
oneOf:
- type: string
- $ref: '#/components/schemas/InterleavedContentItem'
- type: array
items:
$ref: '#/components/schemas/InterleavedContentItem'
- $ref: '#/components/schemas/URL'
description: The content of the document.
mime_type:
type: string
description: The MIME type of the document.
additionalProperties: false
required:
- content
- mime_type
title: Document
description: A document to be used by an agent.
description: >-
(Optional) List of documents to create the turn with.
toolgroups:
type: array
items:
$ref: '#/components/schemas/AgentTool'
description: >-
(Optional) List of toolgroups to create the turn with, will be used in
addition to the agent's config toolgroups for the request.
tool_config:
$ref: '#/components/schemas/ToolConfig'
description: >-
(Optional) The tool configuration to create the turn with, will be used
to override the agent's tool_config.
additionalProperties: false
required:
- messages
title: CreateAgentTurnRequest
AgentTurnResponseEvent:
type: object
properties:
payload:
oneOf:
- $ref: '#/components/schemas/AgentTurnResponseStepStartPayload'
- $ref: '#/components/schemas/AgentTurnResponseStepProgressPayload'
- $ref: '#/components/schemas/AgentTurnResponseStepCompletePayload'
- $ref: '#/components/schemas/AgentTurnResponseTurnStartPayload'
- $ref: '#/components/schemas/AgentTurnResponseTurnCompletePayload'
- $ref: '#/components/schemas/AgentTurnResponseTurnAwaitingInputPayload'
discriminator:
propertyName: event_type
mapping:
step_start: '#/components/schemas/AgentTurnResponseStepStartPayload'
step_progress: '#/components/schemas/AgentTurnResponseStepProgressPayload'
step_complete: '#/components/schemas/AgentTurnResponseStepCompletePayload'
turn_start: '#/components/schemas/AgentTurnResponseTurnStartPayload'
turn_complete: '#/components/schemas/AgentTurnResponseTurnCompletePayload'
turn_awaiting_input: '#/components/schemas/AgentTurnResponseTurnAwaitingInputPayload'
description: >-
Event-specific payload containing event data
additionalProperties: false
required:
- payload
title: AgentTurnResponseEvent
description: >-
An event in an agent turn response stream.
AgentTurnResponseStepCompletePayload:
type: object
properties:
event_type:
type: string
enum:
- step_start
- step_complete
- step_progress
- turn_start
- turn_complete
- turn_awaiting_input
const: step_complete
default: step_complete
description: Type of event being reported
step_type:
type: string
enum:
- inference
- tool_execution
- shield_call
- memory_retrieval
description: Type of step being executed
step_id:
type: string
description: >-
Unique identifier for the step within a turn
step_details:
oneOf:
- $ref: '#/components/schemas/InferenceStep'
- $ref: '#/components/schemas/ToolExecutionStep'
- $ref: '#/components/schemas/ShieldCallStep'
- $ref: '#/components/schemas/MemoryRetrievalStep'
discriminator:
propertyName: step_type
mapping:
inference: '#/components/schemas/InferenceStep'
tool_execution: '#/components/schemas/ToolExecutionStep'
shield_call: '#/components/schemas/ShieldCallStep'
memory_retrieval: '#/components/schemas/MemoryRetrievalStep'
description: Complete details of the executed step
additionalProperties: false
required:
- event_type
- step_type
- step_id
- step_details
title: AgentTurnResponseStepCompletePayload
description: >-
Payload for step completion events in agent turn responses.
AgentTurnResponseStepProgressPayload:
type: object
properties:
event_type:
type: string
enum:
- step_start
- step_complete
- step_progress
- turn_start
- turn_complete
- turn_awaiting_input
const: step_progress
default: step_progress
description: Type of event being reported
step_type:
type: string
enum:
- inference
- tool_execution
- shield_call
- memory_retrieval
description: Type of step being executed
step_id:
type: string
description: >-
Unique identifier for the step within a turn
delta:
oneOf:
- $ref: '#/components/schemas/TextDelta'
- $ref: '#/components/schemas/ImageDelta'
- $ref: '#/components/schemas/ToolCallDelta'
discriminator:
propertyName: type
mapping:
text: '#/components/schemas/TextDelta'
image: '#/components/schemas/ImageDelta'
tool_call: '#/components/schemas/ToolCallDelta'
description: >-
Incremental content changes during step execution
additionalProperties: false
required:
- event_type
- step_type
- step_id
- delta
title: AgentTurnResponseStepProgressPayload
description: >-
Payload for step progress events in agent turn responses.
AgentTurnResponseStepStartPayload:
type: object
properties:
event_type:
type: string
enum:
- step_start
- step_complete
- step_progress
- turn_start
- turn_complete
- turn_awaiting_input
const: step_start
default: step_start
description: Type of event being reported
step_type:
type: string
enum:
- inference
- tool_execution
- shield_call
- memory_retrieval
description: Type of step being executed
step_id:
type: string
description: >-
Unique identifier for the step within a turn
metadata:
type: object
additionalProperties:
oneOf:
- type: 'null'
- type: boolean
- type: number
- type: string
- type: array
- type: object
description: >-
(Optional) Additional metadata for the step
additionalProperties: false
required:
- event_type
- step_type
- step_id
title: AgentTurnResponseStepStartPayload
description: >-
Payload for step start events in agent turn responses.
AgentTurnResponseStreamChunk:
type: object
properties:
event:
$ref: '#/components/schemas/AgentTurnResponseEvent'
description: >-
Individual event in the agent turn response stream
additionalProperties: false
required:
- event
title: AgentTurnResponseStreamChunk
description: Streamed agent turn completion response.
"AgentTurnResponseTurnAwaitingInputPayload":
type: object
properties:
event_type:
type: string
enum:
- step_start
- step_complete
- step_progress
- turn_start
- turn_complete
- turn_awaiting_input
const: turn_awaiting_input
default: turn_awaiting_input
description: Type of event being reported
turn:
$ref: '#/components/schemas/Turn'
description: >-
Turn data when waiting for external tool responses
additionalProperties: false
required:
- event_type
- turn
title: >-
AgentTurnResponseTurnAwaitingInputPayload
description: >-
Payload for turn awaiting input events in agent turn responses.
AgentTurnResponseTurnCompletePayload:
type: object
properties:
event_type:
type: string
enum:
- step_start
- step_complete
- step_progress
- turn_start
- turn_complete
- turn_awaiting_input
const: turn_complete
default: turn_complete
description: Type of event being reported
turn:
$ref: '#/components/schemas/Turn'
description: >-
Complete turn data including all steps and results
additionalProperties: false
required:
- event_type
- turn
title: AgentTurnResponseTurnCompletePayload
description: >-
Payload for turn completion events in agent turn responses.
AgentTurnResponseTurnStartPayload:
type: object
properties:
event_type:
type: string
enum:
- step_start
- step_complete
- step_progress
- turn_start
- turn_complete
- turn_awaiting_input
const: turn_start
default: turn_start
description: Type of event being reported
turn_id:
type: string
description: >-
Unique identifier for the turn within a session
additionalProperties: false
required:
- event_type
- turn_id
title: AgentTurnResponseTurnStartPayload
description: >-
Payload for turn start events in agent turn responses.
ImageDelta:
type: object
properties:
type:
type: string
const: image
default: image
description: >-
Discriminator type of the delta. Always "image"
image:
type: string
contentEncoding: base64
description: The incremental image data as bytes
additionalProperties: false
required:
- type
- image
title: ImageDelta
description: >-
An image content delta for streaming responses.
TextDelta:
type: object
properties:
type:
type: string
const: text
default: text
description: >-
Discriminator type of the delta. Always "text"
text:
type: string
description: The incremental text content
additionalProperties: false
required:
- type
- text
title: TextDelta
description: >-
A text content delta for streaming responses.
ToolCallDelta:
type: object
properties:
type:
type: string
const: tool_call
default: tool_call
description: >-
Discriminator type of the delta. Always "tool_call"
tool_call:
oneOf:
- type: string
- $ref: '#/components/schemas/ToolCall'
description: >-
Either an in-progress tool call string or the final parsed tool call
parse_status:
type: string
enum:
- started
- in_progress
- failed
- succeeded
description: Current parsing status of the tool call
additionalProperties: false
required:
- type
- tool_call
- parse_status
title: ToolCallDelta
description: >-
A tool call content delta for streaming responses.
ResumeAgentTurnRequest:
type: object
properties:
tool_responses:
type: array
items:
$ref: '#/components/schemas/ToolResponse'
description: >-
The tool call responses to resume the turn with.
stream:
type: boolean
description: Whether to stream the response.
additionalProperties: false
required:
- tool_responses
title: ResumeAgentTurnRequest
AgentStepResponse:
type: object
properties:
step:
oneOf:
- $ref: '#/components/schemas/InferenceStep'
- $ref: '#/components/schemas/ToolExecutionStep'
- $ref: '#/components/schemas/ShieldCallStep'
- $ref: '#/components/schemas/MemoryRetrievalStep'
discriminator:
propertyName: step_type
mapping:
inference: '#/components/schemas/InferenceStep'
tool_execution: '#/components/schemas/ToolExecutionStep'
shield_call: '#/components/schemas/ShieldCallStep'
memory_retrieval: '#/components/schemas/MemoryRetrievalStep'
description: >-
The complete step data and execution details
additionalProperties: false
required:
- step
title: AgentStepResponse
description: >-
Response containing details of a specific agent step.
Benchmark:
type: object
properties:
identifier:
type: string
provider_resource_id:
type: string
provider_id:
type: string
type:
type: string
enum:
- model
- shield
- vector_db
- dataset
- scoring_function
- benchmark
- tool
- tool_group
- prompt
const: benchmark
default: benchmark
description: The resource type, always benchmark
dataset_id:
type: string
description: >-
Identifier of the dataset to use for the benchmark evaluation
scoring_functions:
type: array
items:
type: string
description: >-
List of scoring function identifiers to apply during evaluation
metadata:
type: object
additionalProperties:
oneOf:
- type: 'null'
- type: boolean
- type: number
- type: string
- type: array
- type: object
description: Metadata for this evaluation task
additionalProperties: false
required:
- identifier
- provider_id
- type
- dataset_id
- scoring_functions
- metadata
title: Benchmark
description: >-
A benchmark resource for evaluating model performance.
ListBenchmarksResponse:
type: object
properties:
data:
type: array
items:
$ref: '#/components/schemas/Benchmark'
additionalProperties: false
required:
- data
title: ListBenchmarksResponse
RegisterBenchmarkRequest:
type: object
properties:
benchmark_id:
type: string
description: The ID of the benchmark to register.
dataset_id:
type: string
description: >-
The ID of the dataset to use for the benchmark.
scoring_functions:
type: array
items:
type: string
description: >-
The scoring functions to use for the benchmark.
provider_benchmark_id:
type: string
description: >-
The ID of the provider benchmark to use for the benchmark.
provider_id:
type: string
description: >-
The ID of the provider to use for the benchmark.
metadata:
type: object
additionalProperties:
oneOf:
- type: 'null'
- type: boolean
- type: number
- type: string
- type: array
- type: object
description: The metadata to use for the benchmark.
additionalProperties: false
required:
- benchmark_id
- dataset_id
- scoring_functions
title: RegisterBenchmarkRequest
AgentCandidate:
type: object
properties:
type:
type: string
const: agent
default: agent
config:
$ref: '#/components/schemas/AgentConfig'
description: >-
The configuration for the agent candidate.
additionalProperties: false
required:
- type
- config
title: AgentCandidate
description: An agent candidate for evaluation.
AggregationFunctionType:
type: string
enum:
- average
- weighted_average
- median
- categorical_count
- accuracy
title: AggregationFunctionType
description: >-
Types of aggregation functions for scoring results.
BasicScoringFnParams:
type: object
properties:
type:
$ref: '#/components/schemas/ScoringFnParamsType'
const: basic
default: basic
description: >-
The type of scoring function parameters, always basic
aggregation_functions:
type: array
items:
$ref: '#/components/schemas/AggregationFunctionType'
description: >-
Aggregation functions to apply to the scores of each row
additionalProperties: false
required:
- type
- aggregation_functions
title: BasicScoringFnParams
description: >-
Parameters for basic scoring function configuration.
BenchmarkConfig:
type: object
properties:
eval_candidate:
oneOf:
- $ref: '#/components/schemas/ModelCandidate'
- $ref: '#/components/schemas/AgentCandidate'
discriminator:
propertyName: type
mapping:
model: '#/components/schemas/ModelCandidate'
agent: '#/components/schemas/AgentCandidate'
description: The candidate to evaluate.
scoring_params:
type: object
additionalProperties:
$ref: '#/components/schemas/ScoringFnParams'
description: >-
Map between scoring function id and parameters for each scoring function
you want to run
num_examples:
type: integer
description: >-
(Optional) The number of examples to evaluate. If not provided, all examples
in the dataset will be evaluated
additionalProperties: false
required:
- eval_candidate
- scoring_params
title: BenchmarkConfig
description: >-
A benchmark configuration for evaluation.
LLMAsJudgeScoringFnParams:
type: object
properties:
type:
$ref: '#/components/schemas/ScoringFnParamsType'
const: llm_as_judge
default: llm_as_judge
description: >-
The type of scoring function parameters, always llm_as_judge
judge_model:
type: string
description: >-
Identifier of the LLM model to use as a judge for scoring
prompt_template:
type: string
description: >-
(Optional) Custom prompt template for the judge model
judge_score_regexes:
type: array
items:
type: string
description: >-
Regexes to extract the answer from generated response
aggregation_functions:
type: array
items:
$ref: '#/components/schemas/AggregationFunctionType'
description: >-
Aggregation functions to apply to the scores of each row
additionalProperties: false
required:
- type
- judge_model
- judge_score_regexes
- aggregation_functions
title: LLMAsJudgeScoringFnParams
description: >-
Parameters for LLM-as-judge scoring function configuration.
ModelCandidate:
type: object
properties:
type:
type: string
const: model
default: model
model:
type: string
description: The model ID to evaluate.
sampling_params:
$ref: '#/components/schemas/SamplingParams'
description: The sampling parameters for the model.
system_message:
$ref: '#/components/schemas/SystemMessage'
description: >-
(Optional) The system message providing instructions or context to the
model.
additionalProperties: false
required:
- type
- model
- sampling_params
title: ModelCandidate
description: A model candidate for evaluation.
RegexParserScoringFnParams:
type: object
properties:
type:
$ref: '#/components/schemas/ScoringFnParamsType'
const: regex_parser
default: regex_parser
description: >-
The type of scoring function parameters, always regex_parser
parsing_regexes:
type: array
items:
type: string
description: >-
Regex to extract the answer from generated response
aggregation_functions:
type: array
items:
$ref: '#/components/schemas/AggregationFunctionType'
description: >-
Aggregation functions to apply to the scores of each row
additionalProperties: false
required:
- type
- parsing_regexes
- aggregation_functions
title: RegexParserScoringFnParams
description: >-
Parameters for regex parser scoring function configuration.
ScoringFnParams:
oneOf:
- $ref: '#/components/schemas/LLMAsJudgeScoringFnParams'
- $ref: '#/components/schemas/RegexParserScoringFnParams'
- $ref: '#/components/schemas/BasicScoringFnParams'
discriminator:
propertyName: type
mapping:
llm_as_judge: '#/components/schemas/LLMAsJudgeScoringFnParams'
regex_parser: '#/components/schemas/RegexParserScoringFnParams'
basic: '#/components/schemas/BasicScoringFnParams'
ScoringFnParamsType:
type: string
enum:
- llm_as_judge
- regex_parser
- basic
title: ScoringFnParamsType
description: >-
Types of scoring function parameter configurations.
SystemMessage:
type: object
properties:
role:
type: string
const: system
default: system
description: >-
Must be "system" to identify this as a system message
content:
$ref: '#/components/schemas/InterleavedContent'
description: >-
The content of the "system prompt". If multiple system messages are provided,
they are concatenated. The underlying Llama Stack code may also add other
system messages (for example, for formatting tool definitions).
additionalProperties: false
required:
- role
- content
title: SystemMessage
description: >-
A system message providing instructions or context to the model.
EvaluateRowsRequest:
type: object
properties:
input_rows:
type: array
items:
type: object
additionalProperties:
oneOf:
- type: 'null'
- type: boolean
- type: number
- type: string
- type: array
- type: object
description: The rows to evaluate.
scoring_functions:
type: array
items:
type: string
description: >-
The scoring functions to use for the evaluation.
benchmark_config:
$ref: '#/components/schemas/BenchmarkConfig'
description: The configuration for the benchmark.
additionalProperties: false
required:
- input_rows
- scoring_functions
- benchmark_config
title: EvaluateRowsRequest
EvaluateResponse:
type: object
properties:
generations:
type: array
items:
type: object
additionalProperties:
oneOf:
- type: 'null'
- type: boolean
- type: number
- type: string
- type: array
- type: object
description: The generations from the evaluation.
scores:
type: object
additionalProperties:
$ref: '#/components/schemas/ScoringResult'
description: The scores from the evaluation.
additionalProperties: false
required:
- generations
- scores
title: EvaluateResponse
description: The response from an evaluation.
ScoringResult:
type: object
properties:
score_rows:
type: array
items:
type: object
additionalProperties:
oneOf:
- type: 'null'
- type: boolean
- type: number
- type: string
- type: array
- type: object
description: >-
The scoring result for each row. Each row is a map of column name to value.
aggregated_results:
type: object
additionalProperties:
oneOf:
- type: 'null'
- type: boolean
- type: number
- type: string
- type: array
- type: object
description: Map of metric name to aggregated value
additionalProperties: false
required:
- score_rows
- aggregated_results
title: ScoringResult
description: A scoring result for a single row.
RunEvalRequest:
type: object
properties:
benchmark_config:
$ref: '#/components/schemas/BenchmarkConfig'
description: The configuration for the benchmark.
additionalProperties: false
required:
- benchmark_config
title: RunEvalRequest
Job:
type: object
properties:
job_id:
type: string
description: Unique identifier for the job
status:
type: string
enum:
- completed
- in_progress
- failed
- scheduled
- cancelled
description: Current execution status of the job
additionalProperties: false
required:
- job_id
- status
title: Job
description: >-
A job execution instance with status tracking.
"OpenAIChatCompletionContentPartImageParam":
type: object
properties:
type:
type: string
const: image_url
default: image_url
description: >-
Must be "image_url" to identify this as image content
image_url:
$ref: '#/components/schemas/OpenAIImageURL'
description: >-
Image URL specification and processing details
additionalProperties: false
required:
- type
- image_url
title: >-
OpenAIChatCompletionContentPartImageParam
description: >-
Image content part for OpenAI-compatible chat completion messages.
OpenAIChatCompletionContentPartTextParam:
type: object
properties:
type:
type: string
const: text
default: text
description: >-
Must be "text" to identify this as text content
text:
type: string
description: The text content of the message
additionalProperties: false
required:
- type
- text
title: OpenAIChatCompletionContentPartTextParam
description: >-
Text content part for OpenAI-compatible chat completion messages.
OpenAIImageURL:
type: object
properties:
url:
type: string
description: >-
URL of the image to include in the message
detail:
type: string
description: >-
(Optional) Level of detail for image processing. Can be "low", "high",
or "auto"
additionalProperties: false
required:
- url
title: OpenAIImageURL
description: >-
Image URL specification for OpenAI-compatible chat completion messages.
RerankRequest:
type: object
properties:
model:
type: string
description: >-
The identifier of the reranking model to use.
query:
oneOf:
- type: string
- $ref: '#/components/schemas/OpenAIChatCompletionContentPartTextParam'
- $ref: '#/components/schemas/OpenAIChatCompletionContentPartImageParam'
description: >-
The search query to rank items against. Can be a string, text content
part, or image content part. The input must not exceed the model's max
input token length.
items:
type: array
items:
oneOf:
- type: string
- $ref: '#/components/schemas/OpenAIChatCompletionContentPartTextParam'
- $ref: '#/components/schemas/OpenAIChatCompletionContentPartImageParam'
description: >-
List of items to rerank. Each item can be a string, text content part,
or image content part. Each input must not exceed the model's max input
token length.
max_num_results:
type: integer
description: >-
(Optional) Maximum number of results to return. Default: returns all.
additionalProperties: false
required:
- model
- query
- items
title: RerankRequest
RerankData:
type: object
properties:
index:
type: integer
description: >-
The original index of the document in the input list
relevance_score:
type: number
description: >-
The relevance score from the model output. Values are inverted when applicable
so that higher scores indicate greater relevance.
additionalProperties: false
required:
- index
- relevance_score
title: RerankData
description: >-
A single rerank result from a reranking response.
RerankResponse:
type: object
properties:
data:
type: array
items:
$ref: '#/components/schemas/RerankData'
description: >-
List of rerank result objects, sorted by relevance score (descending)
additionalProperties: false
required:
- data
title: RerankResponse
description: Response from a reranking request.
Checkpoint:
type: object
properties:
identifier:
type: string
description: Unique identifier for the checkpoint
created_at:
type: string
format: date-time
description: >-
Timestamp when the checkpoint was created
epoch:
type: integer
description: >-
Training epoch when the checkpoint was saved
post_training_job_id:
type: string
description: >-
Identifier of the training job that created this checkpoint
path:
type: string
description: >-
File system path where the checkpoint is stored
training_metrics:
$ref: '#/components/schemas/PostTrainingMetric'
description: >-
(Optional) Training metrics associated with this checkpoint
additionalProperties: false
required:
- identifier
- created_at
- epoch
- post_training_job_id
- path
title: Checkpoint
description: Checkpoint created during training runs.
PostTrainingJobArtifactsResponse:
type: object
properties:
job_uuid:
type: string
description: Unique identifier for the training job
checkpoints:
type: array
items:
$ref: '#/components/schemas/Checkpoint'
description: >-
List of model checkpoints created during training
additionalProperties: false
required:
- job_uuid
- checkpoints
title: PostTrainingJobArtifactsResponse
description: Artifacts of a finetuning job.
PostTrainingMetric:
type: object
properties:
epoch:
type: integer
description: Training epoch number
train_loss:
type: number
description: Loss value on the training dataset
validation_loss:
type: number
description: Loss value on the validation dataset
perplexity:
type: number
description: >-
Perplexity metric indicating model confidence
additionalProperties: false
required:
- epoch
- train_loss
- validation_loss
- perplexity
title: PostTrainingMetric
description: >-
Training metrics captured during post-training jobs.
CancelTrainingJobRequest:
type: object
properties:
job_uuid:
type: string
description: The UUID of the job to cancel.
additionalProperties: false
required:
- job_uuid
title: CancelTrainingJobRequest
PostTrainingJobStatusResponse:
type: object
properties:
job_uuid:
type: string
description: Unique identifier for the training job
status:
type: string
enum:
- completed
- in_progress
- failed
- scheduled
- cancelled
description: Current status of the training job
scheduled_at:
type: string
format: date-time
description: >-
(Optional) Timestamp when the job was scheduled
started_at:
type: string
format: date-time
description: >-
(Optional) Timestamp when the job execution began
completed_at:
type: string
format: date-time
description: >-
(Optional) Timestamp when the job finished, if completed
resources_allocated:
type: object
additionalProperties:
oneOf:
- type: 'null'
- type: boolean
- type: number
- type: string
- type: array
- type: object
description: >-
(Optional) Information about computational resources allocated to the
job
checkpoints:
type: array
items:
$ref: '#/components/schemas/Checkpoint'
description: >-
List of model checkpoints created during training
additionalProperties: false
required:
- job_uuid
- status
- checkpoints
title: PostTrainingJobStatusResponse
description: Status of a finetuning job.
ListPostTrainingJobsResponse:
type: object
properties:
data:
type: array
items:
type: object
properties:
job_uuid:
type: string
additionalProperties: false
required:
- job_uuid
title: PostTrainingJob
additionalProperties: false
required:
- data
title: ListPostTrainingJobsResponse
DPOAlignmentConfig:
type: object
properties:
beta:
type: number
description: Temperature parameter for the DPO loss
loss_type:
$ref: '#/components/schemas/DPOLossType'
default: sigmoid
description: The type of loss function to use for DPO
additionalProperties: false
required:
- beta
- loss_type
title: DPOAlignmentConfig
description: >-
Configuration for Direct Preference Optimization (DPO) alignment.
DPOLossType:
type: string
enum:
- sigmoid
- hinge
- ipo
- kto_pair
title: DPOLossType
DataConfig:
type: object
properties:
dataset_id:
type: string
description: >-
Unique identifier for the training dataset
batch_size:
type: integer
description: Number of samples per training batch
shuffle:
type: boolean
description: >-
Whether to shuffle the dataset during training
data_format:
$ref: '#/components/schemas/DatasetFormat'
description: >-
Format of the dataset (instruct or dialog)
validation_dataset_id:
type: string
description: >-
(Optional) Unique identifier for the validation dataset
packed:
type: boolean
default: false
description: >-
(Optional) Whether to pack multiple samples into a single sequence for
efficiency
train_on_input:
type: boolean
default: false
description: >-
(Optional) Whether to compute loss on input tokens as well as output tokens
additionalProperties: false
required:
- dataset_id
- batch_size
- shuffle
- data_format
title: DataConfig
description: >-
Configuration for training data and data loading.
DatasetFormat:
type: string
enum:
- instruct
- dialog
title: DatasetFormat
description: Format of the training dataset.
EfficiencyConfig:
type: object
properties:
enable_activation_checkpointing:
type: boolean
default: false
description: >-
(Optional) Whether to use activation checkpointing to reduce memory usage
enable_activation_offloading:
type: boolean
default: false
description: >-
(Optional) Whether to offload activations to CPU to save GPU memory
memory_efficient_fsdp_wrap:
type: boolean
default: false
description: >-
(Optional) Whether to use memory-efficient FSDP wrapping
fsdp_cpu_offload:
type: boolean
default: false
description: >-
(Optional) Whether to offload FSDP parameters to CPU
additionalProperties: false
title: EfficiencyConfig
description: >-
Configuration for memory and compute efficiency optimizations.
OptimizerConfig:
type: object
properties:
optimizer_type:
$ref: '#/components/schemas/OptimizerType'
description: >-
Type of optimizer to use (adam, adamw, or sgd)
lr:
type: number
description: Learning rate for the optimizer
weight_decay:
type: number
description: >-
Weight decay coefficient for regularization
num_warmup_steps:
type: integer
description: Number of steps for learning rate warmup
additionalProperties: false
required:
- optimizer_type
- lr
- weight_decay
- num_warmup_steps
title: OptimizerConfig
description: >-
Configuration parameters for the optimization algorithm.
OptimizerType:
type: string
enum:
- adam
- adamw
- sgd
title: OptimizerType
description: >-
Available optimizer algorithms for training.
TrainingConfig:
type: object
properties:
n_epochs:
type: integer
description: Number of training epochs to run
max_steps_per_epoch:
type: integer
default: 1
description: Maximum number of steps to run per epoch
gradient_accumulation_steps:
type: integer
default: 1
description: >-
Number of steps to accumulate gradients before updating
max_validation_steps:
type: integer
default: 1
description: >-
(Optional) Maximum number of validation steps per epoch
data_config:
$ref: '#/components/schemas/DataConfig'
description: >-
(Optional) Configuration for data loading and formatting
optimizer_config:
$ref: '#/components/schemas/OptimizerConfig'
description: >-
(Optional) Configuration for the optimization algorithm
efficiency_config:
$ref: '#/components/schemas/EfficiencyConfig'
description: >-
(Optional) Configuration for memory and compute optimizations
dtype:
type: string
default: bf16
description: >-
(Optional) Data type for model parameters (bf16, fp16, fp32)
additionalProperties: false
required:
- n_epochs
- max_steps_per_epoch
- gradient_accumulation_steps
title: TrainingConfig
description: >-
Comprehensive configuration for the training process.
PreferenceOptimizeRequest:
type: object
properties:
job_uuid:
type: string
description: The UUID of the job to create.
finetuned_model:
type: string
description: The model to fine-tune.
algorithm_config:
$ref: '#/components/schemas/DPOAlignmentConfig'
description: The algorithm configuration.
training_config:
$ref: '#/components/schemas/TrainingConfig'
description: The training configuration.
hyperparam_search_config:
type: object
additionalProperties:
oneOf:
- type: 'null'
- type: boolean
- type: number
- type: string
- type: array
- type: object
description: The hyperparam search configuration.
logger_config:
type: object
additionalProperties:
oneOf:
- type: 'null'
- type: boolean
- type: number
- type: string
- type: array
- type: object
description: The logger configuration.
additionalProperties: false
required:
- job_uuid
- finetuned_model
- algorithm_config
- training_config
- hyperparam_search_config
- logger_config
title: PreferenceOptimizeRequest
PostTrainingJob:
type: object
properties:
job_uuid:
type: string
additionalProperties: false
required:
- job_uuid
title: PostTrainingJob
AlgorithmConfig:
oneOf:
- $ref: '#/components/schemas/LoraFinetuningConfig'
- $ref: '#/components/schemas/QATFinetuningConfig'
discriminator:
propertyName: type
mapping:
LoRA: '#/components/schemas/LoraFinetuningConfig'
QAT: '#/components/schemas/QATFinetuningConfig'
LoraFinetuningConfig:
type: object
properties:
type:
type: string
const: LoRA
default: LoRA
description: Algorithm type identifier, always "LoRA"
lora_attn_modules:
type: array
items:
type: string
description: >-
List of attention module names to apply LoRA to
apply_lora_to_mlp:
type: boolean
description: Whether to apply LoRA to MLP layers
apply_lora_to_output:
type: boolean
description: >-
Whether to apply LoRA to output projection layers
rank:
type: integer
description: >-
Rank of the LoRA adaptation (lower rank = fewer parameters)
alpha:
type: integer
description: >-
LoRA scaling parameter that controls adaptation strength
use_dora:
type: boolean
default: false
description: >-
(Optional) Whether to use DoRA (Weight-Decomposed Low-Rank Adaptation)
quantize_base:
type: boolean
default: false
description: >-
(Optional) Whether to quantize the base model weights
additionalProperties: false
required:
- type
- lora_attn_modules
- apply_lora_to_mlp
- apply_lora_to_output
- rank
- alpha
title: LoraFinetuningConfig
description: >-
Configuration for Low-Rank Adaptation (LoRA) fine-tuning.
QATFinetuningConfig:
type: object
properties:
type:
type: string
const: QAT
default: QAT
description: Algorithm type identifier, always "QAT"
quantizer_name:
type: string
description: >-
Name of the quantization algorithm to use
group_size:
type: integer
description: Size of groups for grouped quantization
additionalProperties: false
required:
- type
- quantizer_name
- group_size
title: QATFinetuningConfig
description: >-
Configuration for Quantization-Aware Training (QAT) fine-tuning.
SupervisedFineTuneRequest:
type: object
properties:
job_uuid:
type: string
description: The UUID of the job to create.
training_config:
$ref: '#/components/schemas/TrainingConfig'
description: The training configuration.
hyperparam_search_config:
type: object
additionalProperties:
oneOf:
- type: 'null'
- type: boolean
- type: number
- type: string
- type: array
- type: object
description: The hyperparam search configuration.
logger_config:
type: object
additionalProperties:
oneOf:
- type: 'null'
- type: boolean
- type: number
- type: string
- type: array
- type: object
description: The logger configuration.
model:
type: string
description: The model to fine-tune.
checkpoint_dir:
type: string
description: The directory to save checkpoint(s) to.
algorithm_config:
$ref: '#/components/schemas/AlgorithmConfig'
description: The algorithm configuration.
additionalProperties: false
required:
- job_uuid
- training_config
- hyperparam_search_config
- logger_config
title: SupervisedFineTuneRequest
QueryMetricsRequest:
type: object
properties:
start_time:
type: integer
description: The start time of the metric to query.
end_time:
type: integer
description: The end time of the metric to query.
granularity:
type: string
description: The granularity of the metric to query.
query_type:
type: string
enum:
- range
- instant
description: The type of query to perform.
label_matchers:
type: array
items:
type: object
properties:
name:
type: string
description: The name of the label to match
value:
type: string
description: The value to match against
operator:
type: string
enum:
- '='
- '!='
- =~
- '!~'
description: >-
The comparison operator to use for matching
default: '='
additionalProperties: false
required:
- name
- value
- operator
title: MetricLabelMatcher
description: >-
A matcher for filtering metrics by label values.
description: >-
The label matchers to apply to the metric.
additionalProperties: false
required:
- start_time
- query_type
title: QueryMetricsRequest
MetricDataPoint:
type: object
properties:
timestamp:
type: integer
description: >-
Unix timestamp when the metric value was recorded
value:
type: number
description: >-
The numeric value of the metric at this timestamp
unit:
type: string
additionalProperties: false
required:
- timestamp
- value
- unit
title: MetricDataPoint
description: >-
A single data point in a metric time series.
MetricLabel:
type: object
properties:
name:
type: string
description: The name of the label
value:
type: string
description: The value of the label
additionalProperties: false
required:
- name
- value
title: MetricLabel
description: A label associated with a metric.
MetricSeries:
type: object
properties:
metric:
type: string
description: The name of the metric
labels:
type: array
items:
$ref: '#/components/schemas/MetricLabel'
description: >-
List of labels associated with this metric series
values:
type: array
items:
$ref: '#/components/schemas/MetricDataPoint'
description: >-
List of data points in chronological order
additionalProperties: false
required:
- metric
- labels
- values
title: MetricSeries
description: A time series of metric data points.
QueryMetricsResponse:
type: object
properties:
data:
type: array
items:
$ref: '#/components/schemas/MetricSeries'
description: >-
List of metric series matching the query criteria
additionalProperties: false
required:
- data
title: QueryMetricsResponse
description: >-
Response containing metric time series data.
QueryCondition:
type: object
properties:
key:
type: string
description: The attribute key to filter on
op:
$ref: '#/components/schemas/QueryConditionOp'
description: The comparison operator to apply
value:
oneOf:
- type: 'null'
- type: boolean
- type: number
- type: string
- type: array
- type: object
description: The value to compare against
additionalProperties: false
required:
- key
- op
- value
title: QueryCondition
description: A condition for filtering query results.
QueryConditionOp:
type: string
enum:
- eq
- ne
- gt
- lt
title: QueryConditionOp
description: >-
Comparison operators for query conditions.
QuerySpansRequest:
type: object
properties:
attribute_filters:
type: array
items:
$ref: '#/components/schemas/QueryCondition'
description: >-
The attribute filters to apply to the spans.
attributes_to_return:
type: array
items:
type: string
description: The attributes to return in the spans.
max_depth:
type: integer
description: The maximum depth of the tree.
additionalProperties: false
required:
- attribute_filters
- attributes_to_return
title: QuerySpansRequest
Span:
type: object
properties:
span_id:
type: string
description: Unique identifier for the span
trace_id:
type: string
description: >-
Unique identifier for the trace this span belongs to
parent_span_id:
type: string
description: >-
(Optional) Unique identifier for the parent span, if this is a child span
name:
type: string
description: >-
Human-readable name describing the operation this span represents
start_time:
type: string
format: date-time
description: Timestamp when the operation began
end_time:
type: string
format: date-time
description: >-
(Optional) Timestamp when the operation finished, if completed
attributes:
type: object
additionalProperties:
oneOf:
- type: 'null'
- type: boolean
- type: number
- type: string
- type: array
- type: object
description: >-
(Optional) Key-value pairs containing additional metadata about the span
additionalProperties: false
required:
- span_id
- trace_id
- name
- start_time
title: Span
description: >-
A span representing a single operation within a trace.
QuerySpansResponse:
type: object
properties:
data:
type: array
items:
$ref: '#/components/schemas/Span'
description: >-
List of spans matching the query criteria
additionalProperties: false
required:
- data
title: QuerySpansResponse
description: Response containing a list of spans.
SaveSpansToDatasetRequest:
type: object
properties:
attribute_filters:
type: array
items:
$ref: '#/components/schemas/QueryCondition'
description: >-
The attribute filters to apply to the spans.
attributes_to_save:
type: array
items:
type: string
description: The attributes to save to the dataset.
dataset_id:
type: string
description: >-
The ID of the dataset to save the spans to.
max_depth:
type: integer
description: The maximum depth of the tree.
additionalProperties: false
required:
- attribute_filters
- attributes_to_save
- dataset_id
title: SaveSpansToDatasetRequest
GetSpanTreeRequest:
type: object
properties:
attributes_to_return:
type: array
items:
type: string
description: The attributes to return in the tree.
max_depth:
type: integer
description: The maximum depth of the tree.
additionalProperties: false
title: GetSpanTreeRequest
SpanStatus:
type: string
enum:
- ok
- error
title: SpanStatus
description: >-
The status of a span indicating whether it completed successfully or with
an error.
SpanWithStatus:
type: object
properties:
span_id:
type: string
description: Unique identifier for the span
trace_id:
type: string
description: >-
Unique identifier for the trace this span belongs to
parent_span_id:
type: string
description: >-
(Optional) Unique identifier for the parent span, if this is a child span
name:
type: string
description: >-
Human-readable name describing the operation this span represents
start_time:
type: string
format: date-time
description: Timestamp when the operation began
end_time:
type: string
format: date-time
description: >-
(Optional) Timestamp when the operation finished, if completed
attributes:
type: object
additionalProperties:
oneOf:
- type: 'null'
- type: boolean
- type: number
- type: string
- type: array
- type: object
description: >-
(Optional) Key-value pairs containing additional metadata about the span
status:
$ref: '#/components/schemas/SpanStatus'
description: >-
(Optional) The current status of the span
additionalProperties: false
required:
- span_id
- trace_id
- name
- start_time
title: SpanWithStatus
description: A span that includes status information.
QuerySpanTreeResponse:
type: object
properties:
data:
type: object
additionalProperties:
$ref: '#/components/schemas/SpanWithStatus'
description: >-
Dictionary mapping span IDs to spans with status information
additionalProperties: false
required:
- data
title: QuerySpanTreeResponse
description: >-
Response containing a tree structure of spans.
QueryTracesRequest:
type: object
properties:
attribute_filters:
type: array
items:
$ref: '#/components/schemas/QueryCondition'
description: >-
The attribute filters to apply to the traces.
limit:
type: integer
description: The limit of traces to return.
offset:
type: integer
description: The offset of the traces to return.
order_by:
type: array
items:
type: string
description: The order by of the traces to return.
additionalProperties: false
title: QueryTracesRequest
Trace:
type: object
properties:
trace_id:
type: string
description: Unique identifier for the trace
root_span_id:
type: string
description: >-
Unique identifier for the root span that started this trace
start_time:
type: string
format: date-time
description: Timestamp when the trace began
end_time:
type: string
format: date-time
description: >-
(Optional) Timestamp when the trace finished, if completed
additionalProperties: false
required:
- trace_id
- root_span_id
- start_time
title: Trace
description: >-
A trace representing the complete execution path of a request across multiple
operations.
QueryTracesResponse:
type: object
properties:
data:
type: array
items:
$ref: '#/components/schemas/Trace'
description: >-
List of traces matching the query criteria
additionalProperties: false
required:
- data
title: QueryTracesResponse
description: Response containing a list of traces.
responses:
BadRequest400:
description: The request was invalid or malformed
content:
application/json:
schema:
$ref: '#/components/schemas/Error'
example:
status: 400
title: Bad Request
detail: The request was invalid or malformed
TooManyRequests429:
description: >-
The client has sent too many requests in a given amount of time
content:
application/json:
schema:
$ref: '#/components/schemas/Error'
example:
status: 429
title: Too Many Requests
detail: >-
You have exceeded the rate limit. Please try again later.
InternalServerError500:
description: >-
The server encountered an unexpected error
content:
application/json:
schema:
$ref: '#/components/schemas/Error'
example:
status: 500
title: Internal Server Error
detail: >-
An unexpected error occurred. Our team has been notified.
DefaultError:
description: An unexpected error occurred
content:
application/json:
schema:
$ref: '#/components/schemas/Error'
example:
status: 0
title: Error
detail: An unexpected error occurred
security:
- Default: []
tags:
- name: Agents
description: >-
APIs for creating and interacting with agentic systems.
## Agents API (Experimental)
> **🧪 EXPERIMENTAL**: This API is in preview and may change based on user feedback.
Great for exploring new capabilities and providing feedback to influence the
final design.
Main functionalities provided by this API:
- Create agents with specific instructions and ability to use tools.
- Interactions with agents are grouped into sessions ("threads"), and each interaction
is called a "turn".
- Agents can be provided with various tools (see the ToolGroups and ToolRuntime
APIs for more details).
- Agents can be provided with various shields (see the Safety API for more details).
- Agents can also use Memory to retrieve information from knowledge bases. See
the RAG Tool and Vector IO APIs for more details.
### 🧪 Feedback Welcome
This API is actively being developed. We welcome feedback on:
- API design and usability
- Performance characteristics
- Missing features or capabilities
- Integration patterns
**Provide Feedback**: [GitHub Discussions](https://github.com/llamastack/llama-stack/discussions)
or [GitHub Issues](https://github.com/llamastack/llama-stack/issues)
x-displayName: Agents
- name: Benchmarks
description: ''
- name: DatasetIO
description: ''
- name: Datasets
description: ''
- name: Eval
description: ''
x-displayName: >-
Llama Stack Evaluation API for running evaluations on model and agent candidates.
- name: PostTraining (Coming Soon)
description: ''
- name: Telemetry
description: ''
x-tagGroups:
- name: Operations
tags:
- Agents
- Benchmarks
- DatasetIO
- Datasets
- Eval
- PostTraining (Coming Soon)
- Telemetry