Added elasticsearch in conftest

This commit is contained in:
Enrico Zimuel 2025-11-04 14:33:41 +01:00
commit 1478e672c8
No known key found for this signature in database
GPG key ID: 6CB203F6934A69F1
374 changed files with 52786 additions and 80518 deletions

View file

@ -23,5 +23,4 @@ A Llama Stack API is described as a collection of REST endpoints. We currently s
We are working on adding a few more APIs to complete the application lifecycle. These will include:
- **Batch Inference**: run inference on a dataset of inputs
- **Batch Agents**: run agents on a dataset of inputs
- **Synthetic Data Generation**: generate synthetic data for model development
- **Batches**: OpenAI-compatible batch management for inference

View file

@ -239,8 +239,13 @@ client = LlamaStackClient(base_url="http://localhost:8321")
models = client.models.list()
# Select the first LLM
llm = next(m for m in models if m.model_type == "llm" and m.provider_id == "ollama")
model_id = llm.identifier
llm = next(
m for m in models
if m.custom_metadata
and m.custom_metadata.get("model_type") == "llm"
and m.custom_metadata.get("provider_id") == "ollama"
)
model_id = llm.id
print("Model:", model_id)
@ -279,8 +284,13 @@ import uuid
client = LlamaStackClient(base_url=f"http://localhost:8321")
models = client.models.list()
llm = next(m for m in models if m.model_type == "llm" and m.provider_id == "ollama")
model_id = llm.identifier
llm = next(
m for m in models
if m.custom_metadata
and m.custom_metadata.get("model_type") == "llm"
and m.custom_metadata.get("provider_id") == "ollama"
)
model_id = llm.id
agent = Agent(client, model=model_id, instructions="You are a helpful assistant.")
@ -450,8 +460,11 @@ import uuid
client = LlamaStackClient(base_url="http://localhost:8321")
# Create a vector database instance
embed_lm = next(m for m in client.models.list() if m.model_type == "embedding")
embedding_model = embed_lm.identifier
embed_lm = next(
m for m in client.models.list()
if m.custom_metadata and m.custom_metadata.get("model_type") == "embedding"
)
embedding_model = embed_lm.id
vector_db_id = f"v{uuid.uuid4().hex}"
# The VectorDB API is deprecated; the server now returns its own authoritative ID.
# We capture the correct ID from the response's .identifier attribute.
@ -489,9 +502,11 @@ client.tool_runtime.rag_tool.insert(
llm = next(
m
for m in client.models.list()
if m.model_type == "llm" and m.provider_id == "ollama"
if m.custom_metadata
and m.custom_metadata.get("model_type") == "llm"
and m.custom_metadata.get("provider_id") == "ollama"
)
model = llm.identifier
model = llm.id
# Create the RAG agent
rag_agent = Agent(

View file

@ -84,7 +84,6 @@ def generate_spec(output_dir: Path, stability_filter: str = None, main_spec: boo
)
yaml_filename = f"{filename_prefix}llama-stack-spec.yaml"
html_filename = f"{filename_prefix}llama-stack-spec.html"
with open(output_dir / yaml_filename, "w", encoding="utf-8") as fp:
y = yaml.YAML()
@ -102,11 +101,6 @@ def generate_spec(output_dir: Path, stability_filter: str = None, main_spec: boo
fp,
)
with open(output_dir / html_filename, "w") as fp:
spec.write_html(fp, pretty_print=True)
print(f"Generated {yaml_filename} and {html_filename}")
def main(output_dir: str):
output_dir = Path(output_dir)
if not output_dir.exists():

File diff suppressed because it is too large Load diff

File diff suppressed because it is too large Load diff

File diff suppressed because it is too large Load diff

View file

@ -1258,7 +1258,23 @@
],
"summary": "List routes.",
"description": "List routes.\nList all available API routes with their methods and implementing providers.",
"parameters": [],
"parameters": [
{
"name": "api_filter",
"in": "query",
"description": "Optional filter to control which routes are returned. Can be an API level ('v1', 'v1alpha', 'v1beta') to show non-deprecated routes at that level, or 'deprecated' to show deprecated routes across all levels. If not specified, returns only non-deprecated v1 routes.",
"required": false,
"schema": {
"type": "string",
"enum": [
"v1",
"v1alpha",
"v1beta",
"deprecated"
]
}
}
],
"deprecated": false
}
},
@ -2634,51 +2650,6 @@
"deprecated": false
}
},
"/v1/synthetic-data-generation/generate": {
"post": {
"responses": {
"200": {
"description": "Response containing filtered synthetic data samples and optional statistics",
"content": {
"application/json": {
"schema": {
"$ref": "#/components/schemas/SyntheticDataGenerationResponse"
}
}
}
},
"400": {
"$ref": "#/components/responses/BadRequest400"
},
"429": {
"$ref": "#/components/responses/TooManyRequests429"
},
"500": {
"$ref": "#/components/responses/InternalServerError500"
},
"default": {
"$ref": "#/components/responses/DefaultError"
}
},
"tags": [
"SyntheticDataGeneration (Coming Soon)"
],
"summary": "Generate synthetic data based on input dialogs and apply filtering.",
"description": "Generate synthetic data based on input dialogs and apply filtering.",
"parameters": [],
"requestBody": {
"content": {
"application/json": {
"schema": {
"$ref": "#/components/schemas/SyntheticDataGenerateRequest"
}
}
},
"required": true
},
"deprecated": false
}
},
"/v1/tool-runtime/invoke": {
"post": {
"responses": {
@ -11462,44 +11433,46 @@
],
"title": "RegisterShieldRequest"
},
"CompletionMessage": {
"InvokeToolRequest": {
"type": "object",
"properties": {
"role": {
"tool_name": {
"type": "string",
"const": "assistant",
"default": "assistant",
"description": "Must be \"assistant\" to identify this as the model's response"
"description": "The name of the tool to invoke."
},
"content": {
"$ref": "#/components/schemas/InterleavedContent",
"description": "The content of the model's response"
},
"stop_reason": {
"type": "string",
"enum": [
"end_of_turn",
"end_of_message",
"out_of_tokens"
],
"description": "Reason why the model stopped generating. Options are: - `StopReason.end_of_turn`: The model finished generating the entire response. - `StopReason.end_of_message`: The model finished generating but generated a partial response -- usually, a tool call. The user may call the tool and continue the conversation with the tool's response. - `StopReason.out_of_tokens`: The model ran out of token budget."
},
"tool_calls": {
"type": "array",
"items": {
"$ref": "#/components/schemas/ToolCall"
"kwargs": {
"type": "object",
"additionalProperties": {
"oneOf": [
{
"type": "null"
},
{
"type": "boolean"
},
{
"type": "number"
},
{
"type": "string"
},
{
"type": "array"
},
{
"type": "object"
}
]
},
"description": "List of tool calls. Each tool call is a ToolCall object."
"description": "A dictionary of arguments to pass to the tool."
}
},
"additionalProperties": false,
"required": [
"role",
"content",
"stop_reason"
"tool_name",
"kwargs"
],
"title": "CompletionMessage",
"description": "A message containing the model's (assistant) response in a chat conversation."
"title": "InvokeToolRequest"
},
"ImageContentItem": {
"type": "object",
@ -11568,53 +11541,6 @@
}
}
},
"Message": {
"oneOf": [
{
"$ref": "#/components/schemas/UserMessage"
},
{
"$ref": "#/components/schemas/SystemMessage"
},
{
"$ref": "#/components/schemas/ToolResponseMessage"
},
{
"$ref": "#/components/schemas/CompletionMessage"
}
],
"discriminator": {
"propertyName": "role",
"mapping": {
"user": "#/components/schemas/UserMessage",
"system": "#/components/schemas/SystemMessage",
"tool": "#/components/schemas/ToolResponseMessage",
"assistant": "#/components/schemas/CompletionMessage"
}
}
},
"SystemMessage": {
"type": "object",
"properties": {
"role": {
"type": "string",
"const": "system",
"default": "system",
"description": "Must be \"system\" to identify this as a system message"
},
"content": {
"$ref": "#/components/schemas/InterleavedContent",
"description": "The content of the \"system prompt\". If multiple system messages are provided, they are concatenated. The underlying Llama Stack code may also add other system messages (for example, for formatting tool definitions)."
}
},
"additionalProperties": false,
"required": [
"role",
"content"
],
"title": "SystemMessage",
"description": "A system message providing instructions or context to the model."
},
"TextContentItem": {
"type": "object",
"properties": {
@ -11637,250 +11563,6 @@
"title": "TextContentItem",
"description": "A text content item"
},
"ToolCall": {
"type": "object",
"properties": {
"call_id": {
"type": "string"
},
"tool_name": {
"oneOf": [
{
"type": "string",
"enum": [
"brave_search",
"wolfram_alpha",
"photogen",
"code_interpreter"
],
"title": "BuiltinTool"
},
{
"type": "string"
}
]
},
"arguments": {
"type": "string"
}
},
"additionalProperties": false,
"required": [
"call_id",
"tool_name",
"arguments"
],
"title": "ToolCall"
},
"ToolResponseMessage": {
"type": "object",
"properties": {
"role": {
"type": "string",
"const": "tool",
"default": "tool",
"description": "Must be \"tool\" to identify this as a tool response"
},
"call_id": {
"type": "string",
"description": "Unique identifier for the tool call this response is for"
},
"content": {
"$ref": "#/components/schemas/InterleavedContent",
"description": "The response content from the tool"
}
},
"additionalProperties": false,
"required": [
"role",
"call_id",
"content"
],
"title": "ToolResponseMessage",
"description": "A message representing the result of a tool invocation."
},
"URL": {
"type": "object",
"properties": {
"uri": {
"type": "string",
"description": "The URL string pointing to the resource"
}
},
"additionalProperties": false,
"required": [
"uri"
],
"title": "URL",
"description": "A URL reference to external content."
},
"UserMessage": {
"type": "object",
"properties": {
"role": {
"type": "string",
"const": "user",
"default": "user",
"description": "Must be \"user\" to identify this as a user message"
},
"content": {
"$ref": "#/components/schemas/InterleavedContent",
"description": "The content of the message, which can include text and other media"
},
"context": {
"$ref": "#/components/schemas/InterleavedContent",
"description": "(Optional) This field is used internally by Llama Stack to pass RAG context. This field may be removed in the API in the future."
}
},
"additionalProperties": false,
"required": [
"role",
"content"
],
"title": "UserMessage",
"description": "A message from the user in a chat conversation."
},
"SyntheticDataGenerateRequest": {
"type": "object",
"properties": {
"dialogs": {
"type": "array",
"items": {
"$ref": "#/components/schemas/Message"
},
"description": "List of conversation messages to use as input for synthetic data generation"
},
"filtering_function": {
"type": "string",
"enum": [
"none",
"random",
"top_k",
"top_p",
"top_k_top_p",
"sigmoid"
],
"description": "Type of filtering to apply to generated synthetic data samples"
},
"model": {
"type": "string",
"description": "(Optional) The identifier of the model to use. The model must be registered with Llama Stack and available via the /models endpoint"
}
},
"additionalProperties": false,
"required": [
"dialogs",
"filtering_function"
],
"title": "SyntheticDataGenerateRequest"
},
"SyntheticDataGenerationResponse": {
"type": "object",
"properties": {
"synthetic_data": {
"type": "array",
"items": {
"type": "object",
"additionalProperties": {
"oneOf": [
{
"type": "null"
},
{
"type": "boolean"
},
{
"type": "number"
},
{
"type": "string"
},
{
"type": "array"
},
{
"type": "object"
}
]
}
},
"description": "List of generated synthetic data samples that passed the filtering criteria"
},
"statistics": {
"type": "object",
"additionalProperties": {
"oneOf": [
{
"type": "null"
},
{
"type": "boolean"
},
{
"type": "number"
},
{
"type": "string"
},
{
"type": "array"
},
{
"type": "object"
}
]
},
"description": "(Optional) Statistical information about the generation process and filtering results"
}
},
"additionalProperties": false,
"required": [
"synthetic_data"
],
"title": "SyntheticDataGenerationResponse",
"description": "Response from the synthetic data generation. Batch of (prompt, response, score) tuples that pass the threshold."
},
"InvokeToolRequest": {
"type": "object",
"properties": {
"tool_name": {
"type": "string",
"description": "The name of the tool to invoke."
},
"kwargs": {
"type": "object",
"additionalProperties": {
"oneOf": [
{
"type": "null"
},
{
"type": "boolean"
},
{
"type": "number"
},
{
"type": "string"
},
{
"type": "array"
},
{
"type": "object"
}
]
},
"description": "A dictionary of arguments to pass to the tool."
}
},
"additionalProperties": false,
"required": [
"tool_name",
"kwargs"
],
"title": "InvokeToolRequest"
},
"ToolInvocationResult": {
"type": "object",
"properties": {
@ -11927,6 +11609,21 @@
"title": "ToolInvocationResult",
"description": "Result of a tool invocation."
},
"URL": {
"type": "object",
"properties": {
"uri": {
"type": "string",
"description": "The URL string pointing to the resource"
}
},
"additionalProperties": false,
"required": [
"uri"
],
"title": "URL",
"description": "A URL reference to external content."
},
"ToolDef": {
"type": "object",
"properties": {
@ -13977,10 +13674,6 @@
"name": "Shields",
"description": ""
},
{
"name": "SyntheticDataGeneration (Coming Soon)",
"description": ""
},
{
"name": "ToolGroups",
"description": ""
@ -14011,7 +13704,6 @@
"Scoring",
"ScoringFunctions",
"Shields",
"SyntheticDataGeneration (Coming Soon)",
"ToolGroups",
"ToolRuntime",
"VectorIO"

View file

@ -953,17 +953,32 @@ paths:
List routes.
List all available API routes with their methods and implementing providers.
parameters: []
parameters:
- name: api_filter
in: query
description: >-
Optional filter to control which routes are returned. Can be an API level
('v1', 'v1alpha', 'v1beta') to show non-deprecated routes at that level,
or 'deprecated' to show deprecated routes across all levels. If not specified,
returns only non-deprecated v1 routes.
required: false
schema:
type: string
enum:
- v1
- v1alpha
- v1beta
- deprecated
deprecated: false
/v1/models:
get:
responses:
'200':
description: A ListModelsResponse.
description: A OpenAIListModelsResponse.
content:
application/json:
schema:
$ref: '#/components/schemas/ListModelsResponse'
$ref: '#/components/schemas/OpenAIListModelsResponse'
'400':
$ref: '#/components/responses/BadRequest400'
'429':
@ -976,8 +991,8 @@ paths:
$ref: '#/components/responses/DefaultError'
tags:
- Models
summary: List all models.
description: List all models.
summary: List models using the OpenAI API.
description: List models using the OpenAI API.
parameters: []
deprecated: false
post:
@ -1967,40 +1982,6 @@ paths:
schema:
type: string
deprecated: false
/v1/synthetic-data-generation/generate:
post:
responses:
'200':
description: >-
Response containing filtered synthetic data samples and optional statistics
content:
application/json:
schema:
$ref: '#/components/schemas/SyntheticDataGenerationResponse'
'400':
$ref: '#/components/responses/BadRequest400'
'429':
$ref: >-
#/components/responses/TooManyRequests429
'500':
$ref: >-
#/components/responses/InternalServerError500
default:
$ref: '#/components/responses/DefaultError'
tags:
- SyntheticDataGeneration (Coming Soon)
summary: >-
Generate synthetic data based on input dialogs and apply filtering.
description: >-
Generate synthetic data based on input dialogs and apply filtering.
parameters: []
requestBody:
content:
application/json:
schema:
$ref: '#/components/schemas/SyntheticDataGenerateRequest'
required: true
deprecated: false
/v1/tool-runtime/invoke:
post:
responses:
@ -5604,6 +5585,88 @@ components:
title: ListRoutesResponse
description: >-
Response containing a list of all available API routes.
OpenAIModel:
type: object
properties:
id:
type: string
object:
type: string
const: model
default: model
created:
type: integer
owned_by:
type: string
custom_metadata:
type: object
additionalProperties:
oneOf:
- type: 'null'
- type: boolean
- type: number
- type: string
- type: array
- type: object
additionalProperties: false
required:
- id
- object
- created
- owned_by
title: OpenAIModel
description: A model from OpenAI.
OpenAIListModelsResponse:
type: object
properties:
data:
type: array
items:
$ref: '#/components/schemas/OpenAIModel'
additionalProperties: false
required:
- data
title: OpenAIListModelsResponse
ModelType:
type: string
enum:
- llm
- embedding
- rerank
title: ModelType
description: >-
Enumeration of supported model types in Llama Stack.
RegisterModelRequest:
type: object
properties:
model_id:
type: string
description: The identifier of the model to register.
provider_model_id:
type: string
description: >-
The identifier of the model in the provider.
provider_id:
type: string
description: The identifier of the provider.
metadata:
type: object
additionalProperties:
oneOf:
- type: 'null'
- type: boolean
- type: number
- type: string
- type: array
- type: object
description: Any additional metadata for this model.
model_type:
$ref: '#/components/schemas/ModelType'
description: The type of model to register.
additionalProperties: false
required:
- model_id
title: RegisterModelRequest
Model:
type: object
properties:
@ -5661,57 +5724,6 @@ components:
title: Model
description: >-
A model resource representing an AI model registered in Llama Stack.
ModelType:
type: string
enum:
- llm
- embedding
- rerank
title: ModelType
description: >-
Enumeration of supported model types in Llama Stack.
ListModelsResponse:
type: object
properties:
data:
type: array
items:
$ref: '#/components/schemas/Model'
additionalProperties: false
required:
- data
title: ListModelsResponse
RegisterModelRequest:
type: object
properties:
model_id:
type: string
description: The identifier of the model to register.
provider_model_id:
type: string
description: >-
The identifier of the model in the provider.
provider_id:
type: string
description: The identifier of the provider.
metadata:
type: object
additionalProperties:
oneOf:
- type: 'null'
- type: boolean
- type: number
- type: string
- type: array
- type: object
description: Any additional metadata for this model.
model_type:
$ref: '#/components/schemas/ModelType'
description: The type of model to register.
additionalProperties: false
required:
- model_id
title: RegisterModelRequest
RunModerationRequest:
type: object
properties:
@ -8725,45 +8737,29 @@ components:
required:
- shield_id
title: RegisterShieldRequest
CompletionMessage:
InvokeToolRequest:
type: object
properties:
role:
tool_name:
type: string
const: assistant
default: assistant
description: The name of the tool to invoke.
kwargs:
type: object
additionalProperties:
oneOf:
- type: 'null'
- type: boolean
- type: number
- type: string
- type: array
- type: object
description: >-
Must be "assistant" to identify this as the model's response
content:
$ref: '#/components/schemas/InterleavedContent'
description: The content of the model's response
stop_reason:
type: string
enum:
- end_of_turn
- end_of_message
- out_of_tokens
description: >-
Reason why the model stopped generating. Options are: - `StopReason.end_of_turn`:
The model finished generating the entire response. - `StopReason.end_of_message`:
The model finished generating but generated a partial response -- usually,
a tool call. The user may call the tool and continue the conversation
with the tool's response. - `StopReason.out_of_tokens`: The model ran
out of token budget.
tool_calls:
type: array
items:
$ref: '#/components/schemas/ToolCall'
description: >-
List of tool calls. Each tool call is a ToolCall object.
A dictionary of arguments to pass to the tool.
additionalProperties: false
required:
- role
- content
- stop_reason
title: CompletionMessage
description: >-
A message containing the model's (assistant) response in a chat conversation.
- tool_name
- kwargs
title: InvokeToolRequest
ImageContentItem:
type: object
properties:
@ -8810,41 +8806,6 @@ components:
mapping:
image: '#/components/schemas/ImageContentItem'
text: '#/components/schemas/TextContentItem'
Message:
oneOf:
- $ref: '#/components/schemas/UserMessage'
- $ref: '#/components/schemas/SystemMessage'
- $ref: '#/components/schemas/ToolResponseMessage'
- $ref: '#/components/schemas/CompletionMessage'
discriminator:
propertyName: role
mapping:
user: '#/components/schemas/UserMessage'
system: '#/components/schemas/SystemMessage'
tool: '#/components/schemas/ToolResponseMessage'
assistant: '#/components/schemas/CompletionMessage'
SystemMessage:
type: object
properties:
role:
type: string
const: system
default: system
description: >-
Must be "system" to identify this as a system message
content:
$ref: '#/components/schemas/InterleavedContent'
description: >-
The content of the "system prompt". If multiple system messages are provided,
they are concatenated. The underlying Llama Stack code may also add other
system messages (for example, for formatting tool definitions).
additionalProperties: false
required:
- role
- content
title: SystemMessage
description: >-
A system message providing instructions or context to the model.
TextContentItem:
type: object
properties:
@ -8863,179 +8824,6 @@ components:
- text
title: TextContentItem
description: A text content item
ToolCall:
type: object
properties:
call_id:
type: string
tool_name:
oneOf:
- type: string
enum:
- brave_search
- wolfram_alpha
- photogen
- code_interpreter
title: BuiltinTool
- type: string
arguments:
type: string
additionalProperties: false
required:
- call_id
- tool_name
- arguments
title: ToolCall
ToolResponseMessage:
type: object
properties:
role:
type: string
const: tool
default: tool
description: >-
Must be "tool" to identify this as a tool response
call_id:
type: string
description: >-
Unique identifier for the tool call this response is for
content:
$ref: '#/components/schemas/InterleavedContent'
description: The response content from the tool
additionalProperties: false
required:
- role
- call_id
- content
title: ToolResponseMessage
description: >-
A message representing the result of a tool invocation.
URL:
type: object
properties:
uri:
type: string
description: The URL string pointing to the resource
additionalProperties: false
required:
- uri
title: URL
description: A URL reference to external content.
UserMessage:
type: object
properties:
role:
type: string
const: user
default: user
description: >-
Must be "user" to identify this as a user message
content:
$ref: '#/components/schemas/InterleavedContent'
description: >-
The content of the message, which can include text and other media
context:
$ref: '#/components/schemas/InterleavedContent'
description: >-
(Optional) This field is used internally by Llama Stack to pass RAG context.
This field may be removed in the API in the future.
additionalProperties: false
required:
- role
- content
title: UserMessage
description: >-
A message from the user in a chat conversation.
SyntheticDataGenerateRequest:
type: object
properties:
dialogs:
type: array
items:
$ref: '#/components/schemas/Message'
description: >-
List of conversation messages to use as input for synthetic data generation
filtering_function:
type: string
enum:
- none
- random
- top_k
- top_p
- top_k_top_p
- sigmoid
description: >-
Type of filtering to apply to generated synthetic data samples
model:
type: string
description: >-
(Optional) The identifier of the model to use. The model must be registered
with Llama Stack and available via the /models endpoint
additionalProperties: false
required:
- dialogs
- filtering_function
title: SyntheticDataGenerateRequest
SyntheticDataGenerationResponse:
type: object
properties:
synthetic_data:
type: array
items:
type: object
additionalProperties:
oneOf:
- type: 'null'
- type: boolean
- type: number
- type: string
- type: array
- type: object
description: >-
List of generated synthetic data samples that passed the filtering criteria
statistics:
type: object
additionalProperties:
oneOf:
- type: 'null'
- type: boolean
- type: number
- type: string
- type: array
- type: object
description: >-
(Optional) Statistical information about the generation process and filtering
results
additionalProperties: false
required:
- synthetic_data
title: SyntheticDataGenerationResponse
description: >-
Response from the synthetic data generation. Batch of (prompt, response, score)
tuples that pass the threshold.
InvokeToolRequest:
type: object
properties:
tool_name:
type: string
description: The name of the tool to invoke.
kwargs:
type: object
additionalProperties:
oneOf:
- type: 'null'
- type: boolean
- type: number
- type: string
- type: array
- type: object
description: >-
A dictionary of arguments to pass to the tool.
additionalProperties: false
required:
- tool_name
- kwargs
title: InvokeToolRequest
ToolInvocationResult:
type: object
properties:
@ -9066,6 +8854,17 @@ components:
additionalProperties: false
title: ToolInvocationResult
description: Result of a tool invocation.
URL:
type: object
properties:
uri:
type: string
description: The URL string pointing to the resource
additionalProperties: false
required:
- uri
title: URL
description: A URL reference to external content.
ToolDef:
type: object
properties:
@ -10667,8 +10466,6 @@ tags:
description: ''
- name: Shields
description: ''
- name: SyntheticDataGeneration (Coming Soon)
description: ''
- name: ToolGroups
description: ''
- name: ToolRuntime
@ -10691,7 +10488,6 @@ x-tagGroups:
- Scoring
- ScoringFunctions
- Shields
- SyntheticDataGeneration (Coming Soon)
- ToolGroups
- ToolRuntime
- VectorIO

File diff suppressed because it is too large Load diff

View file

@ -956,17 +956,32 @@ paths:
List routes.
List all available API routes with their methods and implementing providers.
parameters: []
parameters:
- name: api_filter
in: query
description: >-
Optional filter to control which routes are returned. Can be an API level
('v1', 'v1alpha', 'v1beta') to show non-deprecated routes at that level,
or 'deprecated' to show deprecated routes across all levels. If not specified,
returns only non-deprecated v1 routes.
required: false
schema:
type: string
enum:
- v1
- v1alpha
- v1beta
- deprecated
deprecated: false
/v1/models:
get:
responses:
'200':
description: A ListModelsResponse.
description: A OpenAIListModelsResponse.
content:
application/json:
schema:
$ref: '#/components/schemas/ListModelsResponse'
$ref: '#/components/schemas/OpenAIListModelsResponse'
'400':
$ref: '#/components/responses/BadRequest400'
'429':
@ -979,8 +994,8 @@ paths:
$ref: '#/components/responses/DefaultError'
tags:
- Models
summary: List all models.
description: List all models.
summary: List models using the OpenAI API.
description: List models using the OpenAI API.
parameters: []
deprecated: false
post:
@ -1970,40 +1985,6 @@ paths:
schema:
type: string
deprecated: false
/v1/synthetic-data-generation/generate:
post:
responses:
'200':
description: >-
Response containing filtered synthetic data samples and optional statistics
content:
application/json:
schema:
$ref: '#/components/schemas/SyntheticDataGenerationResponse'
'400':
$ref: '#/components/responses/BadRequest400'
'429':
$ref: >-
#/components/responses/TooManyRequests429
'500':
$ref: >-
#/components/responses/InternalServerError500
default:
$ref: '#/components/responses/DefaultError'
tags:
- SyntheticDataGeneration (Coming Soon)
summary: >-
Generate synthetic data based on input dialogs and apply filtering.
description: >-
Generate synthetic data based on input dialogs and apply filtering.
parameters: []
requestBody:
content:
application/json:
schema:
$ref: '#/components/schemas/SyntheticDataGenerateRequest'
required: true
deprecated: false
/v1/tool-runtime/invoke:
post:
responses:
@ -6817,6 +6798,88 @@ components:
title: ListRoutesResponse
description: >-
Response containing a list of all available API routes.
OpenAIModel:
type: object
properties:
id:
type: string
object:
type: string
const: model
default: model
created:
type: integer
owned_by:
type: string
custom_metadata:
type: object
additionalProperties:
oneOf:
- type: 'null'
- type: boolean
- type: number
- type: string
- type: array
- type: object
additionalProperties: false
required:
- id
- object
- created
- owned_by
title: OpenAIModel
description: A model from OpenAI.
OpenAIListModelsResponse:
type: object
properties:
data:
type: array
items:
$ref: '#/components/schemas/OpenAIModel'
additionalProperties: false
required:
- data
title: OpenAIListModelsResponse
ModelType:
type: string
enum:
- llm
- embedding
- rerank
title: ModelType
description: >-
Enumeration of supported model types in Llama Stack.
RegisterModelRequest:
type: object
properties:
model_id:
type: string
description: The identifier of the model to register.
provider_model_id:
type: string
description: >-
The identifier of the model in the provider.
provider_id:
type: string
description: The identifier of the provider.
metadata:
type: object
additionalProperties:
oneOf:
- type: 'null'
- type: boolean
- type: number
- type: string
- type: array
- type: object
description: Any additional metadata for this model.
model_type:
$ref: '#/components/schemas/ModelType'
description: The type of model to register.
additionalProperties: false
required:
- model_id
title: RegisterModelRequest
Model:
type: object
properties:
@ -6874,57 +6937,6 @@ components:
title: Model
description: >-
A model resource representing an AI model registered in Llama Stack.
ModelType:
type: string
enum:
- llm
- embedding
- rerank
title: ModelType
description: >-
Enumeration of supported model types in Llama Stack.
ListModelsResponse:
type: object
properties:
data:
type: array
items:
$ref: '#/components/schemas/Model'
additionalProperties: false
required:
- data
title: ListModelsResponse
RegisterModelRequest:
type: object
properties:
model_id:
type: string
description: The identifier of the model to register.
provider_model_id:
type: string
description: >-
The identifier of the model in the provider.
provider_id:
type: string
description: The identifier of the provider.
metadata:
type: object
additionalProperties:
oneOf:
- type: 'null'
- type: boolean
- type: number
- type: string
- type: array
- type: object
description: Any additional metadata for this model.
model_type:
$ref: '#/components/schemas/ModelType'
description: The type of model to register.
additionalProperties: false
required:
- model_id
title: RegisterModelRequest
RunModerationRequest:
type: object
properties:
@ -9938,45 +9950,29 @@ components:
required:
- shield_id
title: RegisterShieldRequest
CompletionMessage:
InvokeToolRequest:
type: object
properties:
role:
tool_name:
type: string
const: assistant
default: assistant
description: The name of the tool to invoke.
kwargs:
type: object
additionalProperties:
oneOf:
- type: 'null'
- type: boolean
- type: number
- type: string
- type: array
- type: object
description: >-
Must be "assistant" to identify this as the model's response
content:
$ref: '#/components/schemas/InterleavedContent'
description: The content of the model's response
stop_reason:
type: string
enum:
- end_of_turn
- end_of_message
- out_of_tokens
description: >-
Reason why the model stopped generating. Options are: - `StopReason.end_of_turn`:
The model finished generating the entire response. - `StopReason.end_of_message`:
The model finished generating but generated a partial response -- usually,
a tool call. The user may call the tool and continue the conversation
with the tool's response. - `StopReason.out_of_tokens`: The model ran
out of token budget.
tool_calls:
type: array
items:
$ref: '#/components/schemas/ToolCall'
description: >-
List of tool calls. Each tool call is a ToolCall object.
A dictionary of arguments to pass to the tool.
additionalProperties: false
required:
- role
- content
- stop_reason
title: CompletionMessage
description: >-
A message containing the model's (assistant) response in a chat conversation.
- tool_name
- kwargs
title: InvokeToolRequest
ImageContentItem:
type: object
properties:
@ -10023,41 +10019,6 @@ components:
mapping:
image: '#/components/schemas/ImageContentItem'
text: '#/components/schemas/TextContentItem'
Message:
oneOf:
- $ref: '#/components/schemas/UserMessage'
- $ref: '#/components/schemas/SystemMessage'
- $ref: '#/components/schemas/ToolResponseMessage'
- $ref: '#/components/schemas/CompletionMessage'
discriminator:
propertyName: role
mapping:
user: '#/components/schemas/UserMessage'
system: '#/components/schemas/SystemMessage'
tool: '#/components/schemas/ToolResponseMessage'
assistant: '#/components/schemas/CompletionMessage'
SystemMessage:
type: object
properties:
role:
type: string
const: system
default: system
description: >-
Must be "system" to identify this as a system message
content:
$ref: '#/components/schemas/InterleavedContent'
description: >-
The content of the "system prompt". If multiple system messages are provided,
they are concatenated. The underlying Llama Stack code may also add other
system messages (for example, for formatting tool definitions).
additionalProperties: false
required:
- role
- content
title: SystemMessage
description: >-
A system message providing instructions or context to the model.
TextContentItem:
type: object
properties:
@ -10076,179 +10037,6 @@ components:
- text
title: TextContentItem
description: A text content item
ToolCall:
type: object
properties:
call_id:
type: string
tool_name:
oneOf:
- type: string
enum:
- brave_search
- wolfram_alpha
- photogen
- code_interpreter
title: BuiltinTool
- type: string
arguments:
type: string
additionalProperties: false
required:
- call_id
- tool_name
- arguments
title: ToolCall
ToolResponseMessage:
type: object
properties:
role:
type: string
const: tool
default: tool
description: >-
Must be "tool" to identify this as a tool response
call_id:
type: string
description: >-
Unique identifier for the tool call this response is for
content:
$ref: '#/components/schemas/InterleavedContent'
description: The response content from the tool
additionalProperties: false
required:
- role
- call_id
- content
title: ToolResponseMessage
description: >-
A message representing the result of a tool invocation.
URL:
type: object
properties:
uri:
type: string
description: The URL string pointing to the resource
additionalProperties: false
required:
- uri
title: URL
description: A URL reference to external content.
UserMessage:
type: object
properties:
role:
type: string
const: user
default: user
description: >-
Must be "user" to identify this as a user message
content:
$ref: '#/components/schemas/InterleavedContent'
description: >-
The content of the message, which can include text and other media
context:
$ref: '#/components/schemas/InterleavedContent'
description: >-
(Optional) This field is used internally by Llama Stack to pass RAG context.
This field may be removed in the API in the future.
additionalProperties: false
required:
- role
- content
title: UserMessage
description: >-
A message from the user in a chat conversation.
SyntheticDataGenerateRequest:
type: object
properties:
dialogs:
type: array
items:
$ref: '#/components/schemas/Message'
description: >-
List of conversation messages to use as input for synthetic data generation
filtering_function:
type: string
enum:
- none
- random
- top_k
- top_p
- top_k_top_p
- sigmoid
description: >-
Type of filtering to apply to generated synthetic data samples
model:
type: string
description: >-
(Optional) The identifier of the model to use. The model must be registered
with Llama Stack and available via the /models endpoint
additionalProperties: false
required:
- dialogs
- filtering_function
title: SyntheticDataGenerateRequest
SyntheticDataGenerationResponse:
type: object
properties:
synthetic_data:
type: array
items:
type: object
additionalProperties:
oneOf:
- type: 'null'
- type: boolean
- type: number
- type: string
- type: array
- type: object
description: >-
List of generated synthetic data samples that passed the filtering criteria
statistics:
type: object
additionalProperties:
oneOf:
- type: 'null'
- type: boolean
- type: number
- type: string
- type: array
- type: object
description: >-
(Optional) Statistical information about the generation process and filtering
results
additionalProperties: false
required:
- synthetic_data
title: SyntheticDataGenerationResponse
description: >-
Response from the synthetic data generation. Batch of (prompt, response, score)
tuples that pass the threshold.
InvokeToolRequest:
type: object
properties:
tool_name:
type: string
description: The name of the tool to invoke.
kwargs:
type: object
additionalProperties:
oneOf:
- type: 'null'
- type: boolean
- type: number
- type: string
- type: array
- type: object
description: >-
A dictionary of arguments to pass to the tool.
additionalProperties: false
required:
- tool_name
- kwargs
title: InvokeToolRequest
ToolInvocationResult:
type: object
properties:
@ -10279,6 +10067,17 @@ components:
additionalProperties: false
title: ToolInvocationResult
description: Result of a tool invocation.
URL:
type: object
properties:
uri:
type: string
description: The URL string pointing to the resource
additionalProperties: false
required:
- uri
title: URL
description: A URL reference to external content.
ToolDef:
type: object
properties:
@ -12311,6 +12110,45 @@ components:
title: AgentSessionCreateResponse
description: >-
Response returned when creating a new agent session.
CompletionMessage:
type: object
properties:
role:
type: string
const: assistant
default: assistant
description: >-
Must be "assistant" to identify this as the model's response
content:
$ref: '#/components/schemas/InterleavedContent'
description: The content of the model's response
stop_reason:
type: string
enum:
- end_of_turn
- end_of_message
- out_of_tokens
description: >-
Reason why the model stopped generating. Options are: - `StopReason.end_of_turn`:
The model finished generating the entire response. - `StopReason.end_of_message`:
The model finished generating but generated a partial response -- usually,
a tool call. The user may call the tool and continue the conversation
with the tool's response. - `StopReason.out_of_tokens`: The model ran
out of token budget.
tool_calls:
type: array
items:
$ref: '#/components/schemas/ToolCall'
description: >-
List of tool calls. Each tool call is a ToolCall object.
additionalProperties: false
required:
- role
- content
- stop_reason
title: CompletionMessage
description: >-
A message containing the model's (assistant) response in a chat conversation.
InferenceStep:
type: object
properties:
@ -12463,6 +12301,29 @@ components:
- step_type
title: ShieldCallStep
description: A shield call step in an agent turn.
ToolCall:
type: object
properties:
call_id:
type: string
tool_name:
oneOf:
- type: string
enum:
- brave_search
- wolfram_alpha
- photogen
- code_interpreter
title: BuiltinTool
- type: string
arguments:
type: string
additionalProperties: false
required:
- call_id
- tool_name
- arguments
title: ToolCall
ToolExecutionStep:
type: object
properties:
@ -12550,6 +12411,30 @@ components:
- content
title: ToolResponse
description: Response from a tool invocation.
ToolResponseMessage:
type: object
properties:
role:
type: string
const: tool
default: tool
description: >-
Must be "tool" to identify this as a tool response
call_id:
type: string
description: >-
Unique identifier for the tool call this response is for
content:
$ref: '#/components/schemas/InterleavedContent'
description: The response content from the tool
additionalProperties: false
required:
- role
- call_id
- content
title: ToolResponseMessage
description: >-
A message representing the result of a tool invocation.
Turn:
type: object
properties:
@ -12635,6 +12520,31 @@ components:
title: Turn
description: >-
A single turn in an interaction with an Agentic System.
UserMessage:
type: object
properties:
role:
type: string
const: user
default: user
description: >-
Must be "user" to identify this as a user message
content:
$ref: '#/components/schemas/InterleavedContent'
description: >-
The content of the message, which can include text and other media
context:
$ref: '#/components/schemas/InterleavedContent'
description: >-
(Optional) This field is used internally by Llama Stack to pass RAG context.
This field may be removed in the API in the future.
additionalProperties: false
required:
- role
- content
title: UserMessage
description: >-
A message from the user in a chat conversation.
CreateAgentTurnRequest:
type: object
properties:
@ -13248,6 +13158,28 @@ components:
- sampling_params
title: ModelCandidate
description: A model candidate for evaluation.
SystemMessage:
type: object
properties:
role:
type: string
const: system
default: system
description: >-
Must be "system" to identify this as a system message
content:
$ref: '#/components/schemas/InterleavedContent'
description: >-
The content of the "system prompt". If multiple system messages are provided,
they are concatenated. The underlying Llama Stack code may also add other
system messages (for example, for formatting tool definitions).
additionalProperties: false
required:
- role
- content
title: SystemMessage
description: >-
A system message providing instructions or context to the model.
EvaluateRowsRequest:
type: object
properties:
@ -14063,8 +13995,6 @@ tags:
description: ''
- name: Shields
description: ''
- name: SyntheticDataGeneration (Coming Soon)
description: ''
- name: ToolGroups
description: ''
- name: ToolRuntime
@ -14092,7 +14022,6 @@ x-tagGroups:
- Scoring
- ScoringFunctions
- Shields
- SyntheticDataGeneration (Coming Soon)
- ToolGroups
- ToolRuntime
- VectorIO