Merge branch 'main' into issue-3443-require_approval

This commit is contained in:
Matthew Farrellee 2025-09-30 12:10:13 -04:00
commit d2fdc70a8d
72 changed files with 1380 additions and 1406 deletions

View file

@ -7,7 +7,7 @@ sidebar_position: 1
### Server path
Llama Stack exposes an OpenAI-compatible API endpoint at `/v1/openai/v1`. So, for a Llama Stack server running locally on port `8321`, the full url to the OpenAI-compatible API endpoint is `http://localhost:8321/v1/openai/v1`.
Llama Stack exposes OpenAI-compatible API endpoints at `/v1`. So, for a Llama Stack server running locally on port `8321`, the full url to the OpenAI-compatible API endpoint is `http://localhost:8321/v1`.
### Clients
@ -25,12 +25,12 @@ client = LlamaStackClient(base_url="http://localhost:8321")
#### OpenAI Client
When using an OpenAI client, set the `base_url` to the `/v1/openai/v1` path on your Llama Stack server.
When using an OpenAI client, set the `base_url` to the `/v1` path on your Llama Stack server.
```python
from openai import OpenAI
client = OpenAI(base_url="http://localhost:8321/v1/openai/v1", api_key="none")
client = OpenAI(base_url="http://localhost:8321/v1", api_key="none")
```
Regardless of the client you choose, the following code examples should all work the same.

View file

@ -261,7 +261,7 @@ You can even run `llama model prompt-format` see all of the templates and their
```
llama model prompt-format -m Llama3.2-3B-Instruct
```
![alt text](../../../resources/prompt-format.png)
![alt text](/img/prompt-format.png)
You will be shown a Markdown formatted description of the model interface and how prompts / messages are formatted for various scenarios.

View file

@ -217,7 +217,6 @@ from llama_stack_client.types import (
Methods:
- <code title="post /v1/inference/chat-completion">client.inference.<a href="https://github.com/meta-llama/llama-stack-client-python/tree/main/src/llama_stack_client/resources/inference.py">chat_completion</a>(\*\*<a href="https://github.com/meta-llama/llama-stack-client-python/tree/main/src/llama_stack_client/types/inference_chat_completion_params.py">params</a>) -> <a href="https://github.com/meta-llama/llama-stack-client-python/tree/main/src/llama_stack_client/types/inference_chat_completion_response.py">InferenceChatCompletionResponse</a></code>
- <code title="post /v1/inference/completion">client.inference.<a href="https://github.com/meta-llama/llama-stack-client-python/tree/main/src/llama_stack_client/resources/inference.py">completion</a>(\*\*<a href="https://github.com/meta-llama/llama-stack-client-python/tree/main/src/llama_stack_client/types/inference_completion_params.py">params</a>) -> <a href="https://github.com/meta-llama/llama-stack-client-python/tree/main/src/llama_stack_client/types/inference_completion_response.py">InferenceCompletionResponse</a></code>
- <code title="post /v1/inference/embeddings">client.inference.<a href="https://github.com/meta-llama/llama-stack-client-python/tree/main/src/llama_stack_client/resources/inference.py">embeddings</a>(\*\*<a href="https://github.com/meta-llama/llama-stack-client-python/tree/main/src/llama_stack_client/types/inference_embeddings_params.py">params</a>) -> <a href="https://github.com/meta-llama/llama-stack-client-python/tree/main/src/llama_stack_client/types/embeddings_response.py">EmbeddingsResponse</a></code>
## VectorIo

View file

@ -824,16 +824,10 @@
"\n",
"\n",
"user_input = \"Michael Jordan was born in 1963. He played basketball for the Chicago Bulls. He retired in 2003. Extract this information into JSON for me. \"\n",
"response = client.inference.completion(\n",
" model_id=\"meta-llama/Llama-3.1-8B-Instruct\",\n",
" content=user_input,\n",
" stream=False,\n",
" sampling_params={\n",
" \"strategy\": {\n",
" \"type\": \"greedy\",\n",
" },\n",
" \"max_tokens\": 50,\n",
" },\n",
"response = client.chat.completions.create(\n",
" model=\"meta-llama/Llama-3.1-8B-Instruct\",\n",
" messages=[{\"role\": \"user\", \"content\": user_input}],\n",
" max_tokens=50,\n",
" response_format={\n",
" \"type\": \"json_schema\",\n",
" \"json_schema\": Output.model_json_schema(),\n",
@ -1013,7 +1007,7 @@
"\n",
"\n",
"\n",
"<img src=\"https://github.com/meta-llama/llama-stack/blob/main/docs/resources/agentic-system.png?raw=true\" alt=\"drawing\" width=\"800\"/>\n",
"<img src=\"https://github.com/meta-llama/llama-stack/blob/main/docs/static/img/agentic-system.png?raw=true\" alt=\"drawing\" width=\"800\"/>\n",
"\n",
"\n",
"Agents are characterized by having access to\n",

View file

@ -706,20 +706,15 @@
" provider_id=\"nvidia\",\n",
")\n",
"\n",
"response = client.inference.completion(\n",
" content=\"Complete the sentence using one word: Roses are red, violets are \",\n",
"response = client.completions.create(\n",
" prompt=\"Complete the sentence using one word: Roses are red, violets are \",\n",
" stream=False,\n",
" model_id=CUSTOMIZED_MODEL_DIR,\n",
" sampling_params={\n",
" \"strategy\": {\n",
" \"type\": \"top_p\",\n",
" \"temperature\": 0.7,\n",
" \"top_p\": 0.9\n",
" },\n",
" \"max_tokens\": 20,\n",
" },\n",
" model=CUSTOMIZED_MODEL_DIR,\n",
" temperature=0.7,\n",
" top_p=0.9,\n",
" max_tokens=20,\n",
")\n",
"print(f\"Inference response: {response.content}\")"
"print(f\"Inference response: {response.choices[0].text}\")"
]
},
{
@ -1233,20 +1228,15 @@
" provider_id=\"nvidia\",\n",
")\n",
"\n",
"response = client.inference.completion(\n",
" content=\"Complete the sentence using one word: Roses are red, violets are \",\n",
"response = client.completions.create(\n",
" prompt=\"Complete the sentence using one word: Roses are red, violets are \",\n",
" stream=False,\n",
" model_id=customized_chat_model_dir,\n",
" sampling_params={\n",
" \"strategy\": {\n",
" \"type\": \"top_p\",\n",
" \"temperature\": 0.7,\n",
" \"top_p\": 0.9\n",
" },\n",
" \"max_tokens\": 20,\n",
" },\n",
" model=customized_chat_model_dir,\n",
" temperature=0.7,\n",
" top_p=0.9,\n",
" max_tokens=20,\n",
")\n",
"print(f\"Inference response: {response.content}\")"
"print(f\"Inference response: {response.choices[0].text}\")"
]
},
{

View file

@ -5,6 +5,7 @@
# the root directory of this source tree.
import hashlib
import inspect
import ipaddress
import types
import typing
@ -12,6 +13,7 @@ from dataclasses import make_dataclass
from typing import Annotated, Any, Dict, get_args, get_origin, Set, Union
from fastapi import UploadFile
from pydantic import BaseModel
from llama_stack.apis.datatypes import Error
from llama_stack.strong_typing.core import JsonType
@ -632,14 +634,22 @@ class Generator:
base_type = get_args(param_type)[0]
else:
base_type = param_type
# Check if the type is optional
is_optional = is_type_optional(base_type)
if is_optional:
base_type = unwrap_optional_type(base_type)
if base_type is UploadFile:
# File upload
properties[name] = {"type": "string", "format": "binary"}
else:
# Form field
# All other types - generate schema reference
# This includes enums, BaseModels, and simple types
properties[name] = self.schema_builder.classdef_to_ref(base_type)
required_fields.append(name)
if not is_optional:
required_fields.append(name)
multipart_schema = {
"type": "object",

View file

Before

Width:  |  Height:  |  Size: 128 KiB

After

Width:  |  Height:  |  Size: 128 KiB

Before After
Before After

View file

Before

Width:  |  Height:  |  Size: 220 KiB

After

Width:  |  Height:  |  Size: 220 KiB

Before After
Before After

View file

Before

Width:  |  Height:  |  Size: 71 KiB

After

Width:  |  Height:  |  Size: 71 KiB

Before After
Before After

View file

Before

Width:  |  Height:  |  Size: 17 KiB

After

Width:  |  Height:  |  Size: 17 KiB

Before After
Before After

View file

Before

Width:  |  Height:  |  Size: 170 KiB

After

Width:  |  Height:  |  Size: 170 KiB

Before After
Before After

File diff suppressed because it is too large Load diff

View file

@ -286,7 +286,7 @@ paths:
schema:
$ref: '#/components/schemas/CreateAgentTurnRequest'
required: true
/v1/openai/v1/responses:
/v1/responses:
get:
responses:
'200':
@ -558,7 +558,7 @@ paths:
required: true
schema:
type: string
/v1/openai/v1/responses/{response_id}:
/v1/responses/{response_id}:
get:
responses:
'200':
@ -720,41 +720,6 @@ paths:
required: true
schema:
type: string
/v1/inference/embeddings:
post:
responses:
'200':
description: >-
An array of embeddings, one for each content. Each embedding is a list
of floats. The dimensionality of the embedding is model-specific; you
can check model metadata using /models/{model_id}.
content:
application/json:
schema:
$ref: '#/components/schemas/EmbeddingsResponse'
'400':
$ref: '#/components/responses/BadRequest400'
'429':
$ref: >-
#/components/responses/TooManyRequests429
'500':
$ref: >-
#/components/responses/InternalServerError500
default:
$ref: '#/components/responses/DefaultError'
tags:
- Inference
summary: >-
Generate embeddings for content pieces using the specified model.
description: >-
Generate embeddings for content pieces using the specified model.
parameters: []
requestBody:
content:
application/json:
schema:
$ref: '#/components/schemas/EmbeddingsRequest'
required: true
/v1alpha/eval/benchmarks/{benchmark_id}/evaluations:
post:
responses:
@ -1033,7 +998,7 @@ paths:
required: true
schema:
type: string
/v1/openai/v1/chat/completions/{completion_id}:
/v1/chat/completions/{completion_id}:
get:
responses:
'200':
@ -2259,7 +2224,7 @@ paths:
schema:
$ref: '#/components/schemas/RegisterBenchmarkRequest'
required: true
/v1/openai/v1/chat/completions:
/v1/chat/completions:
get:
responses:
'200':
@ -2452,7 +2417,7 @@ paths:
schema:
$ref: '#/components/schemas/RegisterModelRequest'
required: true
/v1/openai/v1/responses/{response_id}/input_items:
/v1/responses/{response_id}/input_items:
get:
responses:
'200':
@ -2906,7 +2871,7 @@ paths:
schema:
$ref: '#/components/schemas/LogEventRequest'
required: true
/v1/openai/v1/vector_stores/{vector_store_id}/files:
/v1/vector_stores/{vector_store_id}/files:
get:
responses:
'200':
@ -3015,7 +2980,7 @@ paths:
schema:
$ref: '#/components/schemas/OpenaiAttachFileToVectorStoreRequest'
required: true
/v1/openai/v1/completions:
/v1/completions:
post:
responses:
'200':
@ -3049,7 +3014,7 @@ paths:
schema:
$ref: '#/components/schemas/OpenaiCompletionRequest'
required: true
/v1/openai/v1/vector_stores:
/v1/vector_stores:
get:
responses:
'200':
@ -3136,7 +3101,7 @@ paths:
schema:
$ref: '#/components/schemas/OpenaiCreateVectorStoreRequest'
required: true
/v1/openai/v1/files/{file_id}:
/v1/files/{file_id}:
get:
responses:
'200':
@ -3201,7 +3166,7 @@ paths:
required: true
schema:
type: string
/v1/openai/v1/vector_stores/{vector_store_id}:
/v1/vector_stores/{vector_store_id}:
get:
responses:
'200':
@ -3298,7 +3263,7 @@ paths:
required: true
schema:
type: string
/v1/openai/v1/vector_stores/{vector_store_id}/files/{file_id}:
/v1/vector_stores/{vector_store_id}/files/{file_id}:
get:
responses:
'200':
@ -3416,7 +3381,7 @@ paths:
required: true
schema:
type: string
/v1/openai/v1/embeddings:
/v1/embeddings:
post:
responses:
'200':
@ -3451,7 +3416,7 @@ paths:
schema:
$ref: '#/components/schemas/OpenaiEmbeddingsRequest'
required: true
/v1/openai/v1/files:
/v1/files:
get:
responses:
'200':
@ -3544,8 +3509,6 @@ paths:
- purpose: The intended purpose of the uploaded file.
- expires_after: Optional form values describing expiration for the file.
Expected expires_after[anchor] = "created_at", expires_after[seconds] = {integer}.
Seconds must be between 3600 and 2592000 (1 hour to 30 days).
parameters: []
requestBody:
content:
@ -3558,45 +3521,13 @@ paths:
format: binary
purpose:
$ref: '#/components/schemas/OpenAIFilePurpose'
expires_after_anchor:
oneOf:
- type: string
- type: 'null'
expires_after_seconds:
oneOf:
- type: integer
- type: 'null'
expires_after:
$ref: '#/components/schemas/ExpiresAfter'
required:
- file
- purpose
- expires_after_anchor
- expires_after_seconds
required: true
/v1/openai/v1/models:
get:
responses:
'200':
description: A OpenAIListModelsResponse.
content:
application/json:
schema:
$ref: '#/components/schemas/OpenAIListModelsResponse'
'400':
$ref: '#/components/responses/BadRequest400'
'429':
$ref: >-
#/components/responses/TooManyRequests429
'500':
$ref: >-
#/components/responses/InternalServerError500
default:
$ref: '#/components/responses/DefaultError'
tags:
- Models
summary: List models using the OpenAI API.
description: List models using the OpenAI API.
parameters: []
/v1/openai/v1/files/{file_id}/content:
/v1/files/{file_id}/content:
get:
responses:
'200':
@ -3630,7 +3561,7 @@ paths:
required: true
schema:
type: string
/v1/openai/v1/vector_stores/{vector_store_id}/files/{file_id}/content:
/v1/vector_stores/{vector_store_id}/files/{file_id}/content:
get:
responses:
'200':
@ -3670,7 +3601,7 @@ paths:
required: true
schema:
type: string
/v1/openai/v1/vector_stores/{vector_store_id}/search:
/v1/vector_stores/{vector_store_id}/search:
post:
responses:
'200':
@ -3930,7 +3861,7 @@ paths:
schema:
$ref: '#/components/schemas/QueryTracesRequest'
required: true
/v1/inference/rerank:
/v1alpha/inference/rerank:
post:
responses:
'200':
@ -4098,7 +4029,7 @@ paths:
schema:
$ref: '#/components/schemas/RunEvalRequest'
required: true
/v1/openai/v1/moderations:
/v1/moderations:
post:
responses:
'200':
@ -4656,7 +4587,16 @@ components:
type: object
properties:
strategy:
$ref: '#/components/schemas/SamplingStrategy'
oneOf:
- $ref: '#/components/schemas/GreedySamplingStrategy'
- $ref: '#/components/schemas/TopPSamplingStrategy'
- $ref: '#/components/schemas/TopKSamplingStrategy'
discriminator:
propertyName: type
mapping:
greedy: '#/components/schemas/GreedySamplingStrategy'
top_p: '#/components/schemas/TopPSamplingStrategy'
top_k: '#/components/schemas/TopKSamplingStrategy'
description: The sampling strategy.
max_tokens:
type: integer
@ -4684,17 +4624,6 @@ components:
- strategy
title: SamplingParams
description: Sampling parameters.
SamplingStrategy:
oneOf:
- $ref: '#/components/schemas/GreedySamplingStrategy'
- $ref: '#/components/schemas/TopPSamplingStrategy'
- $ref: '#/components/schemas/TopKSamplingStrategy'
discriminator:
propertyName: type
mapping:
greedy: '#/components/schemas/GreedySamplingStrategy'
top_p: '#/components/schemas/TopPSamplingStrategy'
top_k: '#/components/schemas/TopKSamplingStrategy'
SystemMessage:
type: object
properties:
@ -5141,7 +5070,16 @@ components:
- progress
description: Type of the event
delta:
$ref: '#/components/schemas/ContentDelta'
oneOf:
- $ref: '#/components/schemas/TextDelta'
- $ref: '#/components/schemas/ImageDelta'
- $ref: '#/components/schemas/ToolCallDelta'
discriminator:
propertyName: type
mapping:
text: '#/components/schemas/TextDelta'
image: '#/components/schemas/ImageDelta'
tool_call: '#/components/schemas/ToolCallDelta'
description: >-
Content generated since last event. This can be one or more tokens, or
a tool call.
@ -5184,17 +5122,6 @@ components:
title: ChatCompletionResponseStreamChunk
description: >-
A chunk of a streamed chat completion response.
ContentDelta:
oneOf:
- $ref: '#/components/schemas/TextDelta'
- $ref: '#/components/schemas/ImageDelta'
- $ref: '#/components/schemas/ToolCallDelta'
discriminator:
propertyName: type
mapping:
text: '#/components/schemas/TextDelta'
image: '#/components/schemas/ImageDelta'
tool_call: '#/components/schemas/ToolCallDelta'
ImageDelta:
type: object
properties:
@ -5876,7 +5803,22 @@ components:
type: object
properties:
payload:
$ref: '#/components/schemas/AgentTurnResponseEventPayload'
oneOf:
- $ref: '#/components/schemas/AgentTurnResponseStepStartPayload'
- $ref: '#/components/schemas/AgentTurnResponseStepProgressPayload'
- $ref: '#/components/schemas/AgentTurnResponseStepCompletePayload'
- $ref: '#/components/schemas/AgentTurnResponseTurnStartPayload'
- $ref: '#/components/schemas/AgentTurnResponseTurnCompletePayload'
- $ref: '#/components/schemas/AgentTurnResponseTurnAwaitingInputPayload'
discriminator:
propertyName: event_type
mapping:
step_start: '#/components/schemas/AgentTurnResponseStepStartPayload'
step_progress: '#/components/schemas/AgentTurnResponseStepProgressPayload'
step_complete: '#/components/schemas/AgentTurnResponseStepCompletePayload'
turn_start: '#/components/schemas/AgentTurnResponseTurnStartPayload'
turn_complete: '#/components/schemas/AgentTurnResponseTurnCompletePayload'
turn_awaiting_input: '#/components/schemas/AgentTurnResponseTurnAwaitingInputPayload'
description: >-
Event-specific payload containing event data
additionalProperties: false
@ -5885,23 +5827,6 @@ components:
title: AgentTurnResponseEvent
description: >-
An event in an agent turn response stream.
AgentTurnResponseEventPayload:
oneOf:
- $ref: '#/components/schemas/AgentTurnResponseStepStartPayload'
- $ref: '#/components/schemas/AgentTurnResponseStepProgressPayload'
- $ref: '#/components/schemas/AgentTurnResponseStepCompletePayload'
- $ref: '#/components/schemas/AgentTurnResponseTurnStartPayload'
- $ref: '#/components/schemas/AgentTurnResponseTurnCompletePayload'
- $ref: '#/components/schemas/AgentTurnResponseTurnAwaitingInputPayload'
discriminator:
propertyName: event_type
mapping:
step_start: '#/components/schemas/AgentTurnResponseStepStartPayload'
step_progress: '#/components/schemas/AgentTurnResponseStepProgressPayload'
step_complete: '#/components/schemas/AgentTurnResponseStepCompletePayload'
turn_start: '#/components/schemas/AgentTurnResponseTurnStartPayload'
turn_complete: '#/components/schemas/AgentTurnResponseTurnCompletePayload'
turn_awaiting_input: '#/components/schemas/AgentTurnResponseTurnAwaitingInputPayload'
AgentTurnResponseStepCompletePayload:
type: object
properties:
@ -5980,7 +5905,16 @@ components:
description: >-
Unique identifier for the step within a turn
delta:
$ref: '#/components/schemas/ContentDelta'
oneOf:
- $ref: '#/components/schemas/TextDelta'
- $ref: '#/components/schemas/ImageDelta'
- $ref: '#/components/schemas/ToolCallDelta'
discriminator:
propertyName: type
mapping:
text: '#/components/schemas/TextDelta'
image: '#/components/schemas/ImageDelta'
tool_call: '#/components/schemas/ToolCallDelta'
description: >-
Incremental content changes during step execution
additionalProperties: false
@ -6968,10 +6902,6 @@ components:
type: string
description: >-
(Optional) Truncation strategy applied to the response
user:
type: string
description: >-
(Optional) User identifier associated with the request
additionalProperties: false
required:
- created_at
@ -7104,15 +7034,6 @@ components:
title: OpenAIResponseOutputMessageMCPListTools
description: >-
MCP list tools output message containing available tools from an MCP server.
OpenAIResponseContentPart:
oneOf:
- $ref: '#/components/schemas/OpenAIResponseContentPartOutputText'
- $ref: '#/components/schemas/OpenAIResponseContentPartRefusal'
discriminator:
propertyName: type
mapping:
output_text: '#/components/schemas/OpenAIResponseContentPartOutputText'
refusal: '#/components/schemas/OpenAIResponseContentPartRefusal'
OpenAIResponseContentPartOutputText:
type: object
properties:
@ -7220,7 +7141,14 @@ components:
description: >-
Unique identifier of the output item containing this content part
part:
$ref: '#/components/schemas/OpenAIResponseContentPart'
oneOf:
- $ref: '#/components/schemas/OpenAIResponseContentPartOutputText'
- $ref: '#/components/schemas/OpenAIResponseContentPartRefusal'
discriminator:
propertyName: type
mapping:
output_text: '#/components/schemas/OpenAIResponseContentPartOutputText'
refusal: '#/components/schemas/OpenAIResponseContentPartRefusal'
description: The content part that was added
sequence_number:
type: integer
@ -7255,7 +7183,14 @@ components:
description: >-
Unique identifier of the output item containing this content part
part:
$ref: '#/components/schemas/OpenAIResponseContentPart'
oneOf:
- $ref: '#/components/schemas/OpenAIResponseContentPartOutputText'
- $ref: '#/components/schemas/OpenAIResponseContentPartRefusal'
discriminator:
propertyName: type
mapping:
output_text: '#/components/schemas/OpenAIResponseContentPartOutputText'
refusal: '#/components/schemas/OpenAIResponseContentPartRefusal'
description: The completed content part
sequence_number:
type: integer
@ -7541,7 +7476,22 @@ components:
description: >-
Unique identifier of the response containing this output
item:
$ref: '#/components/schemas/OpenAIResponseOutput'
oneOf:
- $ref: '#/components/schemas/OpenAIResponseMessage'
- $ref: '#/components/schemas/OpenAIResponseOutputMessageWebSearchToolCall'
- $ref: '#/components/schemas/OpenAIResponseOutputMessageFileSearchToolCall'
- $ref: '#/components/schemas/OpenAIResponseOutputMessageFunctionToolCall'
- $ref: '#/components/schemas/OpenAIResponseOutputMessageMCPCall'
- $ref: '#/components/schemas/OpenAIResponseOutputMessageMCPListTools'
discriminator:
propertyName: type
mapping:
message: '#/components/schemas/OpenAIResponseMessage'
web_search_call: '#/components/schemas/OpenAIResponseOutputMessageWebSearchToolCall'
file_search_call: '#/components/schemas/OpenAIResponseOutputMessageFileSearchToolCall'
function_call: '#/components/schemas/OpenAIResponseOutputMessageFunctionToolCall'
mcp_call: '#/components/schemas/OpenAIResponseOutputMessageMCPCall'
mcp_list_tools: '#/components/schemas/OpenAIResponseOutputMessageMCPListTools'
description: >-
The output item that was added (message, tool call, etc.)
output_index:
@ -7577,7 +7527,22 @@ components:
description: >-
Unique identifier of the response containing this output
item:
$ref: '#/components/schemas/OpenAIResponseOutput'
oneOf:
- $ref: '#/components/schemas/OpenAIResponseMessage'
- $ref: '#/components/schemas/OpenAIResponseOutputMessageWebSearchToolCall'
- $ref: '#/components/schemas/OpenAIResponseOutputMessageFileSearchToolCall'
- $ref: '#/components/schemas/OpenAIResponseOutputMessageFunctionToolCall'
- $ref: '#/components/schemas/OpenAIResponseOutputMessageMCPCall'
- $ref: '#/components/schemas/OpenAIResponseOutputMessageMCPListTools'
discriminator:
propertyName: type
mapping:
message: '#/components/schemas/OpenAIResponseMessage'
web_search_call: '#/components/schemas/OpenAIResponseOutputMessageWebSearchToolCall'
file_search_call: '#/components/schemas/OpenAIResponseOutputMessageFileSearchToolCall'
function_call: '#/components/schemas/OpenAIResponseOutputMessageFunctionToolCall'
mcp_call: '#/components/schemas/OpenAIResponseOutputMessageMCPCall'
mcp_list_tools: '#/components/schemas/OpenAIResponseOutputMessageMCPListTools'
description: >-
The completed output item (message, tool call, etc.)
output_index:
@ -7846,72 +7811,6 @@ components:
title: OpenAIDeleteResponseObject
description: >-
Response object confirming deletion of an OpenAI response.
EmbeddingsRequest:
type: object
properties:
model_id:
type: string
description: >-
The identifier of the model to use. The model must be an embedding model
registered with Llama Stack and available via the /models endpoint.
contents:
oneOf:
- type: array
items:
type: string
- type: array
items:
$ref: '#/components/schemas/InterleavedContentItem'
description: >-
List of contents to generate embeddings for. Each content can be a string
or an InterleavedContentItem (and hence can be multimodal). The behavior
depends on the model and provider. Some models may only support text.
text_truncation:
type: string
enum:
- none
- start
- end
description: >-
(Optional) Config for how to truncate text for embedding when text is
longer than the model's max sequence length.
output_dimension:
type: integer
description: >-
(Optional) Output dimensionality for the embeddings. Only supported by
Matryoshka models.
task_type:
type: string
enum:
- query
- document
description: >-
(Optional) How is the embedding being used? This is only supported by
asymmetric embedding models.
additionalProperties: false
required:
- model_id
- contents
title: EmbeddingsRequest
EmbeddingsResponse:
type: object
properties:
embeddings:
type: array
items:
type: array
items:
type: number
description: >-
List of embedding vectors, one per input content. Each embedding is a
list of floats. The dimensionality of the embedding is model-specific;
you can check model metadata using /models/{model_id}
additionalProperties: false
required:
- embeddings
title: EmbeddingsResponse
description: >-
Response containing generated embeddings.
AgentCandidate:
type: object
properties:
@ -7966,7 +7865,14 @@ components:
type: object
properties:
eval_candidate:
$ref: '#/components/schemas/EvalCandidate'
oneOf:
- $ref: '#/components/schemas/ModelCandidate'
- $ref: '#/components/schemas/AgentCandidate'
discriminator:
propertyName: type
mapping:
model: '#/components/schemas/ModelCandidate'
agent: '#/components/schemas/AgentCandidate'
description: The candidate to evaluate.
scoring_params:
type: object
@ -7987,15 +7893,6 @@ components:
title: BenchmarkConfig
description: >-
A benchmark configuration for evaluation.
EvalCandidate:
oneOf:
- $ref: '#/components/schemas/ModelCandidate'
- $ref: '#/components/schemas/AgentCandidate'
discriminator:
propertyName: type
mapping:
model: '#/components/schemas/ModelCandidate'
agent: '#/components/schemas/AgentCandidate'
LLMAsJudgeScoringFnParams:
type: object
properties:
@ -8459,7 +8356,20 @@ components:
type: object
properties:
message:
$ref: '#/components/schemas/OpenAIMessageParam'
oneOf:
- $ref: '#/components/schemas/OpenAIUserMessageParam'
- $ref: '#/components/schemas/OpenAISystemMessageParam'
- $ref: '#/components/schemas/OpenAIAssistantMessageParam'
- $ref: '#/components/schemas/OpenAIToolMessageParam'
- $ref: '#/components/schemas/OpenAIDeveloperMessageParam'
discriminator:
propertyName: role
mapping:
user: '#/components/schemas/OpenAIUserMessageParam'
system: '#/components/schemas/OpenAISystemMessageParam'
assistant: '#/components/schemas/OpenAIAssistantMessageParam'
tool: '#/components/schemas/OpenAIToolMessageParam'
developer: '#/components/schemas/OpenAIDeveloperMessageParam'
description: The message from the model
finish_reason:
type: string
@ -8752,15 +8662,6 @@ components:
- model
- input_messages
title: OpenAICompletionWithInputMessages
DataSource:
oneOf:
- $ref: '#/components/schemas/URIDataSource'
- $ref: '#/components/schemas/RowsDataSource'
discriminator:
propertyName: type
mapping:
uri: '#/components/schemas/URIDataSource'
rows: '#/components/schemas/RowsDataSource'
Dataset:
type: object
properties:
@ -8795,7 +8696,14 @@ components:
description: >-
Purpose of the dataset indicating its intended use
source:
$ref: '#/components/schemas/DataSource'
oneOf:
- $ref: '#/components/schemas/URIDataSource'
- $ref: '#/components/schemas/RowsDataSource'
discriminator:
propertyName: type
mapping:
uri: '#/components/schemas/URIDataSource'
rows: '#/components/schemas/RowsDataSource'
description: >-
Data source configuration for the dataset
metadata:
@ -9041,31 +8949,6 @@ components:
- type
title: ObjectType
description: Parameter type for object values.
ParamType:
oneOf:
- $ref: '#/components/schemas/StringType'
- $ref: '#/components/schemas/NumberType'
- $ref: '#/components/schemas/BooleanType'
- $ref: '#/components/schemas/ArrayType'
- $ref: '#/components/schemas/ObjectType'
- $ref: '#/components/schemas/JsonType'
- $ref: '#/components/schemas/UnionType'
- $ref: '#/components/schemas/ChatCompletionInputType'
- $ref: '#/components/schemas/CompletionInputType'
- $ref: '#/components/schemas/AgentTurnInputType'
discriminator:
propertyName: type
mapping:
string: '#/components/schemas/StringType'
number: '#/components/schemas/NumberType'
boolean: '#/components/schemas/BooleanType'
array: '#/components/schemas/ArrayType'
object: '#/components/schemas/ObjectType'
json: '#/components/schemas/JsonType'
union: '#/components/schemas/UnionType'
chat_completion_input: '#/components/schemas/ChatCompletionInputType'
completion_input: '#/components/schemas/CompletionInputType'
agent_turn_input: '#/components/schemas/AgentTurnInputType'
ScoringFn:
type: object
properties:
@ -9104,7 +8987,30 @@ components:
- type: array
- type: object
return_type:
$ref: '#/components/schemas/ParamType'
oneOf:
- $ref: '#/components/schemas/StringType'
- $ref: '#/components/schemas/NumberType'
- $ref: '#/components/schemas/BooleanType'
- $ref: '#/components/schemas/ArrayType'
- $ref: '#/components/schemas/ObjectType'
- $ref: '#/components/schemas/JsonType'
- $ref: '#/components/schemas/UnionType'
- $ref: '#/components/schemas/ChatCompletionInputType'
- $ref: '#/components/schemas/CompletionInputType'
- $ref: '#/components/schemas/AgentTurnInputType'
discriminator:
propertyName: type
mapping:
string: '#/components/schemas/StringType'
number: '#/components/schemas/NumberType'
boolean: '#/components/schemas/BooleanType'
array: '#/components/schemas/ArrayType'
object: '#/components/schemas/ObjectType'
json: '#/components/schemas/JsonType'
union: '#/components/schemas/UnionType'
chat_completion_input: '#/components/schemas/ChatCompletionInputType'
completion_input: '#/components/schemas/CompletionInputType'
agent_turn_input: '#/components/schemas/AgentTurnInputType'
params:
$ref: '#/components/schemas/ScoringFnParams'
additionalProperties: false
@ -10234,10 +10140,6 @@ components:
type: string
description: >-
(Optional) Truncation strategy applied to the response
user:
type: string
description: >-
(Optional) User identifier associated with the request
input:
type: array
items:
@ -10560,7 +10462,14 @@ components:
description: >-
Event type identifier set to STRUCTURED_LOG
payload:
$ref: '#/components/schemas/StructuredLogPayload'
oneOf:
- $ref: '#/components/schemas/SpanStartPayload'
- $ref: '#/components/schemas/SpanEndPayload'
discriminator:
propertyName: type
mapping:
span_start: '#/components/schemas/SpanStartPayload'
span_end: '#/components/schemas/SpanEndPayload'
description: >-
The structured payload data for the log event
additionalProperties: false
@ -10573,15 +10482,6 @@ components:
title: StructuredLogEvent
description: >-
A structured log event containing typed payload data.
StructuredLogPayload:
oneOf:
- $ref: '#/components/schemas/SpanStartPayload'
- $ref: '#/components/schemas/SpanEndPayload'
discriminator:
propertyName: type
mapping:
span_start: '#/components/schemas/SpanStartPayload'
span_end: '#/components/schemas/SpanEndPayload'
StructuredLogType:
type: string
enum:
@ -10790,7 +10690,14 @@ components:
description: >-
Key-value attributes associated with the file
chunking_strategy:
$ref: '#/components/schemas/VectorStoreChunkingStrategy'
oneOf:
- $ref: '#/components/schemas/VectorStoreChunkingStrategyAuto'
- $ref: '#/components/schemas/VectorStoreChunkingStrategyStatic'
discriminator:
propertyName: type
mapping:
auto: '#/components/schemas/VectorStoreChunkingStrategyAuto'
static: '#/components/schemas/VectorStoreChunkingStrategyStatic'
description: >-
Strategy used for splitting the file into chunks
created_at:
@ -11805,38 +11712,6 @@ components:
title: VectorStoreListFilesResponse
description: >-
Response from listing files in a vector store.
OpenAIModel:
type: object
properties:
id:
type: string
object:
type: string
const: model
default: model
created:
type: integer
owned_by:
type: string
additionalProperties: false
required:
- id
- object
- created
- owned_by
title: OpenAIModel
description: A model from OpenAI.
OpenAIListModelsResponse:
type: object
properties:
data:
type: array
items:
$ref: '#/components/schemas/OpenAIModel'
additionalProperties: false
required:
- data
title: OpenAIListModelsResponse
VectorStoreListResponse:
type: object
properties:
@ -12102,6 +11977,25 @@ components:
required:
- attributes
title: OpenaiUpdateVectorStoreFileRequest
ExpiresAfter:
type: object
properties:
anchor:
type: string
const: created_at
seconds:
type: integer
additionalProperties: false
required:
- anchor
- seconds
title: ExpiresAfter
description: >-
Control expiration of uploaded files.
Params:
- anchor, must be "created_at"
- seconds, must be int between 3600 and 2592000 (1 hour to 30 days)
DPOAlignmentConfig:
type: object
properties:
@ -12387,7 +12281,14 @@ components:
type: object
properties:
query_generator_config:
$ref: '#/components/schemas/RAGQueryGeneratorConfig'
oneOf:
- $ref: '#/components/schemas/DefaultRAGQueryGeneratorConfig'
- $ref: '#/components/schemas/LLMRAGQueryGeneratorConfig'
discriminator:
propertyName: type
mapping:
default: '#/components/schemas/DefaultRAGQueryGeneratorConfig'
llm: '#/components/schemas/LLMRAGQueryGeneratorConfig'
description: Configuration for the query generator.
max_tokens_in_context:
type: integer
@ -12430,15 +12331,6 @@ components:
title: RAGQueryConfig
description: >-
Configuration for the RAG query generation.
RAGQueryGeneratorConfig:
oneOf:
- $ref: '#/components/schemas/DefaultRAGQueryGeneratorConfig'
- $ref: '#/components/schemas/LLMRAGQueryGeneratorConfig'
discriminator:
propertyName: type
mapping:
default: '#/components/schemas/DefaultRAGQueryGeneratorConfig'
llm: '#/components/schemas/LLMRAGQueryGeneratorConfig'
RAGSearchMode:
type: string
enum:
@ -12874,6 +12766,15 @@ components:
- dataset_id
- scoring_functions
title: RegisterBenchmarkRequest
DataSource:
oneOf:
- $ref: '#/components/schemas/URIDataSource'
- $ref: '#/components/schemas/RowsDataSource'
discriminator:
propertyName: type
mapping:
uri: '#/components/schemas/URIDataSource'
rows: '#/components/schemas/RowsDataSource'
RegisterDatasetRequest:
type: object
properties:
@ -12958,6 +12859,31 @@ components:
required:
- model_id
title: RegisterModelRequest
ParamType:
oneOf:
- $ref: '#/components/schemas/StringType'
- $ref: '#/components/schemas/NumberType'
- $ref: '#/components/schemas/BooleanType'
- $ref: '#/components/schemas/ArrayType'
- $ref: '#/components/schemas/ObjectType'
- $ref: '#/components/schemas/JsonType'
- $ref: '#/components/schemas/UnionType'
- $ref: '#/components/schemas/ChatCompletionInputType'
- $ref: '#/components/schemas/CompletionInputType'
- $ref: '#/components/schemas/AgentTurnInputType'
discriminator:
propertyName: type
mapping:
string: '#/components/schemas/StringType'
number: '#/components/schemas/NumberType'
boolean: '#/components/schemas/BooleanType'
array: '#/components/schemas/ArrayType'
object: '#/components/schemas/ObjectType'
json: '#/components/schemas/JsonType'
union: '#/components/schemas/UnionType'
chat_completion_input: '#/components/schemas/ChatCompletionInputType'
completion_input: '#/components/schemas/CompletionInputType'
agent_turn_input: '#/components/schemas/AgentTurnInputType'
RegisterScoringFunctionRequest:
type: object
properties: