Merge branch 'main' into nvidia-e2e-notebook

This commit is contained in:
Jash Gulabrai 2025-05-28 17:48:15 -04:00
commit f5cb965f0f
226 changed files with 16519 additions and 8666 deletions

View file

@ -518,6 +518,74 @@
}
},
"/v1/openai/v1/responses": {
"get": {
"responses": {
"200": {
"description": "A ListOpenAIResponseObject.",
"content": {
"application/json": {
"schema": {
"$ref": "#/components/schemas/ListOpenAIResponseObject"
}
}
}
},
"400": {
"$ref": "#/components/responses/BadRequest400"
},
"429": {
"$ref": "#/components/responses/TooManyRequests429"
},
"500": {
"$ref": "#/components/responses/InternalServerError500"
},
"default": {
"$ref": "#/components/responses/DefaultError"
}
},
"tags": [
"Agents"
],
"description": "List all OpenAI responses.",
"parameters": [
{
"name": "after",
"in": "query",
"description": "The ID of the last response to return.",
"required": false,
"schema": {
"type": "string"
}
},
{
"name": "limit",
"in": "query",
"description": "The number of responses to return.",
"required": false,
"schema": {
"type": "integer"
}
},
{
"name": "model",
"in": "query",
"description": "The model to filter responses by.",
"required": false,
"schema": {
"type": "string"
}
},
{
"name": "order",
"in": "query",
"description": "The order to sort responses by when sorted by created_at ('asc' or 'desc').",
"required": false,
"schema": {
"$ref": "#/components/schemas/Order"
}
}
]
},
"post": {
"responses": {
"200": {
@ -1395,7 +1463,7 @@
]
}
},
"/v1/openai/v1/responses/{id}": {
"/v1/openai/v1/responses/{response_id}": {
"get": {
"responses": {
"200": {
@ -1427,7 +1495,7 @@
"description": "Retrieve an OpenAI response by its ID.",
"parameters": [
{
"name": "id",
"name": "response_id",
"in": "path",
"description": "The ID of the OpenAI response to retrieve.",
"required": true,
@ -2926,6 +2994,97 @@
}
}
},
"/v1/openai/v1/responses/{response_id}/input_items": {
"get": {
"responses": {
"200": {
"description": "An ListOpenAIResponseInputItem.",
"content": {
"application/json": {
"schema": {
"$ref": "#/components/schemas/ListOpenAIResponseInputItem"
}
}
}
},
"400": {
"$ref": "#/components/responses/BadRequest400"
},
"429": {
"$ref": "#/components/responses/TooManyRequests429"
},
"500": {
"$ref": "#/components/responses/InternalServerError500"
},
"default": {
"$ref": "#/components/responses/DefaultError"
}
},
"tags": [
"Agents"
],
"description": "List input items for a given OpenAI response.",
"parameters": [
{
"name": "response_id",
"in": "path",
"description": "The ID of the response to retrieve input items for.",
"required": true,
"schema": {
"type": "string"
}
},
{
"name": "after",
"in": "query",
"description": "An item ID to list items after, used for pagination.",
"required": false,
"schema": {
"type": "string"
}
},
{
"name": "before",
"in": "query",
"description": "An item ID to list items before, used for pagination.",
"required": false,
"schema": {
"type": "string"
}
},
{
"name": "include",
"in": "query",
"description": "Additional fields to include in the response.",
"required": false,
"schema": {
"type": "array",
"items": {
"type": "string"
}
}
},
{
"name": "limit",
"in": "query",
"description": "A limit on the number of objects to be returned. Limit can range between 1 and 100, and the default is 20.",
"required": false,
"schema": {
"type": "integer"
}
},
{
"name": "order",
"in": "query",
"description": "The order to return the input items in. Default is desc.",
"required": false,
"schema": {
"$ref": "#/components/schemas/Order"
}
}
]
}
},
"/v1/providers": {
"get": {
"responses": {
@ -6742,6 +6901,9 @@
},
{
"$ref": "#/components/schemas/OpenAIResponseInputToolFunction"
},
{
"$ref": "#/components/schemas/OpenAIResponseInputToolMCP"
}
],
"discriminator": {
@ -6749,7 +6911,8 @@
"mapping": {
"web_search": "#/components/schemas/OpenAIResponseInputToolWebSearch",
"file_search": "#/components/schemas/OpenAIResponseInputToolFileSearch",
"function": "#/components/schemas/OpenAIResponseInputToolFunction"
"function": "#/components/schemas/OpenAIResponseInputToolFunction",
"mcp": "#/components/schemas/OpenAIResponseInputToolMCP"
}
}
},
@ -6839,6 +7002,110 @@
],
"title": "OpenAIResponseInputToolFunction"
},
"OpenAIResponseInputToolMCP": {
"type": "object",
"properties": {
"type": {
"type": "string",
"const": "mcp",
"default": "mcp"
},
"server_label": {
"type": "string"
},
"server_url": {
"type": "string"
},
"headers": {
"type": "object",
"additionalProperties": {
"oneOf": [
{
"type": "null"
},
{
"type": "boolean"
},
{
"type": "number"
},
{
"type": "string"
},
{
"type": "array"
},
{
"type": "object"
}
]
}
},
"require_approval": {
"oneOf": [
{
"type": "string",
"const": "always"
},
{
"type": "string",
"const": "never"
},
{
"type": "object",
"properties": {
"always": {
"type": "array",
"items": {
"type": "string"
}
},
"never": {
"type": "array",
"items": {
"type": "string"
}
}
},
"additionalProperties": false,
"title": "ApprovalFilter"
}
],
"default": "never"
},
"allowed_tools": {
"oneOf": [
{
"type": "array",
"items": {
"type": "string"
}
},
{
"type": "object",
"properties": {
"tool_names": {
"type": "array",
"items": {
"type": "string"
}
}
},
"additionalProperties": false,
"title": "AllowedToolsFilter"
}
]
}
},
"additionalProperties": false,
"required": [
"type",
"server_label",
"server_url",
"require_approval"
],
"title": "OpenAIResponseInputToolMCP"
},
"OpenAIResponseInputToolWebSearch": {
"type": "object",
"properties": {
@ -6951,15 +7218,15 @@
"OpenAIResponseOutputMessageFunctionToolCall": {
"type": "object",
"properties": {
"arguments": {
"type": "string"
},
"call_id": {
"type": "string"
},
"name": {
"type": "string"
},
"arguments": {
"type": "string"
},
"type": {
"type": "string",
"const": "function_call",
@ -6974,12 +7241,10 @@
},
"additionalProperties": false,
"required": [
"arguments",
"call_id",
"name",
"type",
"id",
"status"
"arguments",
"type"
],
"title": "OpenAIResponseOutputMessageFunctionToolCall"
},
@ -7027,6 +7292,9 @@
"type": "string",
"description": "The underlying LLM used for completions."
},
"instructions": {
"type": "string"
},
"previous_response_id": {
"type": "string",
"description": "(Optional) if specified, the new response will be a continuation of the previous response. This can be used to easily fork-off new responses from existing responses."
@ -7142,6 +7410,12 @@
},
{
"$ref": "#/components/schemas/OpenAIResponseOutputMessageFunctionToolCall"
},
{
"$ref": "#/components/schemas/OpenAIResponseOutputMessageMCPCall"
},
{
"$ref": "#/components/schemas/OpenAIResponseOutputMessageMCPListTools"
}
],
"discriminator": {
@ -7149,15 +7423,126 @@
"mapping": {
"message": "#/components/schemas/OpenAIResponseMessage",
"web_search_call": "#/components/schemas/OpenAIResponseOutputMessageWebSearchToolCall",
"function_call": "#/components/schemas/OpenAIResponseOutputMessageFunctionToolCall"
"function_call": "#/components/schemas/OpenAIResponseOutputMessageFunctionToolCall",
"mcp_call": "#/components/schemas/OpenAIResponseOutputMessageMCPCall",
"mcp_list_tools": "#/components/schemas/OpenAIResponseOutputMessageMCPListTools"
}
}
},
"OpenAIResponseOutputMessageMCPCall": {
"type": "object",
"properties": {
"id": {
"type": "string"
},
"type": {
"type": "string",
"const": "mcp_call",
"default": "mcp_call"
},
"arguments": {
"type": "string"
},
"name": {
"type": "string"
},
"server_label": {
"type": "string"
},
"error": {
"type": "string"
},
"output": {
"type": "string"
}
},
"additionalProperties": false,
"required": [
"id",
"type",
"arguments",
"name",
"server_label"
],
"title": "OpenAIResponseOutputMessageMCPCall"
},
"OpenAIResponseOutputMessageMCPListTools": {
"type": "object",
"properties": {
"id": {
"type": "string"
},
"type": {
"type": "string",
"const": "mcp_list_tools",
"default": "mcp_list_tools"
},
"server_label": {
"type": "string"
},
"tools": {
"type": "array",
"items": {
"type": "object",
"properties": {
"input_schema": {
"type": "object",
"additionalProperties": {
"oneOf": [
{
"type": "null"
},
{
"type": "boolean"
},
{
"type": "number"
},
{
"type": "string"
},
{
"type": "array"
},
{
"type": "object"
}
]
}
},
"name": {
"type": "string"
},
"description": {
"type": "string"
}
},
"additionalProperties": false,
"required": [
"input_schema",
"name"
],
"title": "MCPListToolsTool"
}
}
},
"additionalProperties": false,
"required": [
"id",
"type",
"server_label",
"tools"
],
"title": "OpenAIResponseOutputMessageMCPListTools"
},
"OpenAIResponseObjectStream": {
"oneOf": [
{
"$ref": "#/components/schemas/OpenAIResponseObjectStreamResponseCreated"
},
{
"$ref": "#/components/schemas/OpenAIResponseObjectStreamResponseOutputTextDelta"
},
{
"$ref": "#/components/schemas/OpenAIResponseObjectStreamResponseCompleted"
}
@ -7166,6 +7551,7 @@
"propertyName": "type",
"mapping": {
"response.created": "#/components/schemas/OpenAIResponseObjectStreamResponseCreated",
"response.output_text.delta": "#/components/schemas/OpenAIResponseObjectStreamResponseOutputTextDelta",
"response.completed": "#/components/schemas/OpenAIResponseObjectStreamResponseCompleted"
}
}
@ -7208,6 +7594,41 @@
],
"title": "OpenAIResponseObjectStreamResponseCreated"
},
"OpenAIResponseObjectStreamResponseOutputTextDelta": {
"type": "object",
"properties": {
"content_index": {
"type": "integer"
},
"delta": {
"type": "string"
},
"item_id": {
"type": "string"
},
"output_index": {
"type": "integer"
},
"sequence_number": {
"type": "integer"
},
"type": {
"type": "string",
"const": "response.output_text.delta",
"default": "response.output_text.delta"
}
},
"additionalProperties": false,
"required": [
"content_index",
"delta",
"item_id",
"output_index",
"sequence_number",
"type"
],
"title": "OpenAIResponseObjectStreamResponseOutputTextDelta"
},
"CreateUploadSessionRequest": {
"type": "object",
"properties": {
@ -9173,9 +9594,6 @@
"toolgroup_id": {
"type": "string"
},
"tool_host": {
"$ref": "#/components/schemas/ToolHost"
},
"description": {
"type": "string"
},
@ -9217,21 +9635,11 @@
"provider_id",
"type",
"toolgroup_id",
"tool_host",
"description",
"parameters"
],
"title": "Tool"
},
"ToolHost": {
"type": "string",
"enum": [
"distribution",
"client",
"model_context_protocol"
],
"title": "ToolHost"
},
"ToolGroup": {
"type": "object",
"properties": {
@ -10068,6 +10476,130 @@
],
"title": "ListModelsResponse"
},
"ListOpenAIResponseInputItem": {
"type": "object",
"properties": {
"data": {
"type": "array",
"items": {
"$ref": "#/components/schemas/OpenAIResponseInput"
}
},
"object": {
"type": "string",
"const": "list",
"default": "list"
}
},
"additionalProperties": false,
"required": [
"data",
"object"
],
"title": "ListOpenAIResponseInputItem"
},
"ListOpenAIResponseObject": {
"type": "object",
"properties": {
"data": {
"type": "array",
"items": {
"$ref": "#/components/schemas/OpenAIResponseObjectWithInput"
}
},
"has_more": {
"type": "boolean"
},
"first_id": {
"type": "string"
},
"last_id": {
"type": "string"
},
"object": {
"type": "string",
"const": "list",
"default": "list"
}
},
"additionalProperties": false,
"required": [
"data",
"has_more",
"first_id",
"last_id",
"object"
],
"title": "ListOpenAIResponseObject"
},
"OpenAIResponseObjectWithInput": {
"type": "object",
"properties": {
"created_at": {
"type": "integer"
},
"error": {
"$ref": "#/components/schemas/OpenAIResponseError"
},
"id": {
"type": "string"
},
"model": {
"type": "string"
},
"object": {
"type": "string",
"const": "response",
"default": "response"
},
"output": {
"type": "array",
"items": {
"$ref": "#/components/schemas/OpenAIResponseOutput"
}
},
"parallel_tool_calls": {
"type": "boolean",
"default": false
},
"previous_response_id": {
"type": "string"
},
"status": {
"type": "string"
},
"temperature": {
"type": "number"
},
"top_p": {
"type": "number"
},
"truncation": {
"type": "string"
},
"user": {
"type": "string"
},
"input": {
"type": "array",
"items": {
"$ref": "#/components/schemas/OpenAIResponseInput"
}
}
},
"additionalProperties": false,
"required": [
"created_at",
"id",
"model",
"object",
"output",
"parallel_tool_calls",
"status",
"input"
],
"title": "OpenAIResponseObjectWithInput"
},
"ListProvidersResponse": {
"type": "object",
"properties": {
@ -11605,6 +12137,10 @@
"type": "string",
"default": "Result {index}\nContent: {chunk.content}\nMetadata: {metadata}\n",
"description": "Template for formatting each retrieved chunk in the context. Available placeholders: {index} (1-based chunk ordinal), {chunk.content} (chunk content string), {metadata} (chunk metadata dict). Default: \"Result {index}\\nContent: {chunk.content}\\nMetadata: {metadata}\\n\""
},
"mode": {
"type": "string",
"description": "Search mode for retrieval—either \"vector\" or \"keyword\". Default \"vector\"."
}
},
"additionalProperties": false,

View file

@ -349,6 +349,53 @@ paths:
$ref: '#/components/schemas/CreateAgentTurnRequest'
required: true
/v1/openai/v1/responses:
get:
responses:
'200':
description: A ListOpenAIResponseObject.
content:
application/json:
schema:
$ref: '#/components/schemas/ListOpenAIResponseObject'
'400':
$ref: '#/components/responses/BadRequest400'
'429':
$ref: >-
#/components/responses/TooManyRequests429
'500':
$ref: >-
#/components/responses/InternalServerError500
default:
$ref: '#/components/responses/DefaultError'
tags:
- Agents
description: List all OpenAI responses.
parameters:
- name: after
in: query
description: The ID of the last response to return.
required: false
schema:
type: string
- name: limit
in: query
description: The number of responses to return.
required: false
schema:
type: integer
- name: model
in: query
description: The model to filter responses by.
required: false
schema:
type: string
- name: order
in: query
description: >-
The order to sort responses by when sorted by created_at ('asc' or 'desc').
required: false
schema:
$ref: '#/components/schemas/Order'
post:
responses:
'200':
@ -963,7 +1010,7 @@ paths:
required: true
schema:
type: string
/v1/openai/v1/responses/{id}:
/v1/openai/v1/responses/{response_id}:
get:
responses:
'200':
@ -986,7 +1033,7 @@ paths:
- Agents
description: Retrieve an OpenAI response by its ID.
parameters:
- name: id
- name: response_id
in: path
description: >-
The ID of the OpenAI response to retrieve.
@ -2038,6 +2085,75 @@ paths:
schema:
$ref: '#/components/schemas/RegisterModelRequest'
required: true
/v1/openai/v1/responses/{response_id}/input_items:
get:
responses:
'200':
description: An ListOpenAIResponseInputItem.
content:
application/json:
schema:
$ref: '#/components/schemas/ListOpenAIResponseInputItem'
'400':
$ref: '#/components/responses/BadRequest400'
'429':
$ref: >-
#/components/responses/TooManyRequests429
'500':
$ref: >-
#/components/responses/InternalServerError500
default:
$ref: '#/components/responses/DefaultError'
tags:
- Agents
description: >-
List input items for a given OpenAI response.
parameters:
- name: response_id
in: path
description: >-
The ID of the response to retrieve input items for.
required: true
schema:
type: string
- name: after
in: query
description: >-
An item ID to list items after, used for pagination.
required: false
schema:
type: string
- name: before
in: query
description: >-
An item ID to list items before, used for pagination.
required: false
schema:
type: string
- name: include
in: query
description: >-
Additional fields to include in the response.
required: false
schema:
type: array
items:
type: string
- name: limit
in: query
description: >-
A limit on the number of objects to be returned. Limit can range between
1 and 100, and the default is 20.
required: false
schema:
type: integer
- name: order
in: query
description: >-
The order to return the input items in. Default is desc.
required: false
schema:
$ref: '#/components/schemas/Order'
/v1/providers:
get:
responses:
@ -4762,12 +4878,14 @@ components:
- $ref: '#/components/schemas/OpenAIResponseInputToolWebSearch'
- $ref: '#/components/schemas/OpenAIResponseInputToolFileSearch'
- $ref: '#/components/schemas/OpenAIResponseInputToolFunction'
- $ref: '#/components/schemas/OpenAIResponseInputToolMCP'
discriminator:
propertyName: type
mapping:
web_search: '#/components/schemas/OpenAIResponseInputToolWebSearch'
file_search: '#/components/schemas/OpenAIResponseInputToolFileSearch'
function: '#/components/schemas/OpenAIResponseInputToolFunction'
mcp: '#/components/schemas/OpenAIResponseInputToolMCP'
OpenAIResponseInputToolFileSearch:
type: object
properties:
@ -4822,6 +4940,66 @@ components:
- type
- name
title: OpenAIResponseInputToolFunction
OpenAIResponseInputToolMCP:
type: object
properties:
type:
type: string
const: mcp
default: mcp
server_label:
type: string
server_url:
type: string
headers:
type: object
additionalProperties:
oneOf:
- type: 'null'
- type: boolean
- type: number
- type: string
- type: array
- type: object
require_approval:
oneOf:
- type: string
const: always
- type: string
const: never
- type: object
properties:
always:
type: array
items:
type: string
never:
type: array
items:
type: string
additionalProperties: false
title: ApprovalFilter
default: never
allowed_tools:
oneOf:
- type: array
items:
type: string
- type: object
properties:
tool_names:
type: array
items:
type: string
additionalProperties: false
title: AllowedToolsFilter
additionalProperties: false
required:
- type
- server_label
- server_url
- require_approval
title: OpenAIResponseInputToolMCP
OpenAIResponseInputToolWebSearch:
type: object
properties:
@ -4897,12 +5075,12 @@ components:
"OpenAIResponseOutputMessageFunctionToolCall":
type: object
properties:
arguments:
type: string
call_id:
type: string
name:
type: string
arguments:
type: string
type:
type: string
const: function_call
@ -4913,12 +5091,10 @@ components:
type: string
additionalProperties: false
required:
- arguments
- call_id
- name
- arguments
- type
- id
- status
title: >-
OpenAIResponseOutputMessageFunctionToolCall
"OpenAIResponseOutputMessageWebSearchToolCall":
@ -4952,6 +5128,8 @@ components:
model:
type: string
description: The underlying LLM used for completions.
instructions:
type: string
previous_response_id:
type: string
description: >-
@ -5034,20 +5212,95 @@ components:
- $ref: '#/components/schemas/OpenAIResponseMessage'
- $ref: '#/components/schemas/OpenAIResponseOutputMessageWebSearchToolCall'
- $ref: '#/components/schemas/OpenAIResponseOutputMessageFunctionToolCall'
- $ref: '#/components/schemas/OpenAIResponseOutputMessageMCPCall'
- $ref: '#/components/schemas/OpenAIResponseOutputMessageMCPListTools'
discriminator:
propertyName: type
mapping:
message: '#/components/schemas/OpenAIResponseMessage'
web_search_call: '#/components/schemas/OpenAIResponseOutputMessageWebSearchToolCall'
function_call: '#/components/schemas/OpenAIResponseOutputMessageFunctionToolCall'
mcp_call: '#/components/schemas/OpenAIResponseOutputMessageMCPCall'
mcp_list_tools: '#/components/schemas/OpenAIResponseOutputMessageMCPListTools'
OpenAIResponseOutputMessageMCPCall:
type: object
properties:
id:
type: string
type:
type: string
const: mcp_call
default: mcp_call
arguments:
type: string
name:
type: string
server_label:
type: string
error:
type: string
output:
type: string
additionalProperties: false
required:
- id
- type
- arguments
- name
- server_label
title: OpenAIResponseOutputMessageMCPCall
OpenAIResponseOutputMessageMCPListTools:
type: object
properties:
id:
type: string
type:
type: string
const: mcp_list_tools
default: mcp_list_tools
server_label:
type: string
tools:
type: array
items:
type: object
properties:
input_schema:
type: object
additionalProperties:
oneOf:
- type: 'null'
- type: boolean
- type: number
- type: string
- type: array
- type: object
name:
type: string
description:
type: string
additionalProperties: false
required:
- input_schema
- name
title: MCPListToolsTool
additionalProperties: false
required:
- id
- type
- server_label
- tools
title: OpenAIResponseOutputMessageMCPListTools
OpenAIResponseObjectStream:
oneOf:
- $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseCreated'
- $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseOutputTextDelta'
- $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseCompleted'
discriminator:
propertyName: type
mapping:
response.created: '#/components/schemas/OpenAIResponseObjectStreamResponseCreated'
response.output_text.delta: '#/components/schemas/OpenAIResponseObjectStreamResponseOutputTextDelta'
response.completed: '#/components/schemas/OpenAIResponseObjectStreamResponseCompleted'
"OpenAIResponseObjectStreamResponseCompleted":
type: object
@ -5079,6 +5332,33 @@ components:
- type
title: >-
OpenAIResponseObjectStreamResponseCreated
"OpenAIResponseObjectStreamResponseOutputTextDelta":
type: object
properties:
content_index:
type: integer
delta:
type: string
item_id:
type: string
output_index:
type: integer
sequence_number:
type: integer
type:
type: string
const: response.output_text.delta
default: response.output_text.delta
additionalProperties: false
required:
- content_index
- delta
- item_id
- output_index
- sequence_number
- type
title: >-
OpenAIResponseObjectStreamResponseOutputTextDelta
CreateUploadSessionRequest:
type: object
properties:
@ -6462,8 +6742,6 @@ components:
default: tool
toolgroup_id:
type: string
tool_host:
$ref: '#/components/schemas/ToolHost'
description:
type: string
parameters:
@ -6486,17 +6764,9 @@ components:
- provider_id
- type
- toolgroup_id
- tool_host
- description
- parameters
title: Tool
ToolHost:
type: string
enum:
- distribution
- client
- model_context_protocol
title: ToolHost
ToolGroup:
type: object
properties:
@ -7042,6 +7312,96 @@ components:
required:
- data
title: ListModelsResponse
ListOpenAIResponseInputItem:
type: object
properties:
data:
type: array
items:
$ref: '#/components/schemas/OpenAIResponseInput'
object:
type: string
const: list
default: list
additionalProperties: false
required:
- data
- object
title: ListOpenAIResponseInputItem
ListOpenAIResponseObject:
type: object
properties:
data:
type: array
items:
$ref: '#/components/schemas/OpenAIResponseObjectWithInput'
has_more:
type: boolean
first_id:
type: string
last_id:
type: string
object:
type: string
const: list
default: list
additionalProperties: false
required:
- data
- has_more
- first_id
- last_id
- object
title: ListOpenAIResponseObject
OpenAIResponseObjectWithInput:
type: object
properties:
created_at:
type: integer
error:
$ref: '#/components/schemas/OpenAIResponseError'
id:
type: string
model:
type: string
object:
type: string
const: response
default: response
output:
type: array
items:
$ref: '#/components/schemas/OpenAIResponseOutput'
parallel_tool_calls:
type: boolean
default: false
previous_response_id:
type: string
status:
type: string
temperature:
type: number
top_p:
type: number
truncation:
type: string
user:
type: string
input:
type: array
items:
$ref: '#/components/schemas/OpenAIResponseInput'
additionalProperties: false
required:
- created_at
- id
- model
- object
- output
- parallel_tool_calls
- status
- input
title: OpenAIResponseObjectWithInput
ListProvidersResponse:
type: object
properties:
@ -8084,6 +8444,10 @@ components:
placeholders: {index} (1-based chunk ordinal), {chunk.content} (chunk
content string), {metadata} (chunk metadata dict). Default: "Result {index}\nContent:
{chunk.content}\nMetadata: {metadata}\n"
mode:
type: string
description: >-
Search mode for retrieval—either "vector" or "keyword". Default "vector".
additionalProperties: false
required:
- query_generator_config

File diff suppressed because it is too large Load diff

View file

@ -3,10 +3,10 @@
Here's a collection of comprehensive guides, examples, and resources for building AI applications with Llama Stack. For the complete documentation, visit our [ReadTheDocs page](https://llama-stack.readthedocs.io/en/latest/index.html).
## Render locally
From the llama-stack root directory, run the following command to render the docs locally:
```bash
pip install -r requirements.txt
cd docs
python -m sphinx_autobuild source _build
uv run --with ".[docs]" sphinx-autobuild docs/source docs/build/html --write-all
```
You can open up the docs in your browser at http://localhost:8000

View file

@ -1,16 +0,0 @@
linkify
myst-parser
-e git+https://github.com/pytorch/pytorch_sphinx_theme.git#egg=pytorch_sphinx_theme
sphinx==8.1.3
sphinx-copybutton
sphinx-design
sphinx-pdj-theme
sphinx-rtd-theme>=1.0.0
sphinx-tabs
sphinx_autobuild
sphinx_rtd_dark_mode
sphinxcontrib-mermaid
sphinxcontrib-openapi
sphinxcontrib-redoc
sphinxcontrib-video
tomli

View file

@ -22,7 +22,11 @@ from docutils import nodes
# Read version from pyproject.toml
with Path(__file__).parent.parent.parent.joinpath("pyproject.toml").open("rb") as f:
pypi_url = "https://pypi.org/pypi/llama-stack/json"
version_tag = json.loads(requests.get(pypi_url).text)["info"]["version"]
headers = {
'User-Agent': 'pip/23.0.1 (python 3.11)', # Mimic pip's user agent
'Accept': 'application/json'
}
version_tag = json.loads(requests.get(pypi_url, headers=headers).text)["info"]["version"]
print(f"{version_tag=}")
# generate the full link including text and url here
@ -53,14 +57,6 @@ myst_enable_extensions = ["colon_fence"]
html_theme = "sphinx_rtd_theme"
html_use_relative_paths = True
# html_theme = "sphinx_pdj_theme"
# html_theme_path = [sphinx_pdj_theme.get_html_theme_path()]
# html_theme = "pytorch_sphinx_theme"
# html_theme_path = [pytorch_sphinx_theme.get_html_theme_path()]
templates_path = ["_templates"]
exclude_patterns = ["_build", "Thumbs.db", ".DS_Store"]

View file

@ -338,6 +338,48 @@ INFO: Application startup complete.
INFO: Uvicorn running on http://['::', '0.0.0.0']:8321 (Press CTRL+C to quit)
INFO: 2401:db00:35c:2d2b:face:0:c9:0:54678 - "GET /models/list HTTP/1.1" 200 OK
```
### Listing Distributions
Using the list command, you can view all existing Llama Stack distributions, including stacks built from templates, from scratch, or using custom configuration files.
```
llama stack list -h
usage: llama stack list [-h]
list the build stacks
options:
-h, --help show this help message and exit
```
Example Usage
```
llama stack list
```
### Removing a Distribution
Use the remove command to delete a distribution you've previously built.
```
llama stack rm -h
usage: llama stack rm [-h] [--all] [name]
Remove the build stack
positional arguments:
name Name of the stack to delete (default: None)
options:
-h, --help show this help message and exit
--all, -a Delete all stacks (use with caution) (default: False)
```
Example
```
llama stack rm llamastack-test
```
To keep your environment organized and avoid clutter, consider using `llama stack list` to review old or unused distributions and `llama stack rm <name>` to delete them when theyre no longer needed.
### Troubleshooting

View file

@ -118,11 +118,6 @@ server:
port: 8321 # Port to listen on (default: 8321)
tls_certfile: "/path/to/cert.pem" # Optional: Path to TLS certificate for HTTPS
tls_keyfile: "/path/to/key.pem" # Optional: Path to TLS key for HTTPS
auth: # Optional: Authentication configuration
provider_type: "kubernetes" # Type of auth provider
config: # Provider-specific configuration
api_server_url: "https://kubernetes.default.svc"
ca_cert_path: "/path/to/ca.crt" # Optional: Path to CA certificate
```
### Authentication Configuration
@ -135,7 +130,7 @@ Authorization: Bearer <token>
The server supports multiple authentication providers:
#### Kubernetes Provider
#### OAuth 2.0/OpenID Connect Provider with Kubernetes
The Kubernetes cluster must be configured to use a service account for authentication.
@ -146,14 +141,67 @@ kubectl create rolebinding llama-stack-auth-rolebinding --clusterrole=admin --se
kubectl create token llama-stack-auth -n llama-stack > llama-stack-auth-token
```
Validates tokens against the Kubernetes API server:
Make sure the `kube-apiserver` runs with `--anonymous-auth=true` to allow unauthenticated requests
and that the correct RoleBinding is created to allow the service account to access the necessary
resources. If that is not the case, you can create a RoleBinding for the service account to access
the necessary resources:
```yaml
# allow-anonymous-openid.yaml
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRole
metadata:
name: allow-anonymous-openid
rules:
- nonResourceURLs: ["/openid/v1/jwks"]
verbs: ["get"]
---
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRoleBinding
metadata:
name: allow-anonymous-openid
roleRef:
apiGroup: rbac.authorization.k8s.io
kind: ClusterRole
name: allow-anonymous-openid
subjects:
- kind: User
name: system:anonymous
apiGroup: rbac.authorization.k8s.io
```
And then apply the configuration:
```bash
kubectl apply -f allow-anonymous-openid.yaml
```
Validates tokens against the Kubernetes API server through the OIDC provider:
```yaml
server:
auth:
provider_type: "kubernetes"
provider_type: "oauth2_token"
config:
api_server_url: "https://kubernetes.default.svc" # URL of the Kubernetes API server
ca_cert_path: "/path/to/ca.crt" # Optional: Path to CA certificate
jwks:
uri: "https://kubernetes.default.svc"
key_recheck_period: 3600
tls_cafile: "/path/to/ca.crt"
issuer: "https://kubernetes.default.svc"
audience: "https://kubernetes.default.svc"
```
To find your cluster's audience, run:
```bash
kubectl create token default --duration=1h | cut -d. -f2 | base64 -d | jq .aud
```
For the issuer, you can use the OIDC provider's URL:
```bash
kubectl get --raw /.well-known/openid-configuration| jq .issuer
```
For the tls_cafile, you can use the CA certificate of the OIDC provider:
```bash
kubectl config view --minify -o jsonpath='{.clusters[0].cluster.certificate-authority}'
```
The provider extracts user information from the JWT token:
@ -208,6 +256,80 @@ And must respond with:
If no access attributes are returned, the token is used as a namespace.
### Quota Configuration
The `quota` section allows you to enable server-side request throttling for both
authenticated and anonymous clients. This is useful for preventing abuse, enforcing
fairness across tenants, and controlling infrastructure costs without requiring
client-side rate limiting or external proxies.
Quotas are disabled by default. When enabled, each client is tracked using either:
* Their authenticated `client_id` (derived from the Bearer token), or
* Their IP address (fallback for anonymous requests)
Quota state is stored in a SQLite-backed key-value store, and rate limits are applied
within a configurable time window (currently only `day` is supported).
#### Example
```yaml
server:
quota:
kvstore:
type: sqlite
db_path: ./quotas.db
anonymous_max_requests: 100
authenticated_max_requests: 1000
period: day
```
#### Configuration Options
| Field | Description |
| ---------------------------- | -------------------------------------------------------------------------- |
| `kvstore` | Required. Backend storage config for tracking request counts. |
| `kvstore.type` | Must be `"sqlite"` for now. Other backends may be supported in the future. |
| `kvstore.db_path` | File path to the SQLite database. |
| `anonymous_max_requests` | Max requests per period for unauthenticated clients. |
| `authenticated_max_requests` | Max requests per period for authenticated clients. |
| `period` | Time window for quota enforcement. Only `"day"` is supported. |
> Note: if `authenticated_max_requests` is set but no authentication provider is
configured, the server will fall back to applying `anonymous_max_requests` to all
clients.
#### Example with Authentication Enabled
```yaml
server:
port: 8321
auth:
provider_type: custom
config:
endpoint: https://auth.example.com/validate
quota:
kvstore:
type: sqlite
db_path: ./quotas.db
anonymous_max_requests: 100
authenticated_max_requests: 1000
period: day
```
If a client exceeds their limit, the server responds with:
```http
HTTP/1.1 429 Too Many Requests
Content-Type: application/json
{
"error": {
"message": "Quota exceeded"
}
}
```
## Extending to handle Safety
Configuring Safety can be a little involved so it is instructive to go through an example.

View file

@ -17,7 +17,7 @@ The `llamastack/distribution-sambanova` distribution consists of the following p
|-----|-------------|
| agents | `inline::meta-reference` |
| inference | `remote::sambanova`, `inline::sentence-transformers` |
| safety | `inline::llama-guard` |
| safety | `remote::sambanova` |
| telemetry | `inline::meta-reference` |
| tool_runtime | `remote::brave-search`, `remote::tavily-search`, `inline::rag-runtime`, `remote::model-context-protocol`, `remote::wolfram-alpha` |
| vector_io | `inline::faiss`, `remote::chromadb`, `remote::pgvector` |
@ -48,33 +48,44 @@ The following models are available by default:
### Prerequisite: API Keys
Make sure you have access to a SambaNova API Key. You can get one by visiting [SambaNova.ai](https://sambanova.ai/).
Make sure you have access to a SambaNova API Key. You can get one by visiting [SambaNova.ai](http://cloud.sambanova.ai?utm_source=llamastack&utm_medium=external&utm_campaign=cloud_signup).
## Running Llama Stack with SambaNova
You can do this via Conda (build code) or Docker which has a pre-built image.
### Via Docker
This method allows you to get started quickly without having to build the distribution code.
### Via Docker
```bash
LLAMA_STACK_PORT=8321
llama stack build --template sambanova --image-type container
docker run \
-it \
--pull always \
-p $LLAMA_STACK_PORT:$LLAMA_STACK_PORT \
llamastack/distribution-sambanova \
-v ~/.llama:/root/.llama \
distribution-sambanova \
--port $LLAMA_STACK_PORT \
--env SAMBANOVA_API_KEY=$SAMBANOVA_API_KEY
```
### Via Venv
```bash
llama stack build --template sambanova --image-type venv
llama stack run --image-type venv ~/.llama/distributions/sambanova/sambanova-run.yaml \
--port $LLAMA_STACK_PORT \
--env SAMBANOVA_API_KEY=$SAMBANOVA_API_KEY
```
### Via Conda
```bash
llama stack build --template sambanova --image-type conda
llama stack run ./run.yaml \
llama stack run --image-type conda ~/.llama/distributions/sambanova/sambanova-run.yaml \
--port $LLAMA_STACK_PORT \
--env SAMBANOVA_API_KEY=$SAMBANOVA_API_KEY
```

View file

@ -66,6 +66,25 @@ To use sqlite-vec in your Llama Stack project, follow these steps:
2. Configure your Llama Stack project to use SQLite-Vec.
3. Start storing and querying vectors.
## Supported Search Modes
The sqlite-vec provider supports both vector-based and keyword-based (full-text) search modes.
When using the RAGTool interface, you can specify the desired search behavior via the `mode` parameter in
`RAGQueryConfig`. For example:
```python
from llama_stack.apis.tool_runtime.rag import RAGQueryConfig
query_config = RAGQueryConfig(max_chunks=6, mode="vector")
results = client.tool_runtime.rag_tool.query(
vector_db_ids=[vector_db_id],
content="what is torchtune",
query_config=query_config,
)
```
## Installation
You can install SQLite-Vec using pip: