mirror of
https://github.com/meta-llama/llama-stack.git
synced 2025-07-21 03:59:42 +00:00
Merge branch 'main' into nvidia-e2e-notebook
This commit is contained in:
commit
f5cb965f0f
226 changed files with 16519 additions and 8666 deletions
584
docs/_static/llama-stack-spec.html
vendored
584
docs/_static/llama-stack-spec.html
vendored
|
@ -518,6 +518,74 @@
|
|||
}
|
||||
},
|
||||
"/v1/openai/v1/responses": {
|
||||
"get": {
|
||||
"responses": {
|
||||
"200": {
|
||||
"description": "A ListOpenAIResponseObject.",
|
||||
"content": {
|
||||
"application/json": {
|
||||
"schema": {
|
||||
"$ref": "#/components/schemas/ListOpenAIResponseObject"
|
||||
}
|
||||
}
|
||||
}
|
||||
},
|
||||
"400": {
|
||||
"$ref": "#/components/responses/BadRequest400"
|
||||
},
|
||||
"429": {
|
||||
"$ref": "#/components/responses/TooManyRequests429"
|
||||
},
|
||||
"500": {
|
||||
"$ref": "#/components/responses/InternalServerError500"
|
||||
},
|
||||
"default": {
|
||||
"$ref": "#/components/responses/DefaultError"
|
||||
}
|
||||
},
|
||||
"tags": [
|
||||
"Agents"
|
||||
],
|
||||
"description": "List all OpenAI responses.",
|
||||
"parameters": [
|
||||
{
|
||||
"name": "after",
|
||||
"in": "query",
|
||||
"description": "The ID of the last response to return.",
|
||||
"required": false,
|
||||
"schema": {
|
||||
"type": "string"
|
||||
}
|
||||
},
|
||||
{
|
||||
"name": "limit",
|
||||
"in": "query",
|
||||
"description": "The number of responses to return.",
|
||||
"required": false,
|
||||
"schema": {
|
||||
"type": "integer"
|
||||
}
|
||||
},
|
||||
{
|
||||
"name": "model",
|
||||
"in": "query",
|
||||
"description": "The model to filter responses by.",
|
||||
"required": false,
|
||||
"schema": {
|
||||
"type": "string"
|
||||
}
|
||||
},
|
||||
{
|
||||
"name": "order",
|
||||
"in": "query",
|
||||
"description": "The order to sort responses by when sorted by created_at ('asc' or 'desc').",
|
||||
"required": false,
|
||||
"schema": {
|
||||
"$ref": "#/components/schemas/Order"
|
||||
}
|
||||
}
|
||||
]
|
||||
},
|
||||
"post": {
|
||||
"responses": {
|
||||
"200": {
|
||||
|
@ -1395,7 +1463,7 @@
|
|||
]
|
||||
}
|
||||
},
|
||||
"/v1/openai/v1/responses/{id}": {
|
||||
"/v1/openai/v1/responses/{response_id}": {
|
||||
"get": {
|
||||
"responses": {
|
||||
"200": {
|
||||
|
@ -1427,7 +1495,7 @@
|
|||
"description": "Retrieve an OpenAI response by its ID.",
|
||||
"parameters": [
|
||||
{
|
||||
"name": "id",
|
||||
"name": "response_id",
|
||||
"in": "path",
|
||||
"description": "The ID of the OpenAI response to retrieve.",
|
||||
"required": true,
|
||||
|
@ -2926,6 +2994,97 @@
|
|||
}
|
||||
}
|
||||
},
|
||||
"/v1/openai/v1/responses/{response_id}/input_items": {
|
||||
"get": {
|
||||
"responses": {
|
||||
"200": {
|
||||
"description": "An ListOpenAIResponseInputItem.",
|
||||
"content": {
|
||||
"application/json": {
|
||||
"schema": {
|
||||
"$ref": "#/components/schemas/ListOpenAIResponseInputItem"
|
||||
}
|
||||
}
|
||||
}
|
||||
},
|
||||
"400": {
|
||||
"$ref": "#/components/responses/BadRequest400"
|
||||
},
|
||||
"429": {
|
||||
"$ref": "#/components/responses/TooManyRequests429"
|
||||
},
|
||||
"500": {
|
||||
"$ref": "#/components/responses/InternalServerError500"
|
||||
},
|
||||
"default": {
|
||||
"$ref": "#/components/responses/DefaultError"
|
||||
}
|
||||
},
|
||||
"tags": [
|
||||
"Agents"
|
||||
],
|
||||
"description": "List input items for a given OpenAI response.",
|
||||
"parameters": [
|
||||
{
|
||||
"name": "response_id",
|
||||
"in": "path",
|
||||
"description": "The ID of the response to retrieve input items for.",
|
||||
"required": true,
|
||||
"schema": {
|
||||
"type": "string"
|
||||
}
|
||||
},
|
||||
{
|
||||
"name": "after",
|
||||
"in": "query",
|
||||
"description": "An item ID to list items after, used for pagination.",
|
||||
"required": false,
|
||||
"schema": {
|
||||
"type": "string"
|
||||
}
|
||||
},
|
||||
{
|
||||
"name": "before",
|
||||
"in": "query",
|
||||
"description": "An item ID to list items before, used for pagination.",
|
||||
"required": false,
|
||||
"schema": {
|
||||
"type": "string"
|
||||
}
|
||||
},
|
||||
{
|
||||
"name": "include",
|
||||
"in": "query",
|
||||
"description": "Additional fields to include in the response.",
|
||||
"required": false,
|
||||
"schema": {
|
||||
"type": "array",
|
||||
"items": {
|
||||
"type": "string"
|
||||
}
|
||||
}
|
||||
},
|
||||
{
|
||||
"name": "limit",
|
||||
"in": "query",
|
||||
"description": "A limit on the number of objects to be returned. Limit can range between 1 and 100, and the default is 20.",
|
||||
"required": false,
|
||||
"schema": {
|
||||
"type": "integer"
|
||||
}
|
||||
},
|
||||
{
|
||||
"name": "order",
|
||||
"in": "query",
|
||||
"description": "The order to return the input items in. Default is desc.",
|
||||
"required": false,
|
||||
"schema": {
|
||||
"$ref": "#/components/schemas/Order"
|
||||
}
|
||||
}
|
||||
]
|
||||
}
|
||||
},
|
||||
"/v1/providers": {
|
||||
"get": {
|
||||
"responses": {
|
||||
|
@ -6742,6 +6901,9 @@
|
|||
},
|
||||
{
|
||||
"$ref": "#/components/schemas/OpenAIResponseInputToolFunction"
|
||||
},
|
||||
{
|
||||
"$ref": "#/components/schemas/OpenAIResponseInputToolMCP"
|
||||
}
|
||||
],
|
||||
"discriminator": {
|
||||
|
@ -6749,7 +6911,8 @@
|
|||
"mapping": {
|
||||
"web_search": "#/components/schemas/OpenAIResponseInputToolWebSearch",
|
||||
"file_search": "#/components/schemas/OpenAIResponseInputToolFileSearch",
|
||||
"function": "#/components/schemas/OpenAIResponseInputToolFunction"
|
||||
"function": "#/components/schemas/OpenAIResponseInputToolFunction",
|
||||
"mcp": "#/components/schemas/OpenAIResponseInputToolMCP"
|
||||
}
|
||||
}
|
||||
},
|
||||
|
@ -6839,6 +7002,110 @@
|
|||
],
|
||||
"title": "OpenAIResponseInputToolFunction"
|
||||
},
|
||||
"OpenAIResponseInputToolMCP": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"type": {
|
||||
"type": "string",
|
||||
"const": "mcp",
|
||||
"default": "mcp"
|
||||
},
|
||||
"server_label": {
|
||||
"type": "string"
|
||||
},
|
||||
"server_url": {
|
||||
"type": "string"
|
||||
},
|
||||
"headers": {
|
||||
"type": "object",
|
||||
"additionalProperties": {
|
||||
"oneOf": [
|
||||
{
|
||||
"type": "null"
|
||||
},
|
||||
{
|
||||
"type": "boolean"
|
||||
},
|
||||
{
|
||||
"type": "number"
|
||||
},
|
||||
{
|
||||
"type": "string"
|
||||
},
|
||||
{
|
||||
"type": "array"
|
||||
},
|
||||
{
|
||||
"type": "object"
|
||||
}
|
||||
]
|
||||
}
|
||||
},
|
||||
"require_approval": {
|
||||
"oneOf": [
|
||||
{
|
||||
"type": "string",
|
||||
"const": "always"
|
||||
},
|
||||
{
|
||||
"type": "string",
|
||||
"const": "never"
|
||||
},
|
||||
{
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"always": {
|
||||
"type": "array",
|
||||
"items": {
|
||||
"type": "string"
|
||||
}
|
||||
},
|
||||
"never": {
|
||||
"type": "array",
|
||||
"items": {
|
||||
"type": "string"
|
||||
}
|
||||
}
|
||||
},
|
||||
"additionalProperties": false,
|
||||
"title": "ApprovalFilter"
|
||||
}
|
||||
],
|
||||
"default": "never"
|
||||
},
|
||||
"allowed_tools": {
|
||||
"oneOf": [
|
||||
{
|
||||
"type": "array",
|
||||
"items": {
|
||||
"type": "string"
|
||||
}
|
||||
},
|
||||
{
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"tool_names": {
|
||||
"type": "array",
|
||||
"items": {
|
||||
"type": "string"
|
||||
}
|
||||
}
|
||||
},
|
||||
"additionalProperties": false,
|
||||
"title": "AllowedToolsFilter"
|
||||
}
|
||||
]
|
||||
}
|
||||
},
|
||||
"additionalProperties": false,
|
||||
"required": [
|
||||
"type",
|
||||
"server_label",
|
||||
"server_url",
|
||||
"require_approval"
|
||||
],
|
||||
"title": "OpenAIResponseInputToolMCP"
|
||||
},
|
||||
"OpenAIResponseInputToolWebSearch": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
|
@ -6951,15 +7218,15 @@
|
|||
"OpenAIResponseOutputMessageFunctionToolCall": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"arguments": {
|
||||
"type": "string"
|
||||
},
|
||||
"call_id": {
|
||||
"type": "string"
|
||||
},
|
||||
"name": {
|
||||
"type": "string"
|
||||
},
|
||||
"arguments": {
|
||||
"type": "string"
|
||||
},
|
||||
"type": {
|
||||
"type": "string",
|
||||
"const": "function_call",
|
||||
|
@ -6974,12 +7241,10 @@
|
|||
},
|
||||
"additionalProperties": false,
|
||||
"required": [
|
||||
"arguments",
|
||||
"call_id",
|
||||
"name",
|
||||
"type",
|
||||
"id",
|
||||
"status"
|
||||
"arguments",
|
||||
"type"
|
||||
],
|
||||
"title": "OpenAIResponseOutputMessageFunctionToolCall"
|
||||
},
|
||||
|
@ -7027,6 +7292,9 @@
|
|||
"type": "string",
|
||||
"description": "The underlying LLM used for completions."
|
||||
},
|
||||
"instructions": {
|
||||
"type": "string"
|
||||
},
|
||||
"previous_response_id": {
|
||||
"type": "string",
|
||||
"description": "(Optional) if specified, the new response will be a continuation of the previous response. This can be used to easily fork-off new responses from existing responses."
|
||||
|
@ -7142,6 +7410,12 @@
|
|||
},
|
||||
{
|
||||
"$ref": "#/components/schemas/OpenAIResponseOutputMessageFunctionToolCall"
|
||||
},
|
||||
{
|
||||
"$ref": "#/components/schemas/OpenAIResponseOutputMessageMCPCall"
|
||||
},
|
||||
{
|
||||
"$ref": "#/components/schemas/OpenAIResponseOutputMessageMCPListTools"
|
||||
}
|
||||
],
|
||||
"discriminator": {
|
||||
|
@ -7149,15 +7423,126 @@
|
|||
"mapping": {
|
||||
"message": "#/components/schemas/OpenAIResponseMessage",
|
||||
"web_search_call": "#/components/schemas/OpenAIResponseOutputMessageWebSearchToolCall",
|
||||
"function_call": "#/components/schemas/OpenAIResponseOutputMessageFunctionToolCall"
|
||||
"function_call": "#/components/schemas/OpenAIResponseOutputMessageFunctionToolCall",
|
||||
"mcp_call": "#/components/schemas/OpenAIResponseOutputMessageMCPCall",
|
||||
"mcp_list_tools": "#/components/schemas/OpenAIResponseOutputMessageMCPListTools"
|
||||
}
|
||||
}
|
||||
},
|
||||
"OpenAIResponseOutputMessageMCPCall": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"id": {
|
||||
"type": "string"
|
||||
},
|
||||
"type": {
|
||||
"type": "string",
|
||||
"const": "mcp_call",
|
||||
"default": "mcp_call"
|
||||
},
|
||||
"arguments": {
|
||||
"type": "string"
|
||||
},
|
||||
"name": {
|
||||
"type": "string"
|
||||
},
|
||||
"server_label": {
|
||||
"type": "string"
|
||||
},
|
||||
"error": {
|
||||
"type": "string"
|
||||
},
|
||||
"output": {
|
||||
"type": "string"
|
||||
}
|
||||
},
|
||||
"additionalProperties": false,
|
||||
"required": [
|
||||
"id",
|
||||
"type",
|
||||
"arguments",
|
||||
"name",
|
||||
"server_label"
|
||||
],
|
||||
"title": "OpenAIResponseOutputMessageMCPCall"
|
||||
},
|
||||
"OpenAIResponseOutputMessageMCPListTools": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"id": {
|
||||
"type": "string"
|
||||
},
|
||||
"type": {
|
||||
"type": "string",
|
||||
"const": "mcp_list_tools",
|
||||
"default": "mcp_list_tools"
|
||||
},
|
||||
"server_label": {
|
||||
"type": "string"
|
||||
},
|
||||
"tools": {
|
||||
"type": "array",
|
||||
"items": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"input_schema": {
|
||||
"type": "object",
|
||||
"additionalProperties": {
|
||||
"oneOf": [
|
||||
{
|
||||
"type": "null"
|
||||
},
|
||||
{
|
||||
"type": "boolean"
|
||||
},
|
||||
{
|
||||
"type": "number"
|
||||
},
|
||||
{
|
||||
"type": "string"
|
||||
},
|
||||
{
|
||||
"type": "array"
|
||||
},
|
||||
{
|
||||
"type": "object"
|
||||
}
|
||||
]
|
||||
}
|
||||
},
|
||||
"name": {
|
||||
"type": "string"
|
||||
},
|
||||
"description": {
|
||||
"type": "string"
|
||||
}
|
||||
},
|
||||
"additionalProperties": false,
|
||||
"required": [
|
||||
"input_schema",
|
||||
"name"
|
||||
],
|
||||
"title": "MCPListToolsTool"
|
||||
}
|
||||
}
|
||||
},
|
||||
"additionalProperties": false,
|
||||
"required": [
|
||||
"id",
|
||||
"type",
|
||||
"server_label",
|
||||
"tools"
|
||||
],
|
||||
"title": "OpenAIResponseOutputMessageMCPListTools"
|
||||
},
|
||||
"OpenAIResponseObjectStream": {
|
||||
"oneOf": [
|
||||
{
|
||||
"$ref": "#/components/schemas/OpenAIResponseObjectStreamResponseCreated"
|
||||
},
|
||||
{
|
||||
"$ref": "#/components/schemas/OpenAIResponseObjectStreamResponseOutputTextDelta"
|
||||
},
|
||||
{
|
||||
"$ref": "#/components/schemas/OpenAIResponseObjectStreamResponseCompleted"
|
||||
}
|
||||
|
@ -7166,6 +7551,7 @@
|
|||
"propertyName": "type",
|
||||
"mapping": {
|
||||
"response.created": "#/components/schemas/OpenAIResponseObjectStreamResponseCreated",
|
||||
"response.output_text.delta": "#/components/schemas/OpenAIResponseObjectStreamResponseOutputTextDelta",
|
||||
"response.completed": "#/components/schemas/OpenAIResponseObjectStreamResponseCompleted"
|
||||
}
|
||||
}
|
||||
|
@ -7208,6 +7594,41 @@
|
|||
],
|
||||
"title": "OpenAIResponseObjectStreamResponseCreated"
|
||||
},
|
||||
"OpenAIResponseObjectStreamResponseOutputTextDelta": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"content_index": {
|
||||
"type": "integer"
|
||||
},
|
||||
"delta": {
|
||||
"type": "string"
|
||||
},
|
||||
"item_id": {
|
||||
"type": "string"
|
||||
},
|
||||
"output_index": {
|
||||
"type": "integer"
|
||||
},
|
||||
"sequence_number": {
|
||||
"type": "integer"
|
||||
},
|
||||
"type": {
|
||||
"type": "string",
|
||||
"const": "response.output_text.delta",
|
||||
"default": "response.output_text.delta"
|
||||
}
|
||||
},
|
||||
"additionalProperties": false,
|
||||
"required": [
|
||||
"content_index",
|
||||
"delta",
|
||||
"item_id",
|
||||
"output_index",
|
||||
"sequence_number",
|
||||
"type"
|
||||
],
|
||||
"title": "OpenAIResponseObjectStreamResponseOutputTextDelta"
|
||||
},
|
||||
"CreateUploadSessionRequest": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
|
@ -9173,9 +9594,6 @@
|
|||
"toolgroup_id": {
|
||||
"type": "string"
|
||||
},
|
||||
"tool_host": {
|
||||
"$ref": "#/components/schemas/ToolHost"
|
||||
},
|
||||
"description": {
|
||||
"type": "string"
|
||||
},
|
||||
|
@ -9217,21 +9635,11 @@
|
|||
"provider_id",
|
||||
"type",
|
||||
"toolgroup_id",
|
||||
"tool_host",
|
||||
"description",
|
||||
"parameters"
|
||||
],
|
||||
"title": "Tool"
|
||||
},
|
||||
"ToolHost": {
|
||||
"type": "string",
|
||||
"enum": [
|
||||
"distribution",
|
||||
"client",
|
||||
"model_context_protocol"
|
||||
],
|
||||
"title": "ToolHost"
|
||||
},
|
||||
"ToolGroup": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
|
@ -10068,6 +10476,130 @@
|
|||
],
|
||||
"title": "ListModelsResponse"
|
||||
},
|
||||
"ListOpenAIResponseInputItem": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"data": {
|
||||
"type": "array",
|
||||
"items": {
|
||||
"$ref": "#/components/schemas/OpenAIResponseInput"
|
||||
}
|
||||
},
|
||||
"object": {
|
||||
"type": "string",
|
||||
"const": "list",
|
||||
"default": "list"
|
||||
}
|
||||
},
|
||||
"additionalProperties": false,
|
||||
"required": [
|
||||
"data",
|
||||
"object"
|
||||
],
|
||||
"title": "ListOpenAIResponseInputItem"
|
||||
},
|
||||
"ListOpenAIResponseObject": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"data": {
|
||||
"type": "array",
|
||||
"items": {
|
||||
"$ref": "#/components/schemas/OpenAIResponseObjectWithInput"
|
||||
}
|
||||
},
|
||||
"has_more": {
|
||||
"type": "boolean"
|
||||
},
|
||||
"first_id": {
|
||||
"type": "string"
|
||||
},
|
||||
"last_id": {
|
||||
"type": "string"
|
||||
},
|
||||
"object": {
|
||||
"type": "string",
|
||||
"const": "list",
|
||||
"default": "list"
|
||||
}
|
||||
},
|
||||
"additionalProperties": false,
|
||||
"required": [
|
||||
"data",
|
||||
"has_more",
|
||||
"first_id",
|
||||
"last_id",
|
||||
"object"
|
||||
],
|
||||
"title": "ListOpenAIResponseObject"
|
||||
},
|
||||
"OpenAIResponseObjectWithInput": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"created_at": {
|
||||
"type": "integer"
|
||||
},
|
||||
"error": {
|
||||
"$ref": "#/components/schemas/OpenAIResponseError"
|
||||
},
|
||||
"id": {
|
||||
"type": "string"
|
||||
},
|
||||
"model": {
|
||||
"type": "string"
|
||||
},
|
||||
"object": {
|
||||
"type": "string",
|
||||
"const": "response",
|
||||
"default": "response"
|
||||
},
|
||||
"output": {
|
||||
"type": "array",
|
||||
"items": {
|
||||
"$ref": "#/components/schemas/OpenAIResponseOutput"
|
||||
}
|
||||
},
|
||||
"parallel_tool_calls": {
|
||||
"type": "boolean",
|
||||
"default": false
|
||||
},
|
||||
"previous_response_id": {
|
||||
"type": "string"
|
||||
},
|
||||
"status": {
|
||||
"type": "string"
|
||||
},
|
||||
"temperature": {
|
||||
"type": "number"
|
||||
},
|
||||
"top_p": {
|
||||
"type": "number"
|
||||
},
|
||||
"truncation": {
|
||||
"type": "string"
|
||||
},
|
||||
"user": {
|
||||
"type": "string"
|
||||
},
|
||||
"input": {
|
||||
"type": "array",
|
||||
"items": {
|
||||
"$ref": "#/components/schemas/OpenAIResponseInput"
|
||||
}
|
||||
}
|
||||
},
|
||||
"additionalProperties": false,
|
||||
"required": [
|
||||
"created_at",
|
||||
"id",
|
||||
"model",
|
||||
"object",
|
||||
"output",
|
||||
"parallel_tool_calls",
|
||||
"status",
|
||||
"input"
|
||||
],
|
||||
"title": "OpenAIResponseObjectWithInput"
|
||||
},
|
||||
"ListProvidersResponse": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
|
@ -11605,6 +12137,10 @@
|
|||
"type": "string",
|
||||
"default": "Result {index}\nContent: {chunk.content}\nMetadata: {metadata}\n",
|
||||
"description": "Template for formatting each retrieved chunk in the context. Available placeholders: {index} (1-based chunk ordinal), {chunk.content} (chunk content string), {metadata} (chunk metadata dict). Default: \"Result {index}\\nContent: {chunk.content}\\nMetadata: {metadata}\\n\""
|
||||
},
|
||||
"mode": {
|
||||
"type": "string",
|
||||
"description": "Search mode for retrieval—either \"vector\" or \"keyword\". Default \"vector\"."
|
||||
}
|
||||
},
|
||||
"additionalProperties": false,
|
||||
|
|
398
docs/_static/llama-stack-spec.yaml
vendored
398
docs/_static/llama-stack-spec.yaml
vendored
|
@ -349,6 +349,53 @@ paths:
|
|||
$ref: '#/components/schemas/CreateAgentTurnRequest'
|
||||
required: true
|
||||
/v1/openai/v1/responses:
|
||||
get:
|
||||
responses:
|
||||
'200':
|
||||
description: A ListOpenAIResponseObject.
|
||||
content:
|
||||
application/json:
|
||||
schema:
|
||||
$ref: '#/components/schemas/ListOpenAIResponseObject'
|
||||
'400':
|
||||
$ref: '#/components/responses/BadRequest400'
|
||||
'429':
|
||||
$ref: >-
|
||||
#/components/responses/TooManyRequests429
|
||||
'500':
|
||||
$ref: >-
|
||||
#/components/responses/InternalServerError500
|
||||
default:
|
||||
$ref: '#/components/responses/DefaultError'
|
||||
tags:
|
||||
- Agents
|
||||
description: List all OpenAI responses.
|
||||
parameters:
|
||||
- name: after
|
||||
in: query
|
||||
description: The ID of the last response to return.
|
||||
required: false
|
||||
schema:
|
||||
type: string
|
||||
- name: limit
|
||||
in: query
|
||||
description: The number of responses to return.
|
||||
required: false
|
||||
schema:
|
||||
type: integer
|
||||
- name: model
|
||||
in: query
|
||||
description: The model to filter responses by.
|
||||
required: false
|
||||
schema:
|
||||
type: string
|
||||
- name: order
|
||||
in: query
|
||||
description: >-
|
||||
The order to sort responses by when sorted by created_at ('asc' or 'desc').
|
||||
required: false
|
||||
schema:
|
||||
$ref: '#/components/schemas/Order'
|
||||
post:
|
||||
responses:
|
||||
'200':
|
||||
|
@ -963,7 +1010,7 @@ paths:
|
|||
required: true
|
||||
schema:
|
||||
type: string
|
||||
/v1/openai/v1/responses/{id}:
|
||||
/v1/openai/v1/responses/{response_id}:
|
||||
get:
|
||||
responses:
|
||||
'200':
|
||||
|
@ -986,7 +1033,7 @@ paths:
|
|||
- Agents
|
||||
description: Retrieve an OpenAI response by its ID.
|
||||
parameters:
|
||||
- name: id
|
||||
- name: response_id
|
||||
in: path
|
||||
description: >-
|
||||
The ID of the OpenAI response to retrieve.
|
||||
|
@ -2038,6 +2085,75 @@ paths:
|
|||
schema:
|
||||
$ref: '#/components/schemas/RegisterModelRequest'
|
||||
required: true
|
||||
/v1/openai/v1/responses/{response_id}/input_items:
|
||||
get:
|
||||
responses:
|
||||
'200':
|
||||
description: An ListOpenAIResponseInputItem.
|
||||
content:
|
||||
application/json:
|
||||
schema:
|
||||
$ref: '#/components/schemas/ListOpenAIResponseInputItem'
|
||||
'400':
|
||||
$ref: '#/components/responses/BadRequest400'
|
||||
'429':
|
||||
$ref: >-
|
||||
#/components/responses/TooManyRequests429
|
||||
'500':
|
||||
$ref: >-
|
||||
#/components/responses/InternalServerError500
|
||||
default:
|
||||
$ref: '#/components/responses/DefaultError'
|
||||
tags:
|
||||
- Agents
|
||||
description: >-
|
||||
List input items for a given OpenAI response.
|
||||
parameters:
|
||||
- name: response_id
|
||||
in: path
|
||||
description: >-
|
||||
The ID of the response to retrieve input items for.
|
||||
required: true
|
||||
schema:
|
||||
type: string
|
||||
- name: after
|
||||
in: query
|
||||
description: >-
|
||||
An item ID to list items after, used for pagination.
|
||||
required: false
|
||||
schema:
|
||||
type: string
|
||||
- name: before
|
||||
in: query
|
||||
description: >-
|
||||
An item ID to list items before, used for pagination.
|
||||
required: false
|
||||
schema:
|
||||
type: string
|
||||
- name: include
|
||||
in: query
|
||||
description: >-
|
||||
Additional fields to include in the response.
|
||||
required: false
|
||||
schema:
|
||||
type: array
|
||||
items:
|
||||
type: string
|
||||
- name: limit
|
||||
in: query
|
||||
description: >-
|
||||
A limit on the number of objects to be returned. Limit can range between
|
||||
1 and 100, and the default is 20.
|
||||
required: false
|
||||
schema:
|
||||
type: integer
|
||||
- name: order
|
||||
in: query
|
||||
description: >-
|
||||
The order to return the input items in. Default is desc.
|
||||
required: false
|
||||
schema:
|
||||
$ref: '#/components/schemas/Order'
|
||||
/v1/providers:
|
||||
get:
|
||||
responses:
|
||||
|
@ -4762,12 +4878,14 @@ components:
|
|||
- $ref: '#/components/schemas/OpenAIResponseInputToolWebSearch'
|
||||
- $ref: '#/components/schemas/OpenAIResponseInputToolFileSearch'
|
||||
- $ref: '#/components/schemas/OpenAIResponseInputToolFunction'
|
||||
- $ref: '#/components/schemas/OpenAIResponseInputToolMCP'
|
||||
discriminator:
|
||||
propertyName: type
|
||||
mapping:
|
||||
web_search: '#/components/schemas/OpenAIResponseInputToolWebSearch'
|
||||
file_search: '#/components/schemas/OpenAIResponseInputToolFileSearch'
|
||||
function: '#/components/schemas/OpenAIResponseInputToolFunction'
|
||||
mcp: '#/components/schemas/OpenAIResponseInputToolMCP'
|
||||
OpenAIResponseInputToolFileSearch:
|
||||
type: object
|
||||
properties:
|
||||
|
@ -4822,6 +4940,66 @@ components:
|
|||
- type
|
||||
- name
|
||||
title: OpenAIResponseInputToolFunction
|
||||
OpenAIResponseInputToolMCP:
|
||||
type: object
|
||||
properties:
|
||||
type:
|
||||
type: string
|
||||
const: mcp
|
||||
default: mcp
|
||||
server_label:
|
||||
type: string
|
||||
server_url:
|
||||
type: string
|
||||
headers:
|
||||
type: object
|
||||
additionalProperties:
|
||||
oneOf:
|
||||
- type: 'null'
|
||||
- type: boolean
|
||||
- type: number
|
||||
- type: string
|
||||
- type: array
|
||||
- type: object
|
||||
require_approval:
|
||||
oneOf:
|
||||
- type: string
|
||||
const: always
|
||||
- type: string
|
||||
const: never
|
||||
- type: object
|
||||
properties:
|
||||
always:
|
||||
type: array
|
||||
items:
|
||||
type: string
|
||||
never:
|
||||
type: array
|
||||
items:
|
||||
type: string
|
||||
additionalProperties: false
|
||||
title: ApprovalFilter
|
||||
default: never
|
||||
allowed_tools:
|
||||
oneOf:
|
||||
- type: array
|
||||
items:
|
||||
type: string
|
||||
- type: object
|
||||
properties:
|
||||
tool_names:
|
||||
type: array
|
||||
items:
|
||||
type: string
|
||||
additionalProperties: false
|
||||
title: AllowedToolsFilter
|
||||
additionalProperties: false
|
||||
required:
|
||||
- type
|
||||
- server_label
|
||||
- server_url
|
||||
- require_approval
|
||||
title: OpenAIResponseInputToolMCP
|
||||
OpenAIResponseInputToolWebSearch:
|
||||
type: object
|
||||
properties:
|
||||
|
@ -4897,12 +5075,12 @@ components:
|
|||
"OpenAIResponseOutputMessageFunctionToolCall":
|
||||
type: object
|
||||
properties:
|
||||
arguments:
|
||||
type: string
|
||||
call_id:
|
||||
type: string
|
||||
name:
|
||||
type: string
|
||||
arguments:
|
||||
type: string
|
||||
type:
|
||||
type: string
|
||||
const: function_call
|
||||
|
@ -4913,12 +5091,10 @@ components:
|
|||
type: string
|
||||
additionalProperties: false
|
||||
required:
|
||||
- arguments
|
||||
- call_id
|
||||
- name
|
||||
- arguments
|
||||
- type
|
||||
- id
|
||||
- status
|
||||
title: >-
|
||||
OpenAIResponseOutputMessageFunctionToolCall
|
||||
"OpenAIResponseOutputMessageWebSearchToolCall":
|
||||
|
@ -4952,6 +5128,8 @@ components:
|
|||
model:
|
||||
type: string
|
||||
description: The underlying LLM used for completions.
|
||||
instructions:
|
||||
type: string
|
||||
previous_response_id:
|
||||
type: string
|
||||
description: >-
|
||||
|
@ -5034,20 +5212,95 @@ components:
|
|||
- $ref: '#/components/schemas/OpenAIResponseMessage'
|
||||
- $ref: '#/components/schemas/OpenAIResponseOutputMessageWebSearchToolCall'
|
||||
- $ref: '#/components/schemas/OpenAIResponseOutputMessageFunctionToolCall'
|
||||
- $ref: '#/components/schemas/OpenAIResponseOutputMessageMCPCall'
|
||||
- $ref: '#/components/schemas/OpenAIResponseOutputMessageMCPListTools'
|
||||
discriminator:
|
||||
propertyName: type
|
||||
mapping:
|
||||
message: '#/components/schemas/OpenAIResponseMessage'
|
||||
web_search_call: '#/components/schemas/OpenAIResponseOutputMessageWebSearchToolCall'
|
||||
function_call: '#/components/schemas/OpenAIResponseOutputMessageFunctionToolCall'
|
||||
mcp_call: '#/components/schemas/OpenAIResponseOutputMessageMCPCall'
|
||||
mcp_list_tools: '#/components/schemas/OpenAIResponseOutputMessageMCPListTools'
|
||||
OpenAIResponseOutputMessageMCPCall:
|
||||
type: object
|
||||
properties:
|
||||
id:
|
||||
type: string
|
||||
type:
|
||||
type: string
|
||||
const: mcp_call
|
||||
default: mcp_call
|
||||
arguments:
|
||||
type: string
|
||||
name:
|
||||
type: string
|
||||
server_label:
|
||||
type: string
|
||||
error:
|
||||
type: string
|
||||
output:
|
||||
type: string
|
||||
additionalProperties: false
|
||||
required:
|
||||
- id
|
||||
- type
|
||||
- arguments
|
||||
- name
|
||||
- server_label
|
||||
title: OpenAIResponseOutputMessageMCPCall
|
||||
OpenAIResponseOutputMessageMCPListTools:
|
||||
type: object
|
||||
properties:
|
||||
id:
|
||||
type: string
|
||||
type:
|
||||
type: string
|
||||
const: mcp_list_tools
|
||||
default: mcp_list_tools
|
||||
server_label:
|
||||
type: string
|
||||
tools:
|
||||
type: array
|
||||
items:
|
||||
type: object
|
||||
properties:
|
||||
input_schema:
|
||||
type: object
|
||||
additionalProperties:
|
||||
oneOf:
|
||||
- type: 'null'
|
||||
- type: boolean
|
||||
- type: number
|
||||
- type: string
|
||||
- type: array
|
||||
- type: object
|
||||
name:
|
||||
type: string
|
||||
description:
|
||||
type: string
|
||||
additionalProperties: false
|
||||
required:
|
||||
- input_schema
|
||||
- name
|
||||
title: MCPListToolsTool
|
||||
additionalProperties: false
|
||||
required:
|
||||
- id
|
||||
- type
|
||||
- server_label
|
||||
- tools
|
||||
title: OpenAIResponseOutputMessageMCPListTools
|
||||
OpenAIResponseObjectStream:
|
||||
oneOf:
|
||||
- $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseCreated'
|
||||
- $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseOutputTextDelta'
|
||||
- $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseCompleted'
|
||||
discriminator:
|
||||
propertyName: type
|
||||
mapping:
|
||||
response.created: '#/components/schemas/OpenAIResponseObjectStreamResponseCreated'
|
||||
response.output_text.delta: '#/components/schemas/OpenAIResponseObjectStreamResponseOutputTextDelta'
|
||||
response.completed: '#/components/schemas/OpenAIResponseObjectStreamResponseCompleted'
|
||||
"OpenAIResponseObjectStreamResponseCompleted":
|
||||
type: object
|
||||
|
@ -5079,6 +5332,33 @@ components:
|
|||
- type
|
||||
title: >-
|
||||
OpenAIResponseObjectStreamResponseCreated
|
||||
"OpenAIResponseObjectStreamResponseOutputTextDelta":
|
||||
type: object
|
||||
properties:
|
||||
content_index:
|
||||
type: integer
|
||||
delta:
|
||||
type: string
|
||||
item_id:
|
||||
type: string
|
||||
output_index:
|
||||
type: integer
|
||||
sequence_number:
|
||||
type: integer
|
||||
type:
|
||||
type: string
|
||||
const: response.output_text.delta
|
||||
default: response.output_text.delta
|
||||
additionalProperties: false
|
||||
required:
|
||||
- content_index
|
||||
- delta
|
||||
- item_id
|
||||
- output_index
|
||||
- sequence_number
|
||||
- type
|
||||
title: >-
|
||||
OpenAIResponseObjectStreamResponseOutputTextDelta
|
||||
CreateUploadSessionRequest:
|
||||
type: object
|
||||
properties:
|
||||
|
@ -6462,8 +6742,6 @@ components:
|
|||
default: tool
|
||||
toolgroup_id:
|
||||
type: string
|
||||
tool_host:
|
||||
$ref: '#/components/schemas/ToolHost'
|
||||
description:
|
||||
type: string
|
||||
parameters:
|
||||
|
@ -6486,17 +6764,9 @@ components:
|
|||
- provider_id
|
||||
- type
|
||||
- toolgroup_id
|
||||
- tool_host
|
||||
- description
|
||||
- parameters
|
||||
title: Tool
|
||||
ToolHost:
|
||||
type: string
|
||||
enum:
|
||||
- distribution
|
||||
- client
|
||||
- model_context_protocol
|
||||
title: ToolHost
|
||||
ToolGroup:
|
||||
type: object
|
||||
properties:
|
||||
|
@ -7042,6 +7312,96 @@ components:
|
|||
required:
|
||||
- data
|
||||
title: ListModelsResponse
|
||||
ListOpenAIResponseInputItem:
|
||||
type: object
|
||||
properties:
|
||||
data:
|
||||
type: array
|
||||
items:
|
||||
$ref: '#/components/schemas/OpenAIResponseInput'
|
||||
object:
|
||||
type: string
|
||||
const: list
|
||||
default: list
|
||||
additionalProperties: false
|
||||
required:
|
||||
- data
|
||||
- object
|
||||
title: ListOpenAIResponseInputItem
|
||||
ListOpenAIResponseObject:
|
||||
type: object
|
||||
properties:
|
||||
data:
|
||||
type: array
|
||||
items:
|
||||
$ref: '#/components/schemas/OpenAIResponseObjectWithInput'
|
||||
has_more:
|
||||
type: boolean
|
||||
first_id:
|
||||
type: string
|
||||
last_id:
|
||||
type: string
|
||||
object:
|
||||
type: string
|
||||
const: list
|
||||
default: list
|
||||
additionalProperties: false
|
||||
required:
|
||||
- data
|
||||
- has_more
|
||||
- first_id
|
||||
- last_id
|
||||
- object
|
||||
title: ListOpenAIResponseObject
|
||||
OpenAIResponseObjectWithInput:
|
||||
type: object
|
||||
properties:
|
||||
created_at:
|
||||
type: integer
|
||||
error:
|
||||
$ref: '#/components/schemas/OpenAIResponseError'
|
||||
id:
|
||||
type: string
|
||||
model:
|
||||
type: string
|
||||
object:
|
||||
type: string
|
||||
const: response
|
||||
default: response
|
||||
output:
|
||||
type: array
|
||||
items:
|
||||
$ref: '#/components/schemas/OpenAIResponseOutput'
|
||||
parallel_tool_calls:
|
||||
type: boolean
|
||||
default: false
|
||||
previous_response_id:
|
||||
type: string
|
||||
status:
|
||||
type: string
|
||||
temperature:
|
||||
type: number
|
||||
top_p:
|
||||
type: number
|
||||
truncation:
|
||||
type: string
|
||||
user:
|
||||
type: string
|
||||
input:
|
||||
type: array
|
||||
items:
|
||||
$ref: '#/components/schemas/OpenAIResponseInput'
|
||||
additionalProperties: false
|
||||
required:
|
||||
- created_at
|
||||
- id
|
||||
- model
|
||||
- object
|
||||
- output
|
||||
- parallel_tool_calls
|
||||
- status
|
||||
- input
|
||||
title: OpenAIResponseObjectWithInput
|
||||
ListProvidersResponse:
|
||||
type: object
|
||||
properties:
|
||||
|
@ -8084,6 +8444,10 @@ components:
|
|||
placeholders: {index} (1-based chunk ordinal), {chunk.content} (chunk
|
||||
content string), {metadata} (chunk metadata dict). Default: "Result {index}\nContent:
|
||||
{chunk.content}\nMetadata: {metadata}\n"
|
||||
mode:
|
||||
type: string
|
||||
description: >-
|
||||
Search mode for retrieval—either "vector" or "keyword". Default "vector".
|
||||
additionalProperties: false
|
||||
required:
|
||||
- query_generator_config
|
||||
|
|
File diff suppressed because it is too large
Load diff
|
@ -3,10 +3,10 @@
|
|||
Here's a collection of comprehensive guides, examples, and resources for building AI applications with Llama Stack. For the complete documentation, visit our [ReadTheDocs page](https://llama-stack.readthedocs.io/en/latest/index.html).
|
||||
|
||||
## Render locally
|
||||
|
||||
From the llama-stack root directory, run the following command to render the docs locally:
|
||||
```bash
|
||||
pip install -r requirements.txt
|
||||
cd docs
|
||||
python -m sphinx_autobuild source _build
|
||||
uv run --with ".[docs]" sphinx-autobuild docs/source docs/build/html --write-all
|
||||
```
|
||||
You can open up the docs in your browser at http://localhost:8000
|
||||
|
||||
|
|
|
@ -1,16 +0,0 @@
|
|||
linkify
|
||||
myst-parser
|
||||
-e git+https://github.com/pytorch/pytorch_sphinx_theme.git#egg=pytorch_sphinx_theme
|
||||
sphinx==8.1.3
|
||||
sphinx-copybutton
|
||||
sphinx-design
|
||||
sphinx-pdj-theme
|
||||
sphinx-rtd-theme>=1.0.0
|
||||
sphinx-tabs
|
||||
sphinx_autobuild
|
||||
sphinx_rtd_dark_mode
|
||||
sphinxcontrib-mermaid
|
||||
sphinxcontrib-openapi
|
||||
sphinxcontrib-redoc
|
||||
sphinxcontrib-video
|
||||
tomli
|
|
@ -22,7 +22,11 @@ from docutils import nodes
|
|||
# Read version from pyproject.toml
|
||||
with Path(__file__).parent.parent.parent.joinpath("pyproject.toml").open("rb") as f:
|
||||
pypi_url = "https://pypi.org/pypi/llama-stack/json"
|
||||
version_tag = json.loads(requests.get(pypi_url).text)["info"]["version"]
|
||||
headers = {
|
||||
'User-Agent': 'pip/23.0.1 (python 3.11)', # Mimic pip's user agent
|
||||
'Accept': 'application/json'
|
||||
}
|
||||
version_tag = json.loads(requests.get(pypi_url, headers=headers).text)["info"]["version"]
|
||||
print(f"{version_tag=}")
|
||||
|
||||
# generate the full link including text and url here
|
||||
|
@ -53,14 +57,6 @@ myst_enable_extensions = ["colon_fence"]
|
|||
|
||||
html_theme = "sphinx_rtd_theme"
|
||||
html_use_relative_paths = True
|
||||
|
||||
# html_theme = "sphinx_pdj_theme"
|
||||
# html_theme_path = [sphinx_pdj_theme.get_html_theme_path()]
|
||||
|
||||
# html_theme = "pytorch_sphinx_theme"
|
||||
# html_theme_path = [pytorch_sphinx_theme.get_html_theme_path()]
|
||||
|
||||
|
||||
templates_path = ["_templates"]
|
||||
exclude_patterns = ["_build", "Thumbs.db", ".DS_Store"]
|
||||
|
||||
|
|
|
@ -338,6 +338,48 @@ INFO: Application startup complete.
|
|||
INFO: Uvicorn running on http://['::', '0.0.0.0']:8321 (Press CTRL+C to quit)
|
||||
INFO: 2401:db00:35c:2d2b:face:0:c9:0:54678 - "GET /models/list HTTP/1.1" 200 OK
|
||||
```
|
||||
### Listing Distributions
|
||||
Using the list command, you can view all existing Llama Stack distributions, including stacks built from templates, from scratch, or using custom configuration files.
|
||||
|
||||
```
|
||||
llama stack list -h
|
||||
usage: llama stack list [-h]
|
||||
|
||||
list the build stacks
|
||||
|
||||
options:
|
||||
-h, --help show this help message and exit
|
||||
```
|
||||
|
||||
Example Usage
|
||||
|
||||
```
|
||||
llama stack list
|
||||
```
|
||||
|
||||
### Removing a Distribution
|
||||
Use the remove command to delete a distribution you've previously built.
|
||||
|
||||
```
|
||||
llama stack rm -h
|
||||
usage: llama stack rm [-h] [--all] [name]
|
||||
|
||||
Remove the build stack
|
||||
|
||||
positional arguments:
|
||||
name Name of the stack to delete (default: None)
|
||||
|
||||
options:
|
||||
-h, --help show this help message and exit
|
||||
--all, -a Delete all stacks (use with caution) (default: False)
|
||||
```
|
||||
|
||||
Example
|
||||
```
|
||||
llama stack rm llamastack-test
|
||||
```
|
||||
|
||||
To keep your environment organized and avoid clutter, consider using `llama stack list` to review old or unused distributions and `llama stack rm <name>` to delete them when they’re no longer needed.
|
||||
|
||||
### Troubleshooting
|
||||
|
||||
|
|
|
@ -118,11 +118,6 @@ server:
|
|||
port: 8321 # Port to listen on (default: 8321)
|
||||
tls_certfile: "/path/to/cert.pem" # Optional: Path to TLS certificate for HTTPS
|
||||
tls_keyfile: "/path/to/key.pem" # Optional: Path to TLS key for HTTPS
|
||||
auth: # Optional: Authentication configuration
|
||||
provider_type: "kubernetes" # Type of auth provider
|
||||
config: # Provider-specific configuration
|
||||
api_server_url: "https://kubernetes.default.svc"
|
||||
ca_cert_path: "/path/to/ca.crt" # Optional: Path to CA certificate
|
||||
```
|
||||
|
||||
### Authentication Configuration
|
||||
|
@ -135,7 +130,7 @@ Authorization: Bearer <token>
|
|||
|
||||
The server supports multiple authentication providers:
|
||||
|
||||
#### Kubernetes Provider
|
||||
#### OAuth 2.0/OpenID Connect Provider with Kubernetes
|
||||
|
||||
The Kubernetes cluster must be configured to use a service account for authentication.
|
||||
|
||||
|
@ -146,14 +141,67 @@ kubectl create rolebinding llama-stack-auth-rolebinding --clusterrole=admin --se
|
|||
kubectl create token llama-stack-auth -n llama-stack > llama-stack-auth-token
|
||||
```
|
||||
|
||||
Validates tokens against the Kubernetes API server:
|
||||
Make sure the `kube-apiserver` runs with `--anonymous-auth=true` to allow unauthenticated requests
|
||||
and that the correct RoleBinding is created to allow the service account to access the necessary
|
||||
resources. If that is not the case, you can create a RoleBinding for the service account to access
|
||||
the necessary resources:
|
||||
|
||||
```yaml
|
||||
# allow-anonymous-openid.yaml
|
||||
apiVersion: rbac.authorization.k8s.io/v1
|
||||
kind: ClusterRole
|
||||
metadata:
|
||||
name: allow-anonymous-openid
|
||||
rules:
|
||||
- nonResourceURLs: ["/openid/v1/jwks"]
|
||||
verbs: ["get"]
|
||||
---
|
||||
apiVersion: rbac.authorization.k8s.io/v1
|
||||
kind: ClusterRoleBinding
|
||||
metadata:
|
||||
name: allow-anonymous-openid
|
||||
roleRef:
|
||||
apiGroup: rbac.authorization.k8s.io
|
||||
kind: ClusterRole
|
||||
name: allow-anonymous-openid
|
||||
subjects:
|
||||
- kind: User
|
||||
name: system:anonymous
|
||||
apiGroup: rbac.authorization.k8s.io
|
||||
```
|
||||
|
||||
And then apply the configuration:
|
||||
```bash
|
||||
kubectl apply -f allow-anonymous-openid.yaml
|
||||
```
|
||||
|
||||
Validates tokens against the Kubernetes API server through the OIDC provider:
|
||||
```yaml
|
||||
server:
|
||||
auth:
|
||||
provider_type: "kubernetes"
|
||||
provider_type: "oauth2_token"
|
||||
config:
|
||||
api_server_url: "https://kubernetes.default.svc" # URL of the Kubernetes API server
|
||||
ca_cert_path: "/path/to/ca.crt" # Optional: Path to CA certificate
|
||||
jwks:
|
||||
uri: "https://kubernetes.default.svc"
|
||||
key_recheck_period: 3600
|
||||
tls_cafile: "/path/to/ca.crt"
|
||||
issuer: "https://kubernetes.default.svc"
|
||||
audience: "https://kubernetes.default.svc"
|
||||
```
|
||||
|
||||
To find your cluster's audience, run:
|
||||
```bash
|
||||
kubectl create token default --duration=1h | cut -d. -f2 | base64 -d | jq .aud
|
||||
```
|
||||
|
||||
For the issuer, you can use the OIDC provider's URL:
|
||||
```bash
|
||||
kubectl get --raw /.well-known/openid-configuration| jq .issuer
|
||||
```
|
||||
|
||||
For the tls_cafile, you can use the CA certificate of the OIDC provider:
|
||||
```bash
|
||||
kubectl config view --minify -o jsonpath='{.clusters[0].cluster.certificate-authority}'
|
||||
```
|
||||
|
||||
The provider extracts user information from the JWT token:
|
||||
|
@ -208,6 +256,80 @@ And must respond with:
|
|||
|
||||
If no access attributes are returned, the token is used as a namespace.
|
||||
|
||||
### Quota Configuration
|
||||
|
||||
The `quota` section allows you to enable server-side request throttling for both
|
||||
authenticated and anonymous clients. This is useful for preventing abuse, enforcing
|
||||
fairness across tenants, and controlling infrastructure costs without requiring
|
||||
client-side rate limiting or external proxies.
|
||||
|
||||
Quotas are disabled by default. When enabled, each client is tracked using either:
|
||||
|
||||
* Their authenticated `client_id` (derived from the Bearer token), or
|
||||
* Their IP address (fallback for anonymous requests)
|
||||
|
||||
Quota state is stored in a SQLite-backed key-value store, and rate limits are applied
|
||||
within a configurable time window (currently only `day` is supported).
|
||||
|
||||
#### Example
|
||||
|
||||
```yaml
|
||||
server:
|
||||
quota:
|
||||
kvstore:
|
||||
type: sqlite
|
||||
db_path: ./quotas.db
|
||||
anonymous_max_requests: 100
|
||||
authenticated_max_requests: 1000
|
||||
period: day
|
||||
```
|
||||
|
||||
#### Configuration Options
|
||||
|
||||
| Field | Description |
|
||||
| ---------------------------- | -------------------------------------------------------------------------- |
|
||||
| `kvstore` | Required. Backend storage config for tracking request counts. |
|
||||
| `kvstore.type` | Must be `"sqlite"` for now. Other backends may be supported in the future. |
|
||||
| `kvstore.db_path` | File path to the SQLite database. |
|
||||
| `anonymous_max_requests` | Max requests per period for unauthenticated clients. |
|
||||
| `authenticated_max_requests` | Max requests per period for authenticated clients. |
|
||||
| `period` | Time window for quota enforcement. Only `"day"` is supported. |
|
||||
|
||||
> Note: if `authenticated_max_requests` is set but no authentication provider is
|
||||
configured, the server will fall back to applying `anonymous_max_requests` to all
|
||||
clients.
|
||||
|
||||
#### Example with Authentication Enabled
|
||||
|
||||
```yaml
|
||||
server:
|
||||
port: 8321
|
||||
auth:
|
||||
provider_type: custom
|
||||
config:
|
||||
endpoint: https://auth.example.com/validate
|
||||
quota:
|
||||
kvstore:
|
||||
type: sqlite
|
||||
db_path: ./quotas.db
|
||||
anonymous_max_requests: 100
|
||||
authenticated_max_requests: 1000
|
||||
period: day
|
||||
```
|
||||
|
||||
If a client exceeds their limit, the server responds with:
|
||||
|
||||
```http
|
||||
HTTP/1.1 429 Too Many Requests
|
||||
Content-Type: application/json
|
||||
|
||||
{
|
||||
"error": {
|
||||
"message": "Quota exceeded"
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
## Extending to handle Safety
|
||||
|
||||
Configuring Safety can be a little involved so it is instructive to go through an example.
|
||||
|
|
|
@ -17,7 +17,7 @@ The `llamastack/distribution-sambanova` distribution consists of the following p
|
|||
|-----|-------------|
|
||||
| agents | `inline::meta-reference` |
|
||||
| inference | `remote::sambanova`, `inline::sentence-transformers` |
|
||||
| safety | `inline::llama-guard` |
|
||||
| safety | `remote::sambanova` |
|
||||
| telemetry | `inline::meta-reference` |
|
||||
| tool_runtime | `remote::brave-search`, `remote::tavily-search`, `inline::rag-runtime`, `remote::model-context-protocol`, `remote::wolfram-alpha` |
|
||||
| vector_io | `inline::faiss`, `remote::chromadb`, `remote::pgvector` |
|
||||
|
@ -48,33 +48,44 @@ The following models are available by default:
|
|||
|
||||
### Prerequisite: API Keys
|
||||
|
||||
Make sure you have access to a SambaNova API Key. You can get one by visiting [SambaNova.ai](https://sambanova.ai/).
|
||||
Make sure you have access to a SambaNova API Key. You can get one by visiting [SambaNova.ai](http://cloud.sambanova.ai?utm_source=llamastack&utm_medium=external&utm_campaign=cloud_signup).
|
||||
|
||||
|
||||
## Running Llama Stack with SambaNova
|
||||
|
||||
You can do this via Conda (build code) or Docker which has a pre-built image.
|
||||
|
||||
### Via Docker
|
||||
|
||||
This method allows you to get started quickly without having to build the distribution code.
|
||||
### Via Docker
|
||||
|
||||
```bash
|
||||
LLAMA_STACK_PORT=8321
|
||||
llama stack build --template sambanova --image-type container
|
||||
docker run \
|
||||
-it \
|
||||
--pull always \
|
||||
-p $LLAMA_STACK_PORT:$LLAMA_STACK_PORT \
|
||||
llamastack/distribution-sambanova \
|
||||
-v ~/.llama:/root/.llama \
|
||||
distribution-sambanova \
|
||||
--port $LLAMA_STACK_PORT \
|
||||
--env SAMBANOVA_API_KEY=$SAMBANOVA_API_KEY
|
||||
```
|
||||
|
||||
|
||||
### Via Venv
|
||||
|
||||
```bash
|
||||
llama stack build --template sambanova --image-type venv
|
||||
llama stack run --image-type venv ~/.llama/distributions/sambanova/sambanova-run.yaml \
|
||||
--port $LLAMA_STACK_PORT \
|
||||
--env SAMBANOVA_API_KEY=$SAMBANOVA_API_KEY
|
||||
```
|
||||
|
||||
|
||||
### Via Conda
|
||||
|
||||
```bash
|
||||
llama stack build --template sambanova --image-type conda
|
||||
llama stack run ./run.yaml \
|
||||
llama stack run --image-type conda ~/.llama/distributions/sambanova/sambanova-run.yaml \
|
||||
--port $LLAMA_STACK_PORT \
|
||||
--env SAMBANOVA_API_KEY=$SAMBANOVA_API_KEY
|
||||
```
|
||||
|
|
|
@ -66,6 +66,25 @@ To use sqlite-vec in your Llama Stack project, follow these steps:
|
|||
2. Configure your Llama Stack project to use SQLite-Vec.
|
||||
3. Start storing and querying vectors.
|
||||
|
||||
## Supported Search Modes
|
||||
|
||||
The sqlite-vec provider supports both vector-based and keyword-based (full-text) search modes.
|
||||
|
||||
When using the RAGTool interface, you can specify the desired search behavior via the `mode` parameter in
|
||||
`RAGQueryConfig`. For example:
|
||||
|
||||
```python
|
||||
from llama_stack.apis.tool_runtime.rag import RAGQueryConfig
|
||||
|
||||
query_config = RAGQueryConfig(max_chunks=6, mode="vector")
|
||||
|
||||
results = client.tool_runtime.rag_tool.query(
|
||||
vector_db_ids=[vector_db_id],
|
||||
content="what is torchtune",
|
||||
query_config=query_config,
|
||||
)
|
||||
```
|
||||
|
||||
## Installation
|
||||
|
||||
You can install SQLite-Vec using pip:
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue