mirror of
https://github.com/meta-llama/llama-stack.git
synced 2025-07-20 19:56:59 +00:00
Merge branch 'main' into nvidia-e2e-notebook
This commit is contained in:
commit
012dd6891f
96 changed files with 4675 additions and 426 deletions
512
docs/_static/llama-stack-spec.html
vendored
512
docs/_static/llama-stack-spec.html
vendored
|
@ -497,6 +497,54 @@
|
|||
}
|
||||
}
|
||||
},
|
||||
"/v1/openai/v1/responses": {
|
||||
"post": {
|
||||
"responses": {
|
||||
"200": {
|
||||
"description": "Runtime representation of an annotated type.",
|
||||
"content": {
|
||||
"application/json": {
|
||||
"schema": {
|
||||
"$ref": "#/components/schemas/OpenAIResponseObject"
|
||||
}
|
||||
},
|
||||
"text/event-stream": {
|
||||
"schema": {
|
||||
"$ref": "#/components/schemas/OpenAIResponseObjectStream"
|
||||
}
|
||||
}
|
||||
}
|
||||
},
|
||||
"400": {
|
||||
"$ref": "#/components/responses/BadRequest400"
|
||||
},
|
||||
"429": {
|
||||
"$ref": "#/components/responses/TooManyRequests429"
|
||||
},
|
||||
"500": {
|
||||
"$ref": "#/components/responses/InternalServerError500"
|
||||
},
|
||||
"default": {
|
||||
"$ref": "#/components/responses/DefaultError"
|
||||
}
|
||||
},
|
||||
"tags": [
|
||||
"Agents"
|
||||
],
|
||||
"description": "Create a new OpenAI response.",
|
||||
"parameters": [],
|
||||
"requestBody": {
|
||||
"content": {
|
||||
"application/json": {
|
||||
"schema": {
|
||||
"$ref": "#/components/schemas/CreateOpenaiResponseRequest"
|
||||
}
|
||||
}
|
||||
},
|
||||
"required": true
|
||||
}
|
||||
}
|
||||
},
|
||||
"/v1/files": {
|
||||
"get": {
|
||||
"responses": {
|
||||
|
@ -1278,6 +1326,49 @@
|
|||
]
|
||||
}
|
||||
},
|
||||
"/v1/openai/v1/responses/{id}": {
|
||||
"get": {
|
||||
"responses": {
|
||||
"200": {
|
||||
"description": "An OpenAIResponseObject.",
|
||||
"content": {
|
||||
"application/json": {
|
||||
"schema": {
|
||||
"$ref": "#/components/schemas/OpenAIResponseObject"
|
||||
}
|
||||
}
|
||||
}
|
||||
},
|
||||
"400": {
|
||||
"$ref": "#/components/responses/BadRequest400"
|
||||
},
|
||||
"429": {
|
||||
"$ref": "#/components/responses/TooManyRequests429"
|
||||
},
|
||||
"500": {
|
||||
"$ref": "#/components/responses/InternalServerError500"
|
||||
},
|
||||
"default": {
|
||||
"$ref": "#/components/responses/DefaultError"
|
||||
}
|
||||
},
|
||||
"tags": [
|
||||
"Agents"
|
||||
],
|
||||
"description": "Retrieve an OpenAI response by its ID.",
|
||||
"parameters": [
|
||||
{
|
||||
"name": "id",
|
||||
"in": "path",
|
||||
"description": "The ID of the OpenAI response to retrieve.",
|
||||
"required": true,
|
||||
"schema": {
|
||||
"type": "string"
|
||||
}
|
||||
}
|
||||
]
|
||||
}
|
||||
},
|
||||
"/v1/scoring-functions/{scoring_fn_id}": {
|
||||
"get": {
|
||||
"responses": {
|
||||
|
@ -6192,6 +6283,427 @@
|
|||
],
|
||||
"title": "AgentTurnResponseTurnStartPayload"
|
||||
},
|
||||
"OpenAIResponseInputMessage": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"content": {
|
||||
"oneOf": [
|
||||
{
|
||||
"type": "string"
|
||||
},
|
||||
{
|
||||
"type": "array",
|
||||
"items": {
|
||||
"$ref": "#/components/schemas/OpenAIResponseInputMessageContent"
|
||||
}
|
||||
}
|
||||
]
|
||||
},
|
||||
"role": {
|
||||
"oneOf": [
|
||||
{
|
||||
"type": "string",
|
||||
"const": "system"
|
||||
},
|
||||
{
|
||||
"type": "string",
|
||||
"const": "developer"
|
||||
},
|
||||
{
|
||||
"type": "string",
|
||||
"const": "user"
|
||||
},
|
||||
{
|
||||
"type": "string",
|
||||
"const": "assistant"
|
||||
}
|
||||
]
|
||||
},
|
||||
"type": {
|
||||
"type": "string",
|
||||
"const": "message",
|
||||
"default": "message"
|
||||
}
|
||||
},
|
||||
"additionalProperties": false,
|
||||
"required": [
|
||||
"content",
|
||||
"role"
|
||||
],
|
||||
"title": "OpenAIResponseInputMessage"
|
||||
},
|
||||
"OpenAIResponseInputMessageContent": {
|
||||
"oneOf": [
|
||||
{
|
||||
"$ref": "#/components/schemas/OpenAIResponseInputMessageContentText"
|
||||
},
|
||||
{
|
||||
"$ref": "#/components/schemas/OpenAIResponseInputMessageContentImage"
|
||||
}
|
||||
],
|
||||
"discriminator": {
|
||||
"propertyName": "type",
|
||||
"mapping": {
|
||||
"input_text": "#/components/schemas/OpenAIResponseInputMessageContentText",
|
||||
"input_image": "#/components/schemas/OpenAIResponseInputMessageContentImage"
|
||||
}
|
||||
}
|
||||
},
|
||||
"OpenAIResponseInputMessageContentImage": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"detail": {
|
||||
"oneOf": [
|
||||
{
|
||||
"type": "string",
|
||||
"const": "low"
|
||||
},
|
||||
{
|
||||
"type": "string",
|
||||
"const": "high"
|
||||
},
|
||||
{
|
||||
"type": "string",
|
||||
"const": "auto"
|
||||
}
|
||||
],
|
||||
"default": "auto"
|
||||
},
|
||||
"type": {
|
||||
"type": "string",
|
||||
"const": "input_image",
|
||||
"default": "input_image"
|
||||
},
|
||||
"image_url": {
|
||||
"type": "string"
|
||||
}
|
||||
},
|
||||
"additionalProperties": false,
|
||||
"required": [
|
||||
"detail",
|
||||
"type"
|
||||
],
|
||||
"title": "OpenAIResponseInputMessageContentImage"
|
||||
},
|
||||
"OpenAIResponseInputMessageContentText": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"text": {
|
||||
"type": "string"
|
||||
},
|
||||
"type": {
|
||||
"type": "string",
|
||||
"const": "input_text",
|
||||
"default": "input_text"
|
||||
}
|
||||
},
|
||||
"additionalProperties": false,
|
||||
"required": [
|
||||
"text",
|
||||
"type"
|
||||
],
|
||||
"title": "OpenAIResponseInputMessageContentText"
|
||||
},
|
||||
"OpenAIResponseInputTool": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"type": {
|
||||
"oneOf": [
|
||||
{
|
||||
"type": "string",
|
||||
"const": "web_search"
|
||||
},
|
||||
{
|
||||
"type": "string",
|
||||
"const": "web_search_preview_2025_03_11"
|
||||
}
|
||||
],
|
||||
"default": "web_search"
|
||||
},
|
||||
"search_context_size": {
|
||||
"type": "string",
|
||||
"default": "medium"
|
||||
}
|
||||
},
|
||||
"additionalProperties": false,
|
||||
"required": [
|
||||
"type"
|
||||
],
|
||||
"title": "OpenAIResponseInputToolWebSearch"
|
||||
},
|
||||
"CreateOpenaiResponseRequest": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"input": {
|
||||
"oneOf": [
|
||||
{
|
||||
"type": "string"
|
||||
},
|
||||
{
|
||||
"type": "array",
|
||||
"items": {
|
||||
"$ref": "#/components/schemas/OpenAIResponseInputMessage"
|
||||
}
|
||||
}
|
||||
],
|
||||
"description": "Input message(s) to create the response."
|
||||
},
|
||||
"model": {
|
||||
"type": "string",
|
||||
"description": "The underlying LLM used for completions."
|
||||
},
|
||||
"previous_response_id": {
|
||||
"type": "string",
|
||||
"description": "(Optional) if specified, the new response will be a continuation of the previous response. This can be used to easily fork-off new responses from existing responses."
|
||||
},
|
||||
"store": {
|
||||
"type": "boolean"
|
||||
},
|
||||
"stream": {
|
||||
"type": "boolean"
|
||||
},
|
||||
"tools": {
|
||||
"type": "array",
|
||||
"items": {
|
||||
"$ref": "#/components/schemas/OpenAIResponseInputTool"
|
||||
}
|
||||
}
|
||||
},
|
||||
"additionalProperties": false,
|
||||
"required": [
|
||||
"input",
|
||||
"model"
|
||||
],
|
||||
"title": "CreateOpenaiResponseRequest"
|
||||
},
|
||||
"OpenAIResponseError": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"code": {
|
||||
"type": "string"
|
||||
},
|
||||
"message": {
|
||||
"type": "string"
|
||||
}
|
||||
},
|
||||
"additionalProperties": false,
|
||||
"required": [
|
||||
"code",
|
||||
"message"
|
||||
],
|
||||
"title": "OpenAIResponseError"
|
||||
},
|
||||
"OpenAIResponseObject": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"created_at": {
|
||||
"type": "integer"
|
||||
},
|
||||
"error": {
|
||||
"$ref": "#/components/schemas/OpenAIResponseError"
|
||||
},
|
||||
"id": {
|
||||
"type": "string"
|
||||
},
|
||||
"model": {
|
||||
"type": "string"
|
||||
},
|
||||
"object": {
|
||||
"type": "string",
|
||||
"const": "response",
|
||||
"default": "response"
|
||||
},
|
||||
"output": {
|
||||
"type": "array",
|
||||
"items": {
|
||||
"$ref": "#/components/schemas/OpenAIResponseOutput"
|
||||
}
|
||||
},
|
||||
"parallel_tool_calls": {
|
||||
"type": "boolean",
|
||||
"default": false
|
||||
},
|
||||
"previous_response_id": {
|
||||
"type": "string"
|
||||
},
|
||||
"status": {
|
||||
"type": "string"
|
||||
},
|
||||
"temperature": {
|
||||
"type": "number"
|
||||
},
|
||||
"top_p": {
|
||||
"type": "number"
|
||||
},
|
||||
"truncation": {
|
||||
"type": "string"
|
||||
},
|
||||
"user": {
|
||||
"type": "string"
|
||||
}
|
||||
},
|
||||
"additionalProperties": false,
|
||||
"required": [
|
||||
"created_at",
|
||||
"id",
|
||||
"model",
|
||||
"object",
|
||||
"output",
|
||||
"parallel_tool_calls",
|
||||
"status"
|
||||
],
|
||||
"title": "OpenAIResponseObject"
|
||||
},
|
||||
"OpenAIResponseOutput": {
|
||||
"oneOf": [
|
||||
{
|
||||
"$ref": "#/components/schemas/OpenAIResponseOutputMessage"
|
||||
},
|
||||
{
|
||||
"$ref": "#/components/schemas/OpenAIResponseOutputMessageWebSearchToolCall"
|
||||
}
|
||||
],
|
||||
"discriminator": {
|
||||
"propertyName": "type",
|
||||
"mapping": {
|
||||
"message": "#/components/schemas/OpenAIResponseOutputMessage",
|
||||
"web_search_call": "#/components/schemas/OpenAIResponseOutputMessageWebSearchToolCall"
|
||||
}
|
||||
}
|
||||
},
|
||||
"OpenAIResponseOutputMessage": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"id": {
|
||||
"type": "string"
|
||||
},
|
||||
"content": {
|
||||
"type": "array",
|
||||
"items": {
|
||||
"$ref": "#/components/schemas/OpenAIResponseOutputMessageContent"
|
||||
}
|
||||
},
|
||||
"role": {
|
||||
"type": "string",
|
||||
"const": "assistant",
|
||||
"default": "assistant"
|
||||
},
|
||||
"status": {
|
||||
"type": "string"
|
||||
},
|
||||
"type": {
|
||||
"type": "string",
|
||||
"const": "message",
|
||||
"default": "message"
|
||||
}
|
||||
},
|
||||
"additionalProperties": false,
|
||||
"required": [
|
||||
"id",
|
||||
"content",
|
||||
"role",
|
||||
"status",
|
||||
"type"
|
||||
],
|
||||
"title": "OpenAIResponseOutputMessage"
|
||||
},
|
||||
"OpenAIResponseOutputMessageContent": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"text": {
|
||||
"type": "string"
|
||||
},
|
||||
"type": {
|
||||
"type": "string",
|
||||
"const": "output_text",
|
||||
"default": "output_text"
|
||||
}
|
||||
},
|
||||
"additionalProperties": false,
|
||||
"required": [
|
||||
"text",
|
||||
"type"
|
||||
],
|
||||
"title": "OpenAIResponseOutputMessageContentOutputText"
|
||||
},
|
||||
"OpenAIResponseOutputMessageWebSearchToolCall": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"id": {
|
||||
"type": "string"
|
||||
},
|
||||
"status": {
|
||||
"type": "string"
|
||||
},
|
||||
"type": {
|
||||
"type": "string",
|
||||
"const": "web_search_call",
|
||||
"default": "web_search_call"
|
||||
}
|
||||
},
|
||||
"additionalProperties": false,
|
||||
"required": [
|
||||
"id",
|
||||
"status",
|
||||
"type"
|
||||
],
|
||||
"title": "OpenAIResponseOutputMessageWebSearchToolCall"
|
||||
},
|
||||
"OpenAIResponseObjectStream": {
|
||||
"oneOf": [
|
||||
{
|
||||
"$ref": "#/components/schemas/OpenAIResponseObjectStreamResponseCreated"
|
||||
},
|
||||
{
|
||||
"$ref": "#/components/schemas/OpenAIResponseObjectStreamResponseCompleted"
|
||||
}
|
||||
],
|
||||
"discriminator": {
|
||||
"propertyName": "type",
|
||||
"mapping": {
|
||||
"response.created": "#/components/schemas/OpenAIResponseObjectStreamResponseCreated",
|
||||
"response.completed": "#/components/schemas/OpenAIResponseObjectStreamResponseCompleted"
|
||||
}
|
||||
}
|
||||
},
|
||||
"OpenAIResponseObjectStreamResponseCompleted": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"response": {
|
||||
"$ref": "#/components/schemas/OpenAIResponseObject"
|
||||
},
|
||||
"type": {
|
||||
"type": "string",
|
||||
"const": "response.completed",
|
||||
"default": "response.completed"
|
||||
}
|
||||
},
|
||||
"additionalProperties": false,
|
||||
"required": [
|
||||
"response",
|
||||
"type"
|
||||
],
|
||||
"title": "OpenAIResponseObjectStreamResponseCompleted"
|
||||
},
|
||||
"OpenAIResponseObjectStreamResponseCreated": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"response": {
|
||||
"$ref": "#/components/schemas/OpenAIResponseObject"
|
||||
},
|
||||
"type": {
|
||||
"type": "string",
|
||||
"const": "response.created",
|
||||
"default": "response.created"
|
||||
}
|
||||
},
|
||||
"additionalProperties": false,
|
||||
"required": [
|
||||
"response",
|
||||
"type"
|
||||
],
|
||||
"title": "OpenAIResponseObjectStreamResponseCreated"
|
||||
},
|
||||
"CreateUploadSessionRequest": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
|
|
350
docs/_static/llama-stack-spec.yaml
vendored
350
docs/_static/llama-stack-spec.yaml
vendored
|
@ -330,6 +330,39 @@ paths:
|
|||
schema:
|
||||
$ref: '#/components/schemas/CreateAgentTurnRequest'
|
||||
required: true
|
||||
/v1/openai/v1/responses:
|
||||
post:
|
||||
responses:
|
||||
'200':
|
||||
description: >-
|
||||
Runtime representation of an annotated type.
|
||||
content:
|
||||
application/json:
|
||||
schema:
|
||||
$ref: '#/components/schemas/OpenAIResponseObject'
|
||||
text/event-stream:
|
||||
schema:
|
||||
$ref: '#/components/schemas/OpenAIResponseObjectStream'
|
||||
'400':
|
||||
$ref: '#/components/responses/BadRequest400'
|
||||
'429':
|
||||
$ref: >-
|
||||
#/components/responses/TooManyRequests429
|
||||
'500':
|
||||
$ref: >-
|
||||
#/components/responses/InternalServerError500
|
||||
default:
|
||||
$ref: '#/components/responses/DefaultError'
|
||||
tags:
|
||||
- Agents
|
||||
description: Create a new OpenAI response.
|
||||
parameters: []
|
||||
requestBody:
|
||||
content:
|
||||
application/json:
|
||||
schema:
|
||||
$ref: '#/components/schemas/CreateOpenaiResponseRequest'
|
||||
required: true
|
||||
/v1/files:
|
||||
get:
|
||||
responses:
|
||||
|
@ -875,6 +908,36 @@ paths:
|
|||
required: true
|
||||
schema:
|
||||
type: string
|
||||
/v1/openai/v1/responses/{id}:
|
||||
get:
|
||||
responses:
|
||||
'200':
|
||||
description: An OpenAIResponseObject.
|
||||
content:
|
||||
application/json:
|
||||
schema:
|
||||
$ref: '#/components/schemas/OpenAIResponseObject'
|
||||
'400':
|
||||
$ref: '#/components/responses/BadRequest400'
|
||||
'429':
|
||||
$ref: >-
|
||||
#/components/responses/TooManyRequests429
|
||||
'500':
|
||||
$ref: >-
|
||||
#/components/responses/InternalServerError500
|
||||
default:
|
||||
$ref: '#/components/responses/DefaultError'
|
||||
tags:
|
||||
- Agents
|
||||
description: Retrieve an OpenAI response by its ID.
|
||||
parameters:
|
||||
- name: id
|
||||
in: path
|
||||
description: >-
|
||||
The ID of the OpenAI response to retrieve.
|
||||
required: true
|
||||
schema:
|
||||
type: string
|
||||
/v1/scoring-functions/{scoring_fn_id}:
|
||||
get:
|
||||
responses:
|
||||
|
@ -4329,6 +4392,293 @@ components:
|
|||
- event_type
|
||||
- turn_id
|
||||
title: AgentTurnResponseTurnStartPayload
|
||||
OpenAIResponseInputMessage:
|
||||
type: object
|
||||
properties:
|
||||
content:
|
||||
oneOf:
|
||||
- type: string
|
||||
- type: array
|
||||
items:
|
||||
$ref: '#/components/schemas/OpenAIResponseInputMessageContent'
|
||||
role:
|
||||
oneOf:
|
||||
- type: string
|
||||
const: system
|
||||
- type: string
|
||||
const: developer
|
||||
- type: string
|
||||
const: user
|
||||
- type: string
|
||||
const: assistant
|
||||
type:
|
||||
type: string
|
||||
const: message
|
||||
default: message
|
||||
additionalProperties: false
|
||||
required:
|
||||
- content
|
||||
- role
|
||||
title: OpenAIResponseInputMessage
|
||||
OpenAIResponseInputMessageContent:
|
||||
oneOf:
|
||||
- $ref: '#/components/schemas/OpenAIResponseInputMessageContentText'
|
||||
- $ref: '#/components/schemas/OpenAIResponseInputMessageContentImage'
|
||||
discriminator:
|
||||
propertyName: type
|
||||
mapping:
|
||||
input_text: '#/components/schemas/OpenAIResponseInputMessageContentText'
|
||||
input_image: '#/components/schemas/OpenAIResponseInputMessageContentImage'
|
||||
OpenAIResponseInputMessageContentImage:
|
||||
type: object
|
||||
properties:
|
||||
detail:
|
||||
oneOf:
|
||||
- type: string
|
||||
const: low
|
||||
- type: string
|
||||
const: high
|
||||
- type: string
|
||||
const: auto
|
||||
default: auto
|
||||
type:
|
||||
type: string
|
||||
const: input_image
|
||||
default: input_image
|
||||
image_url:
|
||||
type: string
|
||||
additionalProperties: false
|
||||
required:
|
||||
- detail
|
||||
- type
|
||||
title: OpenAIResponseInputMessageContentImage
|
||||
OpenAIResponseInputMessageContentText:
|
||||
type: object
|
||||
properties:
|
||||
text:
|
||||
type: string
|
||||
type:
|
||||
type: string
|
||||
const: input_text
|
||||
default: input_text
|
||||
additionalProperties: false
|
||||
required:
|
||||
- text
|
||||
- type
|
||||
title: OpenAIResponseInputMessageContentText
|
||||
OpenAIResponseInputTool:
|
||||
type: object
|
||||
properties:
|
||||
type:
|
||||
oneOf:
|
||||
- type: string
|
||||
const: web_search
|
||||
- type: string
|
||||
const: web_search_preview_2025_03_11
|
||||
default: web_search
|
||||
search_context_size:
|
||||
type: string
|
||||
default: medium
|
||||
additionalProperties: false
|
||||
required:
|
||||
- type
|
||||
title: OpenAIResponseInputToolWebSearch
|
||||
CreateOpenaiResponseRequest:
|
||||
type: object
|
||||
properties:
|
||||
input:
|
||||
oneOf:
|
||||
- type: string
|
||||
- type: array
|
||||
items:
|
||||
$ref: '#/components/schemas/OpenAIResponseInputMessage'
|
||||
description: Input message(s) to create the response.
|
||||
model:
|
||||
type: string
|
||||
description: The underlying LLM used for completions.
|
||||
previous_response_id:
|
||||
type: string
|
||||
description: >-
|
||||
(Optional) if specified, the new response will be a continuation of the
|
||||
previous response. This can be used to easily fork-off new responses from
|
||||
existing responses.
|
||||
store:
|
||||
type: boolean
|
||||
stream:
|
||||
type: boolean
|
||||
tools:
|
||||
type: array
|
||||
items:
|
||||
$ref: '#/components/schemas/OpenAIResponseInputTool'
|
||||
additionalProperties: false
|
||||
required:
|
||||
- input
|
||||
- model
|
||||
title: CreateOpenaiResponseRequest
|
||||
OpenAIResponseError:
|
||||
type: object
|
||||
properties:
|
||||
code:
|
||||
type: string
|
||||
message:
|
||||
type: string
|
||||
additionalProperties: false
|
||||
required:
|
||||
- code
|
||||
- message
|
||||
title: OpenAIResponseError
|
||||
OpenAIResponseObject:
|
||||
type: object
|
||||
properties:
|
||||
created_at:
|
||||
type: integer
|
||||
error:
|
||||
$ref: '#/components/schemas/OpenAIResponseError'
|
||||
id:
|
||||
type: string
|
||||
model:
|
||||
type: string
|
||||
object:
|
||||
type: string
|
||||
const: response
|
||||
default: response
|
||||
output:
|
||||
type: array
|
||||
items:
|
||||
$ref: '#/components/schemas/OpenAIResponseOutput'
|
||||
parallel_tool_calls:
|
||||
type: boolean
|
||||
default: false
|
||||
previous_response_id:
|
||||
type: string
|
||||
status:
|
||||
type: string
|
||||
temperature:
|
||||
type: number
|
||||
top_p:
|
||||
type: number
|
||||
truncation:
|
||||
type: string
|
||||
user:
|
||||
type: string
|
||||
additionalProperties: false
|
||||
required:
|
||||
- created_at
|
||||
- id
|
||||
- model
|
||||
- object
|
||||
- output
|
||||
- parallel_tool_calls
|
||||
- status
|
||||
title: OpenAIResponseObject
|
||||
OpenAIResponseOutput:
|
||||
oneOf:
|
||||
- $ref: '#/components/schemas/OpenAIResponseOutputMessage'
|
||||
- $ref: '#/components/schemas/OpenAIResponseOutputMessageWebSearchToolCall'
|
||||
discriminator:
|
||||
propertyName: type
|
||||
mapping:
|
||||
message: '#/components/schemas/OpenAIResponseOutputMessage'
|
||||
web_search_call: '#/components/schemas/OpenAIResponseOutputMessageWebSearchToolCall'
|
||||
OpenAIResponseOutputMessage:
|
||||
type: object
|
||||
properties:
|
||||
id:
|
||||
type: string
|
||||
content:
|
||||
type: array
|
||||
items:
|
||||
$ref: '#/components/schemas/OpenAIResponseOutputMessageContent'
|
||||
role:
|
||||
type: string
|
||||
const: assistant
|
||||
default: assistant
|
||||
status:
|
||||
type: string
|
||||
type:
|
||||
type: string
|
||||
const: message
|
||||
default: message
|
||||
additionalProperties: false
|
||||
required:
|
||||
- id
|
||||
- content
|
||||
- role
|
||||
- status
|
||||
- type
|
||||
title: OpenAIResponseOutputMessage
|
||||
OpenAIResponseOutputMessageContent:
|
||||
type: object
|
||||
properties:
|
||||
text:
|
||||
type: string
|
||||
type:
|
||||
type: string
|
||||
const: output_text
|
||||
default: output_text
|
||||
additionalProperties: false
|
||||
required:
|
||||
- text
|
||||
- type
|
||||
title: >-
|
||||
OpenAIResponseOutputMessageContentOutputText
|
||||
"OpenAIResponseOutputMessageWebSearchToolCall":
|
||||
type: object
|
||||
properties:
|
||||
id:
|
||||
type: string
|
||||
status:
|
||||
type: string
|
||||
type:
|
||||
type: string
|
||||
const: web_search_call
|
||||
default: web_search_call
|
||||
additionalProperties: false
|
||||
required:
|
||||
- id
|
||||
- status
|
||||
- type
|
||||
title: >-
|
||||
OpenAIResponseOutputMessageWebSearchToolCall
|
||||
OpenAIResponseObjectStream:
|
||||
oneOf:
|
||||
- $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseCreated'
|
||||
- $ref: '#/components/schemas/OpenAIResponseObjectStreamResponseCompleted'
|
||||
discriminator:
|
||||
propertyName: type
|
||||
mapping:
|
||||
response.created: '#/components/schemas/OpenAIResponseObjectStreamResponseCreated'
|
||||
response.completed: '#/components/schemas/OpenAIResponseObjectStreamResponseCompleted'
|
||||
"OpenAIResponseObjectStreamResponseCompleted":
|
||||
type: object
|
||||
properties:
|
||||
response:
|
||||
$ref: '#/components/schemas/OpenAIResponseObject'
|
||||
type:
|
||||
type: string
|
||||
const: response.completed
|
||||
default: response.completed
|
||||
additionalProperties: false
|
||||
required:
|
||||
- response
|
||||
- type
|
||||
title: >-
|
||||
OpenAIResponseObjectStreamResponseCompleted
|
||||
"OpenAIResponseObjectStreamResponseCreated":
|
||||
type: object
|
||||
properties:
|
||||
response:
|
||||
$ref: '#/components/schemas/OpenAIResponseObject'
|
||||
type:
|
||||
type: string
|
||||
const: response.created
|
||||
default: response.created
|
||||
additionalProperties: false
|
||||
required:
|
||||
- response
|
||||
- type
|
||||
title: >-
|
||||
OpenAIResponseObjectStreamResponseCreated
|
||||
CreateUploadSessionRequest:
|
||||
type: object
|
||||
properties:
|
||||
|
|
907
docs/getting_started_llama_api.ipynb
Normal file
907
docs/getting_started_llama_api.ipynb
Normal file
File diff suppressed because one or more lines are too long
|
@ -179,7 +179,7 @@ class ContentBuilder:
|
|||
"Creates the content subtree for a request or response."
|
||||
|
||||
def is_iterator_type(t):
|
||||
return "StreamChunk" in str(t)
|
||||
return "StreamChunk" in str(t) or "OpenAIResponseObjectStream" in str(t)
|
||||
|
||||
def get_media_type(t):
|
||||
if is_generic_list(t):
|
||||
|
|
|
@ -53,6 +53,13 @@ models:
|
|||
provider_id: ollama
|
||||
provider_model_id: null
|
||||
shields: []
|
||||
server:
|
||||
port: 8321
|
||||
auth:
|
||||
provider_type: "kubernetes"
|
||||
config:
|
||||
api_server_url: "https://kubernetes.default.svc"
|
||||
ca_cert_path: "/path/to/ca.crt"
|
||||
```
|
||||
|
||||
Let's break this down into the different sections. The first section specifies the set of APIs that the stack server will serve:
|
||||
|
@ -102,6 +109,105 @@ A Model is an instance of a "Resource" (see [Concepts](../concepts/index)) and i
|
|||
|
||||
What's with the `provider_model_id` field? This is an identifier for the model inside the provider's model catalog. Contrast it with `model_id` which is the identifier for the same model for Llama Stack's purposes. For example, you may want to name "llama3.2:vision-11b" as "image_captioning_model" when you use it in your Stack interactions. When omitted, the server will set `provider_model_id` to be the same as `model_id`.
|
||||
|
||||
## Server Configuration
|
||||
|
||||
The `server` section configures the HTTP server that serves the Llama Stack APIs:
|
||||
|
||||
```yaml
|
||||
server:
|
||||
port: 8321 # Port to listen on (default: 8321)
|
||||
tls_certfile: "/path/to/cert.pem" # Optional: Path to TLS certificate for HTTPS
|
||||
tls_keyfile: "/path/to/key.pem" # Optional: Path to TLS key for HTTPS
|
||||
auth: # Optional: Authentication configuration
|
||||
provider_type: "kubernetes" # Type of auth provider
|
||||
config: # Provider-specific configuration
|
||||
api_server_url: "https://kubernetes.default.svc"
|
||||
ca_cert_path: "/path/to/ca.crt" # Optional: Path to CA certificate
|
||||
```
|
||||
|
||||
### Authentication Configuration
|
||||
|
||||
The `auth` section configures authentication for the server. When configured, all API requests must include a valid Bearer token in the Authorization header:
|
||||
|
||||
```
|
||||
Authorization: Bearer <token>
|
||||
```
|
||||
|
||||
The server supports multiple authentication providers:
|
||||
|
||||
#### Kubernetes Provider
|
||||
|
||||
The Kubernetes cluster must be configured to use a service account for authentication.
|
||||
|
||||
```bash
|
||||
kubectl create namespace llama-stack
|
||||
kubectl create serviceaccount llama-stack-auth -n llama-stack
|
||||
kubectl create rolebinding llama-stack-auth-rolebinding --clusterrole=admin --serviceaccount=llama-stack:llama-stack-auth -n llama-stack
|
||||
kubectl create token llama-stack-auth -n llama-stack > llama-stack-auth-token
|
||||
```
|
||||
|
||||
Validates tokens against the Kubernetes API server:
|
||||
```yaml
|
||||
server:
|
||||
auth:
|
||||
provider_type: "kubernetes"
|
||||
config:
|
||||
api_server_url: "https://kubernetes.default.svc" # URL of the Kubernetes API server
|
||||
ca_cert_path: "/path/to/ca.crt" # Optional: Path to CA certificate
|
||||
```
|
||||
|
||||
The provider extracts user information from the JWT token:
|
||||
- Username from the `sub` claim becomes a role
|
||||
- Kubernetes groups become teams
|
||||
|
||||
You can easily validate a request by running:
|
||||
|
||||
```bash
|
||||
curl -s -L -H "Authorization: Bearer $(cat llama-stack-auth-token)" http://127.0.0.1:8321/v1/providers
|
||||
```
|
||||
|
||||
#### Custom Provider
|
||||
Validates tokens against a custom authentication endpoint:
|
||||
```yaml
|
||||
server:
|
||||
auth:
|
||||
provider_type: "custom"
|
||||
config:
|
||||
endpoint: "https://auth.example.com/validate" # URL of the auth endpoint
|
||||
```
|
||||
|
||||
The custom endpoint receives a POST request with:
|
||||
```json
|
||||
{
|
||||
"api_key": "<token>",
|
||||
"request": {
|
||||
"path": "/api/v1/endpoint",
|
||||
"headers": {
|
||||
"content-type": "application/json",
|
||||
"user-agent": "curl/7.64.1"
|
||||
},
|
||||
"params": {
|
||||
"key": ["value"]
|
||||
}
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
And must respond with:
|
||||
```json
|
||||
{
|
||||
"access_attributes": {
|
||||
"roles": ["admin", "user"],
|
||||
"teams": ["ml-team", "nlp-team"],
|
||||
"projects": ["llama-3", "project-x"],
|
||||
"namespaces": ["research"]
|
||||
},
|
||||
"message": "Authentication successful"
|
||||
}
|
||||
```
|
||||
|
||||
If no access attributes are returned, the token is used as a namespace.
|
||||
|
||||
## Extending to handle Safety
|
||||
|
||||
Configuring Safety can be a little involved so it is instructive to go through an example.
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue