mirror of
https://github.com/meta-llama/llama-stack.git
synced 2025-10-09 13:14:39 +00:00
feat: Adding OpenAI Compatible Prompts API
Signed-off-by: Francisco Javier Arceo <farceo@redhat.com>
This commit is contained in:
parent
30117dea22
commit
8b00883abd
181 changed files with 21356 additions and 10332 deletions
423
docs/_static/llama-stack-spec.html
vendored
423
docs/_static/llama-stack-spec.html
vendored
|
@ -633,6 +633,80 @@
|
|||
}
|
||||
}
|
||||
},
|
||||
"/v1/prompts": {
|
||||
"get": {
|
||||
"responses": {
|
||||
"200": {
|
||||
"description": "A ListPromptsResponse containing all prompts.",
|
||||
"content": {
|
||||
"application/json": {
|
||||
"schema": {
|
||||
"$ref": "#/components/schemas/ListPromptsResponse"
|
||||
}
|
||||
}
|
||||
}
|
||||
},
|
||||
"400": {
|
||||
"$ref": "#/components/responses/BadRequest400"
|
||||
},
|
||||
"429": {
|
||||
"$ref": "#/components/responses/TooManyRequests429"
|
||||
},
|
||||
"500": {
|
||||
"$ref": "#/components/responses/InternalServerError500"
|
||||
},
|
||||
"default": {
|
||||
"$ref": "#/components/responses/DefaultError"
|
||||
}
|
||||
},
|
||||
"tags": [
|
||||
"Prompts"
|
||||
],
|
||||
"description": "List all prompts.",
|
||||
"parameters": []
|
||||
},
|
||||
"post": {
|
||||
"responses": {
|
||||
"200": {
|
||||
"description": "The created Prompt resource.",
|
||||
"content": {
|
||||
"application/json": {
|
||||
"schema": {
|
||||
"$ref": "#/components/schemas/Prompt"
|
||||
}
|
||||
}
|
||||
}
|
||||
},
|
||||
"400": {
|
||||
"$ref": "#/components/responses/BadRequest400"
|
||||
},
|
||||
"429": {
|
||||
"$ref": "#/components/responses/TooManyRequests429"
|
||||
},
|
||||
"500": {
|
||||
"$ref": "#/components/responses/InternalServerError500"
|
||||
},
|
||||
"default": {
|
||||
"$ref": "#/components/responses/DefaultError"
|
||||
}
|
||||
},
|
||||
"tags": [
|
||||
"Prompts"
|
||||
],
|
||||
"description": "Create a new prompt.",
|
||||
"parameters": [],
|
||||
"requestBody": {
|
||||
"content": {
|
||||
"application/json": {
|
||||
"schema": {
|
||||
"$ref": "#/components/schemas/CreatePromptRequest"
|
||||
}
|
||||
}
|
||||
},
|
||||
"required": true
|
||||
}
|
||||
}
|
||||
},
|
||||
"/v1/agents/{agent_id}": {
|
||||
"get": {
|
||||
"responses": {
|
||||
|
@ -901,6 +975,143 @@
|
|||
]
|
||||
}
|
||||
},
|
||||
"/v1/prompts/{prompt_id}": {
|
||||
"get": {
|
||||
"responses": {
|
||||
"200": {
|
||||
"description": "A Prompt resource.",
|
||||
"content": {
|
||||
"application/json": {
|
||||
"schema": {
|
||||
"$ref": "#/components/schemas/Prompt"
|
||||
}
|
||||
}
|
||||
}
|
||||
},
|
||||
"400": {
|
||||
"$ref": "#/components/responses/BadRequest400"
|
||||
},
|
||||
"429": {
|
||||
"$ref": "#/components/responses/TooManyRequests429"
|
||||
},
|
||||
"500": {
|
||||
"$ref": "#/components/responses/InternalServerError500"
|
||||
},
|
||||
"default": {
|
||||
"$ref": "#/components/responses/DefaultError"
|
||||
}
|
||||
},
|
||||
"tags": [
|
||||
"Prompts"
|
||||
],
|
||||
"description": "Get a prompt by its identifier and optional version.",
|
||||
"parameters": [
|
||||
{
|
||||
"name": "prompt_id",
|
||||
"in": "path",
|
||||
"description": "The identifier of the prompt to get.",
|
||||
"required": true,
|
||||
"schema": {
|
||||
"type": "string"
|
||||
}
|
||||
},
|
||||
{
|
||||
"name": "version",
|
||||
"in": "query",
|
||||
"description": "The version of the prompt to get (defaults to latest).",
|
||||
"required": false,
|
||||
"schema": {
|
||||
"type": "string"
|
||||
}
|
||||
}
|
||||
]
|
||||
},
|
||||
"post": {
|
||||
"responses": {
|
||||
"200": {
|
||||
"description": "The updated Prompt resource with incremented version.",
|
||||
"content": {
|
||||
"application/json": {
|
||||
"schema": {
|
||||
"$ref": "#/components/schemas/Prompt"
|
||||
}
|
||||
}
|
||||
}
|
||||
},
|
||||
"400": {
|
||||
"$ref": "#/components/responses/BadRequest400"
|
||||
},
|
||||
"429": {
|
||||
"$ref": "#/components/responses/TooManyRequests429"
|
||||
},
|
||||
"500": {
|
||||
"$ref": "#/components/responses/InternalServerError500"
|
||||
},
|
||||
"default": {
|
||||
"$ref": "#/components/responses/DefaultError"
|
||||
}
|
||||
},
|
||||
"tags": [
|
||||
"Prompts"
|
||||
],
|
||||
"description": "Update an existing prompt (increments version).",
|
||||
"parameters": [
|
||||
{
|
||||
"name": "prompt_id",
|
||||
"in": "path",
|
||||
"description": "The identifier of the prompt to update.",
|
||||
"required": true,
|
||||
"schema": {
|
||||
"type": "string"
|
||||
}
|
||||
}
|
||||
],
|
||||
"requestBody": {
|
||||
"content": {
|
||||
"application/json": {
|
||||
"schema": {
|
||||
"$ref": "#/components/schemas/UpdatePromptRequest"
|
||||
}
|
||||
}
|
||||
},
|
||||
"required": true
|
||||
}
|
||||
},
|
||||
"delete": {
|
||||
"responses": {
|
||||
"200": {
|
||||
"description": "OK"
|
||||
},
|
||||
"400": {
|
||||
"$ref": "#/components/responses/BadRequest400"
|
||||
},
|
||||
"429": {
|
||||
"$ref": "#/components/responses/TooManyRequests429"
|
||||
},
|
||||
"500": {
|
||||
"$ref": "#/components/responses/InternalServerError500"
|
||||
},
|
||||
"default": {
|
||||
"$ref": "#/components/responses/DefaultError"
|
||||
}
|
||||
},
|
||||
"tags": [
|
||||
"Prompts"
|
||||
],
|
||||
"description": "Delete a prompt.",
|
||||
"parameters": [
|
||||
{
|
||||
"name": "prompt_id",
|
||||
"in": "path",
|
||||
"description": "The identifier of the prompt to delete.",
|
||||
"required": true,
|
||||
"schema": {
|
||||
"type": "string"
|
||||
}
|
||||
}
|
||||
]
|
||||
}
|
||||
},
|
||||
"/v1/inference/embeddings": {
|
||||
"post": {
|
||||
"responses": {
|
||||
|
@ -4129,7 +4340,7 @@
|
|||
"tags": [
|
||||
"Files"
|
||||
],
|
||||
"description": "Upload a file that can be used across various endpoints.\nThe file upload should be a multipart form request with:\n- file: The File object (not file name) to be uploaded.\n- purpose: The intended purpose of the uploaded file.",
|
||||
"description": "Upload a file that can be used across various endpoints.\nThe file upload should be a multipart form request with:\n- file: The File object (not file name) to be uploaded.\n- purpose: The intended purpose of the uploaded file.\n- expires_after: Optional form values describing expiration for the file. Expected expires_after[anchor] = \"created_at\", expires_after[seconds] = <int>. Seconds must be between 3600 and 2592000 (1 hour to 30 days).",
|
||||
"parameters": [],
|
||||
"requestBody": {
|
||||
"content": {
|
||||
|
@ -4143,11 +4354,33 @@
|
|||
},
|
||||
"purpose": {
|
||||
"$ref": "#/components/schemas/OpenAIFilePurpose"
|
||||
},
|
||||
"expires_after_anchor": {
|
||||
"oneOf": [
|
||||
{
|
||||
"type": "string"
|
||||
},
|
||||
{
|
||||
"type": "null"
|
||||
}
|
||||
]
|
||||
},
|
||||
"expires_after_seconds": {
|
||||
"oneOf": [
|
||||
{
|
||||
"type": "integer"
|
||||
},
|
||||
{
|
||||
"type": "null"
|
||||
}
|
||||
]
|
||||
}
|
||||
},
|
||||
"required": [
|
||||
"file",
|
||||
"purpose"
|
||||
"purpose",
|
||||
"expires_after_anchor",
|
||||
"expires_after_seconds"
|
||||
]
|
||||
}
|
||||
}
|
||||
|
@ -4985,6 +5218,59 @@
|
|||
}
|
||||
}
|
||||
},
|
||||
"/v1/prompts/{prompt_id}/default-version": {
|
||||
"post": {
|
||||
"responses": {
|
||||
"200": {
|
||||
"description": "The prompt with the specified version now set as default.",
|
||||
"content": {
|
||||
"application/json": {
|
||||
"schema": {
|
||||
"$ref": "#/components/schemas/Prompt"
|
||||
}
|
||||
}
|
||||
}
|
||||
},
|
||||
"400": {
|
||||
"$ref": "#/components/responses/BadRequest400"
|
||||
},
|
||||
"429": {
|
||||
"$ref": "#/components/responses/TooManyRequests429"
|
||||
},
|
||||
"500": {
|
||||
"$ref": "#/components/responses/InternalServerError500"
|
||||
},
|
||||
"default": {
|
||||
"$ref": "#/components/responses/DefaultError"
|
||||
}
|
||||
},
|
||||
"tags": [
|
||||
"Prompts"
|
||||
],
|
||||
"description": "Set which version of a prompt should be the default in get_prompt (latest).",
|
||||
"parameters": [
|
||||
{
|
||||
"name": "prompt_id",
|
||||
"in": "path",
|
||||
"description": "The identifier of the prompt.",
|
||||
"required": true,
|
||||
"schema": {
|
||||
"type": "string"
|
||||
}
|
||||
}
|
||||
],
|
||||
"requestBody": {
|
||||
"content": {
|
||||
"application/json": {
|
||||
"schema": {
|
||||
"$ref": "#/components/schemas/SetDefaultVersionRequest"
|
||||
}
|
||||
}
|
||||
},
|
||||
"required": true
|
||||
}
|
||||
}
|
||||
},
|
||||
"/v1/post-training/supervised-fine-tune": {
|
||||
"post": {
|
||||
"responses": {
|
||||
|
@ -9648,6 +9934,58 @@
|
|||
],
|
||||
"title": "OpenAIResponseObjectStreamResponseWebSearchCallSearching"
|
||||
},
|
||||
"CreatePromptRequest": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"prompt": {
|
||||
"type": "string",
|
||||
"description": "The prompt text content with variable placeholders."
|
||||
},
|
||||
"variables": {
|
||||
"type": "object",
|
||||
"additionalProperties": {
|
||||
"type": "string"
|
||||
},
|
||||
"description": "Dictionary of variable names to their default values."
|
||||
}
|
||||
},
|
||||
"additionalProperties": false,
|
||||
"required": [
|
||||
"prompt"
|
||||
],
|
||||
"title": "CreatePromptRequest"
|
||||
},
|
||||
"Prompt": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"prompt": {
|
||||
"type": "string",
|
||||
"description": "The system prompt text with variable placeholders. Variables are only supported when using the Responses API."
|
||||
},
|
||||
"version": {
|
||||
"type": "string",
|
||||
"description": "Version string (integer start at 1 cast as string, incremented on save)"
|
||||
},
|
||||
"prompt_id": {
|
||||
"type": "string",
|
||||
"description": "Unique identifier formatted as 'pmpt_<48-digit-hash>'"
|
||||
},
|
||||
"variables": {
|
||||
"type": "object",
|
||||
"additionalProperties": {
|
||||
"type": "string"
|
||||
},
|
||||
"description": "Dictionary of prompt variable names and values"
|
||||
}
|
||||
},
|
||||
"additionalProperties": false,
|
||||
"required": [
|
||||
"version",
|
||||
"prompt_id"
|
||||
],
|
||||
"title": "Prompt",
|
||||
"description": "A prompt resource representing a stored OpenAI Compatible prompt template in Llama Stack."
|
||||
},
|
||||
"OpenAIDeleteResponseObject": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
|
@ -10274,7 +10612,8 @@
|
|||
"scoring_function",
|
||||
"benchmark",
|
||||
"tool",
|
||||
"tool_group"
|
||||
"tool_group",
|
||||
"prompt"
|
||||
],
|
||||
"const": "benchmark",
|
||||
"default": "benchmark",
|
||||
|
@ -10901,7 +11240,8 @@
|
|||
"scoring_function",
|
||||
"benchmark",
|
||||
"tool",
|
||||
"tool_group"
|
||||
"tool_group",
|
||||
"prompt"
|
||||
],
|
||||
"const": "dataset",
|
||||
"default": "dataset",
|
||||
|
@ -11051,7 +11391,8 @@
|
|||
"scoring_function",
|
||||
"benchmark",
|
||||
"tool",
|
||||
"tool_group"
|
||||
"tool_group",
|
||||
"prompt"
|
||||
],
|
||||
"const": "model",
|
||||
"default": "model",
|
||||
|
@ -11316,7 +11657,8 @@
|
|||
"scoring_function",
|
||||
"benchmark",
|
||||
"tool",
|
||||
"tool_group"
|
||||
"tool_group",
|
||||
"prompt"
|
||||
],
|
||||
"const": "scoring_function",
|
||||
"default": "scoring_function",
|
||||
|
@ -11424,7 +11766,8 @@
|
|||
"scoring_function",
|
||||
"benchmark",
|
||||
"tool",
|
||||
"tool_group"
|
||||
"tool_group",
|
||||
"prompt"
|
||||
],
|
||||
"const": "shield",
|
||||
"default": "shield",
|
||||
|
@ -11669,7 +12012,8 @@
|
|||
"scoring_function",
|
||||
"benchmark",
|
||||
"tool",
|
||||
"tool_group"
|
||||
"tool_group",
|
||||
"prompt"
|
||||
],
|
||||
"const": "tool",
|
||||
"default": "tool",
|
||||
|
@ -11751,7 +12095,8 @@
|
|||
"scoring_function",
|
||||
"benchmark",
|
||||
"tool",
|
||||
"tool_group"
|
||||
"tool_group",
|
||||
"prompt"
|
||||
],
|
||||
"const": "tool_group",
|
||||
"default": "tool_group",
|
||||
|
@ -12045,7 +12390,8 @@
|
|||
"scoring_function",
|
||||
"benchmark",
|
||||
"tool",
|
||||
"tool_group"
|
||||
"tool_group",
|
||||
"prompt"
|
||||
],
|
||||
"const": "vector_db",
|
||||
"default": "vector_db",
|
||||
|
@ -12860,6 +13206,23 @@
|
|||
"title": "OpenAIResponseObjectWithInput",
|
||||
"description": "OpenAI response object extended with input context information."
|
||||
},
|
||||
"ListPromptsResponse": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"data": {
|
||||
"type": "array",
|
||||
"items": {
|
||||
"$ref": "#/components/schemas/Prompt"
|
||||
}
|
||||
}
|
||||
},
|
||||
"additionalProperties": false,
|
||||
"required": [
|
||||
"data"
|
||||
],
|
||||
"title": "ListPromptsResponse",
|
||||
"description": "Response model to list prompts."
|
||||
},
|
||||
"ListProvidersResponse": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
|
@ -17106,6 +17469,20 @@
|
|||
"title": "ScoreBatchResponse",
|
||||
"description": "Response from batch scoring operations on datasets."
|
||||
},
|
||||
"SetDefaultVersionRequest": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"version": {
|
||||
"type": "string",
|
||||
"description": "The version to set as default."
|
||||
}
|
||||
},
|
||||
"additionalProperties": false,
|
||||
"required": [
|
||||
"version"
|
||||
],
|
||||
"title": "SetDefaultVersionRequest"
|
||||
},
|
||||
"AlgorithmConfig": {
|
||||
"oneOf": [
|
||||
{
|
||||
|
@ -17390,6 +17767,27 @@
|
|||
"title": "SyntheticDataGenerationResponse",
|
||||
"description": "Response from the synthetic data generation. Batch of (prompt, response, score) tuples that pass the threshold."
|
||||
},
|
||||
"UpdatePromptRequest": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"prompt": {
|
||||
"type": "string",
|
||||
"description": "The updated prompt text content."
|
||||
},
|
||||
"variables": {
|
||||
"type": "object",
|
||||
"additionalProperties": {
|
||||
"type": "string"
|
||||
},
|
||||
"description": "Updated dictionary of variable names to their default values."
|
||||
}
|
||||
},
|
||||
"additionalProperties": false,
|
||||
"required": [
|
||||
"prompt"
|
||||
],
|
||||
"title": "UpdatePromptRequest"
|
||||
},
|
||||
"VersionInfo": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
|
@ -17515,6 +17913,10 @@
|
|||
{
|
||||
"name": "PostTraining (Coming Soon)"
|
||||
},
|
||||
{
|
||||
"name": "Prompts",
|
||||
"x-displayName": "Protocol for prompt management operations."
|
||||
},
|
||||
{
|
||||
"name": "Providers",
|
||||
"x-displayName": "Providers API for inspecting, listing, and modifying providers and their configurations."
|
||||
|
@ -17565,6 +17967,7 @@
|
|||
"Inspect",
|
||||
"Models",
|
||||
"PostTraining (Coming Soon)",
|
||||
"Prompts",
|
||||
"Providers",
|
||||
"Safety",
|
||||
"Scoring",
|
||||
|
|
297
docs/_static/llama-stack-spec.yaml
vendored
297
docs/_static/llama-stack-spec.yaml
vendored
|
@ -427,6 +427,58 @@ paths:
|
|||
schema:
|
||||
$ref: '#/components/schemas/CreateOpenaiResponseRequest'
|
||||
required: true
|
||||
/v1/prompts:
|
||||
get:
|
||||
responses:
|
||||
'200':
|
||||
description: >-
|
||||
A ListPromptsResponse containing all prompts.
|
||||
content:
|
||||
application/json:
|
||||
schema:
|
||||
$ref: '#/components/schemas/ListPromptsResponse'
|
||||
'400':
|
||||
$ref: '#/components/responses/BadRequest400'
|
||||
'429':
|
||||
$ref: >-
|
||||
#/components/responses/TooManyRequests429
|
||||
'500':
|
||||
$ref: >-
|
||||
#/components/responses/InternalServerError500
|
||||
default:
|
||||
$ref: '#/components/responses/DefaultError'
|
||||
tags:
|
||||
- Prompts
|
||||
description: List all prompts.
|
||||
parameters: []
|
||||
post:
|
||||
responses:
|
||||
'200':
|
||||
description: The created Prompt resource.
|
||||
content:
|
||||
application/json:
|
||||
schema:
|
||||
$ref: '#/components/schemas/Prompt'
|
||||
'400':
|
||||
$ref: '#/components/responses/BadRequest400'
|
||||
'429':
|
||||
$ref: >-
|
||||
#/components/responses/TooManyRequests429
|
||||
'500':
|
||||
$ref: >-
|
||||
#/components/responses/InternalServerError500
|
||||
default:
|
||||
$ref: '#/components/responses/DefaultError'
|
||||
tags:
|
||||
- Prompts
|
||||
description: Create a new prompt.
|
||||
parameters: []
|
||||
requestBody:
|
||||
content:
|
||||
application/json:
|
||||
schema:
|
||||
$ref: '#/components/schemas/CreatePromptRequest'
|
||||
required: true
|
||||
/v1/agents/{agent_id}:
|
||||
get:
|
||||
responses:
|
||||
|
@ -616,6 +668,103 @@ paths:
|
|||
required: true
|
||||
schema:
|
||||
type: string
|
||||
/v1/prompts/{prompt_id}:
|
||||
get:
|
||||
responses:
|
||||
'200':
|
||||
description: A Prompt resource.
|
||||
content:
|
||||
application/json:
|
||||
schema:
|
||||
$ref: '#/components/schemas/Prompt'
|
||||
'400':
|
||||
$ref: '#/components/responses/BadRequest400'
|
||||
'429':
|
||||
$ref: >-
|
||||
#/components/responses/TooManyRequests429
|
||||
'500':
|
||||
$ref: >-
|
||||
#/components/responses/InternalServerError500
|
||||
default:
|
||||
$ref: '#/components/responses/DefaultError'
|
||||
tags:
|
||||
- Prompts
|
||||
description: >-
|
||||
Get a prompt by its identifier and optional version.
|
||||
parameters:
|
||||
- name: prompt_id
|
||||
in: path
|
||||
description: The identifier of the prompt to get.
|
||||
required: true
|
||||
schema:
|
||||
type: string
|
||||
- name: version
|
||||
in: query
|
||||
description: >-
|
||||
The version of the prompt to get (defaults to latest).
|
||||
required: false
|
||||
schema:
|
||||
type: string
|
||||
post:
|
||||
responses:
|
||||
'200':
|
||||
description: >-
|
||||
The updated Prompt resource with incremented version.
|
||||
content:
|
||||
application/json:
|
||||
schema:
|
||||
$ref: '#/components/schemas/Prompt'
|
||||
'400':
|
||||
$ref: '#/components/responses/BadRequest400'
|
||||
'429':
|
||||
$ref: >-
|
||||
#/components/responses/TooManyRequests429
|
||||
'500':
|
||||
$ref: >-
|
||||
#/components/responses/InternalServerError500
|
||||
default:
|
||||
$ref: '#/components/responses/DefaultError'
|
||||
tags:
|
||||
- Prompts
|
||||
description: >-
|
||||
Update an existing prompt (increments version).
|
||||
parameters:
|
||||
- name: prompt_id
|
||||
in: path
|
||||
description: The identifier of the prompt to update.
|
||||
required: true
|
||||
schema:
|
||||
type: string
|
||||
requestBody:
|
||||
content:
|
||||
application/json:
|
||||
schema:
|
||||
$ref: '#/components/schemas/UpdatePromptRequest'
|
||||
required: true
|
||||
delete:
|
||||
responses:
|
||||
'200':
|
||||
description: OK
|
||||
'400':
|
||||
$ref: '#/components/responses/BadRequest400'
|
||||
'429':
|
||||
$ref: >-
|
||||
#/components/responses/TooManyRequests429
|
||||
'500':
|
||||
$ref: >-
|
||||
#/components/responses/InternalServerError500
|
||||
default:
|
||||
$ref: '#/components/responses/DefaultError'
|
||||
tags:
|
||||
- Prompts
|
||||
description: Delete a prompt.
|
||||
parameters:
|
||||
- name: prompt_id
|
||||
in: path
|
||||
description: The identifier of the prompt to delete.
|
||||
required: true
|
||||
schema:
|
||||
type: string
|
||||
/v1/inference/embeddings:
|
||||
post:
|
||||
responses:
|
||||
|
@ -2933,6 +3082,10 @@ paths:
|
|||
- file: The File object (not file name) to be uploaded.
|
||||
|
||||
- purpose: The intended purpose of the uploaded file.
|
||||
|
||||
- expires_after: Optional form values describing expiration for the file.
|
||||
Expected expires_after[anchor] = "created_at", expires_after[seconds] = <int>.
|
||||
Seconds must be between 3600 and 2592000 (1 hour to 30 days).
|
||||
parameters: []
|
||||
requestBody:
|
||||
content:
|
||||
|
@ -2945,9 +3098,19 @@ paths:
|
|||
format: binary
|
||||
purpose:
|
||||
$ref: '#/components/schemas/OpenAIFilePurpose'
|
||||
expires_after_anchor:
|
||||
oneOf:
|
||||
- type: string
|
||||
- type: 'null'
|
||||
expires_after_seconds:
|
||||
oneOf:
|
||||
- type: integer
|
||||
- type: 'null'
|
||||
required:
|
||||
- file
|
||||
- purpose
|
||||
- expires_after_anchor
|
||||
- expires_after_seconds
|
||||
required: true
|
||||
/v1/openai/v1/models:
|
||||
get:
|
||||
|
@ -3532,6 +3695,43 @@ paths:
|
|||
schema:
|
||||
$ref: '#/components/schemas/ScoreBatchRequest'
|
||||
required: true
|
||||
/v1/prompts/{prompt_id}/default-version:
|
||||
post:
|
||||
responses:
|
||||
'200':
|
||||
description: >-
|
||||
The prompt with the specified version now set as default.
|
||||
content:
|
||||
application/json:
|
||||
schema:
|
||||
$ref: '#/components/schemas/Prompt'
|
||||
'400':
|
||||
$ref: '#/components/responses/BadRequest400'
|
||||
'429':
|
||||
$ref: >-
|
||||
#/components/responses/TooManyRequests429
|
||||
'500':
|
||||
$ref: >-
|
||||
#/components/responses/InternalServerError500
|
||||
default:
|
||||
$ref: '#/components/responses/DefaultError'
|
||||
tags:
|
||||
- Prompts
|
||||
description: >-
|
||||
Set which version of a prompt should be the default in get_prompt (latest).
|
||||
parameters:
|
||||
- name: prompt_id
|
||||
in: path
|
||||
description: The identifier of the prompt.
|
||||
required: true
|
||||
schema:
|
||||
type: string
|
||||
requestBody:
|
||||
content:
|
||||
application/json:
|
||||
schema:
|
||||
$ref: '#/components/schemas/SetDefaultVersionRequest'
|
||||
required: true
|
||||
/v1/post-training/supervised-fine-tune:
|
||||
post:
|
||||
responses:
|
||||
|
@ -7134,6 +7334,53 @@ components:
|
|||
- type
|
||||
title: >-
|
||||
OpenAIResponseObjectStreamResponseWebSearchCallSearching
|
||||
CreatePromptRequest:
|
||||
type: object
|
||||
properties:
|
||||
prompt:
|
||||
type: string
|
||||
description: >-
|
||||
The prompt text content with variable placeholders.
|
||||
variables:
|
||||
type: object
|
||||
additionalProperties:
|
||||
type: string
|
||||
description: >-
|
||||
Dictionary of variable names to their default values.
|
||||
additionalProperties: false
|
||||
required:
|
||||
- prompt
|
||||
title: CreatePromptRequest
|
||||
Prompt:
|
||||
type: object
|
||||
properties:
|
||||
prompt:
|
||||
type: string
|
||||
description: >-
|
||||
The system prompt text with variable placeholders. Variables are only
|
||||
supported when using the Responses API.
|
||||
version:
|
||||
type: string
|
||||
description: >-
|
||||
Version string (integer start at 1 cast as string, incremented on save)
|
||||
prompt_id:
|
||||
type: string
|
||||
description: >-
|
||||
Unique identifier formatted as 'pmpt_<48-digit-hash>'
|
||||
variables:
|
||||
type: object
|
||||
additionalProperties:
|
||||
type: string
|
||||
description: >-
|
||||
Dictionary of prompt variable names and values
|
||||
additionalProperties: false
|
||||
required:
|
||||
- version
|
||||
- prompt_id
|
||||
title: Prompt
|
||||
description: >-
|
||||
A prompt resource representing a stored OpenAI Compatible prompt template
|
||||
in Llama Stack.
|
||||
OpenAIDeleteResponseObject:
|
||||
type: object
|
||||
properties:
|
||||
|
@ -7607,6 +7854,7 @@ components:
|
|||
- benchmark
|
||||
- tool
|
||||
- tool_group
|
||||
- prompt
|
||||
const: benchmark
|
||||
default: benchmark
|
||||
description: The resource type, always benchmark
|
||||
|
@ -8093,6 +8341,7 @@ components:
|
|||
- benchmark
|
||||
- tool
|
||||
- tool_group
|
||||
- prompt
|
||||
const: dataset
|
||||
default: dataset
|
||||
description: >-
|
||||
|
@ -8205,6 +8454,7 @@ components:
|
|||
- benchmark
|
||||
- tool
|
||||
- tool_group
|
||||
- prompt
|
||||
const: model
|
||||
default: model
|
||||
description: >-
|
||||
|
@ -8396,6 +8646,7 @@ components:
|
|||
- benchmark
|
||||
- tool
|
||||
- tool_group
|
||||
- prompt
|
||||
const: scoring_function
|
||||
default: scoring_function
|
||||
description: >-
|
||||
|
@ -8472,6 +8723,7 @@ components:
|
|||
- benchmark
|
||||
- tool
|
||||
- tool_group
|
||||
- prompt
|
||||
const: shield
|
||||
default: shield
|
||||
description: The resource type, always shield
|
||||
|
@ -8651,6 +8903,7 @@ components:
|
|||
- benchmark
|
||||
- tool
|
||||
- tool_group
|
||||
- prompt
|
||||
const: tool
|
||||
default: tool
|
||||
description: Type of resource, always 'tool'
|
||||
|
@ -8709,6 +8962,7 @@ components:
|
|||
- benchmark
|
||||
- tool
|
||||
- tool_group
|
||||
- prompt
|
||||
const: tool_group
|
||||
default: tool_group
|
||||
description: Type of resource, always 'tool_group'
|
||||
|
@ -8937,6 +9191,7 @@ components:
|
|||
- benchmark
|
||||
- tool
|
||||
- tool_group
|
||||
- prompt
|
||||
const: vector_db
|
||||
default: vector_db
|
||||
description: >-
|
||||
|
@ -9563,6 +9818,18 @@ components:
|
|||
title: OpenAIResponseObjectWithInput
|
||||
description: >-
|
||||
OpenAI response object extended with input context information.
|
||||
ListPromptsResponse:
|
||||
type: object
|
||||
properties:
|
||||
data:
|
||||
type: array
|
||||
items:
|
||||
$ref: '#/components/schemas/Prompt'
|
||||
additionalProperties: false
|
||||
required:
|
||||
- data
|
||||
title: ListPromptsResponse
|
||||
description: Response model to list prompts.
|
||||
ListProvidersResponse:
|
||||
type: object
|
||||
properties:
|
||||
|
@ -12708,6 +12975,16 @@ components:
|
|||
title: ScoreBatchResponse
|
||||
description: >-
|
||||
Response from batch scoring operations on datasets.
|
||||
SetDefaultVersionRequest:
|
||||
type: object
|
||||
properties:
|
||||
version:
|
||||
type: string
|
||||
description: The version to set as default.
|
||||
additionalProperties: false
|
||||
required:
|
||||
- version
|
||||
title: SetDefaultVersionRequest
|
||||
AlgorithmConfig:
|
||||
oneOf:
|
||||
- $ref: '#/components/schemas/LoraFinetuningConfig'
|
||||
|
@ -12904,6 +13181,22 @@ components:
|
|||
description: >-
|
||||
Response from the synthetic data generation. Batch of (prompt, response, score)
|
||||
tuples that pass the threshold.
|
||||
UpdatePromptRequest:
|
||||
type: object
|
||||
properties:
|
||||
prompt:
|
||||
type: string
|
||||
description: The updated prompt text content.
|
||||
variables:
|
||||
type: object
|
||||
additionalProperties:
|
||||
type: string
|
||||
description: >-
|
||||
Updated dictionary of variable names to their default values.
|
||||
additionalProperties: false
|
||||
required:
|
||||
- prompt
|
||||
title: UpdatePromptRequest
|
||||
VersionInfo:
|
||||
type: object
|
||||
properties:
|
||||
|
@ -13015,6 +13308,9 @@ tags:
|
|||
- name: Inspect
|
||||
- name: Models
|
||||
- name: PostTraining (Coming Soon)
|
||||
- name: Prompts
|
||||
x-displayName: >-
|
||||
Protocol for prompt management operations.
|
||||
- name: Providers
|
||||
x-displayName: >-
|
||||
Providers API for inspecting, listing, and modifying providers and their configurations.
|
||||
|
@ -13042,6 +13338,7 @@ x-tagGroups:
|
|||
- Inspect
|
||||
- Models
|
||||
- PostTraining (Coming Soon)
|
||||
- Prompts
|
||||
- Providers
|
||||
- Safety
|
||||
- Scoring
|
||||
|
|
|
@ -40,18 +40,15 @@ The system patches OpenAI and Ollama client methods to intercept calls before th
|
|||
|
||||
### Storage Architecture
|
||||
|
||||
Recordings use a two-tier storage system optimized for both speed and debuggability:
|
||||
Recordings are stored as JSON files in the recording directory. They are looked up by their request hash.
|
||||
|
||||
```
|
||||
recordings/
|
||||
├── index.sqlite # Fast lookup by request hash
|
||||
└── responses/
|
||||
├── abc123def456.json # Individual response files
|
||||
└── def789ghi012.json
|
||||
```
|
||||
|
||||
**SQLite index** enables O(log n) hash lookups and metadata queries without loading response bodies.
|
||||
|
||||
**JSON files** store complete request/response pairs in human-readable format for debugging.
|
||||
|
||||
## Recording Modes
|
||||
|
@ -166,8 +163,8 @@ This preserves type safety - when replayed, you get the same Pydantic objects wi
|
|||
Control recording behavior globally:
|
||||
|
||||
```bash
|
||||
export LLAMA_STACK_TEST_INFERENCE_MODE=replay
|
||||
export LLAMA_STACK_TEST_RECORDING_DIR=/path/to/recordings
|
||||
export LLAMA_STACK_TEST_INFERENCE_MODE=replay # this is the default
|
||||
export LLAMA_STACK_TEST_RECORDING_DIR=/path/to/recordings # default is tests/integration/recordings
|
||||
pytest tests/integration/
|
||||
```
|
||||
|
||||
|
|
|
@ -50,6 +50,7 @@ The following models are available by default:
|
|||
- `meta/llama-3.2-11b-vision-instruct `
|
||||
- `meta/llama-3.2-90b-vision-instruct `
|
||||
- `meta/llama-3.3-70b-instruct `
|
||||
- `nvidia/vila `
|
||||
- `nvidia/llama-3.2-nv-embedqa-1b-v2 `
|
||||
- `nvidia/nv-embedqa-e5-v5 `
|
||||
- `nvidia/nv-embedqa-mistral-7b-v2 `
|
||||
|
|
|
@ -12,6 +12,60 @@ That means you'll get fast and efficient vector retrieval.
|
|||
- Easy to use
|
||||
- Fully integrated with Llama Stack
|
||||
|
||||
There are three implementations of search for PGVectoIndex available:
|
||||
|
||||
1. Vector Search:
|
||||
- How it works:
|
||||
- Uses PostgreSQL's vector extension (pgvector) to perform similarity search
|
||||
- Compares query embeddings against stored embeddings using Cosine distance or other distance metrics
|
||||
- Eg. SQL query: SELECT document, embedding <=> %s::vector AS distance FROM table ORDER BY distance
|
||||
|
||||
-Characteristics:
|
||||
- Semantic understanding - finds documents similar in meaning even if they don't share keywords
|
||||
- Works with high-dimensional vector embeddings (typically 768, 1024, or higher dimensions)
|
||||
- Best for: Finding conceptually related content, handling synonyms, cross-language search
|
||||
|
||||
2. Keyword Search
|
||||
- How it works:
|
||||
- Uses PostgreSQL's full-text search capabilities with tsvector and ts_rank
|
||||
- Converts text to searchable tokens using to_tsvector('english', text). Default language is English.
|
||||
- Eg. SQL query: SELECT document, ts_rank(tokenized_content, plainto_tsquery('english', %s)) AS score
|
||||
|
||||
- Characteristics:
|
||||
- Lexical matching - finds exact keyword matches and variations
|
||||
- Uses GIN (Generalized Inverted Index) for fast text search performance
|
||||
- Scoring: Uses PostgreSQL's ts_rank function for relevance scoring
|
||||
- Best for: Exact term matching, proper names, technical terms, Boolean-style queries
|
||||
|
||||
3. Hybrid Search
|
||||
- How it works:
|
||||
- Combines both vector and keyword search results
|
||||
- Runs both searches independently, then merges results using configurable reranking
|
||||
|
||||
- Two reranking strategies available:
|
||||
- Reciprocal Rank Fusion (RRF) - (default: 60.0)
|
||||
- Weighted Average - (default: 0.5)
|
||||
|
||||
- Characteristics:
|
||||
- Best of both worlds: semantic understanding + exact matching
|
||||
- Documents appearing in both searches get boosted scores
|
||||
- Configurable balance between semantic and lexical matching
|
||||
- Best for: General-purpose search where you want both precision and recall
|
||||
|
||||
4. Database Schema
|
||||
The PGVector implementation stores data optimized for all three search types:
|
||||
CREATE TABLE vector_store_xxx (
|
||||
id TEXT PRIMARY KEY,
|
||||
document JSONB, -- Original document
|
||||
embedding vector(dimension), -- For vector search
|
||||
content_text TEXT, -- Raw text content
|
||||
tokenized_content TSVECTOR -- For keyword search
|
||||
);
|
||||
|
||||
-- Indexes for performance
|
||||
CREATE INDEX content_gin_idx ON table USING GIN(tokenized_content); -- Keyword search
|
||||
-- Vector index created automatically by pgvector
|
||||
|
||||
## Usage
|
||||
|
||||
To use PGVector in your Llama Stack project, follow these steps:
|
||||
|
@ -20,6 +74,25 @@ To use PGVector in your Llama Stack project, follow these steps:
|
|||
2. Configure your Llama Stack project to use pgvector. (e.g. remote::pgvector).
|
||||
3. Start storing and querying vectors.
|
||||
|
||||
## This is an example how you can set up your environment for using PGVector
|
||||
|
||||
1. Export env vars:
|
||||
```bash
|
||||
export ENABLE_PGVECTOR=true
|
||||
export PGVECTOR_HOST=localhost
|
||||
export PGVECTOR_PORT=5432
|
||||
export PGVECTOR_DB=llamastack
|
||||
export PGVECTOR_USER=llamastack
|
||||
export PGVECTOR_PASSWORD=llamastack
|
||||
```
|
||||
|
||||
2. Create DB:
|
||||
```bash
|
||||
psql -h localhost -U postgres -c "CREATE ROLE llamastack LOGIN PASSWORD 'llamastack';"
|
||||
psql -h localhost -U postgres -c "CREATE DATABASE llamastack OWNER llamastack;"
|
||||
psql -h localhost -U llamastack -d llamastack -c "CREATE EXTENSION IF NOT EXISTS vector;"
|
||||
```
|
||||
|
||||
## Installation
|
||||
|
||||
You can install PGVector using docker:
|
||||
|
|
|
@ -17,6 +17,7 @@ Weaviate supports:
|
|||
- Metadata filtering
|
||||
- Multi-modal retrieval
|
||||
|
||||
|
||||
## Usage
|
||||
|
||||
To use Weaviate in your Llama Stack project, follow these steps:
|
||||
|
|
|
@ -478,7 +478,6 @@ llama-stack-client scoring_functions list
|
|||
┏━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━┓
|
||||
┃ identifier ┃ provider_id ┃ description ┃ type ┃
|
||||
┡━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━┩
|
||||
│ basic::bfcl │ basic │ BFCL complex scoring │ scoring_function │
|
||||
│ basic::docvqa │ basic │ DocVQA Visual Question & Answer scoring function │ scoring_function │
|
||||
│ basic::equality │ basic │ Returns 1.0 if the input is equal to the target, 0.0 │ scoring_function │
|
||||
│ │ │ otherwise. │ │
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue