mirror of
https://github.com/meta-llama/llama-stack.git
synced 2025-08-12 13:00:39 +00:00
merge
This commit is contained in:
commit
7676756778
24 changed files with 1363 additions and 443 deletions
816
docs/_static/llama-stack-spec.html
vendored
816
docs/_static/llama-stack-spec.html
vendored
|
@ -554,6 +554,67 @@
|
|||
}
|
||||
}
|
||||
},
|
||||
"/v1/agents/{agent_id}/session/{session_id}/turn/{turn_id}/continue": {
|
||||
"post": {
|
||||
"responses": {
|
||||
"200": {
|
||||
"description": "A single turn in an interaction with an Agentic System. **OR** streamed agent turn completion response.",
|
||||
"content": {
|
||||
"application/json": {
|
||||
"schema": {
|
||||
"$ref": "#/components/schemas/Turn"
|
||||
}
|
||||
},
|
||||
"text/event-stream": {
|
||||
"schema": {
|
||||
"$ref": "#/components/schemas/AgentTurnResponseStreamChunk"
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
},
|
||||
"tags": [
|
||||
"Agents"
|
||||
],
|
||||
"description": "",
|
||||
"parameters": [
|
||||
{
|
||||
"name": "agent_id",
|
||||
"in": "path",
|
||||
"required": true,
|
||||
"schema": {
|
||||
"type": "string"
|
||||
}
|
||||
},
|
||||
{
|
||||
"name": "session_id",
|
||||
"in": "path",
|
||||
"required": true,
|
||||
"schema": {
|
||||
"type": "string"
|
||||
}
|
||||
},
|
||||
{
|
||||
"name": "turn_id",
|
||||
"in": "path",
|
||||
"required": true,
|
||||
"schema": {
|
||||
"type": "string"
|
||||
}
|
||||
}
|
||||
],
|
||||
"requestBody": {
|
||||
"content": {
|
||||
"application/json": {
|
||||
"schema": {
|
||||
"$ref": "#/components/schemas/ContinueAgentTurnRequest"
|
||||
}
|
||||
}
|
||||
},
|
||||
"required": true
|
||||
}
|
||||
}
|
||||
},
|
||||
"/v1/agents": {
|
||||
"post": {
|
||||
"responses": {
|
||||
|
@ -678,6 +739,65 @@
|
|||
}
|
||||
}
|
||||
},
|
||||
"/v1/files": {
|
||||
"get": {
|
||||
"responses": {
|
||||
"200": {
|
||||
"description": "OK",
|
||||
"content": {
|
||||
"application/json": {
|
||||
"schema": {
|
||||
"$ref": "#/components/schemas/ListBucketResponse"
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
},
|
||||
"tags": [
|
||||
"Files (Coming Soon)"
|
||||
],
|
||||
"description": "List all buckets.",
|
||||
"parameters": [
|
||||
{
|
||||
"name": "bucket",
|
||||
"in": "query",
|
||||
"required": true,
|
||||
"schema": {
|
||||
"type": "string"
|
||||
}
|
||||
}
|
||||
]
|
||||
},
|
||||
"post": {
|
||||
"responses": {
|
||||
"200": {
|
||||
"description": "OK",
|
||||
"content": {
|
||||
"application/json": {
|
||||
"schema": {
|
||||
"$ref": "#/components/schemas/FileUploadResponse"
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
},
|
||||
"tags": [
|
||||
"Files (Coming Soon)"
|
||||
],
|
||||
"description": "Create a new upload session for a file identified by a bucket and key.",
|
||||
"parameters": [],
|
||||
"requestBody": {
|
||||
"content": {
|
||||
"application/json": {
|
||||
"schema": {
|
||||
"$ref": "#/components/schemas/CreateUploadSessionRequest"
|
||||
}
|
||||
}
|
||||
},
|
||||
"required": true
|
||||
}
|
||||
}
|
||||
},
|
||||
"/v1/agents/{agent_id}": {
|
||||
"delete": {
|
||||
"responses": {
|
||||
|
@ -779,6 +899,84 @@
|
|||
]
|
||||
}
|
||||
},
|
||||
"/v1/files/{bucket}/{key}": {
|
||||
"get": {
|
||||
"responses": {
|
||||
"200": {
|
||||
"description": "OK",
|
||||
"content": {
|
||||
"application/json": {
|
||||
"schema": {
|
||||
"$ref": "#/components/schemas/FileResponse"
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
},
|
||||
"tags": [
|
||||
"Files (Coming Soon)"
|
||||
],
|
||||
"description": "Get a file info identified by a bucket and key.",
|
||||
"parameters": [
|
||||
{
|
||||
"name": "bucket",
|
||||
"in": "path",
|
||||
"description": "Bucket name (valid chars: a-zA-Z0-9_-)",
|
||||
"required": true,
|
||||
"schema": {
|
||||
"type": "string"
|
||||
}
|
||||
},
|
||||
{
|
||||
"name": "key",
|
||||
"in": "path",
|
||||
"description": "Key under which the file is stored (valid chars: a-zA-Z0-9_-/.)",
|
||||
"required": true,
|
||||
"schema": {
|
||||
"type": "string"
|
||||
}
|
||||
}
|
||||
]
|
||||
},
|
||||
"delete": {
|
||||
"responses": {
|
||||
"200": {
|
||||
"description": "OK",
|
||||
"content": {
|
||||
"application/json": {
|
||||
"schema": {
|
||||
"$ref": "#/components/schemas/FileResponse"
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
},
|
||||
"tags": [
|
||||
"Files (Coming Soon)"
|
||||
],
|
||||
"description": "Delete a file identified by a bucket and key.",
|
||||
"parameters": [
|
||||
{
|
||||
"name": "bucket",
|
||||
"in": "path",
|
||||
"description": "Bucket name (valid chars: a-zA-Z0-9_-)",
|
||||
"required": true,
|
||||
"schema": {
|
||||
"type": "string"
|
||||
}
|
||||
},
|
||||
{
|
||||
"name": "key",
|
||||
"in": "path",
|
||||
"description": "Key under which the file is stored (valid chars: a-zA-Z0-9_-/.)",
|
||||
"required": true,
|
||||
"schema": {
|
||||
"type": "string"
|
||||
}
|
||||
}
|
||||
]
|
||||
}
|
||||
},
|
||||
"/v1/inference/embeddings": {
|
||||
"post": {
|
||||
"responses": {
|
||||
|
@ -1470,6 +1668,91 @@
|
|||
"parameters": []
|
||||
}
|
||||
},
|
||||
"/v1/files/session:{upload_id}": {
|
||||
"get": {
|
||||
"responses": {
|
||||
"200": {
|
||||
"description": "OK",
|
||||
"content": {
|
||||
"application/json": {
|
||||
"schema": {
|
||||
"oneOf": [
|
||||
{
|
||||
"$ref": "#/components/schemas/FileUploadResponse"
|
||||
},
|
||||
{
|
||||
"type": "null"
|
||||
}
|
||||
]
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
},
|
||||
"tags": [
|
||||
"Files (Coming Soon)"
|
||||
],
|
||||
"description": "Returns information about an existsing upload session",
|
||||
"parameters": [
|
||||
{
|
||||
"name": "upload_id",
|
||||
"in": "path",
|
||||
"description": "ID of the upload session",
|
||||
"required": true,
|
||||
"schema": {
|
||||
"type": "string"
|
||||
}
|
||||
}
|
||||
]
|
||||
},
|
||||
"post": {
|
||||
"responses": {
|
||||
"200": {
|
||||
"description": "OK",
|
||||
"content": {
|
||||
"application/json": {
|
||||
"schema": {
|
||||
"oneOf": [
|
||||
{
|
||||
"$ref": "#/components/schemas/FileResponse"
|
||||
},
|
||||
{
|
||||
"type": "null"
|
||||
}
|
||||
]
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
},
|
||||
"tags": [
|
||||
"Files (Coming Soon)"
|
||||
],
|
||||
"description": "Upload file content to an existing upload session. On the server, request body will have the raw bytes that are uploaded.",
|
||||
"parameters": [
|
||||
{
|
||||
"name": "upload_id",
|
||||
"in": "path",
|
||||
"description": "ID of the upload session",
|
||||
"required": true,
|
||||
"schema": {
|
||||
"type": "string"
|
||||
}
|
||||
}
|
||||
],
|
||||
"requestBody": {
|
||||
"content": {
|
||||
"application/octet-stream": {
|
||||
"schema": {
|
||||
"type": "string",
|
||||
"format": "binary"
|
||||
}
|
||||
}
|
||||
},
|
||||
"required": true
|
||||
}
|
||||
}
|
||||
},
|
||||
"/v1/vector-dbs/{vector_db_id}": {
|
||||
"get": {
|
||||
"responses": {
|
||||
|
@ -1826,6 +2109,37 @@
|
|||
}
|
||||
}
|
||||
},
|
||||
"/v1/files/{bucket}": {
|
||||
"get": {
|
||||
"responses": {
|
||||
"200": {
|
||||
"description": "OK",
|
||||
"content": {
|
||||
"application/json": {
|
||||
"schema": {
|
||||
"$ref": "#/components/schemas/ListFileResponse"
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
},
|
||||
"tags": [
|
||||
"Files (Coming Soon)"
|
||||
],
|
||||
"description": "List all files in a bucket.",
|
||||
"parameters": [
|
||||
{
|
||||
"name": "bucket",
|
||||
"in": "path",
|
||||
"description": "Bucket name (valid chars: a-zA-Z0-9_-)",
|
||||
"required": true,
|
||||
"schema": {
|
||||
"type": "string"
|
||||
}
|
||||
}
|
||||
]
|
||||
}
|
||||
},
|
||||
"/v1/models": {
|
||||
"get": {
|
||||
"responses": {
|
||||
|
@ -2558,67 +2872,6 @@
|
|||
}
|
||||
}
|
||||
},
|
||||
"/v1/agents/{agent_id}/session/{session_id}/turn/{turn_id}/submit_tool_response_messages": {
|
||||
"post": {
|
||||
"responses": {
|
||||
"200": {
|
||||
"description": "A single turn in an interaction with an Agentic System. **OR** streamed agent turn completion response.",
|
||||
"content": {
|
||||
"application/json": {
|
||||
"schema": {
|
||||
"$ref": "#/components/schemas/Turn"
|
||||
}
|
||||
},
|
||||
"text/event-stream": {
|
||||
"schema": {
|
||||
"$ref": "#/components/schemas/AgentTurnResponseStreamChunk"
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
},
|
||||
"tags": [
|
||||
"Agents"
|
||||
],
|
||||
"description": "",
|
||||
"parameters": [
|
||||
{
|
||||
"name": "agent_id",
|
||||
"in": "path",
|
||||
"required": true,
|
||||
"schema": {
|
||||
"type": "string"
|
||||
}
|
||||
},
|
||||
{
|
||||
"name": "session_id",
|
||||
"in": "path",
|
||||
"required": true,
|
||||
"schema": {
|
||||
"type": "string"
|
||||
}
|
||||
},
|
||||
{
|
||||
"name": "turn_id",
|
||||
"in": "path",
|
||||
"required": true,
|
||||
"schema": {
|
||||
"type": "string"
|
||||
}
|
||||
}
|
||||
],
|
||||
"requestBody": {
|
||||
"content": {
|
||||
"application/json": {
|
||||
"schema": {
|
||||
"$ref": "#/components/schemas/SubmitToolResponseMessagesRequest"
|
||||
}
|
||||
}
|
||||
},
|
||||
"required": true
|
||||
}
|
||||
}
|
||||
},
|
||||
"/v1/post-training/supervised-fine-tune": {
|
||||
"post": {
|
||||
"responses": {
|
||||
|
@ -4791,62 +5044,10 @@
|
|||
"title": "CompletionResponseStreamChunk",
|
||||
"description": "A chunk of a streamed completion response."
|
||||
},
|
||||
"CreateAgentRequest": {
|
||||
"ContinueAgentTurnRequest": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"agent_config": {
|
||||
"$ref": "#/components/schemas/AgentConfig"
|
||||
}
|
||||
},
|
||||
"additionalProperties": false,
|
||||
"required": [
|
||||
"agent_config"
|
||||
],
|
||||
"title": "CreateAgentRequest"
|
||||
},
|
||||
"AgentCreateResponse": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"agent_id": {
|
||||
"type": "string"
|
||||
}
|
||||
},
|
||||
"additionalProperties": false,
|
||||
"required": [
|
||||
"agent_id"
|
||||
],
|
||||
"title": "AgentCreateResponse"
|
||||
},
|
||||
"CreateAgentSessionRequest": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"session_name": {
|
||||
"type": "string"
|
||||
}
|
||||
},
|
||||
"additionalProperties": false,
|
||||
"required": [
|
||||
"session_name"
|
||||
],
|
||||
"title": "CreateAgentSessionRequest"
|
||||
},
|
||||
"AgentSessionCreateResponse": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"session_id": {
|
||||
"type": "string"
|
||||
}
|
||||
},
|
||||
"additionalProperties": false,
|
||||
"required": [
|
||||
"session_id"
|
||||
],
|
||||
"title": "AgentSessionCreateResponse"
|
||||
},
|
||||
"CreateAgentTurnRequest": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"messages": {
|
||||
"new_messages": {
|
||||
"type": "array",
|
||||
"items": {
|
||||
"oneOf": [
|
||||
|
@ -4858,61 +5059,13 @@
|
|||
}
|
||||
]
|
||||
}
|
||||
},
|
||||
"stream": {
|
||||
"type": "boolean"
|
||||
},
|
||||
"documents": {
|
||||
"type": "array",
|
||||
"items": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"content": {
|
||||
"oneOf": [
|
||||
{
|
||||
"type": "string"
|
||||
},
|
||||
{
|
||||
"$ref": "#/components/schemas/InterleavedContentItem"
|
||||
},
|
||||
{
|
||||
"type": "array",
|
||||
"items": {
|
||||
"$ref": "#/components/schemas/InterleavedContentItem"
|
||||
}
|
||||
},
|
||||
{
|
||||
"$ref": "#/components/schemas/URL"
|
||||
}
|
||||
]
|
||||
},
|
||||
"mime_type": {
|
||||
"type": "string"
|
||||
}
|
||||
},
|
||||
"additionalProperties": false,
|
||||
"required": [
|
||||
"content",
|
||||
"mime_type"
|
||||
],
|
||||
"title": "Document"
|
||||
}
|
||||
},
|
||||
"toolgroups": {
|
||||
"type": "array",
|
||||
"items": {
|
||||
"$ref": "#/components/schemas/AgentTool"
|
||||
}
|
||||
},
|
||||
"tool_config": {
|
||||
"$ref": "#/components/schemas/ToolConfig"
|
||||
}
|
||||
},
|
||||
"additionalProperties": false,
|
||||
"required": [
|
||||
"messages"
|
||||
"new_messages"
|
||||
],
|
||||
"title": "CreateAgentTurnRequest"
|
||||
"title": "ContinueAgentTurnRequest"
|
||||
},
|
||||
"InferenceStep": {
|
||||
"type": "object",
|
||||
|
@ -5294,7 +5447,7 @@
|
|||
"$ref": "#/components/schemas/AgentTurnResponseTurnCompletePayload"
|
||||
},
|
||||
{
|
||||
"$ref": "#/components/schemas/AgentTurnResponseTurnPendingPayload"
|
||||
"$ref": "#/components/schemas/AgentTurnResponseTurnAwaitingInputPayload"
|
||||
}
|
||||
],
|
||||
"discriminator": {
|
||||
|
@ -5305,7 +5458,7 @@
|
|||
"step_complete": "#/components/schemas/AgentTurnResponseStepCompletePayload",
|
||||
"turn_start": "#/components/schemas/AgentTurnResponseTurnStartPayload",
|
||||
"turn_complete": "#/components/schemas/AgentTurnResponseTurnCompletePayload",
|
||||
"turn_pending": "#/components/schemas/AgentTurnResponseTurnPendingPayload"
|
||||
"turn_awaiting_input": "#/components/schemas/AgentTurnResponseTurnAwaitingInputPayload"
|
||||
}
|
||||
}
|
||||
},
|
||||
|
@ -5468,6 +5621,25 @@
|
|||
"title": "AgentTurnResponseStreamChunk",
|
||||
"description": "streamed agent turn completion response."
|
||||
},
|
||||
"AgentTurnResponseTurnAwaitingInputPayload": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"event_type": {
|
||||
"type": "string",
|
||||
"const": "turn_awaiting_input",
|
||||
"default": "turn_awaiting_input"
|
||||
},
|
||||
"turn": {
|
||||
"$ref": "#/components/schemas/Turn"
|
||||
}
|
||||
},
|
||||
"additionalProperties": false,
|
||||
"required": [
|
||||
"event_type",
|
||||
"turn"
|
||||
],
|
||||
"title": "AgentTurnResponseTurnAwaitingInputPayload"
|
||||
},
|
||||
"AgentTurnResponseTurnCompletePayload": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
|
@ -5487,25 +5659,6 @@
|
|||
],
|
||||
"title": "AgentTurnResponseTurnCompletePayload"
|
||||
},
|
||||
"AgentTurnResponseTurnPendingPayload": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"event_type": {
|
||||
"type": "string",
|
||||
"const": "turn_pending",
|
||||
"default": "turn_pending"
|
||||
},
|
||||
"turn": {
|
||||
"$ref": "#/components/schemas/Turn"
|
||||
}
|
||||
},
|
||||
"additionalProperties": false,
|
||||
"required": [
|
||||
"event_type",
|
||||
"turn"
|
||||
],
|
||||
"title": "AgentTurnResponseTurnPendingPayload"
|
||||
},
|
||||
"AgentTurnResponseTurnStartPayload": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
|
@ -5525,6 +5678,228 @@
|
|||
],
|
||||
"title": "AgentTurnResponseTurnStartPayload"
|
||||
},
|
||||
"CreateAgentRequest": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"agent_config": {
|
||||
"$ref": "#/components/schemas/AgentConfig"
|
||||
}
|
||||
},
|
||||
"additionalProperties": false,
|
||||
"required": [
|
||||
"agent_config"
|
||||
],
|
||||
"title": "CreateAgentRequest"
|
||||
},
|
||||
"AgentCreateResponse": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"agent_id": {
|
||||
"type": "string"
|
||||
}
|
||||
},
|
||||
"additionalProperties": false,
|
||||
"required": [
|
||||
"agent_id"
|
||||
],
|
||||
"title": "AgentCreateResponse"
|
||||
},
|
||||
"CreateAgentSessionRequest": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"session_name": {
|
||||
"type": "string"
|
||||
}
|
||||
},
|
||||
"additionalProperties": false,
|
||||
"required": [
|
||||
"session_name"
|
||||
],
|
||||
"title": "CreateAgentSessionRequest"
|
||||
},
|
||||
"AgentSessionCreateResponse": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"session_id": {
|
||||
"type": "string"
|
||||
}
|
||||
},
|
||||
"additionalProperties": false,
|
||||
"required": [
|
||||
"session_id"
|
||||
],
|
||||
"title": "AgentSessionCreateResponse"
|
||||
},
|
||||
"CreateAgentTurnRequest": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"messages": {
|
||||
"type": "array",
|
||||
"items": {
|
||||
"oneOf": [
|
||||
{
|
||||
"$ref": "#/components/schemas/UserMessage"
|
||||
},
|
||||
{
|
||||
"$ref": "#/components/schemas/ToolResponseMessage"
|
||||
}
|
||||
]
|
||||
}
|
||||
},
|
||||
"stream": {
|
||||
"type": "boolean"
|
||||
},
|
||||
"documents": {
|
||||
"type": "array",
|
||||
"items": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"content": {
|
||||
"oneOf": [
|
||||
{
|
||||
"type": "string"
|
||||
},
|
||||
{
|
||||
"$ref": "#/components/schemas/InterleavedContentItem"
|
||||
},
|
||||
{
|
||||
"type": "array",
|
||||
"items": {
|
||||
"$ref": "#/components/schemas/InterleavedContentItem"
|
||||
}
|
||||
},
|
||||
{
|
||||
"$ref": "#/components/schemas/URL"
|
||||
}
|
||||
]
|
||||
},
|
||||
"mime_type": {
|
||||
"type": "string"
|
||||
}
|
||||
},
|
||||
"additionalProperties": false,
|
||||
"required": [
|
||||
"content",
|
||||
"mime_type"
|
||||
],
|
||||
"title": "Document"
|
||||
}
|
||||
},
|
||||
"toolgroups": {
|
||||
"type": "array",
|
||||
"items": {
|
||||
"$ref": "#/components/schemas/AgentTool"
|
||||
}
|
||||
},
|
||||
"tool_config": {
|
||||
"$ref": "#/components/schemas/ToolConfig"
|
||||
}
|
||||
},
|
||||
"additionalProperties": false,
|
||||
"required": [
|
||||
"messages"
|
||||
],
|
||||
"title": "CreateAgentTurnRequest"
|
||||
},
|
||||
"CreateUploadSessionRequest": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"bucket": {
|
||||
"type": "string",
|
||||
"description": "Bucket under which the file is stored (valid chars: a-zA-Z0-9_-)"
|
||||
},
|
||||
"key": {
|
||||
"type": "string",
|
||||
"description": "Key under which the file is stored (valid chars: a-zA-Z0-9_-/.)"
|
||||
},
|
||||
"mime_type": {
|
||||
"type": "string",
|
||||
"description": "MIME type of the file"
|
||||
},
|
||||
"size": {
|
||||
"type": "integer",
|
||||
"description": "File size in bytes"
|
||||
}
|
||||
},
|
||||
"additionalProperties": false,
|
||||
"required": [
|
||||
"bucket",
|
||||
"key",
|
||||
"mime_type",
|
||||
"size"
|
||||
],
|
||||
"title": "CreateUploadSessionRequest"
|
||||
},
|
||||
"FileUploadResponse": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"id": {
|
||||
"type": "string",
|
||||
"description": "ID of the upload session"
|
||||
},
|
||||
"url": {
|
||||
"type": "string",
|
||||
"description": "Upload URL for the file or file parts"
|
||||
},
|
||||
"offset": {
|
||||
"type": "integer",
|
||||
"description": "Upload content offset"
|
||||
},
|
||||
"size": {
|
||||
"type": "integer",
|
||||
"description": "Upload content size"
|
||||
}
|
||||
},
|
||||
"additionalProperties": false,
|
||||
"required": [
|
||||
"id",
|
||||
"url",
|
||||
"offset",
|
||||
"size"
|
||||
],
|
||||
"title": "FileUploadResponse",
|
||||
"description": "Response after initiating a file upload session."
|
||||
},
|
||||
"FileResponse": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"bucket": {
|
||||
"type": "string",
|
||||
"description": "Bucket under which the file is stored (valid chars: a-zA-Z0-9_-)"
|
||||
},
|
||||
"key": {
|
||||
"type": "string",
|
||||
"description": "Key under which the file is stored (valid chars: a-zA-Z0-9_-/.)"
|
||||
},
|
||||
"mime_type": {
|
||||
"type": "string",
|
||||
"description": "MIME type of the file"
|
||||
},
|
||||
"url": {
|
||||
"type": "string",
|
||||
"description": "Upload URL for the file contents"
|
||||
},
|
||||
"bytes": {
|
||||
"type": "integer",
|
||||
"description": "Size of the file in bytes"
|
||||
},
|
||||
"created_at": {
|
||||
"type": "integer",
|
||||
"description": "Timestamp of when the file was created"
|
||||
}
|
||||
},
|
||||
"additionalProperties": false,
|
||||
"required": [
|
||||
"bucket",
|
||||
"key",
|
||||
"mime_type",
|
||||
"url",
|
||||
"bytes",
|
||||
"created_at"
|
||||
],
|
||||
"title": "FileResponse",
|
||||
"description": "Response representing a file entry."
|
||||
},
|
||||
"EmbeddingsRequest": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
|
@ -6840,6 +7215,37 @@
|
|||
],
|
||||
"title": "ToolInvocationResult"
|
||||
},
|
||||
"BucketResponse": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"name": {
|
||||
"type": "string"
|
||||
}
|
||||
},
|
||||
"additionalProperties": false,
|
||||
"required": [
|
||||
"name"
|
||||
],
|
||||
"title": "BucketResponse"
|
||||
},
|
||||
"ListBucketResponse": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"data": {
|
||||
"type": "array",
|
||||
"items": {
|
||||
"$ref": "#/components/schemas/BucketResponse"
|
||||
},
|
||||
"description": "List of FileResponse entries"
|
||||
}
|
||||
},
|
||||
"additionalProperties": false,
|
||||
"required": [
|
||||
"data"
|
||||
],
|
||||
"title": "ListBucketResponse",
|
||||
"description": "Response representing a list of file entries."
|
||||
},
|
||||
"ListDatasetsResponse": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
|
@ -6856,6 +7262,24 @@
|
|||
],
|
||||
"title": "ListDatasetsResponse"
|
||||
},
|
||||
"ListFileResponse": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"data": {
|
||||
"type": "array",
|
||||
"items": {
|
||||
"$ref": "#/components/schemas/FileResponse"
|
||||
},
|
||||
"description": "List of FileResponse entries"
|
||||
}
|
||||
},
|
||||
"additionalProperties": false,
|
||||
"required": [
|
||||
"data"
|
||||
],
|
||||
"title": "ListFileResponse",
|
||||
"description": "Response representing a list of file entries."
|
||||
},
|
||||
"ListModelsResponse": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
|
@ -8323,22 +8747,6 @@
|
|||
],
|
||||
"title": "ScoreBatchResponse"
|
||||
},
|
||||
"SubmitToolResponseMessagesRequest": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"tool_response_messages": {
|
||||
"type": "array",
|
||||
"items": {
|
||||
"$ref": "#/components/schemas/ToolResponseMessage"
|
||||
}
|
||||
}
|
||||
},
|
||||
"additionalProperties": false,
|
||||
"required": [
|
||||
"tool_response_messages"
|
||||
],
|
||||
"title": "SubmitToolResponseMessagesRequest"
|
||||
},
|
||||
"AlgorithmConfig": {
|
||||
"oneOf": [
|
||||
{
|
||||
|
@ -8643,6 +9051,9 @@
|
|||
{
|
||||
"name": "Eval"
|
||||
},
|
||||
{
|
||||
"name": "Files (Coming Soon)"
|
||||
},
|
||||
{
|
||||
"name": "Inference",
|
||||
"description": "This API provides the raw interface to the underlying models. Two kinds of models are supported:\n- LLM models: these models generate \"raw\" and \"chat\" (conversational) completions.\n- Embedding models: these models generate embeddings to be used for semantic search.",
|
||||
|
@ -8698,6 +9109,7 @@
|
|||
"DatasetIO",
|
||||
"Datasets",
|
||||
"Eval",
|
||||
"Files (Coming Soon)",
|
||||
"Inference",
|
||||
"Inspect",
|
||||
"Models",
|
||||
|
|
551
docs/_static/llama-stack-spec.yaml
vendored
551
docs/_static/llama-stack-spec.yaml
vendored
|
@ -329,6 +329,45 @@ paths:
|
|||
schema:
|
||||
$ref: '#/components/schemas/CompletionRequest'
|
||||
required: true
|
||||
/v1/agents/{agent_id}/session/{session_id}/turn/{turn_id}/continue:
|
||||
post:
|
||||
responses:
|
||||
'200':
|
||||
description: >-
|
||||
A single turn in an interaction with an Agentic System. **OR** streamed
|
||||
agent turn completion response.
|
||||
content:
|
||||
application/json:
|
||||
schema:
|
||||
$ref: '#/components/schemas/Turn'
|
||||
text/event-stream:
|
||||
schema:
|
||||
$ref: '#/components/schemas/AgentTurnResponseStreamChunk'
|
||||
tags:
|
||||
- Agents
|
||||
description: ''
|
||||
parameters:
|
||||
- name: agent_id
|
||||
in: path
|
||||
required: true
|
||||
schema:
|
||||
type: string
|
||||
- name: session_id
|
||||
in: path
|
||||
required: true
|
||||
schema:
|
||||
type: string
|
||||
- name: turn_id
|
||||
in: path
|
||||
required: true
|
||||
schema:
|
||||
type: string
|
||||
requestBody:
|
||||
content:
|
||||
application/json:
|
||||
schema:
|
||||
$ref: '#/components/schemas/ContinueAgentTurnRequest'
|
||||
required: true
|
||||
/v1/agents:
|
||||
post:
|
||||
responses:
|
||||
|
@ -406,6 +445,43 @@ paths:
|
|||
schema:
|
||||
$ref: '#/components/schemas/CreateAgentTurnRequest'
|
||||
required: true
|
||||
/v1/files:
|
||||
get:
|
||||
responses:
|
||||
'200':
|
||||
description: OK
|
||||
content:
|
||||
application/json:
|
||||
schema:
|
||||
$ref: '#/components/schemas/ListBucketResponse'
|
||||
tags:
|
||||
- Files (Coming Soon)
|
||||
description: List all buckets.
|
||||
parameters:
|
||||
- name: bucket
|
||||
in: query
|
||||
required: true
|
||||
schema:
|
||||
type: string
|
||||
post:
|
||||
responses:
|
||||
'200':
|
||||
description: OK
|
||||
content:
|
||||
application/json:
|
||||
schema:
|
||||
$ref: '#/components/schemas/FileUploadResponse'
|
||||
tags:
|
||||
- Files (Coming Soon)
|
||||
description: >-
|
||||
Create a new upload session for a file identified by a bucket and key.
|
||||
parameters: []
|
||||
requestBody:
|
||||
content:
|
||||
application/json:
|
||||
schema:
|
||||
$ref: '#/components/schemas/CreateUploadSessionRequest'
|
||||
required: true
|
||||
/v1/agents/{agent_id}:
|
||||
delete:
|
||||
responses:
|
||||
|
@ -468,6 +544,59 @@ paths:
|
|||
required: true
|
||||
schema:
|
||||
type: string
|
||||
/v1/files/{bucket}/{key}:
|
||||
get:
|
||||
responses:
|
||||
'200':
|
||||
description: OK
|
||||
content:
|
||||
application/json:
|
||||
schema:
|
||||
$ref: '#/components/schemas/FileResponse'
|
||||
tags:
|
||||
- Files (Coming Soon)
|
||||
description: >-
|
||||
Get a file info identified by a bucket and key.
|
||||
parameters:
|
||||
- name: bucket
|
||||
in: path
|
||||
description: 'Bucket name (valid chars: a-zA-Z0-9_-)'
|
||||
required: true
|
||||
schema:
|
||||
type: string
|
||||
- name: key
|
||||
in: path
|
||||
description: >-
|
||||
Key under which the file is stored (valid chars: a-zA-Z0-9_-/.)
|
||||
required: true
|
||||
schema:
|
||||
type: string
|
||||
delete:
|
||||
responses:
|
||||
'200':
|
||||
description: OK
|
||||
content:
|
||||
application/json:
|
||||
schema:
|
||||
$ref: '#/components/schemas/FileResponse'
|
||||
tags:
|
||||
- Files (Coming Soon)
|
||||
description: >-
|
||||
Delete a file identified by a bucket and key.
|
||||
parameters:
|
||||
- name: bucket
|
||||
in: path
|
||||
description: 'Bucket name (valid chars: a-zA-Z0-9_-)'
|
||||
required: true
|
||||
schema:
|
||||
type: string
|
||||
- name: key
|
||||
in: path
|
||||
description: >-
|
||||
Key under which the file is stored (valid chars: a-zA-Z0-9_-/.)
|
||||
required: true
|
||||
schema:
|
||||
type: string
|
||||
/v1/inference/embeddings:
|
||||
post:
|
||||
responses:
|
||||
|
@ -875,6 +1004,57 @@ paths:
|
|||
- PostTraining (Coming Soon)
|
||||
description: ''
|
||||
parameters: []
|
||||
/v1/files/session:{upload_id}:
|
||||
get:
|
||||
responses:
|
||||
'200':
|
||||
description: OK
|
||||
content:
|
||||
application/json:
|
||||
schema:
|
||||
oneOf:
|
||||
- $ref: '#/components/schemas/FileUploadResponse'
|
||||
- type: 'null'
|
||||
tags:
|
||||
- Files (Coming Soon)
|
||||
description: >-
|
||||
Returns information about an existsing upload session
|
||||
parameters:
|
||||
- name: upload_id
|
||||
in: path
|
||||
description: ID of the upload session
|
||||
required: true
|
||||
schema:
|
||||
type: string
|
||||
post:
|
||||
responses:
|
||||
'200':
|
||||
description: OK
|
||||
content:
|
||||
application/json:
|
||||
schema:
|
||||
oneOf:
|
||||
- $ref: '#/components/schemas/FileResponse'
|
||||
- type: 'null'
|
||||
tags:
|
||||
- Files (Coming Soon)
|
||||
description: >-
|
||||
Upload file content to an existing upload session. On the server, request
|
||||
body will have the raw bytes that are uploaded.
|
||||
parameters:
|
||||
- name: upload_id
|
||||
in: path
|
||||
description: ID of the upload session
|
||||
required: true
|
||||
schema:
|
||||
type: string
|
||||
requestBody:
|
||||
content:
|
||||
application/octet-stream:
|
||||
schema:
|
||||
type: string
|
||||
format: binary
|
||||
required: true
|
||||
/v1/vector-dbs/{vector_db_id}:
|
||||
get:
|
||||
responses:
|
||||
|
@ -1091,6 +1271,25 @@ paths:
|
|||
schema:
|
||||
$ref: '#/components/schemas/RegisterDatasetRequest'
|
||||
required: true
|
||||
/v1/files/{bucket}:
|
||||
get:
|
||||
responses:
|
||||
'200':
|
||||
description: OK
|
||||
content:
|
||||
application/json:
|
||||
schema:
|
||||
$ref: '#/components/schemas/ListFileResponse'
|
||||
tags:
|
||||
- Files (Coming Soon)
|
||||
description: List all files in a bucket.
|
||||
parameters:
|
||||
- name: bucket
|
||||
in: path
|
||||
description: 'Bucket name (valid chars: a-zA-Z0-9_-)'
|
||||
required: true
|
||||
schema:
|
||||
type: string
|
||||
/v1/models:
|
||||
get:
|
||||
responses:
|
||||
|
@ -1543,45 +1742,6 @@ paths:
|
|||
schema:
|
||||
$ref: '#/components/schemas/ScoreBatchRequest'
|
||||
required: true
|
||||
/v1/agents/{agent_id}/session/{session_id}/turn/{turn_id}/submit_tool_response_messages:
|
||||
post:
|
||||
responses:
|
||||
'200':
|
||||
description: >-
|
||||
A single turn in an interaction with an Agentic System. **OR** streamed
|
||||
agent turn completion response.
|
||||
content:
|
||||
application/json:
|
||||
schema:
|
||||
$ref: '#/components/schemas/Turn'
|
||||
text/event-stream:
|
||||
schema:
|
||||
$ref: '#/components/schemas/AgentTurnResponseStreamChunk'
|
||||
tags:
|
||||
- Agents
|
||||
description: ''
|
||||
parameters:
|
||||
- name: agent_id
|
||||
in: path
|
||||
required: true
|
||||
schema:
|
||||
type: string
|
||||
- name: session_id
|
||||
in: path
|
||||
required: true
|
||||
schema:
|
||||
type: string
|
||||
- name: turn_id
|
||||
in: path
|
||||
required: true
|
||||
schema:
|
||||
type: string
|
||||
requestBody:
|
||||
content:
|
||||
application/json:
|
||||
schema:
|
||||
$ref: '#/components/schemas/SubmitToolResponseMessagesRequest'
|
||||
required: true
|
||||
/v1/post-training/supervised-fine-tune:
|
||||
post:
|
||||
responses:
|
||||
|
@ -3080,83 +3240,19 @@ components:
|
|||
title: CompletionResponseStreamChunk
|
||||
description: >-
|
||||
A chunk of a streamed completion response.
|
||||
CreateAgentRequest:
|
||||
ContinueAgentTurnRequest:
|
||||
type: object
|
||||
properties:
|
||||
agent_config:
|
||||
$ref: '#/components/schemas/AgentConfig'
|
||||
additionalProperties: false
|
||||
required:
|
||||
- agent_config
|
||||
title: CreateAgentRequest
|
||||
AgentCreateResponse:
|
||||
type: object
|
||||
properties:
|
||||
agent_id:
|
||||
type: string
|
||||
additionalProperties: false
|
||||
required:
|
||||
- agent_id
|
||||
title: AgentCreateResponse
|
||||
CreateAgentSessionRequest:
|
||||
type: object
|
||||
properties:
|
||||
session_name:
|
||||
type: string
|
||||
additionalProperties: false
|
||||
required:
|
||||
- session_name
|
||||
title: CreateAgentSessionRequest
|
||||
AgentSessionCreateResponse:
|
||||
type: object
|
||||
properties:
|
||||
session_id:
|
||||
type: string
|
||||
additionalProperties: false
|
||||
required:
|
||||
- session_id
|
||||
title: AgentSessionCreateResponse
|
||||
CreateAgentTurnRequest:
|
||||
type: object
|
||||
properties:
|
||||
messages:
|
||||
new_messages:
|
||||
type: array
|
||||
items:
|
||||
oneOf:
|
||||
- $ref: '#/components/schemas/UserMessage'
|
||||
- $ref: '#/components/schemas/ToolResponseMessage'
|
||||
stream:
|
||||
type: boolean
|
||||
documents:
|
||||
type: array
|
||||
items:
|
||||
type: object
|
||||
properties:
|
||||
content:
|
||||
oneOf:
|
||||
- type: string
|
||||
- $ref: '#/components/schemas/InterleavedContentItem'
|
||||
- type: array
|
||||
items:
|
||||
$ref: '#/components/schemas/InterleavedContentItem'
|
||||
- $ref: '#/components/schemas/URL'
|
||||
mime_type:
|
||||
type: string
|
||||
additionalProperties: false
|
||||
required:
|
||||
- content
|
||||
- mime_type
|
||||
title: Document
|
||||
toolgroups:
|
||||
type: array
|
||||
items:
|
||||
$ref: '#/components/schemas/AgentTool'
|
||||
tool_config:
|
||||
$ref: '#/components/schemas/ToolConfig'
|
||||
additionalProperties: false
|
||||
required:
|
||||
- messages
|
||||
title: CreateAgentTurnRequest
|
||||
- new_messages
|
||||
title: ContinueAgentTurnRequest
|
||||
InferenceStep:
|
||||
type: object
|
||||
properties:
|
||||
|
@ -3405,7 +3501,7 @@ components:
|
|||
- $ref: '#/components/schemas/AgentTurnResponseStepCompletePayload'
|
||||
- $ref: '#/components/schemas/AgentTurnResponseTurnStartPayload'
|
||||
- $ref: '#/components/schemas/AgentTurnResponseTurnCompletePayload'
|
||||
- $ref: '#/components/schemas/AgentTurnResponseTurnPendingPayload'
|
||||
- $ref: '#/components/schemas/AgentTurnResponseTurnAwaitingInputPayload'
|
||||
discriminator:
|
||||
propertyName: event_type
|
||||
mapping:
|
||||
|
@ -3414,7 +3510,7 @@ components:
|
|||
step_complete: '#/components/schemas/AgentTurnResponseStepCompletePayload'
|
||||
turn_start: '#/components/schemas/AgentTurnResponseTurnStartPayload'
|
||||
turn_complete: '#/components/schemas/AgentTurnResponseTurnCompletePayload'
|
||||
turn_pending: '#/components/schemas/AgentTurnResponseTurnPendingPayload'
|
||||
turn_awaiting_input: '#/components/schemas/AgentTurnResponseTurnAwaitingInputPayload'
|
||||
AgentTurnResponseStepCompletePayload:
|
||||
type: object
|
||||
properties:
|
||||
|
@ -3521,6 +3617,21 @@ components:
|
|||
- event
|
||||
title: AgentTurnResponseStreamChunk
|
||||
description: streamed agent turn completion response.
|
||||
"AgentTurnResponseTurnAwaitingInputPayload":
|
||||
type: object
|
||||
properties:
|
||||
event_type:
|
||||
type: string
|
||||
const: turn_awaiting_input
|
||||
default: turn_awaiting_input
|
||||
turn:
|
||||
$ref: '#/components/schemas/Turn'
|
||||
additionalProperties: false
|
||||
required:
|
||||
- event_type
|
||||
- turn
|
||||
title: >-
|
||||
AgentTurnResponseTurnAwaitingInputPayload
|
||||
AgentTurnResponseTurnCompletePayload:
|
||||
type: object
|
||||
properties:
|
||||
|
@ -3535,20 +3646,6 @@ components:
|
|||
- event_type
|
||||
- turn
|
||||
title: AgentTurnResponseTurnCompletePayload
|
||||
AgentTurnResponseTurnPendingPayload:
|
||||
type: object
|
||||
properties:
|
||||
event_type:
|
||||
type: string
|
||||
const: turn_pending
|
||||
default: turn_pending
|
||||
turn:
|
||||
$ref: '#/components/schemas/Turn'
|
||||
additionalProperties: false
|
||||
required:
|
||||
- event_type
|
||||
- turn
|
||||
title: AgentTurnResponseTurnPendingPayload
|
||||
AgentTurnResponseTurnStartPayload:
|
||||
type: object
|
||||
properties:
|
||||
|
@ -3563,6 +3660,164 @@ components:
|
|||
- event_type
|
||||
- turn_id
|
||||
title: AgentTurnResponseTurnStartPayload
|
||||
CreateAgentRequest:
|
||||
type: object
|
||||
properties:
|
||||
agent_config:
|
||||
$ref: '#/components/schemas/AgentConfig'
|
||||
additionalProperties: false
|
||||
required:
|
||||
- agent_config
|
||||
title: CreateAgentRequest
|
||||
AgentCreateResponse:
|
||||
type: object
|
||||
properties:
|
||||
agent_id:
|
||||
type: string
|
||||
additionalProperties: false
|
||||
required:
|
||||
- agent_id
|
||||
title: AgentCreateResponse
|
||||
CreateAgentSessionRequest:
|
||||
type: object
|
||||
properties:
|
||||
session_name:
|
||||
type: string
|
||||
additionalProperties: false
|
||||
required:
|
||||
- session_name
|
||||
title: CreateAgentSessionRequest
|
||||
AgentSessionCreateResponse:
|
||||
type: object
|
||||
properties:
|
||||
session_id:
|
||||
type: string
|
||||
additionalProperties: false
|
||||
required:
|
||||
- session_id
|
||||
title: AgentSessionCreateResponse
|
||||
CreateAgentTurnRequest:
|
||||
type: object
|
||||
properties:
|
||||
messages:
|
||||
type: array
|
||||
items:
|
||||
oneOf:
|
||||
- $ref: '#/components/schemas/UserMessage'
|
||||
- $ref: '#/components/schemas/ToolResponseMessage'
|
||||
stream:
|
||||
type: boolean
|
||||
documents:
|
||||
type: array
|
||||
items:
|
||||
type: object
|
||||
properties:
|
||||
content:
|
||||
oneOf:
|
||||
- type: string
|
||||
- $ref: '#/components/schemas/InterleavedContentItem'
|
||||
- type: array
|
||||
items:
|
||||
$ref: '#/components/schemas/InterleavedContentItem'
|
||||
- $ref: '#/components/schemas/URL'
|
||||
mime_type:
|
||||
type: string
|
||||
additionalProperties: false
|
||||
required:
|
||||
- content
|
||||
- mime_type
|
||||
title: Document
|
||||
toolgroups:
|
||||
type: array
|
||||
items:
|
||||
$ref: '#/components/schemas/AgentTool'
|
||||
tool_config:
|
||||
$ref: '#/components/schemas/ToolConfig'
|
||||
additionalProperties: false
|
||||
required:
|
||||
- messages
|
||||
title: CreateAgentTurnRequest
|
||||
CreateUploadSessionRequest:
|
||||
type: object
|
||||
properties:
|
||||
bucket:
|
||||
type: string
|
||||
description: >-
|
||||
Bucket under which the file is stored (valid chars: a-zA-Z0-9_-)
|
||||
key:
|
||||
type: string
|
||||
description: >-
|
||||
Key under which the file is stored (valid chars: a-zA-Z0-9_-/.)
|
||||
mime_type:
|
||||
type: string
|
||||
description: MIME type of the file
|
||||
size:
|
||||
type: integer
|
||||
description: File size in bytes
|
||||
additionalProperties: false
|
||||
required:
|
||||
- bucket
|
||||
- key
|
||||
- mime_type
|
||||
- size
|
||||
title: CreateUploadSessionRequest
|
||||
FileUploadResponse:
|
||||
type: object
|
||||
properties:
|
||||
id:
|
||||
type: string
|
||||
description: ID of the upload session
|
||||
url:
|
||||
type: string
|
||||
description: Upload URL for the file or file parts
|
||||
offset:
|
||||
type: integer
|
||||
description: Upload content offset
|
||||
size:
|
||||
type: integer
|
||||
description: Upload content size
|
||||
additionalProperties: false
|
||||
required:
|
||||
- id
|
||||
- url
|
||||
- offset
|
||||
- size
|
||||
title: FileUploadResponse
|
||||
description: >-
|
||||
Response after initiating a file upload session.
|
||||
FileResponse:
|
||||
type: object
|
||||
properties:
|
||||
bucket:
|
||||
type: string
|
||||
description: >-
|
||||
Bucket under which the file is stored (valid chars: a-zA-Z0-9_-)
|
||||
key:
|
||||
type: string
|
||||
description: >-
|
||||
Key under which the file is stored (valid chars: a-zA-Z0-9_-/.)
|
||||
mime_type:
|
||||
type: string
|
||||
description: MIME type of the file
|
||||
url:
|
||||
type: string
|
||||
description: Upload URL for the file contents
|
||||
bytes:
|
||||
type: integer
|
||||
description: Size of the file in bytes
|
||||
created_at:
|
||||
type: integer
|
||||
description: Timestamp of when the file was created
|
||||
additionalProperties: false
|
||||
required:
|
||||
- bucket
|
||||
- key
|
||||
- mime_type
|
||||
- url
|
||||
- bytes
|
||||
- created_at
|
||||
title: FileResponse
|
||||
description: Response representing a file entry.
|
||||
EmbeddingsRequest:
|
||||
type: object
|
||||
properties:
|
||||
|
@ -4394,6 +4649,29 @@ components:
|
|||
required:
|
||||
- content
|
||||
title: ToolInvocationResult
|
||||
BucketResponse:
|
||||
type: object
|
||||
properties:
|
||||
name:
|
||||
type: string
|
||||
additionalProperties: false
|
||||
required:
|
||||
- name
|
||||
title: BucketResponse
|
||||
ListBucketResponse:
|
||||
type: object
|
||||
properties:
|
||||
data:
|
||||
type: array
|
||||
items:
|
||||
$ref: '#/components/schemas/BucketResponse'
|
||||
description: List of FileResponse entries
|
||||
additionalProperties: false
|
||||
required:
|
||||
- data
|
||||
title: ListBucketResponse
|
||||
description: >-
|
||||
Response representing a list of file entries.
|
||||
ListDatasetsResponse:
|
||||
type: object
|
||||
properties:
|
||||
|
@ -4405,6 +4683,20 @@ components:
|
|||
required:
|
||||
- data
|
||||
title: ListDatasetsResponse
|
||||
ListFileResponse:
|
||||
type: object
|
||||
properties:
|
||||
data:
|
||||
type: array
|
||||
items:
|
||||
$ref: '#/components/schemas/FileResponse'
|
||||
description: List of FileResponse entries
|
||||
additionalProperties: false
|
||||
required:
|
||||
- data
|
||||
title: ListFileResponse
|
||||
description: >-
|
||||
Response representing a list of file entries.
|
||||
ListModelsResponse:
|
||||
type: object
|
||||
properties:
|
||||
|
@ -5328,17 +5620,6 @@ components:
|
|||
required:
|
||||
- results
|
||||
title: ScoreBatchResponse
|
||||
SubmitToolResponseMessagesRequest:
|
||||
type: object
|
||||
properties:
|
||||
tool_response_messages:
|
||||
type: array
|
||||
items:
|
||||
$ref: '#/components/schemas/ToolResponseMessage'
|
||||
additionalProperties: false
|
||||
required:
|
||||
- tool_response_messages
|
||||
title: SubmitToolResponseMessagesRequest
|
||||
AlgorithmConfig:
|
||||
oneOf:
|
||||
- $ref: '#/components/schemas/LoraFinetuningConfig'
|
||||
|
@ -5533,6 +5814,7 @@ tags:
|
|||
- name: DatasetIO
|
||||
- name: Datasets
|
||||
- name: Eval
|
||||
- name: Files (Coming Soon)
|
||||
- name: Inference
|
||||
description: >-
|
||||
This API provides the raw interface to the underlying models. Two kinds of models
|
||||
|
@ -5567,6 +5849,7 @@ x-tagGroups:
|
|||
- DatasetIO
|
||||
- Datasets
|
||||
- Eval
|
||||
- Files (Coming Soon)
|
||||
- Inference
|
||||
- Inspect
|
||||
- Models
|
||||
|
|
|
@ -477,6 +477,7 @@ class Generator:
|
|||
"SyntheticDataGeneration",
|
||||
"PostTraining",
|
||||
"BatchInference",
|
||||
"Files",
|
||||
]:
|
||||
op.defining_class.__name__ = f"{op.defining_class.__name__} (Coming Soon)"
|
||||
print(op.defining_class.__name__)
|
||||
|
@ -520,8 +521,30 @@ class Generator:
|
|||
# parameters passed anywhere
|
||||
parameters = path_parameters + query_parameters
|
||||
|
||||
# data passed in payload
|
||||
if op.request_params:
|
||||
webmethod = getattr(op.func_ref, "__webmethod__", None)
|
||||
raw_bytes_request_body = False
|
||||
if webmethod:
|
||||
raw_bytes_request_body = getattr(webmethod, "raw_bytes_request_body", False)
|
||||
|
||||
# data passed in request body as raw bytes cannot have request parameters
|
||||
if raw_bytes_request_body and op.request_params:
|
||||
raise ValueError("Cannot have both raw bytes request body and request parameters")
|
||||
|
||||
# data passed in request body as raw bytes
|
||||
if raw_bytes_request_body:
|
||||
requestBody = RequestBody(
|
||||
content={
|
||||
"application/octet-stream": {
|
||||
"schema": {
|
||||
"type": "string",
|
||||
"format": "binary",
|
||||
}
|
||||
}
|
||||
},
|
||||
required=True,
|
||||
)
|
||||
# data passed in payload as JSON and mapped to request parameters
|
||||
elif op.request_params:
|
||||
builder = ContentBuilder(self.schema_builder)
|
||||
first = next(iter(op.request_params))
|
||||
request_name, request_type = first
|
||||
|
|
|
@ -78,7 +78,7 @@ class MediaType:
|
|||
|
||||
@dataclass
|
||||
class RequestBody:
|
||||
content: Dict[str, MediaType]
|
||||
content: Dict[str, MediaType | Dict[str, Any]]
|
||||
description: Optional[str] = None
|
||||
required: Optional[bool] = None
|
||||
|
||||
|
|
|
@ -194,7 +194,7 @@ class AgentTurnResponseEventType(Enum):
|
|||
|
||||
turn_start = "turn_start"
|
||||
turn_complete = "turn_complete"
|
||||
turn_pending = "turn_pending"
|
||||
turn_awaiting_input = "turn_awaiting_input"
|
||||
|
||||
|
||||
@json_schema_type
|
||||
|
@ -237,8 +237,10 @@ class AgentTurnResponseTurnCompletePayload(BaseModel):
|
|||
|
||||
|
||||
@json_schema_type
|
||||
class AgentTurnResponseTurnPendingPayload(BaseModel):
|
||||
event_type: Literal[AgentTurnResponseEventType.turn_pending.value] = AgentTurnResponseEventType.turn_pending.value
|
||||
class AgentTurnResponseTurnAwaitingInputPayload(BaseModel):
|
||||
event_type: Literal[AgentTurnResponseEventType.turn_awaiting_input.value] = (
|
||||
AgentTurnResponseEventType.turn_awaiting_input.value
|
||||
)
|
||||
turn: Turn
|
||||
|
||||
|
||||
|
@ -250,7 +252,7 @@ AgentTurnResponseEventPayload = register_schema(
|
|||
AgentTurnResponseStepCompletePayload,
|
||||
AgentTurnResponseTurnStartPayload,
|
||||
AgentTurnResponseTurnCompletePayload,
|
||||
AgentTurnResponseTurnPendingPayload,
|
||||
AgentTurnResponseTurnAwaitingInputPayload,
|
||||
],
|
||||
Field(discriminator="event_type"),
|
||||
],
|
||||
|
@ -344,15 +346,20 @@ class Agents(Protocol):
|
|||
) -> Union[Turn, AsyncIterator[AgentTurnResponseStreamChunk]]: ...
|
||||
|
||||
@webmethod(
|
||||
route="/agents/{agent_id}/session/{session_id}/turn/{turn_id}/tool_responses",
|
||||
route="/agents/{agent_id}/session/{session_id}/turn/{turn_id}/continue",
|
||||
method="POST",
|
||||
)
|
||||
async def submit_tool_responses(
|
||||
async def continue_agent_turn(
|
||||
self,
|
||||
agent_id: str,
|
||||
session_id: str,
|
||||
turn_id: str,
|
||||
tool_responses: Dict[str, ToolResponseMessage],
|
||||
new_messages: List[
|
||||
Union[
|
||||
UserMessage,
|
||||
ToolResponseMessage,
|
||||
]
|
||||
],
|
||||
) -> Union[Turn, AsyncIterator[AgentTurnResponseStreamChunk]]: ...
|
||||
|
||||
@webmethod(
|
||||
|
|
7
llama_stack/apis/files/__init__.py
Normal file
7
llama_stack/apis/files/__init__.py
Normal file
|
@ -0,0 +1,7 @@
|
|||
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
||||
# All rights reserved.
|
||||
#
|
||||
# This source code is licensed under the terms described in the LICENSE file in
|
||||
# the root directory of this source tree.
|
||||
|
||||
from .files import * # noqa: F401 F403
|
174
llama_stack/apis/files/files.py
Normal file
174
llama_stack/apis/files/files.py
Normal file
|
@ -0,0 +1,174 @@
|
|||
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
||||
# All rights reserved.
|
||||
#
|
||||
# This source code is licensed under the terms described in the LICENSE file in
|
||||
# the root directory of this source tree.
|
||||
|
||||
from typing import List, Optional, Protocol, runtime_checkable
|
||||
|
||||
from pydantic import BaseModel
|
||||
|
||||
from llama_stack.providers.utils.telemetry.trace_protocol import trace_protocol
|
||||
from llama_stack.schema_utils import json_schema_type, webmethod
|
||||
|
||||
|
||||
@json_schema_type
|
||||
class FileUploadResponse(BaseModel):
|
||||
"""
|
||||
Response after initiating a file upload session.
|
||||
|
||||
:param id: ID of the upload session
|
||||
:param url: Upload URL for the file or file parts
|
||||
:param offset: Upload content offset
|
||||
:param size: Upload content size
|
||||
"""
|
||||
|
||||
id: str
|
||||
url: str
|
||||
offset: int
|
||||
size: int
|
||||
|
||||
|
||||
@json_schema_type
|
||||
class BucketResponse(BaseModel):
|
||||
name: str
|
||||
|
||||
|
||||
@json_schema_type
|
||||
class ListBucketResponse(BaseModel):
|
||||
"""
|
||||
Response representing a list of file entries.
|
||||
|
||||
:param data: List of FileResponse entries
|
||||
"""
|
||||
|
||||
data: List[BucketResponse]
|
||||
|
||||
|
||||
@json_schema_type
|
||||
class FileResponse(BaseModel):
|
||||
"""
|
||||
Response representing a file entry.
|
||||
|
||||
:param bucket: Bucket under which the file is stored (valid chars: a-zA-Z0-9_-)
|
||||
:param key: Key under which the file is stored (valid chars: a-zA-Z0-9_-/.)
|
||||
:param mime_type: MIME type of the file
|
||||
:param url: Upload URL for the file contents
|
||||
:param bytes: Size of the file in bytes
|
||||
:param created_at: Timestamp of when the file was created
|
||||
"""
|
||||
|
||||
bucket: str
|
||||
key: str
|
||||
mime_type: str
|
||||
url: str
|
||||
bytes: int
|
||||
created_at: int
|
||||
|
||||
|
||||
@json_schema_type
|
||||
class ListFileResponse(BaseModel):
|
||||
"""
|
||||
Response representing a list of file entries.
|
||||
|
||||
:param data: List of FileResponse entries
|
||||
"""
|
||||
|
||||
data: List[FileResponse]
|
||||
|
||||
|
||||
@runtime_checkable
|
||||
@trace_protocol
|
||||
class Files(Protocol):
|
||||
@webmethod(route="/files", method="POST")
|
||||
async def create_upload_session(
|
||||
self,
|
||||
bucket: str,
|
||||
key: str,
|
||||
mime_type: str,
|
||||
size: int,
|
||||
) -> FileUploadResponse:
|
||||
"""
|
||||
Create a new upload session for a file identified by a bucket and key.
|
||||
|
||||
:param bucket: Bucket under which the file is stored (valid chars: a-zA-Z0-9_-)
|
||||
:param key: Key under which the file is stored (valid chars: a-zA-Z0-9_-/.)
|
||||
:param mime_type: MIME type of the file
|
||||
:param size: File size in bytes
|
||||
"""
|
||||
...
|
||||
|
||||
@webmethod(route="/files/session:{upload_id}", method="POST", raw_bytes_request_body=True)
|
||||
async def upload_content_to_session(
|
||||
self,
|
||||
upload_id: str,
|
||||
) -> Optional[FileResponse]:
|
||||
"""
|
||||
Upload file content to an existing upload session.
|
||||
On the server, request body will have the raw bytes that are uploaded.
|
||||
|
||||
:param upload_id: ID of the upload session
|
||||
"""
|
||||
...
|
||||
|
||||
@webmethod(route="/files/session:{upload_id}", method="GET")
|
||||
async def get_upload_session_info(
|
||||
self,
|
||||
upload_id: str,
|
||||
) -> Optional[FileUploadResponse]:
|
||||
"""
|
||||
Returns information about an existsing upload session
|
||||
|
||||
:param upload_id: ID of the upload session
|
||||
"""
|
||||
...
|
||||
|
||||
@webmethod(route="/files", method="GET")
|
||||
async def list_all_buckets(
|
||||
self,
|
||||
bucket: str,
|
||||
) -> ListBucketResponse:
|
||||
"""
|
||||
List all buckets.
|
||||
"""
|
||||
...
|
||||
|
||||
@webmethod(route="/files/{bucket}", method="GET")
|
||||
async def list_files_in_bucket(
|
||||
self,
|
||||
bucket: str,
|
||||
) -> ListFileResponse:
|
||||
"""
|
||||
List all files in a bucket.
|
||||
|
||||
:param bucket: Bucket name (valid chars: a-zA-Z0-9_-)
|
||||
"""
|
||||
...
|
||||
|
||||
@webmethod(route="/files/{bucket}/{key:path}", method="GET")
|
||||
async def get_file(
|
||||
self,
|
||||
bucket: str,
|
||||
key: str,
|
||||
) -> FileResponse:
|
||||
"""
|
||||
Get a file info identified by a bucket and key.
|
||||
|
||||
:param bucket: Bucket name (valid chars: a-zA-Z0-9_-)
|
||||
:param key: Key under which the file is stored (valid chars: a-zA-Z0-9_-/.)
|
||||
"""
|
||||
...
|
||||
|
||||
@webmethod(route="/files/{bucket}/{key:path}", method="DELETE")
|
||||
async def delete_file(
|
||||
self,
|
||||
bucket: str,
|
||||
key: str,
|
||||
) -> FileResponse:
|
||||
"""
|
||||
Delete a file identified by a bucket and key.
|
||||
|
||||
:param bucket: Bucket name (valid chars: a-zA-Z0-9_-)
|
||||
:param key: Key under which the file is stored (valid chars: a-zA-Z0-9_-/.)
|
||||
"""
|
||||
...
|
|
@ -19,6 +19,7 @@ from llama_stack.apis.benchmarks import Benchmarks
|
|||
from llama_stack.apis.datasetio import DatasetIO
|
||||
from llama_stack.apis.datasets import Datasets
|
||||
from llama_stack.apis.eval import Eval
|
||||
from llama_stack.apis.files import Files
|
||||
from llama_stack.apis.inference import Inference
|
||||
from llama_stack.apis.inspect import Inspect
|
||||
from llama_stack.apis.models import Models
|
||||
|
@ -63,6 +64,7 @@ class LlamaStack(
|
|||
ToolGroups,
|
||||
ToolRuntime,
|
||||
RAGToolRuntime,
|
||||
Files,
|
||||
):
|
||||
pass
|
||||
|
||||
|
|
|
@ -169,12 +169,17 @@ class MetaReferenceAgentsImpl(Agents):
|
|||
async for event in agent.create_and_execute_turn(request):
|
||||
yield event
|
||||
|
||||
async def submit_tool_response_messages(
|
||||
async def continue_agent_turn(
|
||||
self,
|
||||
agent_id: str,
|
||||
session_id: str,
|
||||
turn_id: str,
|
||||
tool_response_messages: List[ToolResponseMessage],
|
||||
new_messages: List[
|
||||
Union[
|
||||
UserMessage,
|
||||
ToolResponseMessage,
|
||||
]
|
||||
],
|
||||
) -> AsyncGenerator:
|
||||
pass
|
||||
|
||||
|
|
|
@ -46,7 +46,7 @@ from llama_stack.providers.utils.inference.embedding_mixin import (
|
|||
)
|
||||
from llama_stack.providers.utils.inference.model_registry import (
|
||||
ModelRegistryHelper,
|
||||
build_model_alias,
|
||||
build_hf_repo_model_alias,
|
||||
)
|
||||
from llama_stack.providers.utils.inference.prompt_adapter import (
|
||||
augment_content_with_response_format_prompt,
|
||||
|
@ -116,7 +116,7 @@ class MetaReferenceInferenceImpl(
|
|||
|
||||
self.model_registry_helper = ModelRegistryHelper(
|
||||
[
|
||||
build_model_alias(
|
||||
build_hf_repo_model_alias(
|
||||
llama_model.descriptor(),
|
||||
llama_model.core_model_id.value,
|
||||
)
|
||||
|
|
|
@ -6,19 +6,19 @@
|
|||
|
||||
from llama_stack.models.llama.datatypes import CoreModelId
|
||||
from llama_stack.providers.utils.inference.model_registry import (
|
||||
build_model_alias,
|
||||
build_hf_repo_model_alias,
|
||||
)
|
||||
|
||||
MODEL_ALIASES = [
|
||||
build_model_alias(
|
||||
build_hf_repo_model_alias(
|
||||
"meta.llama3-1-8b-instruct-v1:0",
|
||||
CoreModelId.llama3_1_8b_instruct.value,
|
||||
),
|
||||
build_model_alias(
|
||||
build_hf_repo_model_alias(
|
||||
"meta.llama3-1-70b-instruct-v1:0",
|
||||
CoreModelId.llama3_1_70b_instruct.value,
|
||||
),
|
||||
build_model_alias(
|
||||
build_hf_repo_model_alias(
|
||||
"meta.llama3-1-405b-instruct-v1:0",
|
||||
CoreModelId.llama3_1_405b_instruct.value,
|
||||
),
|
||||
|
|
|
@ -6,15 +6,15 @@
|
|||
|
||||
from llama_stack.models.llama.datatypes import CoreModelId
|
||||
from llama_stack.providers.utils.inference.model_registry import (
|
||||
build_model_alias,
|
||||
build_hf_repo_model_alias,
|
||||
)
|
||||
|
||||
model_aliases = [
|
||||
build_model_alias(
|
||||
build_hf_repo_model_alias(
|
||||
"llama3.1-8b",
|
||||
CoreModelId.llama3_1_8b_instruct.value,
|
||||
),
|
||||
build_model_alias(
|
||||
build_hf_repo_model_alias(
|
||||
"llama-3.3-70b",
|
||||
CoreModelId.llama3_3_70b_instruct.value,
|
||||
),
|
||||
|
|
|
@ -25,7 +25,7 @@ from llama_stack.apis.inference import (
|
|||
from llama_stack.models.llama.datatypes import CoreModelId
|
||||
from llama_stack.providers.utils.inference.model_registry import (
|
||||
ModelRegistryHelper,
|
||||
build_model_alias,
|
||||
build_hf_repo_model_alias,
|
||||
)
|
||||
from llama_stack.providers.utils.inference.openai_compat import (
|
||||
get_sampling_options,
|
||||
|
@ -39,11 +39,11 @@ from llama_stack.providers.utils.inference.prompt_adapter import (
|
|||
from .config import DatabricksImplConfig
|
||||
|
||||
model_aliases = [
|
||||
build_model_alias(
|
||||
build_hf_repo_model_alias(
|
||||
"databricks-meta-llama-3-1-70b-instruct",
|
||||
CoreModelId.llama3_1_70b_instruct.value,
|
||||
),
|
||||
build_model_alias(
|
||||
build_hf_repo_model_alias(
|
||||
"databricks-meta-llama-3-1-405b-instruct",
|
||||
CoreModelId.llama3_1_405b_instruct.value,
|
||||
),
|
||||
|
|
|
@ -6,47 +6,47 @@
|
|||
|
||||
from llama_stack.models.llama.datatypes import CoreModelId
|
||||
from llama_stack.providers.utils.inference.model_registry import (
|
||||
build_model_alias,
|
||||
build_hf_repo_model_alias,
|
||||
)
|
||||
|
||||
MODEL_ALIASES = [
|
||||
build_model_alias(
|
||||
build_hf_repo_model_alias(
|
||||
"accounts/fireworks/models/llama-v3p1-8b-instruct",
|
||||
CoreModelId.llama3_1_8b_instruct.value,
|
||||
),
|
||||
build_model_alias(
|
||||
build_hf_repo_model_alias(
|
||||
"accounts/fireworks/models/llama-v3p1-70b-instruct",
|
||||
CoreModelId.llama3_1_70b_instruct.value,
|
||||
),
|
||||
build_model_alias(
|
||||
build_hf_repo_model_alias(
|
||||
"accounts/fireworks/models/llama-v3p1-405b-instruct",
|
||||
CoreModelId.llama3_1_405b_instruct.value,
|
||||
),
|
||||
build_model_alias(
|
||||
build_hf_repo_model_alias(
|
||||
"accounts/fireworks/models/llama-v3p2-1b-instruct",
|
||||
CoreModelId.llama3_2_1b_instruct.value,
|
||||
),
|
||||
build_model_alias(
|
||||
build_hf_repo_model_alias(
|
||||
"accounts/fireworks/models/llama-v3p2-3b-instruct",
|
||||
CoreModelId.llama3_2_3b_instruct.value,
|
||||
),
|
||||
build_model_alias(
|
||||
build_hf_repo_model_alias(
|
||||
"accounts/fireworks/models/llama-v3p2-11b-vision-instruct",
|
||||
CoreModelId.llama3_2_11b_vision_instruct.value,
|
||||
),
|
||||
build_model_alias(
|
||||
build_hf_repo_model_alias(
|
||||
"accounts/fireworks/models/llama-v3p2-90b-vision-instruct",
|
||||
CoreModelId.llama3_2_90b_vision_instruct.value,
|
||||
),
|
||||
build_model_alias(
|
||||
build_hf_repo_model_alias(
|
||||
"accounts/fireworks/models/llama-v3p3-70b-instruct",
|
||||
CoreModelId.llama3_3_70b_instruct.value,
|
||||
),
|
||||
build_model_alias(
|
||||
build_hf_repo_model_alias(
|
||||
"accounts/fireworks/models/llama-guard-3-8b",
|
||||
CoreModelId.llama_guard_3_8b.value,
|
||||
),
|
||||
build_model_alias(
|
||||
build_hf_repo_model_alias(
|
||||
"accounts/fireworks/models/llama-guard-3-11b-vision",
|
||||
CoreModelId.llama_guard_3_11b_vision.value,
|
||||
),
|
||||
|
|
|
@ -31,8 +31,8 @@ from llama_stack.models.llama.sku_list import CoreModelId
|
|||
from llama_stack.providers.remote.inference.groq.config import GroqConfig
|
||||
from llama_stack.providers.utils.inference.model_registry import (
|
||||
ModelRegistryHelper,
|
||||
build_hf_repo_model_alias,
|
||||
build_model_alias,
|
||||
build_model_alias_with_just_provider_model_id,
|
||||
)
|
||||
|
||||
from .groq_utils import (
|
||||
|
@ -42,19 +42,19 @@ from .groq_utils import (
|
|||
)
|
||||
|
||||
_MODEL_ALIASES = [
|
||||
build_model_alias(
|
||||
build_hf_repo_model_alias(
|
||||
"llama3-8b-8192",
|
||||
CoreModelId.llama3_1_8b_instruct.value,
|
||||
),
|
||||
build_model_alias_with_just_provider_model_id(
|
||||
build_model_alias(
|
||||
"llama-3.1-8b-instant",
|
||||
CoreModelId.llama3_1_8b_instruct.value,
|
||||
),
|
||||
build_model_alias(
|
||||
build_hf_repo_model_alias(
|
||||
"llama3-70b-8192",
|
||||
CoreModelId.llama3_70b_instruct.value,
|
||||
),
|
||||
build_model_alias(
|
||||
build_hf_repo_model_alias(
|
||||
"llama-3.3-70b-versatile",
|
||||
CoreModelId.llama3_3_70b_instruct.value,
|
||||
),
|
||||
|
@ -62,7 +62,7 @@ _MODEL_ALIASES = [
|
|||
# Preview models aren't recommended for production use, but we include this one
|
||||
# to pass the test fixture
|
||||
# TODO(aidand): Replace this with a stable model once Groq supports it
|
||||
build_model_alias(
|
||||
build_hf_repo_model_alias(
|
||||
"llama-3.2-3b-preview",
|
||||
CoreModelId.llama3_2_3b_instruct.value,
|
||||
),
|
||||
|
|
|
@ -6,43 +6,43 @@
|
|||
|
||||
from llama_stack.models.llama.datatypes import CoreModelId
|
||||
from llama_stack.providers.utils.inference.model_registry import (
|
||||
build_model_alias,
|
||||
build_hf_repo_model_alias,
|
||||
)
|
||||
|
||||
_MODEL_ALIASES = [
|
||||
build_model_alias(
|
||||
build_hf_repo_model_alias(
|
||||
"meta/llama3-8b-instruct",
|
||||
CoreModelId.llama3_8b_instruct.value,
|
||||
),
|
||||
build_model_alias(
|
||||
build_hf_repo_model_alias(
|
||||
"meta/llama3-70b-instruct",
|
||||
CoreModelId.llama3_70b_instruct.value,
|
||||
),
|
||||
build_model_alias(
|
||||
build_hf_repo_model_alias(
|
||||
"meta/llama-3.1-8b-instruct",
|
||||
CoreModelId.llama3_1_8b_instruct.value,
|
||||
),
|
||||
build_model_alias(
|
||||
build_hf_repo_model_alias(
|
||||
"meta/llama-3.1-70b-instruct",
|
||||
CoreModelId.llama3_1_70b_instruct.value,
|
||||
),
|
||||
build_model_alias(
|
||||
build_hf_repo_model_alias(
|
||||
"meta/llama-3.1-405b-instruct",
|
||||
CoreModelId.llama3_1_405b_instruct.value,
|
||||
),
|
||||
build_model_alias(
|
||||
build_hf_repo_model_alias(
|
||||
"meta/llama-3.2-1b-instruct",
|
||||
CoreModelId.llama3_2_1b_instruct.value,
|
||||
),
|
||||
build_model_alias(
|
||||
build_hf_repo_model_alias(
|
||||
"meta/llama-3.2-3b-instruct",
|
||||
CoreModelId.llama3_2_3b_instruct.value,
|
||||
),
|
||||
build_model_alias(
|
||||
build_hf_repo_model_alias(
|
||||
"meta/llama-3.2-11b-vision-instruct",
|
||||
CoreModelId.llama3_2_11b_vision_instruct.value,
|
||||
),
|
||||
build_model_alias(
|
||||
build_hf_repo_model_alias(
|
||||
"meta/llama-3.2-90b-vision-instruct",
|
||||
CoreModelId.llama3_2_90b_vision_instruct.value,
|
||||
),
|
||||
|
|
|
@ -35,8 +35,8 @@ from llama_stack.models.llama.datatypes import CoreModelId
|
|||
from llama_stack.providers.datatypes import ModelsProtocolPrivate
|
||||
from llama_stack.providers.utils.inference.model_registry import (
|
||||
ModelRegistryHelper,
|
||||
build_hf_repo_model_alias,
|
||||
build_model_alias,
|
||||
build_model_alias_with_just_provider_model_id,
|
||||
)
|
||||
from llama_stack.providers.utils.inference.openai_compat import (
|
||||
OpenAICompatCompletionChoice,
|
||||
|
@ -59,73 +59,73 @@ from llama_stack.providers.utils.inference.prompt_adapter import (
|
|||
log = logging.getLogger(__name__)
|
||||
|
||||
model_aliases = [
|
||||
build_model_alias(
|
||||
build_hf_repo_model_alias(
|
||||
"llama3.1:8b-instruct-fp16",
|
||||
CoreModelId.llama3_1_8b_instruct.value,
|
||||
),
|
||||
build_model_alias_with_just_provider_model_id(
|
||||
build_model_alias(
|
||||
"llama3.1:8b",
|
||||
CoreModelId.llama3_1_8b_instruct.value,
|
||||
),
|
||||
build_model_alias(
|
||||
build_hf_repo_model_alias(
|
||||
"llama3.1:70b-instruct-fp16",
|
||||
CoreModelId.llama3_1_70b_instruct.value,
|
||||
),
|
||||
build_model_alias_with_just_provider_model_id(
|
||||
build_model_alias(
|
||||
"llama3.1:70b",
|
||||
CoreModelId.llama3_1_70b_instruct.value,
|
||||
),
|
||||
build_model_alias(
|
||||
build_hf_repo_model_alias(
|
||||
"llama3.1:405b-instruct-fp16",
|
||||
CoreModelId.llama3_1_405b_instruct.value,
|
||||
),
|
||||
build_model_alias_with_just_provider_model_id(
|
||||
build_model_alias(
|
||||
"llama3.1:405b",
|
||||
CoreModelId.llama3_1_405b_instruct.value,
|
||||
),
|
||||
build_model_alias(
|
||||
build_hf_repo_model_alias(
|
||||
"llama3.2:1b-instruct-fp16",
|
||||
CoreModelId.llama3_2_1b_instruct.value,
|
||||
),
|
||||
build_model_alias_with_just_provider_model_id(
|
||||
build_model_alias(
|
||||
"llama3.2:1b",
|
||||
CoreModelId.llama3_2_1b_instruct.value,
|
||||
),
|
||||
build_model_alias(
|
||||
build_hf_repo_model_alias(
|
||||
"llama3.2:3b-instruct-fp16",
|
||||
CoreModelId.llama3_2_3b_instruct.value,
|
||||
),
|
||||
build_model_alias_with_just_provider_model_id(
|
||||
build_model_alias(
|
||||
"llama3.2:3b",
|
||||
CoreModelId.llama3_2_3b_instruct.value,
|
||||
),
|
||||
build_model_alias(
|
||||
build_hf_repo_model_alias(
|
||||
"llama3.2-vision:11b-instruct-fp16",
|
||||
CoreModelId.llama3_2_11b_vision_instruct.value,
|
||||
),
|
||||
build_model_alias_with_just_provider_model_id(
|
||||
build_model_alias(
|
||||
"llama3.2-vision:latest",
|
||||
CoreModelId.llama3_2_11b_vision_instruct.value,
|
||||
),
|
||||
build_model_alias(
|
||||
build_hf_repo_model_alias(
|
||||
"llama3.2-vision:90b-instruct-fp16",
|
||||
CoreModelId.llama3_2_90b_vision_instruct.value,
|
||||
),
|
||||
build_model_alias_with_just_provider_model_id(
|
||||
build_model_alias(
|
||||
"llama3.2-vision:90b",
|
||||
CoreModelId.llama3_2_90b_vision_instruct.value,
|
||||
),
|
||||
build_model_alias(
|
||||
build_hf_repo_model_alias(
|
||||
"llama3.3:70b",
|
||||
CoreModelId.llama3_3_70b_instruct.value,
|
||||
),
|
||||
# The Llama Guard models don't have their full fp16 versions
|
||||
# so we are going to alias their default version to the canonical SKU
|
||||
build_model_alias(
|
||||
build_hf_repo_model_alias(
|
||||
"llama-guard3:8b",
|
||||
CoreModelId.llama_guard_3_8b.value,
|
||||
),
|
||||
build_model_alias(
|
||||
build_hf_repo_model_alias(
|
||||
"llama-guard3:1b",
|
||||
CoreModelId.llama_guard_3_1b.value,
|
||||
),
|
||||
|
|
|
@ -6,43 +6,43 @@
|
|||
|
||||
from llama_stack.models.llama.datatypes import CoreModelId
|
||||
from llama_stack.providers.utils.inference.model_registry import (
|
||||
build_model_alias,
|
||||
build_hf_repo_model_alias,
|
||||
)
|
||||
|
||||
MODEL_ALIASES = [
|
||||
build_model_alias(
|
||||
build_hf_repo_model_alias(
|
||||
"Meta-Llama-3.1-8B-Instruct",
|
||||
CoreModelId.llama3_1_8b_instruct.value,
|
||||
),
|
||||
build_model_alias(
|
||||
build_hf_repo_model_alias(
|
||||
"Meta-Llama-3.1-70B-Instruct",
|
||||
CoreModelId.llama3_1_70b_instruct.value,
|
||||
),
|
||||
build_model_alias(
|
||||
build_hf_repo_model_alias(
|
||||
"Meta-Llama-3.1-405B-Instruct",
|
||||
CoreModelId.llama3_1_405b_instruct.value,
|
||||
),
|
||||
build_model_alias(
|
||||
build_hf_repo_model_alias(
|
||||
"Meta-Llama-3.2-1B-Instruct",
|
||||
CoreModelId.llama3_2_1b_instruct.value,
|
||||
),
|
||||
build_model_alias(
|
||||
build_hf_repo_model_alias(
|
||||
"Meta-Llama-3.2-3B-Instruct",
|
||||
CoreModelId.llama3_2_3b_instruct.value,
|
||||
),
|
||||
build_model_alias(
|
||||
build_hf_repo_model_alias(
|
||||
"Meta-Llama-3.3-70B-Instruct",
|
||||
CoreModelId.llama3_3_70b_instruct.value,
|
||||
),
|
||||
build_model_alias(
|
||||
build_hf_repo_model_alias(
|
||||
"Llama-3.2-11B-Vision-Instruct",
|
||||
CoreModelId.llama3_2_11b_vision_instruct.value,
|
||||
),
|
||||
build_model_alias(
|
||||
build_hf_repo_model_alias(
|
||||
"Llama-3.2-90B-Vision-Instruct",
|
||||
CoreModelId.llama3_2_90b_vision_instruct.value,
|
||||
),
|
||||
build_model_alias(
|
||||
build_hf_repo_model_alias(
|
||||
"Meta-Llama-Guard-3-8B",
|
||||
CoreModelId.llama_guard_3_8b.value,
|
||||
),
|
||||
|
|
|
@ -32,7 +32,7 @@ from llama_stack.models.llama.sku_list import all_registered_models
|
|||
from llama_stack.providers.datatypes import ModelsProtocolPrivate
|
||||
from llama_stack.providers.utils.inference.model_registry import (
|
||||
ModelRegistryHelper,
|
||||
build_model_alias,
|
||||
build_hf_repo_model_alias,
|
||||
)
|
||||
from llama_stack.providers.utils.inference.openai_compat import (
|
||||
OpenAICompatCompletionChoice,
|
||||
|
@ -53,9 +53,9 @@ from .config import InferenceAPIImplConfig, InferenceEndpointImplConfig, TGIImpl
|
|||
log = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def build_model_aliases():
|
||||
def build_hf_repo_model_aliases():
|
||||
return [
|
||||
build_model_alias(
|
||||
build_hf_repo_model_alias(
|
||||
model.huggingface_repo,
|
||||
model.descriptor(),
|
||||
)
|
||||
|
@ -70,7 +70,7 @@ class _HfAdapter(Inference, ModelsProtocolPrivate):
|
|||
model_id: str
|
||||
|
||||
def __init__(self) -> None:
|
||||
self.register_helper = ModelRegistryHelper(build_model_aliases())
|
||||
self.register_helper = ModelRegistryHelper(build_hf_repo_model_aliases())
|
||||
self.huggingface_repo_to_llama_model_id = {
|
||||
model.huggingface_repo: model.descriptor() for model in all_registered_models() if model.huggingface_repo
|
||||
}
|
||||
|
|
|
@ -6,43 +6,43 @@
|
|||
|
||||
from llama_stack.models.llama.datatypes import CoreModelId
|
||||
from llama_stack.providers.utils.inference.model_registry import (
|
||||
build_model_alias,
|
||||
build_hf_repo_model_alias,
|
||||
)
|
||||
|
||||
MODEL_ALIASES = [
|
||||
build_model_alias(
|
||||
build_hf_repo_model_alias(
|
||||
"meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo",
|
||||
CoreModelId.llama3_1_8b_instruct.value,
|
||||
),
|
||||
build_model_alias(
|
||||
build_hf_repo_model_alias(
|
||||
"meta-llama/Meta-Llama-3.1-70B-Instruct-Turbo",
|
||||
CoreModelId.llama3_1_70b_instruct.value,
|
||||
),
|
||||
build_model_alias(
|
||||
build_hf_repo_model_alias(
|
||||
"meta-llama/Meta-Llama-3.1-405B-Instruct-Turbo",
|
||||
CoreModelId.llama3_1_405b_instruct.value,
|
||||
),
|
||||
build_model_alias(
|
||||
build_hf_repo_model_alias(
|
||||
"meta-llama/Llama-3.2-3B-Instruct-Turbo",
|
||||
CoreModelId.llama3_2_3b_instruct.value,
|
||||
),
|
||||
build_model_alias(
|
||||
build_hf_repo_model_alias(
|
||||
"meta-llama/Llama-3.2-11B-Vision-Instruct-Turbo",
|
||||
CoreModelId.llama3_2_11b_vision_instruct.value,
|
||||
),
|
||||
build_model_alias(
|
||||
build_hf_repo_model_alias(
|
||||
"meta-llama/Llama-3.2-90B-Vision-Instruct-Turbo",
|
||||
CoreModelId.llama3_2_90b_vision_instruct.value,
|
||||
),
|
||||
build_model_alias(
|
||||
build_hf_repo_model_alias(
|
||||
"meta-llama/Llama-3.3-70B-Instruct-Turbo",
|
||||
CoreModelId.llama3_3_70b_instruct.value,
|
||||
),
|
||||
build_model_alias(
|
||||
build_hf_repo_model_alias(
|
||||
"meta-llama/Meta-Llama-Guard-3-8B",
|
||||
CoreModelId.llama_guard_3_8b.value,
|
||||
),
|
||||
build_model_alias(
|
||||
build_hf_repo_model_alias(
|
||||
"meta-llama/Llama-Guard-3-11B-Vision-Turbo",
|
||||
CoreModelId.llama_guard_3_11b_vision.value,
|
||||
),
|
||||
|
|
|
@ -38,7 +38,7 @@ from llama_stack.models.llama.sku_list import all_registered_models
|
|||
from llama_stack.providers.datatypes import ModelsProtocolPrivate
|
||||
from llama_stack.providers.utils.inference.model_registry import (
|
||||
ModelRegistryHelper,
|
||||
build_model_alias,
|
||||
build_hf_repo_model_alias,
|
||||
)
|
||||
from llama_stack.providers.utils.inference.openai_compat import (
|
||||
OpenAICompatCompletionResponse,
|
||||
|
@ -62,9 +62,9 @@ from .config import VLLMInferenceAdapterConfig
|
|||
log = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def build_model_aliases():
|
||||
def build_hf_repo_model_aliases():
|
||||
return [
|
||||
build_model_alias(
|
||||
build_hf_repo_model_alias(
|
||||
model.huggingface_repo,
|
||||
model.descriptor(),
|
||||
)
|
||||
|
@ -204,7 +204,7 @@ async def _process_vllm_chat_completion_stream_response(
|
|||
|
||||
class VLLMInferenceAdapter(Inference, ModelsProtocolPrivate):
|
||||
def __init__(self, config: VLLMInferenceAdapterConfig) -> None:
|
||||
self.register_helper = ModelRegistryHelper(build_model_aliases())
|
||||
self.register_helper = ModelRegistryHelper(build_hf_repo_model_aliases())
|
||||
self.config = config
|
||||
self.client = None
|
||||
|
||||
|
|
|
@ -83,17 +83,13 @@ def inference_cerebras() -> ProviderFixture:
|
|||
|
||||
|
||||
@pytest.fixture(scope="session")
|
||||
def inference_ollama(inference_model) -> ProviderFixture:
|
||||
inference_model = [inference_model] if isinstance(inference_model, str) else inference_model
|
||||
if inference_model and "Llama3.1-8B-Instruct" in inference_model:
|
||||
pytest.skip("Ollama only supports Llama3.2-3B-Instruct for testing")
|
||||
|
||||
def inference_ollama() -> ProviderFixture:
|
||||
return ProviderFixture(
|
||||
providers=[
|
||||
Provider(
|
||||
provider_id="ollama",
|
||||
provider_type="remote::ollama",
|
||||
config=OllamaImplConfig(host="localhost", port=os.getenv("OLLAMA_PORT", 11434)).model_dump(),
|
||||
config=OllamaImplConfig(url=get_env_or_fail("OLLAMA_URL")).model_dump(),
|
||||
)
|
||||
],
|
||||
)
|
||||
|
|
|
@ -4,9 +4,10 @@
|
|||
# This source code is licensed under the terms described in the LICENSE file in
|
||||
# the root directory of this source tree.
|
||||
|
||||
from collections import namedtuple
|
||||
from typing import List, Optional
|
||||
|
||||
from pydantic import BaseModel, Field
|
||||
|
||||
from llama_stack.apis.models.models import ModelType
|
||||
from llama_stack.models.llama.sku_list import all_registered_models
|
||||
from llama_stack.providers.datatypes import Model, ModelsProtocolPrivate
|
||||
|
@ -14,7 +15,14 @@ from llama_stack.providers.utils.inference import (
|
|||
ALL_HUGGINGFACE_REPOS_TO_MODEL_DESCRIPTOR,
|
||||
)
|
||||
|
||||
ModelAlias = namedtuple("ModelAlias", ["provider_model_id", "aliases", "llama_model"])
|
||||
|
||||
# TODO: this class is more confusing than useful right now. We need to make it
|
||||
# more closer to the Model class.
|
||||
class ModelAlias(BaseModel):
|
||||
provider_model_id: str
|
||||
aliases: List[str] = Field(default_factory=list)
|
||||
llama_model: Optional[str] = None
|
||||
model_type: ModelType = ModelType.llm
|
||||
|
||||
|
||||
def get_huggingface_repo(model_descriptor: str) -> Optional[str]:
|
||||
|
@ -24,7 +32,7 @@ def get_huggingface_repo(model_descriptor: str) -> Optional[str]:
|
|||
return None
|
||||
|
||||
|
||||
def build_model_alias(provider_model_id: str, model_descriptor: str) -> ModelAlias:
|
||||
def build_hf_repo_model_alias(provider_model_id: str, model_descriptor: str) -> ModelAlias:
|
||||
return ModelAlias(
|
||||
provider_model_id=provider_model_id,
|
||||
aliases=[
|
||||
|
@ -34,7 +42,7 @@ def build_model_alias(provider_model_id: str, model_descriptor: str) -> ModelAli
|
|||
)
|
||||
|
||||
|
||||
def build_model_alias_with_just_provider_model_id(provider_model_id: str, model_descriptor: str) -> ModelAlias:
|
||||
def build_model_alias(provider_model_id: str, model_descriptor: str) -> ModelAlias:
|
||||
return ModelAlias(
|
||||
provider_model_id=provider_model_id,
|
||||
aliases=[],
|
||||
|
|
|
@ -19,6 +19,7 @@ class WebMethod:
|
|||
request_examples: Optional[List[Any]] = None
|
||||
response_examples: Optional[List[Any]] = None
|
||||
method: Optional[str] = None
|
||||
raw_bytes_request_body: Optional[bool] = False
|
||||
|
||||
|
||||
def webmethod(
|
||||
|
@ -27,6 +28,7 @@ def webmethod(
|
|||
public: Optional[bool] = False,
|
||||
request_examples: Optional[List[Any]] = None,
|
||||
response_examples: Optional[List[Any]] = None,
|
||||
raw_bytes_request_body: Optional[bool] = False,
|
||||
) -> Callable[[T], T]:
|
||||
"""
|
||||
Decorator that supplies additional metadata to an endpoint operation function.
|
||||
|
@ -44,6 +46,7 @@ def webmethod(
|
|||
public=public or False,
|
||||
request_examples=request_examples,
|
||||
response_examples=response_examples,
|
||||
raw_bytes_request_body=raw_bytes_request_body,
|
||||
)
|
||||
return cls
|
||||
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue