This commit is contained in:
Xi Yan 2025-02-20 14:04:25 -08:00
commit 7676756778
24 changed files with 1363 additions and 443 deletions

View file

@ -554,6 +554,67 @@
}
}
},
"/v1/agents/{agent_id}/session/{session_id}/turn/{turn_id}/continue": {
"post": {
"responses": {
"200": {
"description": "A single turn in an interaction with an Agentic System. **OR** streamed agent turn completion response.",
"content": {
"application/json": {
"schema": {
"$ref": "#/components/schemas/Turn"
}
},
"text/event-stream": {
"schema": {
"$ref": "#/components/schemas/AgentTurnResponseStreamChunk"
}
}
}
}
},
"tags": [
"Agents"
],
"description": "",
"parameters": [
{
"name": "agent_id",
"in": "path",
"required": true,
"schema": {
"type": "string"
}
},
{
"name": "session_id",
"in": "path",
"required": true,
"schema": {
"type": "string"
}
},
{
"name": "turn_id",
"in": "path",
"required": true,
"schema": {
"type": "string"
}
}
],
"requestBody": {
"content": {
"application/json": {
"schema": {
"$ref": "#/components/schemas/ContinueAgentTurnRequest"
}
}
},
"required": true
}
}
},
"/v1/agents": {
"post": {
"responses": {
@ -678,6 +739,65 @@
}
}
},
"/v1/files": {
"get": {
"responses": {
"200": {
"description": "OK",
"content": {
"application/json": {
"schema": {
"$ref": "#/components/schemas/ListBucketResponse"
}
}
}
}
},
"tags": [
"Files (Coming Soon)"
],
"description": "List all buckets.",
"parameters": [
{
"name": "bucket",
"in": "query",
"required": true,
"schema": {
"type": "string"
}
}
]
},
"post": {
"responses": {
"200": {
"description": "OK",
"content": {
"application/json": {
"schema": {
"$ref": "#/components/schemas/FileUploadResponse"
}
}
}
}
},
"tags": [
"Files (Coming Soon)"
],
"description": "Create a new upload session for a file identified by a bucket and key.",
"parameters": [],
"requestBody": {
"content": {
"application/json": {
"schema": {
"$ref": "#/components/schemas/CreateUploadSessionRequest"
}
}
},
"required": true
}
}
},
"/v1/agents/{agent_id}": {
"delete": {
"responses": {
@ -779,6 +899,84 @@
]
}
},
"/v1/files/{bucket}/{key}": {
"get": {
"responses": {
"200": {
"description": "OK",
"content": {
"application/json": {
"schema": {
"$ref": "#/components/schemas/FileResponse"
}
}
}
}
},
"tags": [
"Files (Coming Soon)"
],
"description": "Get a file info identified by a bucket and key.",
"parameters": [
{
"name": "bucket",
"in": "path",
"description": "Bucket name (valid chars: a-zA-Z0-9_-)",
"required": true,
"schema": {
"type": "string"
}
},
{
"name": "key",
"in": "path",
"description": "Key under which the file is stored (valid chars: a-zA-Z0-9_-/.)",
"required": true,
"schema": {
"type": "string"
}
}
]
},
"delete": {
"responses": {
"200": {
"description": "OK",
"content": {
"application/json": {
"schema": {
"$ref": "#/components/schemas/FileResponse"
}
}
}
}
},
"tags": [
"Files (Coming Soon)"
],
"description": "Delete a file identified by a bucket and key.",
"parameters": [
{
"name": "bucket",
"in": "path",
"description": "Bucket name (valid chars: a-zA-Z0-9_-)",
"required": true,
"schema": {
"type": "string"
}
},
{
"name": "key",
"in": "path",
"description": "Key under which the file is stored (valid chars: a-zA-Z0-9_-/.)",
"required": true,
"schema": {
"type": "string"
}
}
]
}
},
"/v1/inference/embeddings": {
"post": {
"responses": {
@ -1470,6 +1668,91 @@
"parameters": []
}
},
"/v1/files/session:{upload_id}": {
"get": {
"responses": {
"200": {
"description": "OK",
"content": {
"application/json": {
"schema": {
"oneOf": [
{
"$ref": "#/components/schemas/FileUploadResponse"
},
{
"type": "null"
}
]
}
}
}
}
},
"tags": [
"Files (Coming Soon)"
],
"description": "Returns information about an existsing upload session",
"parameters": [
{
"name": "upload_id",
"in": "path",
"description": "ID of the upload session",
"required": true,
"schema": {
"type": "string"
}
}
]
},
"post": {
"responses": {
"200": {
"description": "OK",
"content": {
"application/json": {
"schema": {
"oneOf": [
{
"$ref": "#/components/schemas/FileResponse"
},
{
"type": "null"
}
]
}
}
}
}
},
"tags": [
"Files (Coming Soon)"
],
"description": "Upload file content to an existing upload session. On the server, request body will have the raw bytes that are uploaded.",
"parameters": [
{
"name": "upload_id",
"in": "path",
"description": "ID of the upload session",
"required": true,
"schema": {
"type": "string"
}
}
],
"requestBody": {
"content": {
"application/octet-stream": {
"schema": {
"type": "string",
"format": "binary"
}
}
},
"required": true
}
}
},
"/v1/vector-dbs/{vector_db_id}": {
"get": {
"responses": {
@ -1826,6 +2109,37 @@
}
}
},
"/v1/files/{bucket}": {
"get": {
"responses": {
"200": {
"description": "OK",
"content": {
"application/json": {
"schema": {
"$ref": "#/components/schemas/ListFileResponse"
}
}
}
}
},
"tags": [
"Files (Coming Soon)"
],
"description": "List all files in a bucket.",
"parameters": [
{
"name": "bucket",
"in": "path",
"description": "Bucket name (valid chars: a-zA-Z0-9_-)",
"required": true,
"schema": {
"type": "string"
}
}
]
}
},
"/v1/models": {
"get": {
"responses": {
@ -2558,67 +2872,6 @@
}
}
},
"/v1/agents/{agent_id}/session/{session_id}/turn/{turn_id}/submit_tool_response_messages": {
"post": {
"responses": {
"200": {
"description": "A single turn in an interaction with an Agentic System. **OR** streamed agent turn completion response.",
"content": {
"application/json": {
"schema": {
"$ref": "#/components/schemas/Turn"
}
},
"text/event-stream": {
"schema": {
"$ref": "#/components/schemas/AgentTurnResponseStreamChunk"
}
}
}
}
},
"tags": [
"Agents"
],
"description": "",
"parameters": [
{
"name": "agent_id",
"in": "path",
"required": true,
"schema": {
"type": "string"
}
},
{
"name": "session_id",
"in": "path",
"required": true,
"schema": {
"type": "string"
}
},
{
"name": "turn_id",
"in": "path",
"required": true,
"schema": {
"type": "string"
}
}
],
"requestBody": {
"content": {
"application/json": {
"schema": {
"$ref": "#/components/schemas/SubmitToolResponseMessagesRequest"
}
}
},
"required": true
}
}
},
"/v1/post-training/supervised-fine-tune": {
"post": {
"responses": {
@ -4791,62 +5044,10 @@
"title": "CompletionResponseStreamChunk",
"description": "A chunk of a streamed completion response."
},
"CreateAgentRequest": {
"ContinueAgentTurnRequest": {
"type": "object",
"properties": {
"agent_config": {
"$ref": "#/components/schemas/AgentConfig"
}
},
"additionalProperties": false,
"required": [
"agent_config"
],
"title": "CreateAgentRequest"
},
"AgentCreateResponse": {
"type": "object",
"properties": {
"agent_id": {
"type": "string"
}
},
"additionalProperties": false,
"required": [
"agent_id"
],
"title": "AgentCreateResponse"
},
"CreateAgentSessionRequest": {
"type": "object",
"properties": {
"session_name": {
"type": "string"
}
},
"additionalProperties": false,
"required": [
"session_name"
],
"title": "CreateAgentSessionRequest"
},
"AgentSessionCreateResponse": {
"type": "object",
"properties": {
"session_id": {
"type": "string"
}
},
"additionalProperties": false,
"required": [
"session_id"
],
"title": "AgentSessionCreateResponse"
},
"CreateAgentTurnRequest": {
"type": "object",
"properties": {
"messages": {
"new_messages": {
"type": "array",
"items": {
"oneOf": [
@ -4858,61 +5059,13 @@
}
]
}
},
"stream": {
"type": "boolean"
},
"documents": {
"type": "array",
"items": {
"type": "object",
"properties": {
"content": {
"oneOf": [
{
"type": "string"
},
{
"$ref": "#/components/schemas/InterleavedContentItem"
},
{
"type": "array",
"items": {
"$ref": "#/components/schemas/InterleavedContentItem"
}
},
{
"$ref": "#/components/schemas/URL"
}
]
},
"mime_type": {
"type": "string"
}
},
"additionalProperties": false,
"required": [
"content",
"mime_type"
],
"title": "Document"
}
},
"toolgroups": {
"type": "array",
"items": {
"$ref": "#/components/schemas/AgentTool"
}
},
"tool_config": {
"$ref": "#/components/schemas/ToolConfig"
}
},
"additionalProperties": false,
"required": [
"messages"
"new_messages"
],
"title": "CreateAgentTurnRequest"
"title": "ContinueAgentTurnRequest"
},
"InferenceStep": {
"type": "object",
@ -5294,7 +5447,7 @@
"$ref": "#/components/schemas/AgentTurnResponseTurnCompletePayload"
},
{
"$ref": "#/components/schemas/AgentTurnResponseTurnPendingPayload"
"$ref": "#/components/schemas/AgentTurnResponseTurnAwaitingInputPayload"
}
],
"discriminator": {
@ -5305,7 +5458,7 @@
"step_complete": "#/components/schemas/AgentTurnResponseStepCompletePayload",
"turn_start": "#/components/schemas/AgentTurnResponseTurnStartPayload",
"turn_complete": "#/components/schemas/AgentTurnResponseTurnCompletePayload",
"turn_pending": "#/components/schemas/AgentTurnResponseTurnPendingPayload"
"turn_awaiting_input": "#/components/schemas/AgentTurnResponseTurnAwaitingInputPayload"
}
}
},
@ -5468,6 +5621,25 @@
"title": "AgentTurnResponseStreamChunk",
"description": "streamed agent turn completion response."
},
"AgentTurnResponseTurnAwaitingInputPayload": {
"type": "object",
"properties": {
"event_type": {
"type": "string",
"const": "turn_awaiting_input",
"default": "turn_awaiting_input"
},
"turn": {
"$ref": "#/components/schemas/Turn"
}
},
"additionalProperties": false,
"required": [
"event_type",
"turn"
],
"title": "AgentTurnResponseTurnAwaitingInputPayload"
},
"AgentTurnResponseTurnCompletePayload": {
"type": "object",
"properties": {
@ -5487,25 +5659,6 @@
],
"title": "AgentTurnResponseTurnCompletePayload"
},
"AgentTurnResponseTurnPendingPayload": {
"type": "object",
"properties": {
"event_type": {
"type": "string",
"const": "turn_pending",
"default": "turn_pending"
},
"turn": {
"$ref": "#/components/schemas/Turn"
}
},
"additionalProperties": false,
"required": [
"event_type",
"turn"
],
"title": "AgentTurnResponseTurnPendingPayload"
},
"AgentTurnResponseTurnStartPayload": {
"type": "object",
"properties": {
@ -5525,6 +5678,228 @@
],
"title": "AgentTurnResponseTurnStartPayload"
},
"CreateAgentRequest": {
"type": "object",
"properties": {
"agent_config": {
"$ref": "#/components/schemas/AgentConfig"
}
},
"additionalProperties": false,
"required": [
"agent_config"
],
"title": "CreateAgentRequest"
},
"AgentCreateResponse": {
"type": "object",
"properties": {
"agent_id": {
"type": "string"
}
},
"additionalProperties": false,
"required": [
"agent_id"
],
"title": "AgentCreateResponse"
},
"CreateAgentSessionRequest": {
"type": "object",
"properties": {
"session_name": {
"type": "string"
}
},
"additionalProperties": false,
"required": [
"session_name"
],
"title": "CreateAgentSessionRequest"
},
"AgentSessionCreateResponse": {
"type": "object",
"properties": {
"session_id": {
"type": "string"
}
},
"additionalProperties": false,
"required": [
"session_id"
],
"title": "AgentSessionCreateResponse"
},
"CreateAgentTurnRequest": {
"type": "object",
"properties": {
"messages": {
"type": "array",
"items": {
"oneOf": [
{
"$ref": "#/components/schemas/UserMessage"
},
{
"$ref": "#/components/schemas/ToolResponseMessage"
}
]
}
},
"stream": {
"type": "boolean"
},
"documents": {
"type": "array",
"items": {
"type": "object",
"properties": {
"content": {
"oneOf": [
{
"type": "string"
},
{
"$ref": "#/components/schemas/InterleavedContentItem"
},
{
"type": "array",
"items": {
"$ref": "#/components/schemas/InterleavedContentItem"
}
},
{
"$ref": "#/components/schemas/URL"
}
]
},
"mime_type": {
"type": "string"
}
},
"additionalProperties": false,
"required": [
"content",
"mime_type"
],
"title": "Document"
}
},
"toolgroups": {
"type": "array",
"items": {
"$ref": "#/components/schemas/AgentTool"
}
},
"tool_config": {
"$ref": "#/components/schemas/ToolConfig"
}
},
"additionalProperties": false,
"required": [
"messages"
],
"title": "CreateAgentTurnRequest"
},
"CreateUploadSessionRequest": {
"type": "object",
"properties": {
"bucket": {
"type": "string",
"description": "Bucket under which the file is stored (valid chars: a-zA-Z0-9_-)"
},
"key": {
"type": "string",
"description": "Key under which the file is stored (valid chars: a-zA-Z0-9_-/.)"
},
"mime_type": {
"type": "string",
"description": "MIME type of the file"
},
"size": {
"type": "integer",
"description": "File size in bytes"
}
},
"additionalProperties": false,
"required": [
"bucket",
"key",
"mime_type",
"size"
],
"title": "CreateUploadSessionRequest"
},
"FileUploadResponse": {
"type": "object",
"properties": {
"id": {
"type": "string",
"description": "ID of the upload session"
},
"url": {
"type": "string",
"description": "Upload URL for the file or file parts"
},
"offset": {
"type": "integer",
"description": "Upload content offset"
},
"size": {
"type": "integer",
"description": "Upload content size"
}
},
"additionalProperties": false,
"required": [
"id",
"url",
"offset",
"size"
],
"title": "FileUploadResponse",
"description": "Response after initiating a file upload session."
},
"FileResponse": {
"type": "object",
"properties": {
"bucket": {
"type": "string",
"description": "Bucket under which the file is stored (valid chars: a-zA-Z0-9_-)"
},
"key": {
"type": "string",
"description": "Key under which the file is stored (valid chars: a-zA-Z0-9_-/.)"
},
"mime_type": {
"type": "string",
"description": "MIME type of the file"
},
"url": {
"type": "string",
"description": "Upload URL for the file contents"
},
"bytes": {
"type": "integer",
"description": "Size of the file in bytes"
},
"created_at": {
"type": "integer",
"description": "Timestamp of when the file was created"
}
},
"additionalProperties": false,
"required": [
"bucket",
"key",
"mime_type",
"url",
"bytes",
"created_at"
],
"title": "FileResponse",
"description": "Response representing a file entry."
},
"EmbeddingsRequest": {
"type": "object",
"properties": {
@ -6840,6 +7215,37 @@
],
"title": "ToolInvocationResult"
},
"BucketResponse": {
"type": "object",
"properties": {
"name": {
"type": "string"
}
},
"additionalProperties": false,
"required": [
"name"
],
"title": "BucketResponse"
},
"ListBucketResponse": {
"type": "object",
"properties": {
"data": {
"type": "array",
"items": {
"$ref": "#/components/schemas/BucketResponse"
},
"description": "List of FileResponse entries"
}
},
"additionalProperties": false,
"required": [
"data"
],
"title": "ListBucketResponse",
"description": "Response representing a list of file entries."
},
"ListDatasetsResponse": {
"type": "object",
"properties": {
@ -6856,6 +7262,24 @@
],
"title": "ListDatasetsResponse"
},
"ListFileResponse": {
"type": "object",
"properties": {
"data": {
"type": "array",
"items": {
"$ref": "#/components/schemas/FileResponse"
},
"description": "List of FileResponse entries"
}
},
"additionalProperties": false,
"required": [
"data"
],
"title": "ListFileResponse",
"description": "Response representing a list of file entries."
},
"ListModelsResponse": {
"type": "object",
"properties": {
@ -8323,22 +8747,6 @@
],
"title": "ScoreBatchResponse"
},
"SubmitToolResponseMessagesRequest": {
"type": "object",
"properties": {
"tool_response_messages": {
"type": "array",
"items": {
"$ref": "#/components/schemas/ToolResponseMessage"
}
}
},
"additionalProperties": false,
"required": [
"tool_response_messages"
],
"title": "SubmitToolResponseMessagesRequest"
},
"AlgorithmConfig": {
"oneOf": [
{
@ -8643,6 +9051,9 @@
{
"name": "Eval"
},
{
"name": "Files (Coming Soon)"
},
{
"name": "Inference",
"description": "This API provides the raw interface to the underlying models. Two kinds of models are supported:\n- LLM models: these models generate \"raw\" and \"chat\" (conversational) completions.\n- Embedding models: these models generate embeddings to be used for semantic search.",
@ -8698,6 +9109,7 @@
"DatasetIO",
"Datasets",
"Eval",
"Files (Coming Soon)",
"Inference",
"Inspect",
"Models",

View file

@ -329,6 +329,45 @@ paths:
schema:
$ref: '#/components/schemas/CompletionRequest'
required: true
/v1/agents/{agent_id}/session/{session_id}/turn/{turn_id}/continue:
post:
responses:
'200':
description: >-
A single turn in an interaction with an Agentic System. **OR** streamed
agent turn completion response.
content:
application/json:
schema:
$ref: '#/components/schemas/Turn'
text/event-stream:
schema:
$ref: '#/components/schemas/AgentTurnResponseStreamChunk'
tags:
- Agents
description: ''
parameters:
- name: agent_id
in: path
required: true
schema:
type: string
- name: session_id
in: path
required: true
schema:
type: string
- name: turn_id
in: path
required: true
schema:
type: string
requestBody:
content:
application/json:
schema:
$ref: '#/components/schemas/ContinueAgentTurnRequest'
required: true
/v1/agents:
post:
responses:
@ -406,6 +445,43 @@ paths:
schema:
$ref: '#/components/schemas/CreateAgentTurnRequest'
required: true
/v1/files:
get:
responses:
'200':
description: OK
content:
application/json:
schema:
$ref: '#/components/schemas/ListBucketResponse'
tags:
- Files (Coming Soon)
description: List all buckets.
parameters:
- name: bucket
in: query
required: true
schema:
type: string
post:
responses:
'200':
description: OK
content:
application/json:
schema:
$ref: '#/components/schemas/FileUploadResponse'
tags:
- Files (Coming Soon)
description: >-
Create a new upload session for a file identified by a bucket and key.
parameters: []
requestBody:
content:
application/json:
schema:
$ref: '#/components/schemas/CreateUploadSessionRequest'
required: true
/v1/agents/{agent_id}:
delete:
responses:
@ -468,6 +544,59 @@ paths:
required: true
schema:
type: string
/v1/files/{bucket}/{key}:
get:
responses:
'200':
description: OK
content:
application/json:
schema:
$ref: '#/components/schemas/FileResponse'
tags:
- Files (Coming Soon)
description: >-
Get a file info identified by a bucket and key.
parameters:
- name: bucket
in: path
description: 'Bucket name (valid chars: a-zA-Z0-9_-)'
required: true
schema:
type: string
- name: key
in: path
description: >-
Key under which the file is stored (valid chars: a-zA-Z0-9_-/.)
required: true
schema:
type: string
delete:
responses:
'200':
description: OK
content:
application/json:
schema:
$ref: '#/components/schemas/FileResponse'
tags:
- Files (Coming Soon)
description: >-
Delete a file identified by a bucket and key.
parameters:
- name: bucket
in: path
description: 'Bucket name (valid chars: a-zA-Z0-9_-)'
required: true
schema:
type: string
- name: key
in: path
description: >-
Key under which the file is stored (valid chars: a-zA-Z0-9_-/.)
required: true
schema:
type: string
/v1/inference/embeddings:
post:
responses:
@ -875,6 +1004,57 @@ paths:
- PostTraining (Coming Soon)
description: ''
parameters: []
/v1/files/session:{upload_id}:
get:
responses:
'200':
description: OK
content:
application/json:
schema:
oneOf:
- $ref: '#/components/schemas/FileUploadResponse'
- type: 'null'
tags:
- Files (Coming Soon)
description: >-
Returns information about an existsing upload session
parameters:
- name: upload_id
in: path
description: ID of the upload session
required: true
schema:
type: string
post:
responses:
'200':
description: OK
content:
application/json:
schema:
oneOf:
- $ref: '#/components/schemas/FileResponse'
- type: 'null'
tags:
- Files (Coming Soon)
description: >-
Upload file content to an existing upload session. On the server, request
body will have the raw bytes that are uploaded.
parameters:
- name: upload_id
in: path
description: ID of the upload session
required: true
schema:
type: string
requestBody:
content:
application/octet-stream:
schema:
type: string
format: binary
required: true
/v1/vector-dbs/{vector_db_id}:
get:
responses:
@ -1091,6 +1271,25 @@ paths:
schema:
$ref: '#/components/schemas/RegisterDatasetRequest'
required: true
/v1/files/{bucket}:
get:
responses:
'200':
description: OK
content:
application/json:
schema:
$ref: '#/components/schemas/ListFileResponse'
tags:
- Files (Coming Soon)
description: List all files in a bucket.
parameters:
- name: bucket
in: path
description: 'Bucket name (valid chars: a-zA-Z0-9_-)'
required: true
schema:
type: string
/v1/models:
get:
responses:
@ -1543,45 +1742,6 @@ paths:
schema:
$ref: '#/components/schemas/ScoreBatchRequest'
required: true
/v1/agents/{agent_id}/session/{session_id}/turn/{turn_id}/submit_tool_response_messages:
post:
responses:
'200':
description: >-
A single turn in an interaction with an Agentic System. **OR** streamed
agent turn completion response.
content:
application/json:
schema:
$ref: '#/components/schemas/Turn'
text/event-stream:
schema:
$ref: '#/components/schemas/AgentTurnResponseStreamChunk'
tags:
- Agents
description: ''
parameters:
- name: agent_id
in: path
required: true
schema:
type: string
- name: session_id
in: path
required: true
schema:
type: string
- name: turn_id
in: path
required: true
schema:
type: string
requestBody:
content:
application/json:
schema:
$ref: '#/components/schemas/SubmitToolResponseMessagesRequest'
required: true
/v1/post-training/supervised-fine-tune:
post:
responses:
@ -3080,83 +3240,19 @@ components:
title: CompletionResponseStreamChunk
description: >-
A chunk of a streamed completion response.
CreateAgentRequest:
ContinueAgentTurnRequest:
type: object
properties:
agent_config:
$ref: '#/components/schemas/AgentConfig'
additionalProperties: false
required:
- agent_config
title: CreateAgentRequest
AgentCreateResponse:
type: object
properties:
agent_id:
type: string
additionalProperties: false
required:
- agent_id
title: AgentCreateResponse
CreateAgentSessionRequest:
type: object
properties:
session_name:
type: string
additionalProperties: false
required:
- session_name
title: CreateAgentSessionRequest
AgentSessionCreateResponse:
type: object
properties:
session_id:
type: string
additionalProperties: false
required:
- session_id
title: AgentSessionCreateResponse
CreateAgentTurnRequest:
type: object
properties:
messages:
new_messages:
type: array
items:
oneOf:
- $ref: '#/components/schemas/UserMessage'
- $ref: '#/components/schemas/ToolResponseMessage'
stream:
type: boolean
documents:
type: array
items:
type: object
properties:
content:
oneOf:
- type: string
- $ref: '#/components/schemas/InterleavedContentItem'
- type: array
items:
$ref: '#/components/schemas/InterleavedContentItem'
- $ref: '#/components/schemas/URL'
mime_type:
type: string
additionalProperties: false
required:
- content
- mime_type
title: Document
toolgroups:
type: array
items:
$ref: '#/components/schemas/AgentTool'
tool_config:
$ref: '#/components/schemas/ToolConfig'
additionalProperties: false
required:
- messages
title: CreateAgentTurnRequest
- new_messages
title: ContinueAgentTurnRequest
InferenceStep:
type: object
properties:
@ -3405,7 +3501,7 @@ components:
- $ref: '#/components/schemas/AgentTurnResponseStepCompletePayload'
- $ref: '#/components/schemas/AgentTurnResponseTurnStartPayload'
- $ref: '#/components/schemas/AgentTurnResponseTurnCompletePayload'
- $ref: '#/components/schemas/AgentTurnResponseTurnPendingPayload'
- $ref: '#/components/schemas/AgentTurnResponseTurnAwaitingInputPayload'
discriminator:
propertyName: event_type
mapping:
@ -3414,7 +3510,7 @@ components:
step_complete: '#/components/schemas/AgentTurnResponseStepCompletePayload'
turn_start: '#/components/schemas/AgentTurnResponseTurnStartPayload'
turn_complete: '#/components/schemas/AgentTurnResponseTurnCompletePayload'
turn_pending: '#/components/schemas/AgentTurnResponseTurnPendingPayload'
turn_awaiting_input: '#/components/schemas/AgentTurnResponseTurnAwaitingInputPayload'
AgentTurnResponseStepCompletePayload:
type: object
properties:
@ -3521,6 +3617,21 @@ components:
- event
title: AgentTurnResponseStreamChunk
description: streamed agent turn completion response.
"AgentTurnResponseTurnAwaitingInputPayload":
type: object
properties:
event_type:
type: string
const: turn_awaiting_input
default: turn_awaiting_input
turn:
$ref: '#/components/schemas/Turn'
additionalProperties: false
required:
- event_type
- turn
title: >-
AgentTurnResponseTurnAwaitingInputPayload
AgentTurnResponseTurnCompletePayload:
type: object
properties:
@ -3535,20 +3646,6 @@ components:
- event_type
- turn
title: AgentTurnResponseTurnCompletePayload
AgentTurnResponseTurnPendingPayload:
type: object
properties:
event_type:
type: string
const: turn_pending
default: turn_pending
turn:
$ref: '#/components/schemas/Turn'
additionalProperties: false
required:
- event_type
- turn
title: AgentTurnResponseTurnPendingPayload
AgentTurnResponseTurnStartPayload:
type: object
properties:
@ -3563,6 +3660,164 @@ components:
- event_type
- turn_id
title: AgentTurnResponseTurnStartPayload
CreateAgentRequest:
type: object
properties:
agent_config:
$ref: '#/components/schemas/AgentConfig'
additionalProperties: false
required:
- agent_config
title: CreateAgentRequest
AgentCreateResponse:
type: object
properties:
agent_id:
type: string
additionalProperties: false
required:
- agent_id
title: AgentCreateResponse
CreateAgentSessionRequest:
type: object
properties:
session_name:
type: string
additionalProperties: false
required:
- session_name
title: CreateAgentSessionRequest
AgentSessionCreateResponse:
type: object
properties:
session_id:
type: string
additionalProperties: false
required:
- session_id
title: AgentSessionCreateResponse
CreateAgentTurnRequest:
type: object
properties:
messages:
type: array
items:
oneOf:
- $ref: '#/components/schemas/UserMessage'
- $ref: '#/components/schemas/ToolResponseMessage'
stream:
type: boolean
documents:
type: array
items:
type: object
properties:
content:
oneOf:
- type: string
- $ref: '#/components/schemas/InterleavedContentItem'
- type: array
items:
$ref: '#/components/schemas/InterleavedContentItem'
- $ref: '#/components/schemas/URL'
mime_type:
type: string
additionalProperties: false
required:
- content
- mime_type
title: Document
toolgroups:
type: array
items:
$ref: '#/components/schemas/AgentTool'
tool_config:
$ref: '#/components/schemas/ToolConfig'
additionalProperties: false
required:
- messages
title: CreateAgentTurnRequest
CreateUploadSessionRequest:
type: object
properties:
bucket:
type: string
description: >-
Bucket under which the file is stored (valid chars: a-zA-Z0-9_-)
key:
type: string
description: >-
Key under which the file is stored (valid chars: a-zA-Z0-9_-/.)
mime_type:
type: string
description: MIME type of the file
size:
type: integer
description: File size in bytes
additionalProperties: false
required:
- bucket
- key
- mime_type
- size
title: CreateUploadSessionRequest
FileUploadResponse:
type: object
properties:
id:
type: string
description: ID of the upload session
url:
type: string
description: Upload URL for the file or file parts
offset:
type: integer
description: Upload content offset
size:
type: integer
description: Upload content size
additionalProperties: false
required:
- id
- url
- offset
- size
title: FileUploadResponse
description: >-
Response after initiating a file upload session.
FileResponse:
type: object
properties:
bucket:
type: string
description: >-
Bucket under which the file is stored (valid chars: a-zA-Z0-9_-)
key:
type: string
description: >-
Key under which the file is stored (valid chars: a-zA-Z0-9_-/.)
mime_type:
type: string
description: MIME type of the file
url:
type: string
description: Upload URL for the file contents
bytes:
type: integer
description: Size of the file in bytes
created_at:
type: integer
description: Timestamp of when the file was created
additionalProperties: false
required:
- bucket
- key
- mime_type
- url
- bytes
- created_at
title: FileResponse
description: Response representing a file entry.
EmbeddingsRequest:
type: object
properties:
@ -4394,6 +4649,29 @@ components:
required:
- content
title: ToolInvocationResult
BucketResponse:
type: object
properties:
name:
type: string
additionalProperties: false
required:
- name
title: BucketResponse
ListBucketResponse:
type: object
properties:
data:
type: array
items:
$ref: '#/components/schemas/BucketResponse'
description: List of FileResponse entries
additionalProperties: false
required:
- data
title: ListBucketResponse
description: >-
Response representing a list of file entries.
ListDatasetsResponse:
type: object
properties:
@ -4405,6 +4683,20 @@ components:
required:
- data
title: ListDatasetsResponse
ListFileResponse:
type: object
properties:
data:
type: array
items:
$ref: '#/components/schemas/FileResponse'
description: List of FileResponse entries
additionalProperties: false
required:
- data
title: ListFileResponse
description: >-
Response representing a list of file entries.
ListModelsResponse:
type: object
properties:
@ -5328,17 +5620,6 @@ components:
required:
- results
title: ScoreBatchResponse
SubmitToolResponseMessagesRequest:
type: object
properties:
tool_response_messages:
type: array
items:
$ref: '#/components/schemas/ToolResponseMessage'
additionalProperties: false
required:
- tool_response_messages
title: SubmitToolResponseMessagesRequest
AlgorithmConfig:
oneOf:
- $ref: '#/components/schemas/LoraFinetuningConfig'
@ -5533,6 +5814,7 @@ tags:
- name: DatasetIO
- name: Datasets
- name: Eval
- name: Files (Coming Soon)
- name: Inference
description: >-
This API provides the raw interface to the underlying models. Two kinds of models
@ -5567,6 +5849,7 @@ x-tagGroups:
- DatasetIO
- Datasets
- Eval
- Files (Coming Soon)
- Inference
- Inspect
- Models

View file

@ -477,6 +477,7 @@ class Generator:
"SyntheticDataGeneration",
"PostTraining",
"BatchInference",
"Files",
]:
op.defining_class.__name__ = f"{op.defining_class.__name__} (Coming Soon)"
print(op.defining_class.__name__)
@ -520,8 +521,30 @@ class Generator:
# parameters passed anywhere
parameters = path_parameters + query_parameters
# data passed in payload
if op.request_params:
webmethod = getattr(op.func_ref, "__webmethod__", None)
raw_bytes_request_body = False
if webmethod:
raw_bytes_request_body = getattr(webmethod, "raw_bytes_request_body", False)
# data passed in request body as raw bytes cannot have request parameters
if raw_bytes_request_body and op.request_params:
raise ValueError("Cannot have both raw bytes request body and request parameters")
# data passed in request body as raw bytes
if raw_bytes_request_body:
requestBody = RequestBody(
content={
"application/octet-stream": {
"schema": {
"type": "string",
"format": "binary",
}
}
},
required=True,
)
# data passed in payload as JSON and mapped to request parameters
elif op.request_params:
builder = ContentBuilder(self.schema_builder)
first = next(iter(op.request_params))
request_name, request_type = first

View file

@ -78,7 +78,7 @@ class MediaType:
@dataclass
class RequestBody:
content: Dict[str, MediaType]
content: Dict[str, MediaType | Dict[str, Any]]
description: Optional[str] = None
required: Optional[bool] = None

View file

@ -194,7 +194,7 @@ class AgentTurnResponseEventType(Enum):
turn_start = "turn_start"
turn_complete = "turn_complete"
turn_pending = "turn_pending"
turn_awaiting_input = "turn_awaiting_input"
@json_schema_type
@ -237,8 +237,10 @@ class AgentTurnResponseTurnCompletePayload(BaseModel):
@json_schema_type
class AgentTurnResponseTurnPendingPayload(BaseModel):
event_type: Literal[AgentTurnResponseEventType.turn_pending.value] = AgentTurnResponseEventType.turn_pending.value
class AgentTurnResponseTurnAwaitingInputPayload(BaseModel):
event_type: Literal[AgentTurnResponseEventType.turn_awaiting_input.value] = (
AgentTurnResponseEventType.turn_awaiting_input.value
)
turn: Turn
@ -250,7 +252,7 @@ AgentTurnResponseEventPayload = register_schema(
AgentTurnResponseStepCompletePayload,
AgentTurnResponseTurnStartPayload,
AgentTurnResponseTurnCompletePayload,
AgentTurnResponseTurnPendingPayload,
AgentTurnResponseTurnAwaitingInputPayload,
],
Field(discriminator="event_type"),
],
@ -344,15 +346,20 @@ class Agents(Protocol):
) -> Union[Turn, AsyncIterator[AgentTurnResponseStreamChunk]]: ...
@webmethod(
route="/agents/{agent_id}/session/{session_id}/turn/{turn_id}/tool_responses",
route="/agents/{agent_id}/session/{session_id}/turn/{turn_id}/continue",
method="POST",
)
async def submit_tool_responses(
async def continue_agent_turn(
self,
agent_id: str,
session_id: str,
turn_id: str,
tool_responses: Dict[str, ToolResponseMessage],
new_messages: List[
Union[
UserMessage,
ToolResponseMessage,
]
],
) -> Union[Turn, AsyncIterator[AgentTurnResponseStreamChunk]]: ...
@webmethod(

View file

@ -0,0 +1,7 @@
# Copyright (c) Meta Platforms, Inc. and affiliates.
# All rights reserved.
#
# This source code is licensed under the terms described in the LICENSE file in
# the root directory of this source tree.
from .files import * # noqa: F401 F403

View file

@ -0,0 +1,174 @@
# Copyright (c) Meta Platforms, Inc. and affiliates.
# All rights reserved.
#
# This source code is licensed under the terms described in the LICENSE file in
# the root directory of this source tree.
from typing import List, Optional, Protocol, runtime_checkable
from pydantic import BaseModel
from llama_stack.providers.utils.telemetry.trace_protocol import trace_protocol
from llama_stack.schema_utils import json_schema_type, webmethod
@json_schema_type
class FileUploadResponse(BaseModel):
"""
Response after initiating a file upload session.
:param id: ID of the upload session
:param url: Upload URL for the file or file parts
:param offset: Upload content offset
:param size: Upload content size
"""
id: str
url: str
offset: int
size: int
@json_schema_type
class BucketResponse(BaseModel):
name: str
@json_schema_type
class ListBucketResponse(BaseModel):
"""
Response representing a list of file entries.
:param data: List of FileResponse entries
"""
data: List[BucketResponse]
@json_schema_type
class FileResponse(BaseModel):
"""
Response representing a file entry.
:param bucket: Bucket under which the file is stored (valid chars: a-zA-Z0-9_-)
:param key: Key under which the file is stored (valid chars: a-zA-Z0-9_-/.)
:param mime_type: MIME type of the file
:param url: Upload URL for the file contents
:param bytes: Size of the file in bytes
:param created_at: Timestamp of when the file was created
"""
bucket: str
key: str
mime_type: str
url: str
bytes: int
created_at: int
@json_schema_type
class ListFileResponse(BaseModel):
"""
Response representing a list of file entries.
:param data: List of FileResponse entries
"""
data: List[FileResponse]
@runtime_checkable
@trace_protocol
class Files(Protocol):
@webmethod(route="/files", method="POST")
async def create_upload_session(
self,
bucket: str,
key: str,
mime_type: str,
size: int,
) -> FileUploadResponse:
"""
Create a new upload session for a file identified by a bucket and key.
:param bucket: Bucket under which the file is stored (valid chars: a-zA-Z0-9_-)
:param key: Key under which the file is stored (valid chars: a-zA-Z0-9_-/.)
:param mime_type: MIME type of the file
:param size: File size in bytes
"""
...
@webmethod(route="/files/session:{upload_id}", method="POST", raw_bytes_request_body=True)
async def upload_content_to_session(
self,
upload_id: str,
) -> Optional[FileResponse]:
"""
Upload file content to an existing upload session.
On the server, request body will have the raw bytes that are uploaded.
:param upload_id: ID of the upload session
"""
...
@webmethod(route="/files/session:{upload_id}", method="GET")
async def get_upload_session_info(
self,
upload_id: str,
) -> Optional[FileUploadResponse]:
"""
Returns information about an existsing upload session
:param upload_id: ID of the upload session
"""
...
@webmethod(route="/files", method="GET")
async def list_all_buckets(
self,
bucket: str,
) -> ListBucketResponse:
"""
List all buckets.
"""
...
@webmethod(route="/files/{bucket}", method="GET")
async def list_files_in_bucket(
self,
bucket: str,
) -> ListFileResponse:
"""
List all files in a bucket.
:param bucket: Bucket name (valid chars: a-zA-Z0-9_-)
"""
...
@webmethod(route="/files/{bucket}/{key:path}", method="GET")
async def get_file(
self,
bucket: str,
key: str,
) -> FileResponse:
"""
Get a file info identified by a bucket and key.
:param bucket: Bucket name (valid chars: a-zA-Z0-9_-)
:param key: Key under which the file is stored (valid chars: a-zA-Z0-9_-/.)
"""
...
@webmethod(route="/files/{bucket}/{key:path}", method="DELETE")
async def delete_file(
self,
bucket: str,
key: str,
) -> FileResponse:
"""
Delete a file identified by a bucket and key.
:param bucket: Bucket name (valid chars: a-zA-Z0-9_-)
:param key: Key under which the file is stored (valid chars: a-zA-Z0-9_-/.)
"""
...

View file

@ -19,6 +19,7 @@ from llama_stack.apis.benchmarks import Benchmarks
from llama_stack.apis.datasetio import DatasetIO
from llama_stack.apis.datasets import Datasets
from llama_stack.apis.eval import Eval
from llama_stack.apis.files import Files
from llama_stack.apis.inference import Inference
from llama_stack.apis.inspect import Inspect
from llama_stack.apis.models import Models
@ -63,6 +64,7 @@ class LlamaStack(
ToolGroups,
ToolRuntime,
RAGToolRuntime,
Files,
):
pass

View file

@ -169,12 +169,17 @@ class MetaReferenceAgentsImpl(Agents):
async for event in agent.create_and_execute_turn(request):
yield event
async def submit_tool_response_messages(
async def continue_agent_turn(
self,
agent_id: str,
session_id: str,
turn_id: str,
tool_response_messages: List[ToolResponseMessage],
new_messages: List[
Union[
UserMessage,
ToolResponseMessage,
]
],
) -> AsyncGenerator:
pass

View file

@ -46,7 +46,7 @@ from llama_stack.providers.utils.inference.embedding_mixin import (
)
from llama_stack.providers.utils.inference.model_registry import (
ModelRegistryHelper,
build_model_alias,
build_hf_repo_model_alias,
)
from llama_stack.providers.utils.inference.prompt_adapter import (
augment_content_with_response_format_prompt,
@ -116,7 +116,7 @@ class MetaReferenceInferenceImpl(
self.model_registry_helper = ModelRegistryHelper(
[
build_model_alias(
build_hf_repo_model_alias(
llama_model.descriptor(),
llama_model.core_model_id.value,
)

View file

@ -6,19 +6,19 @@
from llama_stack.models.llama.datatypes import CoreModelId
from llama_stack.providers.utils.inference.model_registry import (
build_model_alias,
build_hf_repo_model_alias,
)
MODEL_ALIASES = [
build_model_alias(
build_hf_repo_model_alias(
"meta.llama3-1-8b-instruct-v1:0",
CoreModelId.llama3_1_8b_instruct.value,
),
build_model_alias(
build_hf_repo_model_alias(
"meta.llama3-1-70b-instruct-v1:0",
CoreModelId.llama3_1_70b_instruct.value,
),
build_model_alias(
build_hf_repo_model_alias(
"meta.llama3-1-405b-instruct-v1:0",
CoreModelId.llama3_1_405b_instruct.value,
),

View file

@ -6,15 +6,15 @@
from llama_stack.models.llama.datatypes import CoreModelId
from llama_stack.providers.utils.inference.model_registry import (
build_model_alias,
build_hf_repo_model_alias,
)
model_aliases = [
build_model_alias(
build_hf_repo_model_alias(
"llama3.1-8b",
CoreModelId.llama3_1_8b_instruct.value,
),
build_model_alias(
build_hf_repo_model_alias(
"llama-3.3-70b",
CoreModelId.llama3_3_70b_instruct.value,
),

View file

@ -25,7 +25,7 @@ from llama_stack.apis.inference import (
from llama_stack.models.llama.datatypes import CoreModelId
from llama_stack.providers.utils.inference.model_registry import (
ModelRegistryHelper,
build_model_alias,
build_hf_repo_model_alias,
)
from llama_stack.providers.utils.inference.openai_compat import (
get_sampling_options,
@ -39,11 +39,11 @@ from llama_stack.providers.utils.inference.prompt_adapter import (
from .config import DatabricksImplConfig
model_aliases = [
build_model_alias(
build_hf_repo_model_alias(
"databricks-meta-llama-3-1-70b-instruct",
CoreModelId.llama3_1_70b_instruct.value,
),
build_model_alias(
build_hf_repo_model_alias(
"databricks-meta-llama-3-1-405b-instruct",
CoreModelId.llama3_1_405b_instruct.value,
),

View file

@ -6,47 +6,47 @@
from llama_stack.models.llama.datatypes import CoreModelId
from llama_stack.providers.utils.inference.model_registry import (
build_model_alias,
build_hf_repo_model_alias,
)
MODEL_ALIASES = [
build_model_alias(
build_hf_repo_model_alias(
"accounts/fireworks/models/llama-v3p1-8b-instruct",
CoreModelId.llama3_1_8b_instruct.value,
),
build_model_alias(
build_hf_repo_model_alias(
"accounts/fireworks/models/llama-v3p1-70b-instruct",
CoreModelId.llama3_1_70b_instruct.value,
),
build_model_alias(
build_hf_repo_model_alias(
"accounts/fireworks/models/llama-v3p1-405b-instruct",
CoreModelId.llama3_1_405b_instruct.value,
),
build_model_alias(
build_hf_repo_model_alias(
"accounts/fireworks/models/llama-v3p2-1b-instruct",
CoreModelId.llama3_2_1b_instruct.value,
),
build_model_alias(
build_hf_repo_model_alias(
"accounts/fireworks/models/llama-v3p2-3b-instruct",
CoreModelId.llama3_2_3b_instruct.value,
),
build_model_alias(
build_hf_repo_model_alias(
"accounts/fireworks/models/llama-v3p2-11b-vision-instruct",
CoreModelId.llama3_2_11b_vision_instruct.value,
),
build_model_alias(
build_hf_repo_model_alias(
"accounts/fireworks/models/llama-v3p2-90b-vision-instruct",
CoreModelId.llama3_2_90b_vision_instruct.value,
),
build_model_alias(
build_hf_repo_model_alias(
"accounts/fireworks/models/llama-v3p3-70b-instruct",
CoreModelId.llama3_3_70b_instruct.value,
),
build_model_alias(
build_hf_repo_model_alias(
"accounts/fireworks/models/llama-guard-3-8b",
CoreModelId.llama_guard_3_8b.value,
),
build_model_alias(
build_hf_repo_model_alias(
"accounts/fireworks/models/llama-guard-3-11b-vision",
CoreModelId.llama_guard_3_11b_vision.value,
),

View file

@ -31,8 +31,8 @@ from llama_stack.models.llama.sku_list import CoreModelId
from llama_stack.providers.remote.inference.groq.config import GroqConfig
from llama_stack.providers.utils.inference.model_registry import (
ModelRegistryHelper,
build_hf_repo_model_alias,
build_model_alias,
build_model_alias_with_just_provider_model_id,
)
from .groq_utils import (
@ -42,19 +42,19 @@ from .groq_utils import (
)
_MODEL_ALIASES = [
build_model_alias(
build_hf_repo_model_alias(
"llama3-8b-8192",
CoreModelId.llama3_1_8b_instruct.value,
),
build_model_alias_with_just_provider_model_id(
build_model_alias(
"llama-3.1-8b-instant",
CoreModelId.llama3_1_8b_instruct.value,
),
build_model_alias(
build_hf_repo_model_alias(
"llama3-70b-8192",
CoreModelId.llama3_70b_instruct.value,
),
build_model_alias(
build_hf_repo_model_alias(
"llama-3.3-70b-versatile",
CoreModelId.llama3_3_70b_instruct.value,
),
@ -62,7 +62,7 @@ _MODEL_ALIASES = [
# Preview models aren't recommended for production use, but we include this one
# to pass the test fixture
# TODO(aidand): Replace this with a stable model once Groq supports it
build_model_alias(
build_hf_repo_model_alias(
"llama-3.2-3b-preview",
CoreModelId.llama3_2_3b_instruct.value,
),

View file

@ -6,43 +6,43 @@
from llama_stack.models.llama.datatypes import CoreModelId
from llama_stack.providers.utils.inference.model_registry import (
build_model_alias,
build_hf_repo_model_alias,
)
_MODEL_ALIASES = [
build_model_alias(
build_hf_repo_model_alias(
"meta/llama3-8b-instruct",
CoreModelId.llama3_8b_instruct.value,
),
build_model_alias(
build_hf_repo_model_alias(
"meta/llama3-70b-instruct",
CoreModelId.llama3_70b_instruct.value,
),
build_model_alias(
build_hf_repo_model_alias(
"meta/llama-3.1-8b-instruct",
CoreModelId.llama3_1_8b_instruct.value,
),
build_model_alias(
build_hf_repo_model_alias(
"meta/llama-3.1-70b-instruct",
CoreModelId.llama3_1_70b_instruct.value,
),
build_model_alias(
build_hf_repo_model_alias(
"meta/llama-3.1-405b-instruct",
CoreModelId.llama3_1_405b_instruct.value,
),
build_model_alias(
build_hf_repo_model_alias(
"meta/llama-3.2-1b-instruct",
CoreModelId.llama3_2_1b_instruct.value,
),
build_model_alias(
build_hf_repo_model_alias(
"meta/llama-3.2-3b-instruct",
CoreModelId.llama3_2_3b_instruct.value,
),
build_model_alias(
build_hf_repo_model_alias(
"meta/llama-3.2-11b-vision-instruct",
CoreModelId.llama3_2_11b_vision_instruct.value,
),
build_model_alias(
build_hf_repo_model_alias(
"meta/llama-3.2-90b-vision-instruct",
CoreModelId.llama3_2_90b_vision_instruct.value,
),

View file

@ -35,8 +35,8 @@ from llama_stack.models.llama.datatypes import CoreModelId
from llama_stack.providers.datatypes import ModelsProtocolPrivate
from llama_stack.providers.utils.inference.model_registry import (
ModelRegistryHelper,
build_hf_repo_model_alias,
build_model_alias,
build_model_alias_with_just_provider_model_id,
)
from llama_stack.providers.utils.inference.openai_compat import (
OpenAICompatCompletionChoice,
@ -59,73 +59,73 @@ from llama_stack.providers.utils.inference.prompt_adapter import (
log = logging.getLogger(__name__)
model_aliases = [
build_model_alias(
build_hf_repo_model_alias(
"llama3.1:8b-instruct-fp16",
CoreModelId.llama3_1_8b_instruct.value,
),
build_model_alias_with_just_provider_model_id(
build_model_alias(
"llama3.1:8b",
CoreModelId.llama3_1_8b_instruct.value,
),
build_model_alias(
build_hf_repo_model_alias(
"llama3.1:70b-instruct-fp16",
CoreModelId.llama3_1_70b_instruct.value,
),
build_model_alias_with_just_provider_model_id(
build_model_alias(
"llama3.1:70b",
CoreModelId.llama3_1_70b_instruct.value,
),
build_model_alias(
build_hf_repo_model_alias(
"llama3.1:405b-instruct-fp16",
CoreModelId.llama3_1_405b_instruct.value,
),
build_model_alias_with_just_provider_model_id(
build_model_alias(
"llama3.1:405b",
CoreModelId.llama3_1_405b_instruct.value,
),
build_model_alias(
build_hf_repo_model_alias(
"llama3.2:1b-instruct-fp16",
CoreModelId.llama3_2_1b_instruct.value,
),
build_model_alias_with_just_provider_model_id(
build_model_alias(
"llama3.2:1b",
CoreModelId.llama3_2_1b_instruct.value,
),
build_model_alias(
build_hf_repo_model_alias(
"llama3.2:3b-instruct-fp16",
CoreModelId.llama3_2_3b_instruct.value,
),
build_model_alias_with_just_provider_model_id(
build_model_alias(
"llama3.2:3b",
CoreModelId.llama3_2_3b_instruct.value,
),
build_model_alias(
build_hf_repo_model_alias(
"llama3.2-vision:11b-instruct-fp16",
CoreModelId.llama3_2_11b_vision_instruct.value,
),
build_model_alias_with_just_provider_model_id(
build_model_alias(
"llama3.2-vision:latest",
CoreModelId.llama3_2_11b_vision_instruct.value,
),
build_model_alias(
build_hf_repo_model_alias(
"llama3.2-vision:90b-instruct-fp16",
CoreModelId.llama3_2_90b_vision_instruct.value,
),
build_model_alias_with_just_provider_model_id(
build_model_alias(
"llama3.2-vision:90b",
CoreModelId.llama3_2_90b_vision_instruct.value,
),
build_model_alias(
build_hf_repo_model_alias(
"llama3.3:70b",
CoreModelId.llama3_3_70b_instruct.value,
),
# The Llama Guard models don't have their full fp16 versions
# so we are going to alias their default version to the canonical SKU
build_model_alias(
build_hf_repo_model_alias(
"llama-guard3:8b",
CoreModelId.llama_guard_3_8b.value,
),
build_model_alias(
build_hf_repo_model_alias(
"llama-guard3:1b",
CoreModelId.llama_guard_3_1b.value,
),

View file

@ -6,43 +6,43 @@
from llama_stack.models.llama.datatypes import CoreModelId
from llama_stack.providers.utils.inference.model_registry import (
build_model_alias,
build_hf_repo_model_alias,
)
MODEL_ALIASES = [
build_model_alias(
build_hf_repo_model_alias(
"Meta-Llama-3.1-8B-Instruct",
CoreModelId.llama3_1_8b_instruct.value,
),
build_model_alias(
build_hf_repo_model_alias(
"Meta-Llama-3.1-70B-Instruct",
CoreModelId.llama3_1_70b_instruct.value,
),
build_model_alias(
build_hf_repo_model_alias(
"Meta-Llama-3.1-405B-Instruct",
CoreModelId.llama3_1_405b_instruct.value,
),
build_model_alias(
build_hf_repo_model_alias(
"Meta-Llama-3.2-1B-Instruct",
CoreModelId.llama3_2_1b_instruct.value,
),
build_model_alias(
build_hf_repo_model_alias(
"Meta-Llama-3.2-3B-Instruct",
CoreModelId.llama3_2_3b_instruct.value,
),
build_model_alias(
build_hf_repo_model_alias(
"Meta-Llama-3.3-70B-Instruct",
CoreModelId.llama3_3_70b_instruct.value,
),
build_model_alias(
build_hf_repo_model_alias(
"Llama-3.2-11B-Vision-Instruct",
CoreModelId.llama3_2_11b_vision_instruct.value,
),
build_model_alias(
build_hf_repo_model_alias(
"Llama-3.2-90B-Vision-Instruct",
CoreModelId.llama3_2_90b_vision_instruct.value,
),
build_model_alias(
build_hf_repo_model_alias(
"Meta-Llama-Guard-3-8B",
CoreModelId.llama_guard_3_8b.value,
),

View file

@ -32,7 +32,7 @@ from llama_stack.models.llama.sku_list import all_registered_models
from llama_stack.providers.datatypes import ModelsProtocolPrivate
from llama_stack.providers.utils.inference.model_registry import (
ModelRegistryHelper,
build_model_alias,
build_hf_repo_model_alias,
)
from llama_stack.providers.utils.inference.openai_compat import (
OpenAICompatCompletionChoice,
@ -53,9 +53,9 @@ from .config import InferenceAPIImplConfig, InferenceEndpointImplConfig, TGIImpl
log = logging.getLogger(__name__)
def build_model_aliases():
def build_hf_repo_model_aliases():
return [
build_model_alias(
build_hf_repo_model_alias(
model.huggingface_repo,
model.descriptor(),
)
@ -70,7 +70,7 @@ class _HfAdapter(Inference, ModelsProtocolPrivate):
model_id: str
def __init__(self) -> None:
self.register_helper = ModelRegistryHelper(build_model_aliases())
self.register_helper = ModelRegistryHelper(build_hf_repo_model_aliases())
self.huggingface_repo_to_llama_model_id = {
model.huggingface_repo: model.descriptor() for model in all_registered_models() if model.huggingface_repo
}

View file

@ -6,43 +6,43 @@
from llama_stack.models.llama.datatypes import CoreModelId
from llama_stack.providers.utils.inference.model_registry import (
build_model_alias,
build_hf_repo_model_alias,
)
MODEL_ALIASES = [
build_model_alias(
build_hf_repo_model_alias(
"meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo",
CoreModelId.llama3_1_8b_instruct.value,
),
build_model_alias(
build_hf_repo_model_alias(
"meta-llama/Meta-Llama-3.1-70B-Instruct-Turbo",
CoreModelId.llama3_1_70b_instruct.value,
),
build_model_alias(
build_hf_repo_model_alias(
"meta-llama/Meta-Llama-3.1-405B-Instruct-Turbo",
CoreModelId.llama3_1_405b_instruct.value,
),
build_model_alias(
build_hf_repo_model_alias(
"meta-llama/Llama-3.2-3B-Instruct-Turbo",
CoreModelId.llama3_2_3b_instruct.value,
),
build_model_alias(
build_hf_repo_model_alias(
"meta-llama/Llama-3.2-11B-Vision-Instruct-Turbo",
CoreModelId.llama3_2_11b_vision_instruct.value,
),
build_model_alias(
build_hf_repo_model_alias(
"meta-llama/Llama-3.2-90B-Vision-Instruct-Turbo",
CoreModelId.llama3_2_90b_vision_instruct.value,
),
build_model_alias(
build_hf_repo_model_alias(
"meta-llama/Llama-3.3-70B-Instruct-Turbo",
CoreModelId.llama3_3_70b_instruct.value,
),
build_model_alias(
build_hf_repo_model_alias(
"meta-llama/Meta-Llama-Guard-3-8B",
CoreModelId.llama_guard_3_8b.value,
),
build_model_alias(
build_hf_repo_model_alias(
"meta-llama/Llama-Guard-3-11B-Vision-Turbo",
CoreModelId.llama_guard_3_11b_vision.value,
),

View file

@ -38,7 +38,7 @@ from llama_stack.models.llama.sku_list import all_registered_models
from llama_stack.providers.datatypes import ModelsProtocolPrivate
from llama_stack.providers.utils.inference.model_registry import (
ModelRegistryHelper,
build_model_alias,
build_hf_repo_model_alias,
)
from llama_stack.providers.utils.inference.openai_compat import (
OpenAICompatCompletionResponse,
@ -62,9 +62,9 @@ from .config import VLLMInferenceAdapterConfig
log = logging.getLogger(__name__)
def build_model_aliases():
def build_hf_repo_model_aliases():
return [
build_model_alias(
build_hf_repo_model_alias(
model.huggingface_repo,
model.descriptor(),
)
@ -204,7 +204,7 @@ async def _process_vllm_chat_completion_stream_response(
class VLLMInferenceAdapter(Inference, ModelsProtocolPrivate):
def __init__(self, config: VLLMInferenceAdapterConfig) -> None:
self.register_helper = ModelRegistryHelper(build_model_aliases())
self.register_helper = ModelRegistryHelper(build_hf_repo_model_aliases())
self.config = config
self.client = None

View file

@ -83,17 +83,13 @@ def inference_cerebras() -> ProviderFixture:
@pytest.fixture(scope="session")
def inference_ollama(inference_model) -> ProviderFixture:
inference_model = [inference_model] if isinstance(inference_model, str) else inference_model
if inference_model and "Llama3.1-8B-Instruct" in inference_model:
pytest.skip("Ollama only supports Llama3.2-3B-Instruct for testing")
def inference_ollama() -> ProviderFixture:
return ProviderFixture(
providers=[
Provider(
provider_id="ollama",
provider_type="remote::ollama",
config=OllamaImplConfig(host="localhost", port=os.getenv("OLLAMA_PORT", 11434)).model_dump(),
config=OllamaImplConfig(url=get_env_or_fail("OLLAMA_URL")).model_dump(),
)
],
)

View file

@ -4,9 +4,10 @@
# This source code is licensed under the terms described in the LICENSE file in
# the root directory of this source tree.
from collections import namedtuple
from typing import List, Optional
from pydantic import BaseModel, Field
from llama_stack.apis.models.models import ModelType
from llama_stack.models.llama.sku_list import all_registered_models
from llama_stack.providers.datatypes import Model, ModelsProtocolPrivate
@ -14,7 +15,14 @@ from llama_stack.providers.utils.inference import (
ALL_HUGGINGFACE_REPOS_TO_MODEL_DESCRIPTOR,
)
ModelAlias = namedtuple("ModelAlias", ["provider_model_id", "aliases", "llama_model"])
# TODO: this class is more confusing than useful right now. We need to make it
# more closer to the Model class.
class ModelAlias(BaseModel):
provider_model_id: str
aliases: List[str] = Field(default_factory=list)
llama_model: Optional[str] = None
model_type: ModelType = ModelType.llm
def get_huggingface_repo(model_descriptor: str) -> Optional[str]:
@ -24,7 +32,7 @@ def get_huggingface_repo(model_descriptor: str) -> Optional[str]:
return None
def build_model_alias(provider_model_id: str, model_descriptor: str) -> ModelAlias:
def build_hf_repo_model_alias(provider_model_id: str, model_descriptor: str) -> ModelAlias:
return ModelAlias(
provider_model_id=provider_model_id,
aliases=[
@ -34,7 +42,7 @@ def build_model_alias(provider_model_id: str, model_descriptor: str) -> ModelAli
)
def build_model_alias_with_just_provider_model_id(provider_model_id: str, model_descriptor: str) -> ModelAlias:
def build_model_alias(provider_model_id: str, model_descriptor: str) -> ModelAlias:
return ModelAlias(
provider_model_id=provider_model_id,
aliases=[],

View file

@ -19,6 +19,7 @@ class WebMethod:
request_examples: Optional[List[Any]] = None
response_examples: Optional[List[Any]] = None
method: Optional[str] = None
raw_bytes_request_body: Optional[bool] = False
def webmethod(
@ -27,6 +28,7 @@ def webmethod(
public: Optional[bool] = False,
request_examples: Optional[List[Any]] = None,
response_examples: Optional[List[Any]] = None,
raw_bytes_request_body: Optional[bool] = False,
) -> Callable[[T], T]:
"""
Decorator that supplies additional metadata to an endpoint operation function.
@ -44,6 +46,7 @@ def webmethod(
public=public or False,
request_examples=request_examples,
response_examples=response_examples,
raw_bytes_request_body=raw_bytes_request_body,
)
return cls