This commit is contained in:
Xi Yan 2025-02-20 14:04:25 -08:00
commit 7676756778
24 changed files with 1363 additions and 443 deletions

View file

@ -554,6 +554,67 @@
} }
} }
}, },
"/v1/agents/{agent_id}/session/{session_id}/turn/{turn_id}/continue": {
"post": {
"responses": {
"200": {
"description": "A single turn in an interaction with an Agentic System. **OR** streamed agent turn completion response.",
"content": {
"application/json": {
"schema": {
"$ref": "#/components/schemas/Turn"
}
},
"text/event-stream": {
"schema": {
"$ref": "#/components/schemas/AgentTurnResponseStreamChunk"
}
}
}
}
},
"tags": [
"Agents"
],
"description": "",
"parameters": [
{
"name": "agent_id",
"in": "path",
"required": true,
"schema": {
"type": "string"
}
},
{
"name": "session_id",
"in": "path",
"required": true,
"schema": {
"type": "string"
}
},
{
"name": "turn_id",
"in": "path",
"required": true,
"schema": {
"type": "string"
}
}
],
"requestBody": {
"content": {
"application/json": {
"schema": {
"$ref": "#/components/schemas/ContinueAgentTurnRequest"
}
}
},
"required": true
}
}
},
"/v1/agents": { "/v1/agents": {
"post": { "post": {
"responses": { "responses": {
@ -678,6 +739,65 @@
} }
} }
}, },
"/v1/files": {
"get": {
"responses": {
"200": {
"description": "OK",
"content": {
"application/json": {
"schema": {
"$ref": "#/components/schemas/ListBucketResponse"
}
}
}
}
},
"tags": [
"Files (Coming Soon)"
],
"description": "List all buckets.",
"parameters": [
{
"name": "bucket",
"in": "query",
"required": true,
"schema": {
"type": "string"
}
}
]
},
"post": {
"responses": {
"200": {
"description": "OK",
"content": {
"application/json": {
"schema": {
"$ref": "#/components/schemas/FileUploadResponse"
}
}
}
}
},
"tags": [
"Files (Coming Soon)"
],
"description": "Create a new upload session for a file identified by a bucket and key.",
"parameters": [],
"requestBody": {
"content": {
"application/json": {
"schema": {
"$ref": "#/components/schemas/CreateUploadSessionRequest"
}
}
},
"required": true
}
}
},
"/v1/agents/{agent_id}": { "/v1/agents/{agent_id}": {
"delete": { "delete": {
"responses": { "responses": {
@ -779,6 +899,84 @@
] ]
} }
}, },
"/v1/files/{bucket}/{key}": {
"get": {
"responses": {
"200": {
"description": "OK",
"content": {
"application/json": {
"schema": {
"$ref": "#/components/schemas/FileResponse"
}
}
}
}
},
"tags": [
"Files (Coming Soon)"
],
"description": "Get a file info identified by a bucket and key.",
"parameters": [
{
"name": "bucket",
"in": "path",
"description": "Bucket name (valid chars: a-zA-Z0-9_-)",
"required": true,
"schema": {
"type": "string"
}
},
{
"name": "key",
"in": "path",
"description": "Key under which the file is stored (valid chars: a-zA-Z0-9_-/.)",
"required": true,
"schema": {
"type": "string"
}
}
]
},
"delete": {
"responses": {
"200": {
"description": "OK",
"content": {
"application/json": {
"schema": {
"$ref": "#/components/schemas/FileResponse"
}
}
}
}
},
"tags": [
"Files (Coming Soon)"
],
"description": "Delete a file identified by a bucket and key.",
"parameters": [
{
"name": "bucket",
"in": "path",
"description": "Bucket name (valid chars: a-zA-Z0-9_-)",
"required": true,
"schema": {
"type": "string"
}
},
{
"name": "key",
"in": "path",
"description": "Key under which the file is stored (valid chars: a-zA-Z0-9_-/.)",
"required": true,
"schema": {
"type": "string"
}
}
]
}
},
"/v1/inference/embeddings": { "/v1/inference/embeddings": {
"post": { "post": {
"responses": { "responses": {
@ -1470,6 +1668,91 @@
"parameters": [] "parameters": []
} }
}, },
"/v1/files/session:{upload_id}": {
"get": {
"responses": {
"200": {
"description": "OK",
"content": {
"application/json": {
"schema": {
"oneOf": [
{
"$ref": "#/components/schemas/FileUploadResponse"
},
{
"type": "null"
}
]
}
}
}
}
},
"tags": [
"Files (Coming Soon)"
],
"description": "Returns information about an existsing upload session",
"parameters": [
{
"name": "upload_id",
"in": "path",
"description": "ID of the upload session",
"required": true,
"schema": {
"type": "string"
}
}
]
},
"post": {
"responses": {
"200": {
"description": "OK",
"content": {
"application/json": {
"schema": {
"oneOf": [
{
"$ref": "#/components/schemas/FileResponse"
},
{
"type": "null"
}
]
}
}
}
}
},
"tags": [
"Files (Coming Soon)"
],
"description": "Upload file content to an existing upload session. On the server, request body will have the raw bytes that are uploaded.",
"parameters": [
{
"name": "upload_id",
"in": "path",
"description": "ID of the upload session",
"required": true,
"schema": {
"type": "string"
}
}
],
"requestBody": {
"content": {
"application/octet-stream": {
"schema": {
"type": "string",
"format": "binary"
}
}
},
"required": true
}
}
},
"/v1/vector-dbs/{vector_db_id}": { "/v1/vector-dbs/{vector_db_id}": {
"get": { "get": {
"responses": { "responses": {
@ -1826,6 +2109,37 @@
} }
} }
}, },
"/v1/files/{bucket}": {
"get": {
"responses": {
"200": {
"description": "OK",
"content": {
"application/json": {
"schema": {
"$ref": "#/components/schemas/ListFileResponse"
}
}
}
}
},
"tags": [
"Files (Coming Soon)"
],
"description": "List all files in a bucket.",
"parameters": [
{
"name": "bucket",
"in": "path",
"description": "Bucket name (valid chars: a-zA-Z0-9_-)",
"required": true,
"schema": {
"type": "string"
}
}
]
}
},
"/v1/models": { "/v1/models": {
"get": { "get": {
"responses": { "responses": {
@ -2558,67 +2872,6 @@
} }
} }
}, },
"/v1/agents/{agent_id}/session/{session_id}/turn/{turn_id}/submit_tool_response_messages": {
"post": {
"responses": {
"200": {
"description": "A single turn in an interaction with an Agentic System. **OR** streamed agent turn completion response.",
"content": {
"application/json": {
"schema": {
"$ref": "#/components/schemas/Turn"
}
},
"text/event-stream": {
"schema": {
"$ref": "#/components/schemas/AgentTurnResponseStreamChunk"
}
}
}
}
},
"tags": [
"Agents"
],
"description": "",
"parameters": [
{
"name": "agent_id",
"in": "path",
"required": true,
"schema": {
"type": "string"
}
},
{
"name": "session_id",
"in": "path",
"required": true,
"schema": {
"type": "string"
}
},
{
"name": "turn_id",
"in": "path",
"required": true,
"schema": {
"type": "string"
}
}
],
"requestBody": {
"content": {
"application/json": {
"schema": {
"$ref": "#/components/schemas/SubmitToolResponseMessagesRequest"
}
}
},
"required": true
}
}
},
"/v1/post-training/supervised-fine-tune": { "/v1/post-training/supervised-fine-tune": {
"post": { "post": {
"responses": { "responses": {
@ -4791,62 +5044,10 @@
"title": "CompletionResponseStreamChunk", "title": "CompletionResponseStreamChunk",
"description": "A chunk of a streamed completion response." "description": "A chunk of a streamed completion response."
}, },
"CreateAgentRequest": { "ContinueAgentTurnRequest": {
"type": "object", "type": "object",
"properties": { "properties": {
"agent_config": { "new_messages": {
"$ref": "#/components/schemas/AgentConfig"
}
},
"additionalProperties": false,
"required": [
"agent_config"
],
"title": "CreateAgentRequest"
},
"AgentCreateResponse": {
"type": "object",
"properties": {
"agent_id": {
"type": "string"
}
},
"additionalProperties": false,
"required": [
"agent_id"
],
"title": "AgentCreateResponse"
},
"CreateAgentSessionRequest": {
"type": "object",
"properties": {
"session_name": {
"type": "string"
}
},
"additionalProperties": false,
"required": [
"session_name"
],
"title": "CreateAgentSessionRequest"
},
"AgentSessionCreateResponse": {
"type": "object",
"properties": {
"session_id": {
"type": "string"
}
},
"additionalProperties": false,
"required": [
"session_id"
],
"title": "AgentSessionCreateResponse"
},
"CreateAgentTurnRequest": {
"type": "object",
"properties": {
"messages": {
"type": "array", "type": "array",
"items": { "items": {
"oneOf": [ "oneOf": [
@ -4858,61 +5059,13 @@
} }
] ]
} }
},
"stream": {
"type": "boolean"
},
"documents": {
"type": "array",
"items": {
"type": "object",
"properties": {
"content": {
"oneOf": [
{
"type": "string"
},
{
"$ref": "#/components/schemas/InterleavedContentItem"
},
{
"type": "array",
"items": {
"$ref": "#/components/schemas/InterleavedContentItem"
}
},
{
"$ref": "#/components/schemas/URL"
}
]
},
"mime_type": {
"type": "string"
}
},
"additionalProperties": false,
"required": [
"content",
"mime_type"
],
"title": "Document"
}
},
"toolgroups": {
"type": "array",
"items": {
"$ref": "#/components/schemas/AgentTool"
}
},
"tool_config": {
"$ref": "#/components/schemas/ToolConfig"
} }
}, },
"additionalProperties": false, "additionalProperties": false,
"required": [ "required": [
"messages" "new_messages"
], ],
"title": "CreateAgentTurnRequest" "title": "ContinueAgentTurnRequest"
}, },
"InferenceStep": { "InferenceStep": {
"type": "object", "type": "object",
@ -5294,7 +5447,7 @@
"$ref": "#/components/schemas/AgentTurnResponseTurnCompletePayload" "$ref": "#/components/schemas/AgentTurnResponseTurnCompletePayload"
}, },
{ {
"$ref": "#/components/schemas/AgentTurnResponseTurnPendingPayload" "$ref": "#/components/schemas/AgentTurnResponseTurnAwaitingInputPayload"
} }
], ],
"discriminator": { "discriminator": {
@ -5305,7 +5458,7 @@
"step_complete": "#/components/schemas/AgentTurnResponseStepCompletePayload", "step_complete": "#/components/schemas/AgentTurnResponseStepCompletePayload",
"turn_start": "#/components/schemas/AgentTurnResponseTurnStartPayload", "turn_start": "#/components/schemas/AgentTurnResponseTurnStartPayload",
"turn_complete": "#/components/schemas/AgentTurnResponseTurnCompletePayload", "turn_complete": "#/components/schemas/AgentTurnResponseTurnCompletePayload",
"turn_pending": "#/components/schemas/AgentTurnResponseTurnPendingPayload" "turn_awaiting_input": "#/components/schemas/AgentTurnResponseTurnAwaitingInputPayload"
} }
} }
}, },
@ -5468,6 +5621,25 @@
"title": "AgentTurnResponseStreamChunk", "title": "AgentTurnResponseStreamChunk",
"description": "streamed agent turn completion response." "description": "streamed agent turn completion response."
}, },
"AgentTurnResponseTurnAwaitingInputPayload": {
"type": "object",
"properties": {
"event_type": {
"type": "string",
"const": "turn_awaiting_input",
"default": "turn_awaiting_input"
},
"turn": {
"$ref": "#/components/schemas/Turn"
}
},
"additionalProperties": false,
"required": [
"event_type",
"turn"
],
"title": "AgentTurnResponseTurnAwaitingInputPayload"
},
"AgentTurnResponseTurnCompletePayload": { "AgentTurnResponseTurnCompletePayload": {
"type": "object", "type": "object",
"properties": { "properties": {
@ -5487,25 +5659,6 @@
], ],
"title": "AgentTurnResponseTurnCompletePayload" "title": "AgentTurnResponseTurnCompletePayload"
}, },
"AgentTurnResponseTurnPendingPayload": {
"type": "object",
"properties": {
"event_type": {
"type": "string",
"const": "turn_pending",
"default": "turn_pending"
},
"turn": {
"$ref": "#/components/schemas/Turn"
}
},
"additionalProperties": false,
"required": [
"event_type",
"turn"
],
"title": "AgentTurnResponseTurnPendingPayload"
},
"AgentTurnResponseTurnStartPayload": { "AgentTurnResponseTurnStartPayload": {
"type": "object", "type": "object",
"properties": { "properties": {
@ -5525,6 +5678,228 @@
], ],
"title": "AgentTurnResponseTurnStartPayload" "title": "AgentTurnResponseTurnStartPayload"
}, },
"CreateAgentRequest": {
"type": "object",
"properties": {
"agent_config": {
"$ref": "#/components/schemas/AgentConfig"
}
},
"additionalProperties": false,
"required": [
"agent_config"
],
"title": "CreateAgentRequest"
},
"AgentCreateResponse": {
"type": "object",
"properties": {
"agent_id": {
"type": "string"
}
},
"additionalProperties": false,
"required": [
"agent_id"
],
"title": "AgentCreateResponse"
},
"CreateAgentSessionRequest": {
"type": "object",
"properties": {
"session_name": {
"type": "string"
}
},
"additionalProperties": false,
"required": [
"session_name"
],
"title": "CreateAgentSessionRequest"
},
"AgentSessionCreateResponse": {
"type": "object",
"properties": {
"session_id": {
"type": "string"
}
},
"additionalProperties": false,
"required": [
"session_id"
],
"title": "AgentSessionCreateResponse"
},
"CreateAgentTurnRequest": {
"type": "object",
"properties": {
"messages": {
"type": "array",
"items": {
"oneOf": [
{
"$ref": "#/components/schemas/UserMessage"
},
{
"$ref": "#/components/schemas/ToolResponseMessage"
}
]
}
},
"stream": {
"type": "boolean"
},
"documents": {
"type": "array",
"items": {
"type": "object",
"properties": {
"content": {
"oneOf": [
{
"type": "string"
},
{
"$ref": "#/components/schemas/InterleavedContentItem"
},
{
"type": "array",
"items": {
"$ref": "#/components/schemas/InterleavedContentItem"
}
},
{
"$ref": "#/components/schemas/URL"
}
]
},
"mime_type": {
"type": "string"
}
},
"additionalProperties": false,
"required": [
"content",
"mime_type"
],
"title": "Document"
}
},
"toolgroups": {
"type": "array",
"items": {
"$ref": "#/components/schemas/AgentTool"
}
},
"tool_config": {
"$ref": "#/components/schemas/ToolConfig"
}
},
"additionalProperties": false,
"required": [
"messages"
],
"title": "CreateAgentTurnRequest"
},
"CreateUploadSessionRequest": {
"type": "object",
"properties": {
"bucket": {
"type": "string",
"description": "Bucket under which the file is stored (valid chars: a-zA-Z0-9_-)"
},
"key": {
"type": "string",
"description": "Key under which the file is stored (valid chars: a-zA-Z0-9_-/.)"
},
"mime_type": {
"type": "string",
"description": "MIME type of the file"
},
"size": {
"type": "integer",
"description": "File size in bytes"
}
},
"additionalProperties": false,
"required": [
"bucket",
"key",
"mime_type",
"size"
],
"title": "CreateUploadSessionRequest"
},
"FileUploadResponse": {
"type": "object",
"properties": {
"id": {
"type": "string",
"description": "ID of the upload session"
},
"url": {
"type": "string",
"description": "Upload URL for the file or file parts"
},
"offset": {
"type": "integer",
"description": "Upload content offset"
},
"size": {
"type": "integer",
"description": "Upload content size"
}
},
"additionalProperties": false,
"required": [
"id",
"url",
"offset",
"size"
],
"title": "FileUploadResponse",
"description": "Response after initiating a file upload session."
},
"FileResponse": {
"type": "object",
"properties": {
"bucket": {
"type": "string",
"description": "Bucket under which the file is stored (valid chars: a-zA-Z0-9_-)"
},
"key": {
"type": "string",
"description": "Key under which the file is stored (valid chars: a-zA-Z0-9_-/.)"
},
"mime_type": {
"type": "string",
"description": "MIME type of the file"
},
"url": {
"type": "string",
"description": "Upload URL for the file contents"
},
"bytes": {
"type": "integer",
"description": "Size of the file in bytes"
},
"created_at": {
"type": "integer",
"description": "Timestamp of when the file was created"
}
},
"additionalProperties": false,
"required": [
"bucket",
"key",
"mime_type",
"url",
"bytes",
"created_at"
],
"title": "FileResponse",
"description": "Response representing a file entry."
},
"EmbeddingsRequest": { "EmbeddingsRequest": {
"type": "object", "type": "object",
"properties": { "properties": {
@ -6840,6 +7215,37 @@
], ],
"title": "ToolInvocationResult" "title": "ToolInvocationResult"
}, },
"BucketResponse": {
"type": "object",
"properties": {
"name": {
"type": "string"
}
},
"additionalProperties": false,
"required": [
"name"
],
"title": "BucketResponse"
},
"ListBucketResponse": {
"type": "object",
"properties": {
"data": {
"type": "array",
"items": {
"$ref": "#/components/schemas/BucketResponse"
},
"description": "List of FileResponse entries"
}
},
"additionalProperties": false,
"required": [
"data"
],
"title": "ListBucketResponse",
"description": "Response representing a list of file entries."
},
"ListDatasetsResponse": { "ListDatasetsResponse": {
"type": "object", "type": "object",
"properties": { "properties": {
@ -6856,6 +7262,24 @@
], ],
"title": "ListDatasetsResponse" "title": "ListDatasetsResponse"
}, },
"ListFileResponse": {
"type": "object",
"properties": {
"data": {
"type": "array",
"items": {
"$ref": "#/components/schemas/FileResponse"
},
"description": "List of FileResponse entries"
}
},
"additionalProperties": false,
"required": [
"data"
],
"title": "ListFileResponse",
"description": "Response representing a list of file entries."
},
"ListModelsResponse": { "ListModelsResponse": {
"type": "object", "type": "object",
"properties": { "properties": {
@ -8323,22 +8747,6 @@
], ],
"title": "ScoreBatchResponse" "title": "ScoreBatchResponse"
}, },
"SubmitToolResponseMessagesRequest": {
"type": "object",
"properties": {
"tool_response_messages": {
"type": "array",
"items": {
"$ref": "#/components/schemas/ToolResponseMessage"
}
}
},
"additionalProperties": false,
"required": [
"tool_response_messages"
],
"title": "SubmitToolResponseMessagesRequest"
},
"AlgorithmConfig": { "AlgorithmConfig": {
"oneOf": [ "oneOf": [
{ {
@ -8643,6 +9051,9 @@
{ {
"name": "Eval" "name": "Eval"
}, },
{
"name": "Files (Coming Soon)"
},
{ {
"name": "Inference", "name": "Inference",
"description": "This API provides the raw interface to the underlying models. Two kinds of models are supported:\n- LLM models: these models generate \"raw\" and \"chat\" (conversational) completions.\n- Embedding models: these models generate embeddings to be used for semantic search.", "description": "This API provides the raw interface to the underlying models. Two kinds of models are supported:\n- LLM models: these models generate \"raw\" and \"chat\" (conversational) completions.\n- Embedding models: these models generate embeddings to be used for semantic search.",
@ -8698,6 +9109,7 @@
"DatasetIO", "DatasetIO",
"Datasets", "Datasets",
"Eval", "Eval",
"Files (Coming Soon)",
"Inference", "Inference",
"Inspect", "Inspect",
"Models", "Models",

View file

@ -329,6 +329,45 @@ paths:
schema: schema:
$ref: '#/components/schemas/CompletionRequest' $ref: '#/components/schemas/CompletionRequest'
required: true required: true
/v1/agents/{agent_id}/session/{session_id}/turn/{turn_id}/continue:
post:
responses:
'200':
description: >-
A single turn in an interaction with an Agentic System. **OR** streamed
agent turn completion response.
content:
application/json:
schema:
$ref: '#/components/schemas/Turn'
text/event-stream:
schema:
$ref: '#/components/schemas/AgentTurnResponseStreamChunk'
tags:
- Agents
description: ''
parameters:
- name: agent_id
in: path
required: true
schema:
type: string
- name: session_id
in: path
required: true
schema:
type: string
- name: turn_id
in: path
required: true
schema:
type: string
requestBody:
content:
application/json:
schema:
$ref: '#/components/schemas/ContinueAgentTurnRequest'
required: true
/v1/agents: /v1/agents:
post: post:
responses: responses:
@ -406,6 +445,43 @@ paths:
schema: schema:
$ref: '#/components/schemas/CreateAgentTurnRequest' $ref: '#/components/schemas/CreateAgentTurnRequest'
required: true required: true
/v1/files:
get:
responses:
'200':
description: OK
content:
application/json:
schema:
$ref: '#/components/schemas/ListBucketResponse'
tags:
- Files (Coming Soon)
description: List all buckets.
parameters:
- name: bucket
in: query
required: true
schema:
type: string
post:
responses:
'200':
description: OK
content:
application/json:
schema:
$ref: '#/components/schemas/FileUploadResponse'
tags:
- Files (Coming Soon)
description: >-
Create a new upload session for a file identified by a bucket and key.
parameters: []
requestBody:
content:
application/json:
schema:
$ref: '#/components/schemas/CreateUploadSessionRequest'
required: true
/v1/agents/{agent_id}: /v1/agents/{agent_id}:
delete: delete:
responses: responses:
@ -468,6 +544,59 @@ paths:
required: true required: true
schema: schema:
type: string type: string
/v1/files/{bucket}/{key}:
get:
responses:
'200':
description: OK
content:
application/json:
schema:
$ref: '#/components/schemas/FileResponse'
tags:
- Files (Coming Soon)
description: >-
Get a file info identified by a bucket and key.
parameters:
- name: bucket
in: path
description: 'Bucket name (valid chars: a-zA-Z0-9_-)'
required: true
schema:
type: string
- name: key
in: path
description: >-
Key under which the file is stored (valid chars: a-zA-Z0-9_-/.)
required: true
schema:
type: string
delete:
responses:
'200':
description: OK
content:
application/json:
schema:
$ref: '#/components/schemas/FileResponse'
tags:
- Files (Coming Soon)
description: >-
Delete a file identified by a bucket and key.
parameters:
- name: bucket
in: path
description: 'Bucket name (valid chars: a-zA-Z0-9_-)'
required: true
schema:
type: string
- name: key
in: path
description: >-
Key under which the file is stored (valid chars: a-zA-Z0-9_-/.)
required: true
schema:
type: string
/v1/inference/embeddings: /v1/inference/embeddings:
post: post:
responses: responses:
@ -875,6 +1004,57 @@ paths:
- PostTraining (Coming Soon) - PostTraining (Coming Soon)
description: '' description: ''
parameters: [] parameters: []
/v1/files/session:{upload_id}:
get:
responses:
'200':
description: OK
content:
application/json:
schema:
oneOf:
- $ref: '#/components/schemas/FileUploadResponse'
- type: 'null'
tags:
- Files (Coming Soon)
description: >-
Returns information about an existsing upload session
parameters:
- name: upload_id
in: path
description: ID of the upload session
required: true
schema:
type: string
post:
responses:
'200':
description: OK
content:
application/json:
schema:
oneOf:
- $ref: '#/components/schemas/FileResponse'
- type: 'null'
tags:
- Files (Coming Soon)
description: >-
Upload file content to an existing upload session. On the server, request
body will have the raw bytes that are uploaded.
parameters:
- name: upload_id
in: path
description: ID of the upload session
required: true
schema:
type: string
requestBody:
content:
application/octet-stream:
schema:
type: string
format: binary
required: true
/v1/vector-dbs/{vector_db_id}: /v1/vector-dbs/{vector_db_id}:
get: get:
responses: responses:
@ -1091,6 +1271,25 @@ paths:
schema: schema:
$ref: '#/components/schemas/RegisterDatasetRequest' $ref: '#/components/schemas/RegisterDatasetRequest'
required: true required: true
/v1/files/{bucket}:
get:
responses:
'200':
description: OK
content:
application/json:
schema:
$ref: '#/components/schemas/ListFileResponse'
tags:
- Files (Coming Soon)
description: List all files in a bucket.
parameters:
- name: bucket
in: path
description: 'Bucket name (valid chars: a-zA-Z0-9_-)'
required: true
schema:
type: string
/v1/models: /v1/models:
get: get:
responses: responses:
@ -1543,45 +1742,6 @@ paths:
schema: schema:
$ref: '#/components/schemas/ScoreBatchRequest' $ref: '#/components/schemas/ScoreBatchRequest'
required: true required: true
/v1/agents/{agent_id}/session/{session_id}/turn/{turn_id}/submit_tool_response_messages:
post:
responses:
'200':
description: >-
A single turn in an interaction with an Agentic System. **OR** streamed
agent turn completion response.
content:
application/json:
schema:
$ref: '#/components/schemas/Turn'
text/event-stream:
schema:
$ref: '#/components/schemas/AgentTurnResponseStreamChunk'
tags:
- Agents
description: ''
parameters:
- name: agent_id
in: path
required: true
schema:
type: string
- name: session_id
in: path
required: true
schema:
type: string
- name: turn_id
in: path
required: true
schema:
type: string
requestBody:
content:
application/json:
schema:
$ref: '#/components/schemas/SubmitToolResponseMessagesRequest'
required: true
/v1/post-training/supervised-fine-tune: /v1/post-training/supervised-fine-tune:
post: post:
responses: responses:
@ -3080,83 +3240,19 @@ components:
title: CompletionResponseStreamChunk title: CompletionResponseStreamChunk
description: >- description: >-
A chunk of a streamed completion response. A chunk of a streamed completion response.
CreateAgentRequest: ContinueAgentTurnRequest:
type: object type: object
properties: properties:
agent_config: new_messages:
$ref: '#/components/schemas/AgentConfig'
additionalProperties: false
required:
- agent_config
title: CreateAgentRequest
AgentCreateResponse:
type: object
properties:
agent_id:
type: string
additionalProperties: false
required:
- agent_id
title: AgentCreateResponse
CreateAgentSessionRequest:
type: object
properties:
session_name:
type: string
additionalProperties: false
required:
- session_name
title: CreateAgentSessionRequest
AgentSessionCreateResponse:
type: object
properties:
session_id:
type: string
additionalProperties: false
required:
- session_id
title: AgentSessionCreateResponse
CreateAgentTurnRequest:
type: object
properties:
messages:
type: array type: array
items: items:
oneOf: oneOf:
- $ref: '#/components/schemas/UserMessage' - $ref: '#/components/schemas/UserMessage'
- $ref: '#/components/schemas/ToolResponseMessage' - $ref: '#/components/schemas/ToolResponseMessage'
stream:
type: boolean
documents:
type: array
items:
type: object
properties:
content:
oneOf:
- type: string
- $ref: '#/components/schemas/InterleavedContentItem'
- type: array
items:
$ref: '#/components/schemas/InterleavedContentItem'
- $ref: '#/components/schemas/URL'
mime_type:
type: string
additionalProperties: false
required:
- content
- mime_type
title: Document
toolgroups:
type: array
items:
$ref: '#/components/schemas/AgentTool'
tool_config:
$ref: '#/components/schemas/ToolConfig'
additionalProperties: false additionalProperties: false
required: required:
- messages - new_messages
title: CreateAgentTurnRequest title: ContinueAgentTurnRequest
InferenceStep: InferenceStep:
type: object type: object
properties: properties:
@ -3405,7 +3501,7 @@ components:
- $ref: '#/components/schemas/AgentTurnResponseStepCompletePayload' - $ref: '#/components/schemas/AgentTurnResponseStepCompletePayload'
- $ref: '#/components/schemas/AgentTurnResponseTurnStartPayload' - $ref: '#/components/schemas/AgentTurnResponseTurnStartPayload'
- $ref: '#/components/schemas/AgentTurnResponseTurnCompletePayload' - $ref: '#/components/schemas/AgentTurnResponseTurnCompletePayload'
- $ref: '#/components/schemas/AgentTurnResponseTurnPendingPayload' - $ref: '#/components/schemas/AgentTurnResponseTurnAwaitingInputPayload'
discriminator: discriminator:
propertyName: event_type propertyName: event_type
mapping: mapping:
@ -3414,7 +3510,7 @@ components:
step_complete: '#/components/schemas/AgentTurnResponseStepCompletePayload' step_complete: '#/components/schemas/AgentTurnResponseStepCompletePayload'
turn_start: '#/components/schemas/AgentTurnResponseTurnStartPayload' turn_start: '#/components/schemas/AgentTurnResponseTurnStartPayload'
turn_complete: '#/components/schemas/AgentTurnResponseTurnCompletePayload' turn_complete: '#/components/schemas/AgentTurnResponseTurnCompletePayload'
turn_pending: '#/components/schemas/AgentTurnResponseTurnPendingPayload' turn_awaiting_input: '#/components/schemas/AgentTurnResponseTurnAwaitingInputPayload'
AgentTurnResponseStepCompletePayload: AgentTurnResponseStepCompletePayload:
type: object type: object
properties: properties:
@ -3521,6 +3617,21 @@ components:
- event - event
title: AgentTurnResponseStreamChunk title: AgentTurnResponseStreamChunk
description: streamed agent turn completion response. description: streamed agent turn completion response.
"AgentTurnResponseTurnAwaitingInputPayload":
type: object
properties:
event_type:
type: string
const: turn_awaiting_input
default: turn_awaiting_input
turn:
$ref: '#/components/schemas/Turn'
additionalProperties: false
required:
- event_type
- turn
title: >-
AgentTurnResponseTurnAwaitingInputPayload
AgentTurnResponseTurnCompletePayload: AgentTurnResponseTurnCompletePayload:
type: object type: object
properties: properties:
@ -3535,20 +3646,6 @@ components:
- event_type - event_type
- turn - turn
title: AgentTurnResponseTurnCompletePayload title: AgentTurnResponseTurnCompletePayload
AgentTurnResponseTurnPendingPayload:
type: object
properties:
event_type:
type: string
const: turn_pending
default: turn_pending
turn:
$ref: '#/components/schemas/Turn'
additionalProperties: false
required:
- event_type
- turn
title: AgentTurnResponseTurnPendingPayload
AgentTurnResponseTurnStartPayload: AgentTurnResponseTurnStartPayload:
type: object type: object
properties: properties:
@ -3563,6 +3660,164 @@ components:
- event_type - event_type
- turn_id - turn_id
title: AgentTurnResponseTurnStartPayload title: AgentTurnResponseTurnStartPayload
CreateAgentRequest:
type: object
properties:
agent_config:
$ref: '#/components/schemas/AgentConfig'
additionalProperties: false
required:
- agent_config
title: CreateAgentRequest
AgentCreateResponse:
type: object
properties:
agent_id:
type: string
additionalProperties: false
required:
- agent_id
title: AgentCreateResponse
CreateAgentSessionRequest:
type: object
properties:
session_name:
type: string
additionalProperties: false
required:
- session_name
title: CreateAgentSessionRequest
AgentSessionCreateResponse:
type: object
properties:
session_id:
type: string
additionalProperties: false
required:
- session_id
title: AgentSessionCreateResponse
CreateAgentTurnRequest:
type: object
properties:
messages:
type: array
items:
oneOf:
- $ref: '#/components/schemas/UserMessage'
- $ref: '#/components/schemas/ToolResponseMessage'
stream:
type: boolean
documents:
type: array
items:
type: object
properties:
content:
oneOf:
- type: string
- $ref: '#/components/schemas/InterleavedContentItem'
- type: array
items:
$ref: '#/components/schemas/InterleavedContentItem'
- $ref: '#/components/schemas/URL'
mime_type:
type: string
additionalProperties: false
required:
- content
- mime_type
title: Document
toolgroups:
type: array
items:
$ref: '#/components/schemas/AgentTool'
tool_config:
$ref: '#/components/schemas/ToolConfig'
additionalProperties: false
required:
- messages
title: CreateAgentTurnRequest
CreateUploadSessionRequest:
type: object
properties:
bucket:
type: string
description: >-
Bucket under which the file is stored (valid chars: a-zA-Z0-9_-)
key:
type: string
description: >-
Key under which the file is stored (valid chars: a-zA-Z0-9_-/.)
mime_type:
type: string
description: MIME type of the file
size:
type: integer
description: File size in bytes
additionalProperties: false
required:
- bucket
- key
- mime_type
- size
title: CreateUploadSessionRequest
FileUploadResponse:
type: object
properties:
id:
type: string
description: ID of the upload session
url:
type: string
description: Upload URL for the file or file parts
offset:
type: integer
description: Upload content offset
size:
type: integer
description: Upload content size
additionalProperties: false
required:
- id
- url
- offset
- size
title: FileUploadResponse
description: >-
Response after initiating a file upload session.
FileResponse:
type: object
properties:
bucket:
type: string
description: >-
Bucket under which the file is stored (valid chars: a-zA-Z0-9_-)
key:
type: string
description: >-
Key under which the file is stored (valid chars: a-zA-Z0-9_-/.)
mime_type:
type: string
description: MIME type of the file
url:
type: string
description: Upload URL for the file contents
bytes:
type: integer
description: Size of the file in bytes
created_at:
type: integer
description: Timestamp of when the file was created
additionalProperties: false
required:
- bucket
- key
- mime_type
- url
- bytes
- created_at
title: FileResponse
description: Response representing a file entry.
EmbeddingsRequest: EmbeddingsRequest:
type: object type: object
properties: properties:
@ -4394,6 +4649,29 @@ components:
required: required:
- content - content
title: ToolInvocationResult title: ToolInvocationResult
BucketResponse:
type: object
properties:
name:
type: string
additionalProperties: false
required:
- name
title: BucketResponse
ListBucketResponse:
type: object
properties:
data:
type: array
items:
$ref: '#/components/schemas/BucketResponse'
description: List of FileResponse entries
additionalProperties: false
required:
- data
title: ListBucketResponse
description: >-
Response representing a list of file entries.
ListDatasetsResponse: ListDatasetsResponse:
type: object type: object
properties: properties:
@ -4405,6 +4683,20 @@ components:
required: required:
- data - data
title: ListDatasetsResponse title: ListDatasetsResponse
ListFileResponse:
type: object
properties:
data:
type: array
items:
$ref: '#/components/schemas/FileResponse'
description: List of FileResponse entries
additionalProperties: false
required:
- data
title: ListFileResponse
description: >-
Response representing a list of file entries.
ListModelsResponse: ListModelsResponse:
type: object type: object
properties: properties:
@ -5328,17 +5620,6 @@ components:
required: required:
- results - results
title: ScoreBatchResponse title: ScoreBatchResponse
SubmitToolResponseMessagesRequest:
type: object
properties:
tool_response_messages:
type: array
items:
$ref: '#/components/schemas/ToolResponseMessage'
additionalProperties: false
required:
- tool_response_messages
title: SubmitToolResponseMessagesRequest
AlgorithmConfig: AlgorithmConfig:
oneOf: oneOf:
- $ref: '#/components/schemas/LoraFinetuningConfig' - $ref: '#/components/schemas/LoraFinetuningConfig'
@ -5533,6 +5814,7 @@ tags:
- name: DatasetIO - name: DatasetIO
- name: Datasets - name: Datasets
- name: Eval - name: Eval
- name: Files (Coming Soon)
- name: Inference - name: Inference
description: >- description: >-
This API provides the raw interface to the underlying models. Two kinds of models This API provides the raw interface to the underlying models. Two kinds of models
@ -5567,6 +5849,7 @@ x-tagGroups:
- DatasetIO - DatasetIO
- Datasets - Datasets
- Eval - Eval
- Files (Coming Soon)
- Inference - Inference
- Inspect - Inspect
- Models - Models

View file

@ -477,6 +477,7 @@ class Generator:
"SyntheticDataGeneration", "SyntheticDataGeneration",
"PostTraining", "PostTraining",
"BatchInference", "BatchInference",
"Files",
]: ]:
op.defining_class.__name__ = f"{op.defining_class.__name__} (Coming Soon)" op.defining_class.__name__ = f"{op.defining_class.__name__} (Coming Soon)"
print(op.defining_class.__name__) print(op.defining_class.__name__)
@ -520,8 +521,30 @@ class Generator:
# parameters passed anywhere # parameters passed anywhere
parameters = path_parameters + query_parameters parameters = path_parameters + query_parameters
# data passed in payload webmethod = getattr(op.func_ref, "__webmethod__", None)
if op.request_params: raw_bytes_request_body = False
if webmethod:
raw_bytes_request_body = getattr(webmethod, "raw_bytes_request_body", False)
# data passed in request body as raw bytes cannot have request parameters
if raw_bytes_request_body and op.request_params:
raise ValueError("Cannot have both raw bytes request body and request parameters")
# data passed in request body as raw bytes
if raw_bytes_request_body:
requestBody = RequestBody(
content={
"application/octet-stream": {
"schema": {
"type": "string",
"format": "binary",
}
}
},
required=True,
)
# data passed in payload as JSON and mapped to request parameters
elif op.request_params:
builder = ContentBuilder(self.schema_builder) builder = ContentBuilder(self.schema_builder)
first = next(iter(op.request_params)) first = next(iter(op.request_params))
request_name, request_type = first request_name, request_type = first

View file

@ -78,7 +78,7 @@ class MediaType:
@dataclass @dataclass
class RequestBody: class RequestBody:
content: Dict[str, MediaType] content: Dict[str, MediaType | Dict[str, Any]]
description: Optional[str] = None description: Optional[str] = None
required: Optional[bool] = None required: Optional[bool] = None

View file

@ -194,7 +194,7 @@ class AgentTurnResponseEventType(Enum):
turn_start = "turn_start" turn_start = "turn_start"
turn_complete = "turn_complete" turn_complete = "turn_complete"
turn_pending = "turn_pending" turn_awaiting_input = "turn_awaiting_input"
@json_schema_type @json_schema_type
@ -237,8 +237,10 @@ class AgentTurnResponseTurnCompletePayload(BaseModel):
@json_schema_type @json_schema_type
class AgentTurnResponseTurnPendingPayload(BaseModel): class AgentTurnResponseTurnAwaitingInputPayload(BaseModel):
event_type: Literal[AgentTurnResponseEventType.turn_pending.value] = AgentTurnResponseEventType.turn_pending.value event_type: Literal[AgentTurnResponseEventType.turn_awaiting_input.value] = (
AgentTurnResponseEventType.turn_awaiting_input.value
)
turn: Turn turn: Turn
@ -250,7 +252,7 @@ AgentTurnResponseEventPayload = register_schema(
AgentTurnResponseStepCompletePayload, AgentTurnResponseStepCompletePayload,
AgentTurnResponseTurnStartPayload, AgentTurnResponseTurnStartPayload,
AgentTurnResponseTurnCompletePayload, AgentTurnResponseTurnCompletePayload,
AgentTurnResponseTurnPendingPayload, AgentTurnResponseTurnAwaitingInputPayload,
], ],
Field(discriminator="event_type"), Field(discriminator="event_type"),
], ],
@ -344,15 +346,20 @@ class Agents(Protocol):
) -> Union[Turn, AsyncIterator[AgentTurnResponseStreamChunk]]: ... ) -> Union[Turn, AsyncIterator[AgentTurnResponseStreamChunk]]: ...
@webmethod( @webmethod(
route="/agents/{agent_id}/session/{session_id}/turn/{turn_id}/tool_responses", route="/agents/{agent_id}/session/{session_id}/turn/{turn_id}/continue",
method="POST", method="POST",
) )
async def submit_tool_responses( async def continue_agent_turn(
self, self,
agent_id: str, agent_id: str,
session_id: str, session_id: str,
turn_id: str, turn_id: str,
tool_responses: Dict[str, ToolResponseMessage], new_messages: List[
Union[
UserMessage,
ToolResponseMessage,
]
],
) -> Union[Turn, AsyncIterator[AgentTurnResponseStreamChunk]]: ... ) -> Union[Turn, AsyncIterator[AgentTurnResponseStreamChunk]]: ...
@webmethod( @webmethod(

View file

@ -0,0 +1,7 @@
# Copyright (c) Meta Platforms, Inc. and affiliates.
# All rights reserved.
#
# This source code is licensed under the terms described in the LICENSE file in
# the root directory of this source tree.
from .files import * # noqa: F401 F403

View file

@ -0,0 +1,174 @@
# Copyright (c) Meta Platforms, Inc. and affiliates.
# All rights reserved.
#
# This source code is licensed under the terms described in the LICENSE file in
# the root directory of this source tree.
from typing import List, Optional, Protocol, runtime_checkable
from pydantic import BaseModel
from llama_stack.providers.utils.telemetry.trace_protocol import trace_protocol
from llama_stack.schema_utils import json_schema_type, webmethod
@json_schema_type
class FileUploadResponse(BaseModel):
"""
Response after initiating a file upload session.
:param id: ID of the upload session
:param url: Upload URL for the file or file parts
:param offset: Upload content offset
:param size: Upload content size
"""
id: str
url: str
offset: int
size: int
@json_schema_type
class BucketResponse(BaseModel):
name: str
@json_schema_type
class ListBucketResponse(BaseModel):
"""
Response representing a list of file entries.
:param data: List of FileResponse entries
"""
data: List[BucketResponse]
@json_schema_type
class FileResponse(BaseModel):
"""
Response representing a file entry.
:param bucket: Bucket under which the file is stored (valid chars: a-zA-Z0-9_-)
:param key: Key under which the file is stored (valid chars: a-zA-Z0-9_-/.)
:param mime_type: MIME type of the file
:param url: Upload URL for the file contents
:param bytes: Size of the file in bytes
:param created_at: Timestamp of when the file was created
"""
bucket: str
key: str
mime_type: str
url: str
bytes: int
created_at: int
@json_schema_type
class ListFileResponse(BaseModel):
"""
Response representing a list of file entries.
:param data: List of FileResponse entries
"""
data: List[FileResponse]
@runtime_checkable
@trace_protocol
class Files(Protocol):
@webmethod(route="/files", method="POST")
async def create_upload_session(
self,
bucket: str,
key: str,
mime_type: str,
size: int,
) -> FileUploadResponse:
"""
Create a new upload session for a file identified by a bucket and key.
:param bucket: Bucket under which the file is stored (valid chars: a-zA-Z0-9_-)
:param key: Key under which the file is stored (valid chars: a-zA-Z0-9_-/.)
:param mime_type: MIME type of the file
:param size: File size in bytes
"""
...
@webmethod(route="/files/session:{upload_id}", method="POST", raw_bytes_request_body=True)
async def upload_content_to_session(
self,
upload_id: str,
) -> Optional[FileResponse]:
"""
Upload file content to an existing upload session.
On the server, request body will have the raw bytes that are uploaded.
:param upload_id: ID of the upload session
"""
...
@webmethod(route="/files/session:{upload_id}", method="GET")
async def get_upload_session_info(
self,
upload_id: str,
) -> Optional[FileUploadResponse]:
"""
Returns information about an existsing upload session
:param upload_id: ID of the upload session
"""
...
@webmethod(route="/files", method="GET")
async def list_all_buckets(
self,
bucket: str,
) -> ListBucketResponse:
"""
List all buckets.
"""
...
@webmethod(route="/files/{bucket}", method="GET")
async def list_files_in_bucket(
self,
bucket: str,
) -> ListFileResponse:
"""
List all files in a bucket.
:param bucket: Bucket name (valid chars: a-zA-Z0-9_-)
"""
...
@webmethod(route="/files/{bucket}/{key:path}", method="GET")
async def get_file(
self,
bucket: str,
key: str,
) -> FileResponse:
"""
Get a file info identified by a bucket and key.
:param bucket: Bucket name (valid chars: a-zA-Z0-9_-)
:param key: Key under which the file is stored (valid chars: a-zA-Z0-9_-/.)
"""
...
@webmethod(route="/files/{bucket}/{key:path}", method="DELETE")
async def delete_file(
self,
bucket: str,
key: str,
) -> FileResponse:
"""
Delete a file identified by a bucket and key.
:param bucket: Bucket name (valid chars: a-zA-Z0-9_-)
:param key: Key under which the file is stored (valid chars: a-zA-Z0-9_-/.)
"""
...

View file

@ -19,6 +19,7 @@ from llama_stack.apis.benchmarks import Benchmarks
from llama_stack.apis.datasetio import DatasetIO from llama_stack.apis.datasetio import DatasetIO
from llama_stack.apis.datasets import Datasets from llama_stack.apis.datasets import Datasets
from llama_stack.apis.eval import Eval from llama_stack.apis.eval import Eval
from llama_stack.apis.files import Files
from llama_stack.apis.inference import Inference from llama_stack.apis.inference import Inference
from llama_stack.apis.inspect import Inspect from llama_stack.apis.inspect import Inspect
from llama_stack.apis.models import Models from llama_stack.apis.models import Models
@ -63,6 +64,7 @@ class LlamaStack(
ToolGroups, ToolGroups,
ToolRuntime, ToolRuntime,
RAGToolRuntime, RAGToolRuntime,
Files,
): ):
pass pass

View file

@ -169,12 +169,17 @@ class MetaReferenceAgentsImpl(Agents):
async for event in agent.create_and_execute_turn(request): async for event in agent.create_and_execute_turn(request):
yield event yield event
async def submit_tool_response_messages( async def continue_agent_turn(
self, self,
agent_id: str, agent_id: str,
session_id: str, session_id: str,
turn_id: str, turn_id: str,
tool_response_messages: List[ToolResponseMessage], new_messages: List[
Union[
UserMessage,
ToolResponseMessage,
]
],
) -> AsyncGenerator: ) -> AsyncGenerator:
pass pass

View file

@ -46,7 +46,7 @@ from llama_stack.providers.utils.inference.embedding_mixin import (
) )
from llama_stack.providers.utils.inference.model_registry import ( from llama_stack.providers.utils.inference.model_registry import (
ModelRegistryHelper, ModelRegistryHelper,
build_model_alias, build_hf_repo_model_alias,
) )
from llama_stack.providers.utils.inference.prompt_adapter import ( from llama_stack.providers.utils.inference.prompt_adapter import (
augment_content_with_response_format_prompt, augment_content_with_response_format_prompt,
@ -116,7 +116,7 @@ class MetaReferenceInferenceImpl(
self.model_registry_helper = ModelRegistryHelper( self.model_registry_helper = ModelRegistryHelper(
[ [
build_model_alias( build_hf_repo_model_alias(
llama_model.descriptor(), llama_model.descriptor(),
llama_model.core_model_id.value, llama_model.core_model_id.value,
) )

View file

@ -6,19 +6,19 @@
from llama_stack.models.llama.datatypes import CoreModelId from llama_stack.models.llama.datatypes import CoreModelId
from llama_stack.providers.utils.inference.model_registry import ( from llama_stack.providers.utils.inference.model_registry import (
build_model_alias, build_hf_repo_model_alias,
) )
MODEL_ALIASES = [ MODEL_ALIASES = [
build_model_alias( build_hf_repo_model_alias(
"meta.llama3-1-8b-instruct-v1:0", "meta.llama3-1-8b-instruct-v1:0",
CoreModelId.llama3_1_8b_instruct.value, CoreModelId.llama3_1_8b_instruct.value,
), ),
build_model_alias( build_hf_repo_model_alias(
"meta.llama3-1-70b-instruct-v1:0", "meta.llama3-1-70b-instruct-v1:0",
CoreModelId.llama3_1_70b_instruct.value, CoreModelId.llama3_1_70b_instruct.value,
), ),
build_model_alias( build_hf_repo_model_alias(
"meta.llama3-1-405b-instruct-v1:0", "meta.llama3-1-405b-instruct-v1:0",
CoreModelId.llama3_1_405b_instruct.value, CoreModelId.llama3_1_405b_instruct.value,
), ),

View file

@ -6,15 +6,15 @@
from llama_stack.models.llama.datatypes import CoreModelId from llama_stack.models.llama.datatypes import CoreModelId
from llama_stack.providers.utils.inference.model_registry import ( from llama_stack.providers.utils.inference.model_registry import (
build_model_alias, build_hf_repo_model_alias,
) )
model_aliases = [ model_aliases = [
build_model_alias( build_hf_repo_model_alias(
"llama3.1-8b", "llama3.1-8b",
CoreModelId.llama3_1_8b_instruct.value, CoreModelId.llama3_1_8b_instruct.value,
), ),
build_model_alias( build_hf_repo_model_alias(
"llama-3.3-70b", "llama-3.3-70b",
CoreModelId.llama3_3_70b_instruct.value, CoreModelId.llama3_3_70b_instruct.value,
), ),

View file

@ -25,7 +25,7 @@ from llama_stack.apis.inference import (
from llama_stack.models.llama.datatypes import CoreModelId from llama_stack.models.llama.datatypes import CoreModelId
from llama_stack.providers.utils.inference.model_registry import ( from llama_stack.providers.utils.inference.model_registry import (
ModelRegistryHelper, ModelRegistryHelper,
build_model_alias, build_hf_repo_model_alias,
) )
from llama_stack.providers.utils.inference.openai_compat import ( from llama_stack.providers.utils.inference.openai_compat import (
get_sampling_options, get_sampling_options,
@ -39,11 +39,11 @@ from llama_stack.providers.utils.inference.prompt_adapter import (
from .config import DatabricksImplConfig from .config import DatabricksImplConfig
model_aliases = [ model_aliases = [
build_model_alias( build_hf_repo_model_alias(
"databricks-meta-llama-3-1-70b-instruct", "databricks-meta-llama-3-1-70b-instruct",
CoreModelId.llama3_1_70b_instruct.value, CoreModelId.llama3_1_70b_instruct.value,
), ),
build_model_alias( build_hf_repo_model_alias(
"databricks-meta-llama-3-1-405b-instruct", "databricks-meta-llama-3-1-405b-instruct",
CoreModelId.llama3_1_405b_instruct.value, CoreModelId.llama3_1_405b_instruct.value,
), ),

View file

@ -6,47 +6,47 @@
from llama_stack.models.llama.datatypes import CoreModelId from llama_stack.models.llama.datatypes import CoreModelId
from llama_stack.providers.utils.inference.model_registry import ( from llama_stack.providers.utils.inference.model_registry import (
build_model_alias, build_hf_repo_model_alias,
) )
MODEL_ALIASES = [ MODEL_ALIASES = [
build_model_alias( build_hf_repo_model_alias(
"accounts/fireworks/models/llama-v3p1-8b-instruct", "accounts/fireworks/models/llama-v3p1-8b-instruct",
CoreModelId.llama3_1_8b_instruct.value, CoreModelId.llama3_1_8b_instruct.value,
), ),
build_model_alias( build_hf_repo_model_alias(
"accounts/fireworks/models/llama-v3p1-70b-instruct", "accounts/fireworks/models/llama-v3p1-70b-instruct",
CoreModelId.llama3_1_70b_instruct.value, CoreModelId.llama3_1_70b_instruct.value,
), ),
build_model_alias( build_hf_repo_model_alias(
"accounts/fireworks/models/llama-v3p1-405b-instruct", "accounts/fireworks/models/llama-v3p1-405b-instruct",
CoreModelId.llama3_1_405b_instruct.value, CoreModelId.llama3_1_405b_instruct.value,
), ),
build_model_alias( build_hf_repo_model_alias(
"accounts/fireworks/models/llama-v3p2-1b-instruct", "accounts/fireworks/models/llama-v3p2-1b-instruct",
CoreModelId.llama3_2_1b_instruct.value, CoreModelId.llama3_2_1b_instruct.value,
), ),
build_model_alias( build_hf_repo_model_alias(
"accounts/fireworks/models/llama-v3p2-3b-instruct", "accounts/fireworks/models/llama-v3p2-3b-instruct",
CoreModelId.llama3_2_3b_instruct.value, CoreModelId.llama3_2_3b_instruct.value,
), ),
build_model_alias( build_hf_repo_model_alias(
"accounts/fireworks/models/llama-v3p2-11b-vision-instruct", "accounts/fireworks/models/llama-v3p2-11b-vision-instruct",
CoreModelId.llama3_2_11b_vision_instruct.value, CoreModelId.llama3_2_11b_vision_instruct.value,
), ),
build_model_alias( build_hf_repo_model_alias(
"accounts/fireworks/models/llama-v3p2-90b-vision-instruct", "accounts/fireworks/models/llama-v3p2-90b-vision-instruct",
CoreModelId.llama3_2_90b_vision_instruct.value, CoreModelId.llama3_2_90b_vision_instruct.value,
), ),
build_model_alias( build_hf_repo_model_alias(
"accounts/fireworks/models/llama-v3p3-70b-instruct", "accounts/fireworks/models/llama-v3p3-70b-instruct",
CoreModelId.llama3_3_70b_instruct.value, CoreModelId.llama3_3_70b_instruct.value,
), ),
build_model_alias( build_hf_repo_model_alias(
"accounts/fireworks/models/llama-guard-3-8b", "accounts/fireworks/models/llama-guard-3-8b",
CoreModelId.llama_guard_3_8b.value, CoreModelId.llama_guard_3_8b.value,
), ),
build_model_alias( build_hf_repo_model_alias(
"accounts/fireworks/models/llama-guard-3-11b-vision", "accounts/fireworks/models/llama-guard-3-11b-vision",
CoreModelId.llama_guard_3_11b_vision.value, CoreModelId.llama_guard_3_11b_vision.value,
), ),

View file

@ -31,8 +31,8 @@ from llama_stack.models.llama.sku_list import CoreModelId
from llama_stack.providers.remote.inference.groq.config import GroqConfig from llama_stack.providers.remote.inference.groq.config import GroqConfig
from llama_stack.providers.utils.inference.model_registry import ( from llama_stack.providers.utils.inference.model_registry import (
ModelRegistryHelper, ModelRegistryHelper,
build_hf_repo_model_alias,
build_model_alias, build_model_alias,
build_model_alias_with_just_provider_model_id,
) )
from .groq_utils import ( from .groq_utils import (
@ -42,19 +42,19 @@ from .groq_utils import (
) )
_MODEL_ALIASES = [ _MODEL_ALIASES = [
build_model_alias( build_hf_repo_model_alias(
"llama3-8b-8192", "llama3-8b-8192",
CoreModelId.llama3_1_8b_instruct.value, CoreModelId.llama3_1_8b_instruct.value,
), ),
build_model_alias_with_just_provider_model_id( build_model_alias(
"llama-3.1-8b-instant", "llama-3.1-8b-instant",
CoreModelId.llama3_1_8b_instruct.value, CoreModelId.llama3_1_8b_instruct.value,
), ),
build_model_alias( build_hf_repo_model_alias(
"llama3-70b-8192", "llama3-70b-8192",
CoreModelId.llama3_70b_instruct.value, CoreModelId.llama3_70b_instruct.value,
), ),
build_model_alias( build_hf_repo_model_alias(
"llama-3.3-70b-versatile", "llama-3.3-70b-versatile",
CoreModelId.llama3_3_70b_instruct.value, CoreModelId.llama3_3_70b_instruct.value,
), ),
@ -62,7 +62,7 @@ _MODEL_ALIASES = [
# Preview models aren't recommended for production use, but we include this one # Preview models aren't recommended for production use, but we include this one
# to pass the test fixture # to pass the test fixture
# TODO(aidand): Replace this with a stable model once Groq supports it # TODO(aidand): Replace this with a stable model once Groq supports it
build_model_alias( build_hf_repo_model_alias(
"llama-3.2-3b-preview", "llama-3.2-3b-preview",
CoreModelId.llama3_2_3b_instruct.value, CoreModelId.llama3_2_3b_instruct.value,
), ),

View file

@ -6,43 +6,43 @@
from llama_stack.models.llama.datatypes import CoreModelId from llama_stack.models.llama.datatypes import CoreModelId
from llama_stack.providers.utils.inference.model_registry import ( from llama_stack.providers.utils.inference.model_registry import (
build_model_alias, build_hf_repo_model_alias,
) )
_MODEL_ALIASES = [ _MODEL_ALIASES = [
build_model_alias( build_hf_repo_model_alias(
"meta/llama3-8b-instruct", "meta/llama3-8b-instruct",
CoreModelId.llama3_8b_instruct.value, CoreModelId.llama3_8b_instruct.value,
), ),
build_model_alias( build_hf_repo_model_alias(
"meta/llama3-70b-instruct", "meta/llama3-70b-instruct",
CoreModelId.llama3_70b_instruct.value, CoreModelId.llama3_70b_instruct.value,
), ),
build_model_alias( build_hf_repo_model_alias(
"meta/llama-3.1-8b-instruct", "meta/llama-3.1-8b-instruct",
CoreModelId.llama3_1_8b_instruct.value, CoreModelId.llama3_1_8b_instruct.value,
), ),
build_model_alias( build_hf_repo_model_alias(
"meta/llama-3.1-70b-instruct", "meta/llama-3.1-70b-instruct",
CoreModelId.llama3_1_70b_instruct.value, CoreModelId.llama3_1_70b_instruct.value,
), ),
build_model_alias( build_hf_repo_model_alias(
"meta/llama-3.1-405b-instruct", "meta/llama-3.1-405b-instruct",
CoreModelId.llama3_1_405b_instruct.value, CoreModelId.llama3_1_405b_instruct.value,
), ),
build_model_alias( build_hf_repo_model_alias(
"meta/llama-3.2-1b-instruct", "meta/llama-3.2-1b-instruct",
CoreModelId.llama3_2_1b_instruct.value, CoreModelId.llama3_2_1b_instruct.value,
), ),
build_model_alias( build_hf_repo_model_alias(
"meta/llama-3.2-3b-instruct", "meta/llama-3.2-3b-instruct",
CoreModelId.llama3_2_3b_instruct.value, CoreModelId.llama3_2_3b_instruct.value,
), ),
build_model_alias( build_hf_repo_model_alias(
"meta/llama-3.2-11b-vision-instruct", "meta/llama-3.2-11b-vision-instruct",
CoreModelId.llama3_2_11b_vision_instruct.value, CoreModelId.llama3_2_11b_vision_instruct.value,
), ),
build_model_alias( build_hf_repo_model_alias(
"meta/llama-3.2-90b-vision-instruct", "meta/llama-3.2-90b-vision-instruct",
CoreModelId.llama3_2_90b_vision_instruct.value, CoreModelId.llama3_2_90b_vision_instruct.value,
), ),

View file

@ -35,8 +35,8 @@ from llama_stack.models.llama.datatypes import CoreModelId
from llama_stack.providers.datatypes import ModelsProtocolPrivate from llama_stack.providers.datatypes import ModelsProtocolPrivate
from llama_stack.providers.utils.inference.model_registry import ( from llama_stack.providers.utils.inference.model_registry import (
ModelRegistryHelper, ModelRegistryHelper,
build_hf_repo_model_alias,
build_model_alias, build_model_alias,
build_model_alias_with_just_provider_model_id,
) )
from llama_stack.providers.utils.inference.openai_compat import ( from llama_stack.providers.utils.inference.openai_compat import (
OpenAICompatCompletionChoice, OpenAICompatCompletionChoice,
@ -59,73 +59,73 @@ from llama_stack.providers.utils.inference.prompt_adapter import (
log = logging.getLogger(__name__) log = logging.getLogger(__name__)
model_aliases = [ model_aliases = [
build_model_alias( build_hf_repo_model_alias(
"llama3.1:8b-instruct-fp16", "llama3.1:8b-instruct-fp16",
CoreModelId.llama3_1_8b_instruct.value, CoreModelId.llama3_1_8b_instruct.value,
), ),
build_model_alias_with_just_provider_model_id( build_model_alias(
"llama3.1:8b", "llama3.1:8b",
CoreModelId.llama3_1_8b_instruct.value, CoreModelId.llama3_1_8b_instruct.value,
), ),
build_model_alias( build_hf_repo_model_alias(
"llama3.1:70b-instruct-fp16", "llama3.1:70b-instruct-fp16",
CoreModelId.llama3_1_70b_instruct.value, CoreModelId.llama3_1_70b_instruct.value,
), ),
build_model_alias_with_just_provider_model_id( build_model_alias(
"llama3.1:70b", "llama3.1:70b",
CoreModelId.llama3_1_70b_instruct.value, CoreModelId.llama3_1_70b_instruct.value,
), ),
build_model_alias( build_hf_repo_model_alias(
"llama3.1:405b-instruct-fp16", "llama3.1:405b-instruct-fp16",
CoreModelId.llama3_1_405b_instruct.value, CoreModelId.llama3_1_405b_instruct.value,
), ),
build_model_alias_with_just_provider_model_id( build_model_alias(
"llama3.1:405b", "llama3.1:405b",
CoreModelId.llama3_1_405b_instruct.value, CoreModelId.llama3_1_405b_instruct.value,
), ),
build_model_alias( build_hf_repo_model_alias(
"llama3.2:1b-instruct-fp16", "llama3.2:1b-instruct-fp16",
CoreModelId.llama3_2_1b_instruct.value, CoreModelId.llama3_2_1b_instruct.value,
), ),
build_model_alias_with_just_provider_model_id( build_model_alias(
"llama3.2:1b", "llama3.2:1b",
CoreModelId.llama3_2_1b_instruct.value, CoreModelId.llama3_2_1b_instruct.value,
), ),
build_model_alias( build_hf_repo_model_alias(
"llama3.2:3b-instruct-fp16", "llama3.2:3b-instruct-fp16",
CoreModelId.llama3_2_3b_instruct.value, CoreModelId.llama3_2_3b_instruct.value,
), ),
build_model_alias_with_just_provider_model_id( build_model_alias(
"llama3.2:3b", "llama3.2:3b",
CoreModelId.llama3_2_3b_instruct.value, CoreModelId.llama3_2_3b_instruct.value,
), ),
build_model_alias( build_hf_repo_model_alias(
"llama3.2-vision:11b-instruct-fp16", "llama3.2-vision:11b-instruct-fp16",
CoreModelId.llama3_2_11b_vision_instruct.value, CoreModelId.llama3_2_11b_vision_instruct.value,
), ),
build_model_alias_with_just_provider_model_id( build_model_alias(
"llama3.2-vision:latest", "llama3.2-vision:latest",
CoreModelId.llama3_2_11b_vision_instruct.value, CoreModelId.llama3_2_11b_vision_instruct.value,
), ),
build_model_alias( build_hf_repo_model_alias(
"llama3.2-vision:90b-instruct-fp16", "llama3.2-vision:90b-instruct-fp16",
CoreModelId.llama3_2_90b_vision_instruct.value, CoreModelId.llama3_2_90b_vision_instruct.value,
), ),
build_model_alias_with_just_provider_model_id( build_model_alias(
"llama3.2-vision:90b", "llama3.2-vision:90b",
CoreModelId.llama3_2_90b_vision_instruct.value, CoreModelId.llama3_2_90b_vision_instruct.value,
), ),
build_model_alias( build_hf_repo_model_alias(
"llama3.3:70b", "llama3.3:70b",
CoreModelId.llama3_3_70b_instruct.value, CoreModelId.llama3_3_70b_instruct.value,
), ),
# The Llama Guard models don't have their full fp16 versions # The Llama Guard models don't have their full fp16 versions
# so we are going to alias their default version to the canonical SKU # so we are going to alias their default version to the canonical SKU
build_model_alias( build_hf_repo_model_alias(
"llama-guard3:8b", "llama-guard3:8b",
CoreModelId.llama_guard_3_8b.value, CoreModelId.llama_guard_3_8b.value,
), ),
build_model_alias( build_hf_repo_model_alias(
"llama-guard3:1b", "llama-guard3:1b",
CoreModelId.llama_guard_3_1b.value, CoreModelId.llama_guard_3_1b.value,
), ),

View file

@ -6,43 +6,43 @@
from llama_stack.models.llama.datatypes import CoreModelId from llama_stack.models.llama.datatypes import CoreModelId
from llama_stack.providers.utils.inference.model_registry import ( from llama_stack.providers.utils.inference.model_registry import (
build_model_alias, build_hf_repo_model_alias,
) )
MODEL_ALIASES = [ MODEL_ALIASES = [
build_model_alias( build_hf_repo_model_alias(
"Meta-Llama-3.1-8B-Instruct", "Meta-Llama-3.1-8B-Instruct",
CoreModelId.llama3_1_8b_instruct.value, CoreModelId.llama3_1_8b_instruct.value,
), ),
build_model_alias( build_hf_repo_model_alias(
"Meta-Llama-3.1-70B-Instruct", "Meta-Llama-3.1-70B-Instruct",
CoreModelId.llama3_1_70b_instruct.value, CoreModelId.llama3_1_70b_instruct.value,
), ),
build_model_alias( build_hf_repo_model_alias(
"Meta-Llama-3.1-405B-Instruct", "Meta-Llama-3.1-405B-Instruct",
CoreModelId.llama3_1_405b_instruct.value, CoreModelId.llama3_1_405b_instruct.value,
), ),
build_model_alias( build_hf_repo_model_alias(
"Meta-Llama-3.2-1B-Instruct", "Meta-Llama-3.2-1B-Instruct",
CoreModelId.llama3_2_1b_instruct.value, CoreModelId.llama3_2_1b_instruct.value,
), ),
build_model_alias( build_hf_repo_model_alias(
"Meta-Llama-3.2-3B-Instruct", "Meta-Llama-3.2-3B-Instruct",
CoreModelId.llama3_2_3b_instruct.value, CoreModelId.llama3_2_3b_instruct.value,
), ),
build_model_alias( build_hf_repo_model_alias(
"Meta-Llama-3.3-70B-Instruct", "Meta-Llama-3.3-70B-Instruct",
CoreModelId.llama3_3_70b_instruct.value, CoreModelId.llama3_3_70b_instruct.value,
), ),
build_model_alias( build_hf_repo_model_alias(
"Llama-3.2-11B-Vision-Instruct", "Llama-3.2-11B-Vision-Instruct",
CoreModelId.llama3_2_11b_vision_instruct.value, CoreModelId.llama3_2_11b_vision_instruct.value,
), ),
build_model_alias( build_hf_repo_model_alias(
"Llama-3.2-90B-Vision-Instruct", "Llama-3.2-90B-Vision-Instruct",
CoreModelId.llama3_2_90b_vision_instruct.value, CoreModelId.llama3_2_90b_vision_instruct.value,
), ),
build_model_alias( build_hf_repo_model_alias(
"Meta-Llama-Guard-3-8B", "Meta-Llama-Guard-3-8B",
CoreModelId.llama_guard_3_8b.value, CoreModelId.llama_guard_3_8b.value,
), ),

View file

@ -32,7 +32,7 @@ from llama_stack.models.llama.sku_list import all_registered_models
from llama_stack.providers.datatypes import ModelsProtocolPrivate from llama_stack.providers.datatypes import ModelsProtocolPrivate
from llama_stack.providers.utils.inference.model_registry import ( from llama_stack.providers.utils.inference.model_registry import (
ModelRegistryHelper, ModelRegistryHelper,
build_model_alias, build_hf_repo_model_alias,
) )
from llama_stack.providers.utils.inference.openai_compat import ( from llama_stack.providers.utils.inference.openai_compat import (
OpenAICompatCompletionChoice, OpenAICompatCompletionChoice,
@ -53,9 +53,9 @@ from .config import InferenceAPIImplConfig, InferenceEndpointImplConfig, TGIImpl
log = logging.getLogger(__name__) log = logging.getLogger(__name__)
def build_model_aliases(): def build_hf_repo_model_aliases():
return [ return [
build_model_alias( build_hf_repo_model_alias(
model.huggingface_repo, model.huggingface_repo,
model.descriptor(), model.descriptor(),
) )
@ -70,7 +70,7 @@ class _HfAdapter(Inference, ModelsProtocolPrivate):
model_id: str model_id: str
def __init__(self) -> None: def __init__(self) -> None:
self.register_helper = ModelRegistryHelper(build_model_aliases()) self.register_helper = ModelRegistryHelper(build_hf_repo_model_aliases())
self.huggingface_repo_to_llama_model_id = { self.huggingface_repo_to_llama_model_id = {
model.huggingface_repo: model.descriptor() for model in all_registered_models() if model.huggingface_repo model.huggingface_repo: model.descriptor() for model in all_registered_models() if model.huggingface_repo
} }

View file

@ -6,43 +6,43 @@
from llama_stack.models.llama.datatypes import CoreModelId from llama_stack.models.llama.datatypes import CoreModelId
from llama_stack.providers.utils.inference.model_registry import ( from llama_stack.providers.utils.inference.model_registry import (
build_model_alias, build_hf_repo_model_alias,
) )
MODEL_ALIASES = [ MODEL_ALIASES = [
build_model_alias( build_hf_repo_model_alias(
"meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo", "meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo",
CoreModelId.llama3_1_8b_instruct.value, CoreModelId.llama3_1_8b_instruct.value,
), ),
build_model_alias( build_hf_repo_model_alias(
"meta-llama/Meta-Llama-3.1-70B-Instruct-Turbo", "meta-llama/Meta-Llama-3.1-70B-Instruct-Turbo",
CoreModelId.llama3_1_70b_instruct.value, CoreModelId.llama3_1_70b_instruct.value,
), ),
build_model_alias( build_hf_repo_model_alias(
"meta-llama/Meta-Llama-3.1-405B-Instruct-Turbo", "meta-llama/Meta-Llama-3.1-405B-Instruct-Turbo",
CoreModelId.llama3_1_405b_instruct.value, CoreModelId.llama3_1_405b_instruct.value,
), ),
build_model_alias( build_hf_repo_model_alias(
"meta-llama/Llama-3.2-3B-Instruct-Turbo", "meta-llama/Llama-3.2-3B-Instruct-Turbo",
CoreModelId.llama3_2_3b_instruct.value, CoreModelId.llama3_2_3b_instruct.value,
), ),
build_model_alias( build_hf_repo_model_alias(
"meta-llama/Llama-3.2-11B-Vision-Instruct-Turbo", "meta-llama/Llama-3.2-11B-Vision-Instruct-Turbo",
CoreModelId.llama3_2_11b_vision_instruct.value, CoreModelId.llama3_2_11b_vision_instruct.value,
), ),
build_model_alias( build_hf_repo_model_alias(
"meta-llama/Llama-3.2-90B-Vision-Instruct-Turbo", "meta-llama/Llama-3.2-90B-Vision-Instruct-Turbo",
CoreModelId.llama3_2_90b_vision_instruct.value, CoreModelId.llama3_2_90b_vision_instruct.value,
), ),
build_model_alias( build_hf_repo_model_alias(
"meta-llama/Llama-3.3-70B-Instruct-Turbo", "meta-llama/Llama-3.3-70B-Instruct-Turbo",
CoreModelId.llama3_3_70b_instruct.value, CoreModelId.llama3_3_70b_instruct.value,
), ),
build_model_alias( build_hf_repo_model_alias(
"meta-llama/Meta-Llama-Guard-3-8B", "meta-llama/Meta-Llama-Guard-3-8B",
CoreModelId.llama_guard_3_8b.value, CoreModelId.llama_guard_3_8b.value,
), ),
build_model_alias( build_hf_repo_model_alias(
"meta-llama/Llama-Guard-3-11B-Vision-Turbo", "meta-llama/Llama-Guard-3-11B-Vision-Turbo",
CoreModelId.llama_guard_3_11b_vision.value, CoreModelId.llama_guard_3_11b_vision.value,
), ),

View file

@ -38,7 +38,7 @@ from llama_stack.models.llama.sku_list import all_registered_models
from llama_stack.providers.datatypes import ModelsProtocolPrivate from llama_stack.providers.datatypes import ModelsProtocolPrivate
from llama_stack.providers.utils.inference.model_registry import ( from llama_stack.providers.utils.inference.model_registry import (
ModelRegistryHelper, ModelRegistryHelper,
build_model_alias, build_hf_repo_model_alias,
) )
from llama_stack.providers.utils.inference.openai_compat import ( from llama_stack.providers.utils.inference.openai_compat import (
OpenAICompatCompletionResponse, OpenAICompatCompletionResponse,
@ -62,9 +62,9 @@ from .config import VLLMInferenceAdapterConfig
log = logging.getLogger(__name__) log = logging.getLogger(__name__)
def build_model_aliases(): def build_hf_repo_model_aliases():
return [ return [
build_model_alias( build_hf_repo_model_alias(
model.huggingface_repo, model.huggingface_repo,
model.descriptor(), model.descriptor(),
) )
@ -204,7 +204,7 @@ async def _process_vllm_chat_completion_stream_response(
class VLLMInferenceAdapter(Inference, ModelsProtocolPrivate): class VLLMInferenceAdapter(Inference, ModelsProtocolPrivate):
def __init__(self, config: VLLMInferenceAdapterConfig) -> None: def __init__(self, config: VLLMInferenceAdapterConfig) -> None:
self.register_helper = ModelRegistryHelper(build_model_aliases()) self.register_helper = ModelRegistryHelper(build_hf_repo_model_aliases())
self.config = config self.config = config
self.client = None self.client = None

View file

@ -83,17 +83,13 @@ def inference_cerebras() -> ProviderFixture:
@pytest.fixture(scope="session") @pytest.fixture(scope="session")
def inference_ollama(inference_model) -> ProviderFixture: def inference_ollama() -> ProviderFixture:
inference_model = [inference_model] if isinstance(inference_model, str) else inference_model
if inference_model and "Llama3.1-8B-Instruct" in inference_model:
pytest.skip("Ollama only supports Llama3.2-3B-Instruct for testing")
return ProviderFixture( return ProviderFixture(
providers=[ providers=[
Provider( Provider(
provider_id="ollama", provider_id="ollama",
provider_type="remote::ollama", provider_type="remote::ollama",
config=OllamaImplConfig(host="localhost", port=os.getenv("OLLAMA_PORT", 11434)).model_dump(), config=OllamaImplConfig(url=get_env_or_fail("OLLAMA_URL")).model_dump(),
) )
], ],
) )

View file

@ -4,9 +4,10 @@
# This source code is licensed under the terms described in the LICENSE file in # This source code is licensed under the terms described in the LICENSE file in
# the root directory of this source tree. # the root directory of this source tree.
from collections import namedtuple
from typing import List, Optional from typing import List, Optional
from pydantic import BaseModel, Field
from llama_stack.apis.models.models import ModelType from llama_stack.apis.models.models import ModelType
from llama_stack.models.llama.sku_list import all_registered_models from llama_stack.models.llama.sku_list import all_registered_models
from llama_stack.providers.datatypes import Model, ModelsProtocolPrivate from llama_stack.providers.datatypes import Model, ModelsProtocolPrivate
@ -14,7 +15,14 @@ from llama_stack.providers.utils.inference import (
ALL_HUGGINGFACE_REPOS_TO_MODEL_DESCRIPTOR, ALL_HUGGINGFACE_REPOS_TO_MODEL_DESCRIPTOR,
) )
ModelAlias = namedtuple("ModelAlias", ["provider_model_id", "aliases", "llama_model"])
# TODO: this class is more confusing than useful right now. We need to make it
# more closer to the Model class.
class ModelAlias(BaseModel):
provider_model_id: str
aliases: List[str] = Field(default_factory=list)
llama_model: Optional[str] = None
model_type: ModelType = ModelType.llm
def get_huggingface_repo(model_descriptor: str) -> Optional[str]: def get_huggingface_repo(model_descriptor: str) -> Optional[str]:
@ -24,7 +32,7 @@ def get_huggingface_repo(model_descriptor: str) -> Optional[str]:
return None return None
def build_model_alias(provider_model_id: str, model_descriptor: str) -> ModelAlias: def build_hf_repo_model_alias(provider_model_id: str, model_descriptor: str) -> ModelAlias:
return ModelAlias( return ModelAlias(
provider_model_id=provider_model_id, provider_model_id=provider_model_id,
aliases=[ aliases=[
@ -34,7 +42,7 @@ def build_model_alias(provider_model_id: str, model_descriptor: str) -> ModelAli
) )
def build_model_alias_with_just_provider_model_id(provider_model_id: str, model_descriptor: str) -> ModelAlias: def build_model_alias(provider_model_id: str, model_descriptor: str) -> ModelAlias:
return ModelAlias( return ModelAlias(
provider_model_id=provider_model_id, provider_model_id=provider_model_id,
aliases=[], aliases=[],

View file

@ -19,6 +19,7 @@ class WebMethod:
request_examples: Optional[List[Any]] = None request_examples: Optional[List[Any]] = None
response_examples: Optional[List[Any]] = None response_examples: Optional[List[Any]] = None
method: Optional[str] = None method: Optional[str] = None
raw_bytes_request_body: Optional[bool] = False
def webmethod( def webmethod(
@ -27,6 +28,7 @@ def webmethod(
public: Optional[bool] = False, public: Optional[bool] = False,
request_examples: Optional[List[Any]] = None, request_examples: Optional[List[Any]] = None,
response_examples: Optional[List[Any]] = None, response_examples: Optional[List[Any]] = None,
raw_bytes_request_body: Optional[bool] = False,
) -> Callable[[T], T]: ) -> Callable[[T], T]:
""" """
Decorator that supplies additional metadata to an endpoint operation function. Decorator that supplies additional metadata to an endpoint operation function.
@ -44,6 +46,7 @@ def webmethod(
public=public or False, public=public or False,
request_examples=request_examples, request_examples=request_examples,
response_examples=response_examples, response_examples=response_examples,
raw_bytes_request_body=raw_bytes_request_body,
) )
return cls return cls