diff --git a/docs/_static/llama-stack-spec.html b/docs/_static/llama-stack-spec.html
index 829c90fc4..9a38ed692 100644
--- a/docs/_static/llama-stack-spec.html
+++ b/docs/_static/llama-stack-spec.html
@@ -678,6 +678,65 @@
}
}
},
+ "/v1/files": {
+ "get": {
+ "responses": {
+ "200": {
+ "description": "OK",
+ "content": {
+ "application/json": {
+ "schema": {
+ "$ref": "#/components/schemas/ListBucketResponse"
+ }
+ }
+ }
+ }
+ },
+ "tags": [
+ "Files (Coming Soon)"
+ ],
+ "description": "List all buckets.",
+ "parameters": [
+ {
+ "name": "bucket",
+ "in": "query",
+ "required": true,
+ "schema": {
+ "type": "string"
+ }
+ }
+ ]
+ },
+ "post": {
+ "responses": {
+ "200": {
+ "description": "OK",
+ "content": {
+ "application/json": {
+ "schema": {
+ "$ref": "#/components/schemas/FileUploadResponse"
+ }
+ }
+ }
+ }
+ },
+ "tags": [
+ "Files (Coming Soon)"
+ ],
+ "description": "Create a new upload session for a file identified by a bucket and key.",
+ "parameters": [],
+ "requestBody": {
+ "content": {
+ "application/json": {
+ "schema": {
+ "$ref": "#/components/schemas/CreateUploadSessionRequest"
+ }
+ }
+ },
+ "required": true
+ }
+ }
+ },
"/v1/agents/{agent_id}": {
"delete": {
"responses": {
@@ -779,6 +838,84 @@
]
}
},
+ "/v1/files/{bucket}/{key}": {
+ "get": {
+ "responses": {
+ "200": {
+ "description": "OK",
+ "content": {
+ "application/json": {
+ "schema": {
+ "$ref": "#/components/schemas/FileResponse"
+ }
+ }
+ }
+ }
+ },
+ "tags": [
+ "Files (Coming Soon)"
+ ],
+ "description": "Get a file info identified by a bucket and key.",
+ "parameters": [
+ {
+ "name": "bucket",
+ "in": "path",
+ "description": "Bucket name (valid chars: a-zA-Z0-9_-)",
+ "required": true,
+ "schema": {
+ "type": "string"
+ }
+ },
+ {
+ "name": "key",
+ "in": "path",
+ "description": "Key under which the file is stored (valid chars: a-zA-Z0-9_-/.)",
+ "required": true,
+ "schema": {
+ "type": "string"
+ }
+ }
+ ]
+ },
+ "delete": {
+ "responses": {
+ "200": {
+ "description": "OK",
+ "content": {
+ "application/json": {
+ "schema": {
+ "$ref": "#/components/schemas/FileResponse"
+ }
+ }
+ }
+ }
+ },
+ "tags": [
+ "Files (Coming Soon)"
+ ],
+ "description": "Delete a file identified by a bucket and key.",
+ "parameters": [
+ {
+ "name": "bucket",
+ "in": "path",
+ "description": "Bucket name (valid chars: a-zA-Z0-9_-)",
+ "required": true,
+ "schema": {
+ "type": "string"
+ }
+ },
+ {
+ "name": "key",
+ "in": "path",
+ "description": "Key under which the file is stored (valid chars: a-zA-Z0-9_-/.)",
+ "required": true,
+ "schema": {
+ "type": "string"
+ }
+ }
+ ]
+ }
+ },
"/v1/inference/embeddings": {
"post": {
"responses": {
@@ -1470,6 +1607,91 @@
"parameters": []
}
},
+ "/v1/files/session:{upload_id}": {
+ "get": {
+ "responses": {
+ "200": {
+ "description": "OK",
+ "content": {
+ "application/json": {
+ "schema": {
+ "oneOf": [
+ {
+ "$ref": "#/components/schemas/FileUploadResponse"
+ },
+ {
+ "type": "null"
+ }
+ ]
+ }
+ }
+ }
+ }
+ },
+ "tags": [
+ "Files (Coming Soon)"
+ ],
+ "description": "Returns information about an existsing upload session",
+ "parameters": [
+ {
+ "name": "upload_id",
+ "in": "path",
+ "description": "ID of the upload session",
+ "required": true,
+ "schema": {
+ "type": "string"
+ }
+ }
+ ]
+ },
+ "post": {
+ "responses": {
+ "200": {
+ "description": "OK",
+ "content": {
+ "application/json": {
+ "schema": {
+ "oneOf": [
+ {
+ "$ref": "#/components/schemas/FileResponse"
+ },
+ {
+ "type": "null"
+ }
+ ]
+ }
+ }
+ }
+ }
+ },
+ "tags": [
+ "Files (Coming Soon)"
+ ],
+ "description": "Upload file content to an existing upload session. On the server, request body will have the raw bytes that are uploaded.",
+ "parameters": [
+ {
+ "name": "upload_id",
+ "in": "path",
+ "description": "ID of the upload session",
+ "required": true,
+ "schema": {
+ "type": "string"
+ }
+ }
+ ],
+ "requestBody": {
+ "content": {
+ "application/octet-stream": {
+ "schema": {
+ "type": "string",
+ "format": "binary"
+ }
+ }
+ },
+ "required": true
+ }
+ }
+ },
"/v1/vector-dbs/{vector_db_id}": {
"get": {
"responses": {
@@ -1826,6 +2048,37 @@
}
}
},
+ "/v1/files/{bucket}": {
+ "get": {
+ "responses": {
+ "200": {
+ "description": "OK",
+ "content": {
+ "application/json": {
+ "schema": {
+ "$ref": "#/components/schemas/ListFileResponse"
+ }
+ }
+ }
+ }
+ },
+ "tags": [
+ "Files (Coming Soon)"
+ ],
+ "description": "List all files in a bucket.",
+ "parameters": [
+ {
+ "name": "bucket",
+ "in": "path",
+ "description": "Bucket name (valid chars: a-zA-Z0-9_-)",
+ "required": true,
+ "schema": {
+ "type": "string"
+ }
+ }
+ ]
+ }
+ },
"/v1/models": {
"get": {
"responses": {
@@ -5525,6 +5778,105 @@
],
"title": "AgentTurnResponseTurnStartPayload"
},
+ "CreateUploadSessionRequest": {
+ "type": "object",
+ "properties": {
+ "bucket": {
+ "type": "string",
+ "description": "Bucket under which the file is stored (valid chars: a-zA-Z0-9_-)"
+ },
+ "key": {
+ "type": "string",
+ "description": "Key under which the file is stored (valid chars: a-zA-Z0-9_-/.)"
+ },
+ "mime_type": {
+ "type": "string",
+ "description": "MIME type of the file"
+ },
+ "size": {
+ "type": "integer",
+ "description": "File size in bytes"
+ }
+ },
+ "additionalProperties": false,
+ "required": [
+ "bucket",
+ "key",
+ "mime_type",
+ "size"
+ ],
+ "title": "CreateUploadSessionRequest"
+ },
+ "FileUploadResponse": {
+ "type": "object",
+ "properties": {
+ "id": {
+ "type": "string",
+ "description": "ID of the upload session"
+ },
+ "url": {
+ "type": "string",
+ "description": "Upload URL for the file or file parts"
+ },
+ "offset": {
+ "type": "integer",
+ "description": "Upload content offset"
+ },
+ "size": {
+ "type": "integer",
+ "description": "Upload content size"
+ }
+ },
+ "additionalProperties": false,
+ "required": [
+ "id",
+ "url",
+ "offset",
+ "size"
+ ],
+ "title": "FileUploadResponse",
+ "description": "Response after initiating a file upload session."
+ },
+ "FileResponse": {
+ "type": "object",
+ "properties": {
+ "bucket": {
+ "type": "string",
+ "description": "Bucket under which the file is stored (valid chars: a-zA-Z0-9_-)"
+ },
+ "key": {
+ "type": "string",
+ "description": "Key under which the file is stored (valid chars: a-zA-Z0-9_-/.)"
+ },
+ "mime_type": {
+ "type": "string",
+ "description": "MIME type of the file"
+ },
+ "url": {
+ "type": "string",
+ "description": "Upload URL for the file contents"
+ },
+ "bytes": {
+ "type": "integer",
+ "description": "Size of the file in bytes"
+ },
+ "created_at": {
+ "type": "integer",
+ "description": "Timestamp of when the file was created"
+ }
+ },
+ "additionalProperties": false,
+ "required": [
+ "bucket",
+ "key",
+ "mime_type",
+ "url",
+ "bytes",
+ "created_at"
+ ],
+ "title": "FileResponse",
+ "description": "Response representing a file entry."
+ },
"EmbeddingsRequest": {
"type": "object",
"properties": {
@@ -6840,6 +7192,37 @@
],
"title": "ToolInvocationResult"
},
+ "BucketResponse": {
+ "type": "object",
+ "properties": {
+ "name": {
+ "type": "string"
+ }
+ },
+ "additionalProperties": false,
+ "required": [
+ "name"
+ ],
+ "title": "BucketResponse"
+ },
+ "ListBucketResponse": {
+ "type": "object",
+ "properties": {
+ "data": {
+ "type": "array",
+ "items": {
+ "$ref": "#/components/schemas/BucketResponse"
+ },
+ "description": "List of FileResponse entries"
+ }
+ },
+ "additionalProperties": false,
+ "required": [
+ "data"
+ ],
+ "title": "ListBucketResponse",
+ "description": "Response representing a list of file entries."
+ },
"ListDatasetsResponse": {
"type": "object",
"properties": {
@@ -6856,6 +7239,24 @@
],
"title": "ListDatasetsResponse"
},
+ "ListFileResponse": {
+ "type": "object",
+ "properties": {
+ "data": {
+ "type": "array",
+ "items": {
+ "$ref": "#/components/schemas/FileResponse"
+ },
+ "description": "List of FileResponse entries"
+ }
+ },
+ "additionalProperties": false,
+ "required": [
+ "data"
+ ],
+ "title": "ListFileResponse",
+ "description": "Response representing a list of file entries."
+ },
"ListModelsResponse": {
"type": "object",
"properties": {
@@ -8643,6 +9044,9 @@
{
"name": "Eval"
},
+ {
+ "name": "Files (Coming Soon)"
+ },
{
"name": "Inference",
"description": "This API provides the raw interface to the underlying models. Two kinds of models are supported:\n- LLM models: these models generate \"raw\" and \"chat\" (conversational) completions.\n- Embedding models: these models generate embeddings to be used for semantic search.",
@@ -8698,6 +9102,7 @@
"DatasetIO",
"Datasets",
"Eval",
+ "Files (Coming Soon)",
"Inference",
"Inspect",
"Models",
diff --git a/docs/_static/llama-stack-spec.yaml b/docs/_static/llama-stack-spec.yaml
index 4dfaa284a..fe94e34ff 100644
--- a/docs/_static/llama-stack-spec.yaml
+++ b/docs/_static/llama-stack-spec.yaml
@@ -406,6 +406,43 @@ paths:
schema:
$ref: '#/components/schemas/CreateAgentTurnRequest'
required: true
+ /v1/files:
+ get:
+ responses:
+ '200':
+ description: OK
+ content:
+ application/json:
+ schema:
+ $ref: '#/components/schemas/ListBucketResponse'
+ tags:
+ - Files (Coming Soon)
+ description: List all buckets.
+ parameters:
+ - name: bucket
+ in: query
+ required: true
+ schema:
+ type: string
+ post:
+ responses:
+ '200':
+ description: OK
+ content:
+ application/json:
+ schema:
+ $ref: '#/components/schemas/FileUploadResponse'
+ tags:
+ - Files (Coming Soon)
+ description: >-
+ Create a new upload session for a file identified by a bucket and key.
+ parameters: []
+ requestBody:
+ content:
+ application/json:
+ schema:
+ $ref: '#/components/schemas/CreateUploadSessionRequest'
+ required: true
/v1/agents/{agent_id}:
delete:
responses:
@@ -468,6 +505,59 @@ paths:
required: true
schema:
type: string
+ /v1/files/{bucket}/{key}:
+ get:
+ responses:
+ '200':
+ description: OK
+ content:
+ application/json:
+ schema:
+ $ref: '#/components/schemas/FileResponse'
+ tags:
+ - Files (Coming Soon)
+ description: >-
+ Get a file info identified by a bucket and key.
+ parameters:
+ - name: bucket
+ in: path
+ description: 'Bucket name (valid chars: a-zA-Z0-9_-)'
+ required: true
+ schema:
+ type: string
+ - name: key
+ in: path
+ description: >-
+ Key under which the file is stored (valid chars: a-zA-Z0-9_-/.)
+ required: true
+ schema:
+ type: string
+ delete:
+ responses:
+ '200':
+ description: OK
+ content:
+ application/json:
+ schema:
+ $ref: '#/components/schemas/FileResponse'
+ tags:
+ - Files (Coming Soon)
+ description: >-
+ Delete a file identified by a bucket and key.
+ parameters:
+ - name: bucket
+ in: path
+ description: 'Bucket name (valid chars: a-zA-Z0-9_-)'
+ required: true
+ schema:
+ type: string
+ - name: key
+ in: path
+ description: >-
+ Key under which the file is stored (valid chars: a-zA-Z0-9_-/.)
+ required: true
+ schema:
+ type: string
/v1/inference/embeddings:
post:
responses:
@@ -875,6 +965,57 @@ paths:
- PostTraining (Coming Soon)
description: ''
parameters: []
+ /v1/files/session:{upload_id}:
+ get:
+ responses:
+ '200':
+ description: OK
+ content:
+ application/json:
+ schema:
+ oneOf:
+ - $ref: '#/components/schemas/FileUploadResponse'
+ - type: 'null'
+ tags:
+ - Files (Coming Soon)
+ description: >-
+ Returns information about an existsing upload session
+ parameters:
+ - name: upload_id
+ in: path
+ description: ID of the upload session
+ required: true
+ schema:
+ type: string
+ post:
+ responses:
+ '200':
+ description: OK
+ content:
+ application/json:
+ schema:
+ oneOf:
+ - $ref: '#/components/schemas/FileResponse'
+ - type: 'null'
+ tags:
+ - Files (Coming Soon)
+ description: >-
+ Upload file content to an existing upload session. On the server, request
+ body will have the raw bytes that are uploaded.
+ parameters:
+ - name: upload_id
+ in: path
+ description: ID of the upload session
+ required: true
+ schema:
+ type: string
+ requestBody:
+ content:
+ application/octet-stream:
+ schema:
+ type: string
+ format: binary
+ required: true
/v1/vector-dbs/{vector_db_id}:
get:
responses:
@@ -1091,6 +1232,25 @@ paths:
schema:
$ref: '#/components/schemas/RegisterDatasetRequest'
required: true
+ /v1/files/{bucket}:
+ get:
+ responses:
+ '200':
+ description: OK
+ content:
+ application/json:
+ schema:
+ $ref: '#/components/schemas/ListFileResponse'
+ tags:
+ - Files (Coming Soon)
+ description: List all files in a bucket.
+ parameters:
+ - name: bucket
+ in: path
+ description: 'Bucket name (valid chars: a-zA-Z0-9_-)'
+ required: true
+ schema:
+ type: string
/v1/models:
get:
responses:
@@ -3563,6 +3723,87 @@ components:
- event_type
- turn_id
title: AgentTurnResponseTurnStartPayload
+ CreateUploadSessionRequest:
+ type: object
+ properties:
+ bucket:
+ type: string
+ description: >-
+ Bucket under which the file is stored (valid chars: a-zA-Z0-9_-)
+ key:
+ type: string
+ description: >-
+ Key under which the file is stored (valid chars: a-zA-Z0-9_-/.)
+ mime_type:
+ type: string
+ description: MIME type of the file
+ size:
+ type: integer
+ description: File size in bytes
+ additionalProperties: false
+ required:
+ - bucket
+ - key
+ - mime_type
+ - size
+ title: CreateUploadSessionRequest
+ FileUploadResponse:
+ type: object
+ properties:
+ id:
+ type: string
+ description: ID of the upload session
+ url:
+ type: string
+ description: Upload URL for the file or file parts
+ offset:
+ type: integer
+ description: Upload content offset
+ size:
+ type: integer
+ description: Upload content size
+ additionalProperties: false
+ required:
+ - id
+ - url
+ - offset
+ - size
+ title: FileUploadResponse
+ description: >-
+ Response after initiating a file upload session.
+ FileResponse:
+ type: object
+ properties:
+ bucket:
+ type: string
+ description: >-
+ Bucket under which the file is stored (valid chars: a-zA-Z0-9_-)
+ key:
+ type: string
+ description: >-
+ Key under which the file is stored (valid chars: a-zA-Z0-9_-/.)
+ mime_type:
+ type: string
+ description: MIME type of the file
+ url:
+ type: string
+ description: Upload URL for the file contents
+ bytes:
+ type: integer
+ description: Size of the file in bytes
+ created_at:
+ type: integer
+ description: Timestamp of when the file was created
+ additionalProperties: false
+ required:
+ - bucket
+ - key
+ - mime_type
+ - url
+ - bytes
+ - created_at
+ title: FileResponse
+ description: Response representing a file entry.
EmbeddingsRequest:
type: object
properties:
@@ -4394,6 +4635,29 @@ components:
required:
- content
title: ToolInvocationResult
+ BucketResponse:
+ type: object
+ properties:
+ name:
+ type: string
+ additionalProperties: false
+ required:
+ - name
+ title: BucketResponse
+ ListBucketResponse:
+ type: object
+ properties:
+ data:
+ type: array
+ items:
+ $ref: '#/components/schemas/BucketResponse'
+ description: List of FileResponse entries
+ additionalProperties: false
+ required:
+ - data
+ title: ListBucketResponse
+ description: >-
+ Response representing a list of file entries.
ListDatasetsResponse:
type: object
properties:
@@ -4405,6 +4669,20 @@ components:
required:
- data
title: ListDatasetsResponse
+ ListFileResponse:
+ type: object
+ properties:
+ data:
+ type: array
+ items:
+ $ref: '#/components/schemas/FileResponse'
+ description: List of FileResponse entries
+ additionalProperties: false
+ required:
+ - data
+ title: ListFileResponse
+ description: >-
+ Response representing a list of file entries.
ListModelsResponse:
type: object
properties:
@@ -5533,6 +5811,7 @@ tags:
- name: DatasetIO
- name: Datasets
- name: Eval
+ - name: Files (Coming Soon)
- name: Inference
description: >-
This API provides the raw interface to the underlying models. Two kinds of models
@@ -5567,6 +5846,7 @@ x-tagGroups:
- DatasetIO
- Datasets
- Eval
+ - Files (Coming Soon)
- Inference
- Inspect
- Models
diff --git a/docs/openapi_generator/pyopenapi/generator.py b/docs/openapi_generator/pyopenapi/generator.py
index 60cd7a242..4220cfc05 100644
--- a/docs/openapi_generator/pyopenapi/generator.py
+++ b/docs/openapi_generator/pyopenapi/generator.py
@@ -477,6 +477,7 @@ class Generator:
"SyntheticDataGeneration",
"PostTraining",
"BatchInference",
+ "Files",
]:
op.defining_class.__name__ = f"{op.defining_class.__name__} (Coming Soon)"
print(op.defining_class.__name__)
@@ -520,8 +521,30 @@ class Generator:
# parameters passed anywhere
parameters = path_parameters + query_parameters
- # data passed in payload
- if op.request_params:
+ webmethod = getattr(op.func_ref, "__webmethod__", None)
+ raw_bytes_request_body = False
+ if webmethod:
+ raw_bytes_request_body = getattr(webmethod, "raw_bytes_request_body", False)
+
+ # data passed in request body as raw bytes cannot have request parameters
+ if raw_bytes_request_body and op.request_params:
+ raise ValueError("Cannot have both raw bytes request body and request parameters")
+
+ # data passed in request body as raw bytes
+ if raw_bytes_request_body:
+ requestBody = RequestBody(
+ content={
+ "application/octet-stream": {
+ "schema": {
+ "type": "string",
+ "format": "binary",
+ }
+ }
+ },
+ required=True,
+ )
+ # data passed in payload as JSON and mapped to request parameters
+ elif op.request_params:
builder = ContentBuilder(self.schema_builder)
first = next(iter(op.request_params))
request_name, request_type = first
diff --git a/docs/openapi_generator/pyopenapi/specification.py b/docs/openapi_generator/pyopenapi/specification.py
index 9e5363b4a..d3e5a1f19 100644
--- a/docs/openapi_generator/pyopenapi/specification.py
+++ b/docs/openapi_generator/pyopenapi/specification.py
@@ -78,7 +78,7 @@ class MediaType:
@dataclass
class RequestBody:
- content: Dict[str, MediaType]
+ content: Dict[str, MediaType | Dict[str, Any]]
description: Optional[str] = None
required: Optional[bool] = None
diff --git a/llama_stack/apis/files/__init__.py b/llama_stack/apis/files/__init__.py
new file mode 100644
index 000000000..269baf177
--- /dev/null
+++ b/llama_stack/apis/files/__init__.py
@@ -0,0 +1,7 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
+
+from .files import * # noqa: F401 F403
diff --git a/llama_stack/apis/files/files.py b/llama_stack/apis/files/files.py
new file mode 100644
index 000000000..f17fadc8c
--- /dev/null
+++ b/llama_stack/apis/files/files.py
@@ -0,0 +1,174 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
+
+from typing import List, Optional, Protocol, runtime_checkable
+
+from pydantic import BaseModel
+
+from llama_stack.providers.utils.telemetry.trace_protocol import trace_protocol
+from llama_stack.schema_utils import json_schema_type, webmethod
+
+
+@json_schema_type
+class FileUploadResponse(BaseModel):
+ """
+ Response after initiating a file upload session.
+
+ :param id: ID of the upload session
+ :param url: Upload URL for the file or file parts
+ :param offset: Upload content offset
+ :param size: Upload content size
+ """
+
+ id: str
+ url: str
+ offset: int
+ size: int
+
+
+@json_schema_type
+class BucketResponse(BaseModel):
+ name: str
+
+
+@json_schema_type
+class ListBucketResponse(BaseModel):
+ """
+ Response representing a list of file entries.
+
+ :param data: List of FileResponse entries
+ """
+
+ data: List[BucketResponse]
+
+
+@json_schema_type
+class FileResponse(BaseModel):
+ """
+ Response representing a file entry.
+
+ :param bucket: Bucket under which the file is stored (valid chars: a-zA-Z0-9_-)
+ :param key: Key under which the file is stored (valid chars: a-zA-Z0-9_-/.)
+ :param mime_type: MIME type of the file
+ :param url: Upload URL for the file contents
+ :param bytes: Size of the file in bytes
+ :param created_at: Timestamp of when the file was created
+ """
+
+ bucket: str
+ key: str
+ mime_type: str
+ url: str
+ bytes: int
+ created_at: int
+
+
+@json_schema_type
+class ListFileResponse(BaseModel):
+ """
+ Response representing a list of file entries.
+
+ :param data: List of FileResponse entries
+ """
+
+ data: List[FileResponse]
+
+
+@runtime_checkable
+@trace_protocol
+class Files(Protocol):
+ @webmethod(route="/files", method="POST")
+ async def create_upload_session(
+ self,
+ bucket: str,
+ key: str,
+ mime_type: str,
+ size: int,
+ ) -> FileUploadResponse:
+ """
+ Create a new upload session for a file identified by a bucket and key.
+
+ :param bucket: Bucket under which the file is stored (valid chars: a-zA-Z0-9_-)
+ :param key: Key under which the file is stored (valid chars: a-zA-Z0-9_-/.)
+ :param mime_type: MIME type of the file
+ :param size: File size in bytes
+ """
+ ...
+
+ @webmethod(route="/files/session:{upload_id}", method="POST", raw_bytes_request_body=True)
+ async def upload_content_to_session(
+ self,
+ upload_id: str,
+ ) -> Optional[FileResponse]:
+ """
+ Upload file content to an existing upload session.
+ On the server, request body will have the raw bytes that are uploaded.
+
+ :param upload_id: ID of the upload session
+ """
+ ...
+
+ @webmethod(route="/files/session:{upload_id}", method="GET")
+ async def get_upload_session_info(
+ self,
+ upload_id: str,
+ ) -> Optional[FileUploadResponse]:
+ """
+ Returns information about an existsing upload session
+
+ :param upload_id: ID of the upload session
+ """
+ ...
+
+ @webmethod(route="/files", method="GET")
+ async def list_all_buckets(
+ self,
+ bucket: str,
+ ) -> ListBucketResponse:
+ """
+ List all buckets.
+ """
+ ...
+
+ @webmethod(route="/files/{bucket}", method="GET")
+ async def list_files_in_bucket(
+ self,
+ bucket: str,
+ ) -> ListFileResponse:
+ """
+ List all files in a bucket.
+
+ :param bucket: Bucket name (valid chars: a-zA-Z0-9_-)
+ """
+ ...
+
+ @webmethod(route="/files/{bucket}/{key:path}", method="GET")
+ async def get_file(
+ self,
+ bucket: str,
+ key: str,
+ ) -> FileResponse:
+ """
+ Get a file info identified by a bucket and key.
+
+ :param bucket: Bucket name (valid chars: a-zA-Z0-9_-)
+ :param key: Key under which the file is stored (valid chars: a-zA-Z0-9_-/.)
+ """
+ ...
+
+ @webmethod(route="/files/{bucket}/{key:path}", method="DELETE")
+ async def delete_file(
+ self,
+ bucket: str,
+ key: str,
+ ) -> FileResponse:
+ """
+ Delete a file identified by a bucket and key.
+
+ :param bucket: Bucket name (valid chars: a-zA-Z0-9_-)
+ :param key: Key under which the file is stored (valid chars: a-zA-Z0-9_-/.)
+ """
+ ...
diff --git a/llama_stack/distribution/stack.py b/llama_stack/distribution/stack.py
index 9335dc3a9..1328c88ef 100644
--- a/llama_stack/distribution/stack.py
+++ b/llama_stack/distribution/stack.py
@@ -19,6 +19,7 @@ from llama_stack.apis.benchmarks import Benchmarks
from llama_stack.apis.datasetio import DatasetIO
from llama_stack.apis.datasets import Datasets
from llama_stack.apis.eval import Eval
+from llama_stack.apis.files import Files
from llama_stack.apis.inference import Inference
from llama_stack.apis.inspect import Inspect
from llama_stack.apis.models import Models
@@ -63,6 +64,7 @@ class LlamaStack(
ToolGroups,
ToolRuntime,
RAGToolRuntime,
+ Files,
):
pass
diff --git a/llama_stack/providers/inline/inference/meta_reference/inference.py b/llama_stack/providers/inline/inference/meta_reference/inference.py
index c79f97def..dfd27d408 100644
--- a/llama_stack/providers/inline/inference/meta_reference/inference.py
+++ b/llama_stack/providers/inline/inference/meta_reference/inference.py
@@ -46,7 +46,7 @@ from llama_stack.providers.utils.inference.embedding_mixin import (
)
from llama_stack.providers.utils.inference.model_registry import (
ModelRegistryHelper,
- build_model_alias,
+ build_hf_repo_model_alias,
)
from llama_stack.providers.utils.inference.prompt_adapter import (
augment_content_with_response_format_prompt,
@@ -116,7 +116,7 @@ class MetaReferenceInferenceImpl(
self.model_registry_helper = ModelRegistryHelper(
[
- build_model_alias(
+ build_hf_repo_model_alias(
llama_model.descriptor(),
llama_model.core_model_id.value,
)
diff --git a/llama_stack/providers/remote/inference/bedrock/models.py b/llama_stack/providers/remote/inference/bedrock/models.py
index b629e05d5..4c5248619 100644
--- a/llama_stack/providers/remote/inference/bedrock/models.py
+++ b/llama_stack/providers/remote/inference/bedrock/models.py
@@ -6,19 +6,19 @@
from llama_stack.models.llama.datatypes import CoreModelId
from llama_stack.providers.utils.inference.model_registry import (
- build_model_alias,
+ build_hf_repo_model_alias,
)
MODEL_ALIASES = [
- build_model_alias(
+ build_hf_repo_model_alias(
"meta.llama3-1-8b-instruct-v1:0",
CoreModelId.llama3_1_8b_instruct.value,
),
- build_model_alias(
+ build_hf_repo_model_alias(
"meta.llama3-1-70b-instruct-v1:0",
CoreModelId.llama3_1_70b_instruct.value,
),
- build_model_alias(
+ build_hf_repo_model_alias(
"meta.llama3-1-405b-instruct-v1:0",
CoreModelId.llama3_1_405b_instruct.value,
),
diff --git a/llama_stack/providers/remote/inference/cerebras/models.py b/llama_stack/providers/remote/inference/cerebras/models.py
index 03ffeb492..53b0d5b55 100644
--- a/llama_stack/providers/remote/inference/cerebras/models.py
+++ b/llama_stack/providers/remote/inference/cerebras/models.py
@@ -6,15 +6,15 @@
from llama_stack.models.llama.datatypes import CoreModelId
from llama_stack.providers.utils.inference.model_registry import (
- build_model_alias,
+ build_hf_repo_model_alias,
)
model_aliases = [
- build_model_alias(
+ build_hf_repo_model_alias(
"llama3.1-8b",
CoreModelId.llama3_1_8b_instruct.value,
),
- build_model_alias(
+ build_hf_repo_model_alias(
"llama-3.3-70b",
CoreModelId.llama3_3_70b_instruct.value,
),
diff --git a/llama_stack/providers/remote/inference/databricks/databricks.py b/llama_stack/providers/remote/inference/databricks/databricks.py
index 05e61361c..03da4d129 100644
--- a/llama_stack/providers/remote/inference/databricks/databricks.py
+++ b/llama_stack/providers/remote/inference/databricks/databricks.py
@@ -25,7 +25,7 @@ from llama_stack.apis.inference import (
from llama_stack.models.llama.datatypes import CoreModelId
from llama_stack.providers.utils.inference.model_registry import (
ModelRegistryHelper,
- build_model_alias,
+ build_hf_repo_model_alias,
)
from llama_stack.providers.utils.inference.openai_compat import (
get_sampling_options,
@@ -39,11 +39,11 @@ from llama_stack.providers.utils.inference.prompt_adapter import (
from .config import DatabricksImplConfig
model_aliases = [
- build_model_alias(
+ build_hf_repo_model_alias(
"databricks-meta-llama-3-1-70b-instruct",
CoreModelId.llama3_1_70b_instruct.value,
),
- build_model_alias(
+ build_hf_repo_model_alias(
"databricks-meta-llama-3-1-405b-instruct",
CoreModelId.llama3_1_405b_instruct.value,
),
diff --git a/llama_stack/providers/remote/inference/fireworks/models.py b/llama_stack/providers/remote/inference/fireworks/models.py
index 14de585d4..8ba67c9ff 100644
--- a/llama_stack/providers/remote/inference/fireworks/models.py
+++ b/llama_stack/providers/remote/inference/fireworks/models.py
@@ -6,47 +6,47 @@
from llama_stack.models.llama.datatypes import CoreModelId
from llama_stack.providers.utils.inference.model_registry import (
- build_model_alias,
+ build_hf_repo_model_alias,
)
MODEL_ALIASES = [
- build_model_alias(
+ build_hf_repo_model_alias(
"accounts/fireworks/models/llama-v3p1-8b-instruct",
CoreModelId.llama3_1_8b_instruct.value,
),
- build_model_alias(
+ build_hf_repo_model_alias(
"accounts/fireworks/models/llama-v3p1-70b-instruct",
CoreModelId.llama3_1_70b_instruct.value,
),
- build_model_alias(
+ build_hf_repo_model_alias(
"accounts/fireworks/models/llama-v3p1-405b-instruct",
CoreModelId.llama3_1_405b_instruct.value,
),
- build_model_alias(
+ build_hf_repo_model_alias(
"accounts/fireworks/models/llama-v3p2-1b-instruct",
CoreModelId.llama3_2_1b_instruct.value,
),
- build_model_alias(
+ build_hf_repo_model_alias(
"accounts/fireworks/models/llama-v3p2-3b-instruct",
CoreModelId.llama3_2_3b_instruct.value,
),
- build_model_alias(
+ build_hf_repo_model_alias(
"accounts/fireworks/models/llama-v3p2-11b-vision-instruct",
CoreModelId.llama3_2_11b_vision_instruct.value,
),
- build_model_alias(
+ build_hf_repo_model_alias(
"accounts/fireworks/models/llama-v3p2-90b-vision-instruct",
CoreModelId.llama3_2_90b_vision_instruct.value,
),
- build_model_alias(
+ build_hf_repo_model_alias(
"accounts/fireworks/models/llama-v3p3-70b-instruct",
CoreModelId.llama3_3_70b_instruct.value,
),
- build_model_alias(
+ build_hf_repo_model_alias(
"accounts/fireworks/models/llama-guard-3-8b",
CoreModelId.llama_guard_3_8b.value,
),
- build_model_alias(
+ build_hf_repo_model_alias(
"accounts/fireworks/models/llama-guard-3-11b-vision",
CoreModelId.llama_guard_3_11b_vision.value,
),
diff --git a/llama_stack/providers/remote/inference/groq/groq.py b/llama_stack/providers/remote/inference/groq/groq.py
index 441b6af5c..12ee613fe 100644
--- a/llama_stack/providers/remote/inference/groq/groq.py
+++ b/llama_stack/providers/remote/inference/groq/groq.py
@@ -31,8 +31,8 @@ from llama_stack.models.llama.sku_list import CoreModelId
from llama_stack.providers.remote.inference.groq.config import GroqConfig
from llama_stack.providers.utils.inference.model_registry import (
ModelRegistryHelper,
+ build_hf_repo_model_alias,
build_model_alias,
- build_model_alias_with_just_provider_model_id,
)
from .groq_utils import (
@@ -42,19 +42,19 @@ from .groq_utils import (
)
_MODEL_ALIASES = [
- build_model_alias(
+ build_hf_repo_model_alias(
"llama3-8b-8192",
CoreModelId.llama3_1_8b_instruct.value,
),
- build_model_alias_with_just_provider_model_id(
+ build_model_alias(
"llama-3.1-8b-instant",
CoreModelId.llama3_1_8b_instruct.value,
),
- build_model_alias(
+ build_hf_repo_model_alias(
"llama3-70b-8192",
CoreModelId.llama3_70b_instruct.value,
),
- build_model_alias(
+ build_hf_repo_model_alias(
"llama-3.3-70b-versatile",
CoreModelId.llama3_3_70b_instruct.value,
),
@@ -62,7 +62,7 @@ _MODEL_ALIASES = [
# Preview models aren't recommended for production use, but we include this one
# to pass the test fixture
# TODO(aidand): Replace this with a stable model once Groq supports it
- build_model_alias(
+ build_hf_repo_model_alias(
"llama-3.2-3b-preview",
CoreModelId.llama3_2_3b_instruct.value,
),
diff --git a/llama_stack/providers/remote/inference/nvidia/models.py b/llama_stack/providers/remote/inference/nvidia/models.py
index 1d9b575d4..6a359e009 100644
--- a/llama_stack/providers/remote/inference/nvidia/models.py
+++ b/llama_stack/providers/remote/inference/nvidia/models.py
@@ -6,43 +6,43 @@
from llama_stack.models.llama.datatypes import CoreModelId
from llama_stack.providers.utils.inference.model_registry import (
- build_model_alias,
+ build_hf_repo_model_alias,
)
_MODEL_ALIASES = [
- build_model_alias(
+ build_hf_repo_model_alias(
"meta/llama3-8b-instruct",
CoreModelId.llama3_8b_instruct.value,
),
- build_model_alias(
+ build_hf_repo_model_alias(
"meta/llama3-70b-instruct",
CoreModelId.llama3_70b_instruct.value,
),
- build_model_alias(
+ build_hf_repo_model_alias(
"meta/llama-3.1-8b-instruct",
CoreModelId.llama3_1_8b_instruct.value,
),
- build_model_alias(
+ build_hf_repo_model_alias(
"meta/llama-3.1-70b-instruct",
CoreModelId.llama3_1_70b_instruct.value,
),
- build_model_alias(
+ build_hf_repo_model_alias(
"meta/llama-3.1-405b-instruct",
CoreModelId.llama3_1_405b_instruct.value,
),
- build_model_alias(
+ build_hf_repo_model_alias(
"meta/llama-3.2-1b-instruct",
CoreModelId.llama3_2_1b_instruct.value,
),
- build_model_alias(
+ build_hf_repo_model_alias(
"meta/llama-3.2-3b-instruct",
CoreModelId.llama3_2_3b_instruct.value,
),
- build_model_alias(
+ build_hf_repo_model_alias(
"meta/llama-3.2-11b-vision-instruct",
CoreModelId.llama3_2_11b_vision_instruct.value,
),
- build_model_alias(
+ build_hf_repo_model_alias(
"meta/llama-3.2-90b-vision-instruct",
CoreModelId.llama3_2_90b_vision_instruct.value,
),
diff --git a/llama_stack/providers/remote/inference/ollama/ollama.py b/llama_stack/providers/remote/inference/ollama/ollama.py
index 2488d9071..287f025e0 100644
--- a/llama_stack/providers/remote/inference/ollama/ollama.py
+++ b/llama_stack/providers/remote/inference/ollama/ollama.py
@@ -35,8 +35,8 @@ from llama_stack.models.llama.datatypes import CoreModelId
from llama_stack.providers.datatypes import ModelsProtocolPrivate
from llama_stack.providers.utils.inference.model_registry import (
ModelRegistryHelper,
+ build_hf_repo_model_alias,
build_model_alias,
- build_model_alias_with_just_provider_model_id,
)
from llama_stack.providers.utils.inference.openai_compat import (
OpenAICompatCompletionChoice,
@@ -59,73 +59,73 @@ from llama_stack.providers.utils.inference.prompt_adapter import (
log = logging.getLogger(__name__)
model_aliases = [
- build_model_alias(
+ build_hf_repo_model_alias(
"llama3.1:8b-instruct-fp16",
CoreModelId.llama3_1_8b_instruct.value,
),
- build_model_alias_with_just_provider_model_id(
+ build_model_alias(
"llama3.1:8b",
CoreModelId.llama3_1_8b_instruct.value,
),
- build_model_alias(
+ build_hf_repo_model_alias(
"llama3.1:70b-instruct-fp16",
CoreModelId.llama3_1_70b_instruct.value,
),
- build_model_alias_with_just_provider_model_id(
+ build_model_alias(
"llama3.1:70b",
CoreModelId.llama3_1_70b_instruct.value,
),
- build_model_alias(
+ build_hf_repo_model_alias(
"llama3.1:405b-instruct-fp16",
CoreModelId.llama3_1_405b_instruct.value,
),
- build_model_alias_with_just_provider_model_id(
+ build_model_alias(
"llama3.1:405b",
CoreModelId.llama3_1_405b_instruct.value,
),
- build_model_alias(
+ build_hf_repo_model_alias(
"llama3.2:1b-instruct-fp16",
CoreModelId.llama3_2_1b_instruct.value,
),
- build_model_alias_with_just_provider_model_id(
+ build_model_alias(
"llama3.2:1b",
CoreModelId.llama3_2_1b_instruct.value,
),
- build_model_alias(
+ build_hf_repo_model_alias(
"llama3.2:3b-instruct-fp16",
CoreModelId.llama3_2_3b_instruct.value,
),
- build_model_alias_with_just_provider_model_id(
+ build_model_alias(
"llama3.2:3b",
CoreModelId.llama3_2_3b_instruct.value,
),
- build_model_alias(
+ build_hf_repo_model_alias(
"llama3.2-vision:11b-instruct-fp16",
CoreModelId.llama3_2_11b_vision_instruct.value,
),
- build_model_alias_with_just_provider_model_id(
+ build_model_alias(
"llama3.2-vision:latest",
CoreModelId.llama3_2_11b_vision_instruct.value,
),
- build_model_alias(
+ build_hf_repo_model_alias(
"llama3.2-vision:90b-instruct-fp16",
CoreModelId.llama3_2_90b_vision_instruct.value,
),
- build_model_alias_with_just_provider_model_id(
+ build_model_alias(
"llama3.2-vision:90b",
CoreModelId.llama3_2_90b_vision_instruct.value,
),
- build_model_alias(
+ build_hf_repo_model_alias(
"llama3.3:70b",
CoreModelId.llama3_3_70b_instruct.value,
),
# The Llama Guard models don't have their full fp16 versions
# so we are going to alias their default version to the canonical SKU
- build_model_alias(
+ build_hf_repo_model_alias(
"llama-guard3:8b",
CoreModelId.llama_guard_3_8b.value,
),
- build_model_alias(
+ build_hf_repo_model_alias(
"llama-guard3:1b",
CoreModelId.llama_guard_3_1b.value,
),
diff --git a/llama_stack/providers/remote/inference/sambanova/models.py b/llama_stack/providers/remote/inference/sambanova/models.py
index 27a4a149e..1e002c81d 100644
--- a/llama_stack/providers/remote/inference/sambanova/models.py
+++ b/llama_stack/providers/remote/inference/sambanova/models.py
@@ -6,43 +6,43 @@
from llama_stack.models.llama.datatypes import CoreModelId
from llama_stack.providers.utils.inference.model_registry import (
- build_model_alias,
+ build_hf_repo_model_alias,
)
MODEL_ALIASES = [
- build_model_alias(
+ build_hf_repo_model_alias(
"Meta-Llama-3.1-8B-Instruct",
CoreModelId.llama3_1_8b_instruct.value,
),
- build_model_alias(
+ build_hf_repo_model_alias(
"Meta-Llama-3.1-70B-Instruct",
CoreModelId.llama3_1_70b_instruct.value,
),
- build_model_alias(
+ build_hf_repo_model_alias(
"Meta-Llama-3.1-405B-Instruct",
CoreModelId.llama3_1_405b_instruct.value,
),
- build_model_alias(
+ build_hf_repo_model_alias(
"Meta-Llama-3.2-1B-Instruct",
CoreModelId.llama3_2_1b_instruct.value,
),
- build_model_alias(
+ build_hf_repo_model_alias(
"Meta-Llama-3.2-3B-Instruct",
CoreModelId.llama3_2_3b_instruct.value,
),
- build_model_alias(
+ build_hf_repo_model_alias(
"Meta-Llama-3.3-70B-Instruct",
CoreModelId.llama3_3_70b_instruct.value,
),
- build_model_alias(
+ build_hf_repo_model_alias(
"Llama-3.2-11B-Vision-Instruct",
CoreModelId.llama3_2_11b_vision_instruct.value,
),
- build_model_alias(
+ build_hf_repo_model_alias(
"Llama-3.2-90B-Vision-Instruct",
CoreModelId.llama3_2_90b_vision_instruct.value,
),
- build_model_alias(
+ build_hf_repo_model_alias(
"Meta-Llama-Guard-3-8B",
CoreModelId.llama_guard_3_8b.value,
),
diff --git a/llama_stack/providers/remote/inference/tgi/tgi.py b/llama_stack/providers/remote/inference/tgi/tgi.py
index 7ffeced95..cd2311a48 100644
--- a/llama_stack/providers/remote/inference/tgi/tgi.py
+++ b/llama_stack/providers/remote/inference/tgi/tgi.py
@@ -32,7 +32,7 @@ from llama_stack.models.llama.sku_list import all_registered_models
from llama_stack.providers.datatypes import ModelsProtocolPrivate
from llama_stack.providers.utils.inference.model_registry import (
ModelRegistryHelper,
- build_model_alias,
+ build_hf_repo_model_alias,
)
from llama_stack.providers.utils.inference.openai_compat import (
OpenAICompatCompletionChoice,
@@ -53,9 +53,9 @@ from .config import InferenceAPIImplConfig, InferenceEndpointImplConfig, TGIImpl
log = logging.getLogger(__name__)
-def build_model_aliases():
+def build_hf_repo_model_aliases():
return [
- build_model_alias(
+ build_hf_repo_model_alias(
model.huggingface_repo,
model.descriptor(),
)
@@ -70,7 +70,7 @@ class _HfAdapter(Inference, ModelsProtocolPrivate):
model_id: str
def __init__(self) -> None:
- self.register_helper = ModelRegistryHelper(build_model_aliases())
+ self.register_helper = ModelRegistryHelper(build_hf_repo_model_aliases())
self.huggingface_repo_to_llama_model_id = {
model.huggingface_repo: model.descriptor() for model in all_registered_models() if model.huggingface_repo
}
diff --git a/llama_stack/providers/remote/inference/together/models.py b/llama_stack/providers/remote/inference/together/models.py
index 87d282ea5..87904c47b 100644
--- a/llama_stack/providers/remote/inference/together/models.py
+++ b/llama_stack/providers/remote/inference/together/models.py
@@ -6,43 +6,43 @@
from llama_stack.models.llama.datatypes import CoreModelId
from llama_stack.providers.utils.inference.model_registry import (
- build_model_alias,
+ build_hf_repo_model_alias,
)
MODEL_ALIASES = [
- build_model_alias(
+ build_hf_repo_model_alias(
"meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo",
CoreModelId.llama3_1_8b_instruct.value,
),
- build_model_alias(
+ build_hf_repo_model_alias(
"meta-llama/Meta-Llama-3.1-70B-Instruct-Turbo",
CoreModelId.llama3_1_70b_instruct.value,
),
- build_model_alias(
+ build_hf_repo_model_alias(
"meta-llama/Meta-Llama-3.1-405B-Instruct-Turbo",
CoreModelId.llama3_1_405b_instruct.value,
),
- build_model_alias(
+ build_hf_repo_model_alias(
"meta-llama/Llama-3.2-3B-Instruct-Turbo",
CoreModelId.llama3_2_3b_instruct.value,
),
- build_model_alias(
+ build_hf_repo_model_alias(
"meta-llama/Llama-3.2-11B-Vision-Instruct-Turbo",
CoreModelId.llama3_2_11b_vision_instruct.value,
),
- build_model_alias(
+ build_hf_repo_model_alias(
"meta-llama/Llama-3.2-90B-Vision-Instruct-Turbo",
CoreModelId.llama3_2_90b_vision_instruct.value,
),
- build_model_alias(
+ build_hf_repo_model_alias(
"meta-llama/Llama-3.3-70B-Instruct-Turbo",
CoreModelId.llama3_3_70b_instruct.value,
),
- build_model_alias(
+ build_hf_repo_model_alias(
"meta-llama/Meta-Llama-Guard-3-8B",
CoreModelId.llama_guard_3_8b.value,
),
- build_model_alias(
+ build_hf_repo_model_alias(
"meta-llama/Llama-Guard-3-11B-Vision-Turbo",
CoreModelId.llama_guard_3_11b_vision.value,
),
diff --git a/llama_stack/providers/remote/inference/vllm/vllm.py b/llama_stack/providers/remote/inference/vllm/vllm.py
index 220bf4bde..75dc432e4 100644
--- a/llama_stack/providers/remote/inference/vllm/vllm.py
+++ b/llama_stack/providers/remote/inference/vllm/vllm.py
@@ -38,7 +38,7 @@ from llama_stack.models.llama.sku_list import all_registered_models
from llama_stack.providers.datatypes import ModelsProtocolPrivate
from llama_stack.providers.utils.inference.model_registry import (
ModelRegistryHelper,
- build_model_alias,
+ build_hf_repo_model_alias,
)
from llama_stack.providers.utils.inference.openai_compat import (
OpenAICompatCompletionResponse,
@@ -62,9 +62,9 @@ from .config import VLLMInferenceAdapterConfig
log = logging.getLogger(__name__)
-def build_model_aliases():
+def build_hf_repo_model_aliases():
return [
- build_model_alias(
+ build_hf_repo_model_alias(
model.huggingface_repo,
model.descriptor(),
)
@@ -204,7 +204,7 @@ async def _process_vllm_chat_completion_stream_response(
class VLLMInferenceAdapter(Inference, ModelsProtocolPrivate):
def __init__(self, config: VLLMInferenceAdapterConfig) -> None:
- self.register_helper = ModelRegistryHelper(build_model_aliases())
+ self.register_helper = ModelRegistryHelper(build_hf_repo_model_aliases())
self.config = config
self.client = None
diff --git a/llama_stack/providers/tests/inference/fixtures.py b/llama_stack/providers/tests/inference/fixtures.py
index 2a782befc..ec4e094c9 100644
--- a/llama_stack/providers/tests/inference/fixtures.py
+++ b/llama_stack/providers/tests/inference/fixtures.py
@@ -83,17 +83,13 @@ def inference_cerebras() -> ProviderFixture:
@pytest.fixture(scope="session")
-def inference_ollama(inference_model) -> ProviderFixture:
- inference_model = [inference_model] if isinstance(inference_model, str) else inference_model
- if inference_model and "Llama3.1-8B-Instruct" in inference_model:
- pytest.skip("Ollama only supports Llama3.2-3B-Instruct for testing")
-
+def inference_ollama() -> ProviderFixture:
return ProviderFixture(
providers=[
Provider(
provider_id="ollama",
provider_type="remote::ollama",
- config=OllamaImplConfig(host="localhost", port=os.getenv("OLLAMA_PORT", 11434)).model_dump(),
+ config=OllamaImplConfig(url=get_env_or_fail("OLLAMA_URL")).model_dump(),
)
],
)
diff --git a/llama_stack/providers/utils/inference/model_registry.py b/llama_stack/providers/utils/inference/model_registry.py
index c5f6cd6b5..e14a733d1 100644
--- a/llama_stack/providers/utils/inference/model_registry.py
+++ b/llama_stack/providers/utils/inference/model_registry.py
@@ -4,9 +4,10 @@
# This source code is licensed under the terms described in the LICENSE file in
# the root directory of this source tree.
-from collections import namedtuple
from typing import List, Optional
+from pydantic import BaseModel, Field
+
from llama_stack.apis.models.models import ModelType
from llama_stack.models.llama.sku_list import all_registered_models
from llama_stack.providers.datatypes import Model, ModelsProtocolPrivate
@@ -14,7 +15,14 @@ from llama_stack.providers.utils.inference import (
ALL_HUGGINGFACE_REPOS_TO_MODEL_DESCRIPTOR,
)
-ModelAlias = namedtuple("ModelAlias", ["provider_model_id", "aliases", "llama_model"])
+
+# TODO: this class is more confusing than useful right now. We need to make it
+# more closer to the Model class.
+class ModelAlias(BaseModel):
+ provider_model_id: str
+ aliases: List[str] = Field(default_factory=list)
+ llama_model: Optional[str] = None
+ model_type: ModelType = ModelType.llm
def get_huggingface_repo(model_descriptor: str) -> Optional[str]:
@@ -24,7 +32,7 @@ def get_huggingface_repo(model_descriptor: str) -> Optional[str]:
return None
-def build_model_alias(provider_model_id: str, model_descriptor: str) -> ModelAlias:
+def build_hf_repo_model_alias(provider_model_id: str, model_descriptor: str) -> ModelAlias:
return ModelAlias(
provider_model_id=provider_model_id,
aliases=[
@@ -34,7 +42,7 @@ def build_model_alias(provider_model_id: str, model_descriptor: str) -> ModelAli
)
-def build_model_alias_with_just_provider_model_id(provider_model_id: str, model_descriptor: str) -> ModelAlias:
+def build_model_alias(provider_model_id: str, model_descriptor: str) -> ModelAlias:
return ModelAlias(
provider_model_id=provider_model_id,
aliases=[],
diff --git a/llama_stack/schema_utils.py b/llama_stack/schema_utils.py
index 56b9e5e4c..581404844 100644
--- a/llama_stack/schema_utils.py
+++ b/llama_stack/schema_utils.py
@@ -19,6 +19,7 @@ class WebMethod:
request_examples: Optional[List[Any]] = None
response_examples: Optional[List[Any]] = None
method: Optional[str] = None
+ raw_bytes_request_body: Optional[bool] = False
def webmethod(
@@ -27,6 +28,7 @@ def webmethod(
public: Optional[bool] = False,
request_examples: Optional[List[Any]] = None,
response_examples: Optional[List[Any]] = None,
+ raw_bytes_request_body: Optional[bool] = False,
) -> Callable[[T], T]:
"""
Decorator that supplies additional metadata to an endpoint operation function.
@@ -44,6 +46,7 @@ def webmethod(
public=public or False,
request_examples=request_examples,
response_examples=response_examples,
+ raw_bytes_request_body=raw_bytes_request_body,
)
return cls