From 4610c29d1e059f3b875c9658ad06758355219df9 Mon Sep 17 00:00:00 2001 From: Derek Higgins Date: Tue, 2 Dec 2025 17:56:59 +0000 Subject: [PATCH] fix(files): Enforce DELETE action permission for file deletion (#4275) Previously, file deletion only checked READ permission via the _lookup_file_id() method. This meant any user with READ access to a file could also delete it, making it impossible to configure read-only file access. This change adds an 'action' parameter to fetch_all() and fetch_one() in AuthorizedSqlStore, defaulting to Action.READ for backward compatibility. The openai_delete_file() method now passes Action.DELETE, ensuring proper RBAC enforcement. With this fix, access policies can now distinguish between Users who can read/list files but not delete them Closes: #4274 Signed-off-by: Derek Higgins (cherry picked from commit 4ff0c25c5260e82774bf3c5f890dd50d0ad02824) # Conflicts: # llama_stack/providers/inline/files/localfs/files.py # llama_stack/providers/remote/files/s3/files.py # src/llama_stack/providers/remote/files/openai/files.py --- .../providers/inline/files/localfs/files.py | 17 +- .../providers/remote/files/s3/files.py | 21 +- .../utils/sqlstore/authorized_sqlstore.py | 5 +- .../providers/remote/files/openai/files.py | 242 ++++++++++++++++++ 4 files changed, 278 insertions(+), 7 deletions(-) create mode 100644 src/llama_stack/providers/remote/files/openai/files.py diff --git a/llama_stack/providers/inline/files/localfs/files.py b/llama_stack/providers/inline/files/localfs/files.py index a76b982ce..a9170805b 100644 --- a/llama_stack/providers/inline/files/localfs/files.py +++ b/llama_stack/providers/inline/files/localfs/files.py @@ -11,9 +11,20 @@ from typing import Annotated from fastapi import Depends, File, Form, Response, UploadFile +<<<<<<< HEAD:llama_stack/providers/inline/files/localfs/files.py from llama_stack.apis.common.errors import ResourceNotFoundError from llama_stack.apis.common.responses import Order from llama_stack.apis.files import ( +======= +from llama_stack.core.access_control.datatypes import Action +from llama_stack.core.datatypes import AccessRule +from llama_stack.core.id_generation import generate_object_id +from llama_stack.core.storage.sqlstore.authorized_sqlstore import AuthorizedSqlStore +from llama_stack.core.storage.sqlstore.sqlstore import sqlstore_impl +from llama_stack.log import get_logger +from llama_stack.providers.utils.files.form_data import parse_expires_after +from llama_stack_api import ( +>>>>>>> 4ff0c25c (fix(files): Enforce DELETE action permission for file deletion (#4275)):src/llama_stack/providers/inline/files/localfs/files.py ExpiresAfter, Files, ListOpenAIFileResponse, @@ -72,12 +83,12 @@ class LocalfsFilesImpl(Files): """Get the filesystem path for a file ID.""" return Path(self.config.storage_dir) / file_id - async def _lookup_file_id(self, file_id: str) -> tuple[OpenAIFileObject, Path]: + async def _lookup_file_id(self, file_id: str, action: Action = Action.READ) -> tuple[OpenAIFileObject, Path]: """Look up a OpenAIFileObject and filesystem path from its ID.""" if not self.sql_store: raise RuntimeError("Files provider not initialized") - row = await self.sql_store.fetch_one("openai_files", where={"id": file_id}) + row = await self.sql_store.fetch_one("openai_files", where={"id": file_id}, action=action) if not row: raise ResourceNotFoundError(file_id, "File", "client.files.list()") @@ -188,7 +199,7 @@ class LocalfsFilesImpl(Files): async def openai_delete_file(self, file_id: str) -> OpenAIFileDeleteResponse: """Delete a file.""" # Delete physical file - _, file_path = await self._lookup_file_id(file_id) + _, file_path = await self._lookup_file_id(file_id, action=Action.DELETE) if file_path.exists(): file_path.unlink() diff --git a/llama_stack/providers/remote/files/s3/files.py b/llama_stack/providers/remote/files/s3/files.py index c0e9f81d6..f241cada0 100644 --- a/llama_stack/providers/remote/files/s3/files.py +++ b/llama_stack/providers/remote/files/s3/files.py @@ -12,9 +12,22 @@ import boto3 from botocore.exceptions import BotoCoreError, ClientError, NoCredentialsError from fastapi import Depends, File, Form, Response, UploadFile +<<<<<<< HEAD:llama_stack/providers/remote/files/s3/files.py from llama_stack.apis.common.errors import ResourceNotFoundError from llama_stack.apis.common.responses import Order from llama_stack.apis.files import ( +======= +if TYPE_CHECKING: + from mypy_boto3_s3.client import S3Client + +from llama_stack.core.access_control.datatypes import Action +from llama_stack.core.datatypes import AccessRule +from llama_stack.core.id_generation import generate_object_id +from llama_stack.core.storage.sqlstore.authorized_sqlstore import AuthorizedSqlStore +from llama_stack.core.storage.sqlstore.sqlstore import sqlstore_impl +from llama_stack.providers.utils.files.form_data import parse_expires_after +from llama_stack_api import ( +>>>>>>> 4ff0c25c (fix(files): Enforce DELETE action permission for file deletion (#4275)):src/llama_stack/providers/remote/files/s3/files.py ExpiresAfter, Files, ListOpenAIFileResponse, @@ -135,11 +148,13 @@ class S3FilesImpl(Files): """Return current UTC timestamp as int seconds.""" return int(datetime.now(UTC).timestamp()) - async def _get_file(self, file_id: str, return_expired: bool = False) -> dict[str, Any]: + async def _get_file( + self, file_id: str, return_expired: bool = False, action: Action = Action.READ + ) -> dict[str, Any]: where: dict[str, str | dict] = {"id": file_id} if not return_expired: where["expires_at"] = {">": self._now()} - if not (row := await self.sql_store.fetch_one("openai_files", where=where)): + if not (row := await self.sql_store.fetch_one("openai_files", where=where, action=action)): raise ResourceNotFoundError(file_id, "File", "files.list()") return row @@ -284,7 +299,7 @@ class S3FilesImpl(Files): async def openai_delete_file(self, file_id: str) -> OpenAIFileDeleteResponse: await self._delete_if_expired(file_id) - _ = await self._get_file(file_id) # raises if not found + _ = await self._get_file(file_id, action=Action.DELETE) # raises if not found await self._delete_file(file_id) return OpenAIFileDeleteResponse(id=file_id, deleted=True) diff --git a/llama_stack/providers/utils/sqlstore/authorized_sqlstore.py b/llama_stack/providers/utils/sqlstore/authorized_sqlstore.py index eb2d9a491..73a8ffbc4 100644 --- a/llama_stack/providers/utils/sqlstore/authorized_sqlstore.py +++ b/llama_stack/providers/utils/sqlstore/authorized_sqlstore.py @@ -136,6 +136,7 @@ class AuthorizedSqlStore: limit: int | None = None, order_by: list[tuple[str, Literal["asc", "desc"]]] | None = None, cursor: tuple[str, str] | None = None, + action: Action = Action.READ, ) -> PaginatedResponse: """Fetch all rows with automatic access control filtering.""" access_where = self._build_access_control_where_clause(self.policy) @@ -160,7 +161,7 @@ class AuthorizedSqlStore: str(record_id), table, User(principal=stored_owner_principal, attributes=stored_access_attrs) ) - if is_action_allowed(self.policy, Action.READ, sql_record, current_user): + if is_action_allowed(self.policy, action, sql_record, current_user): filtered_rows.append(row) return PaginatedResponse( @@ -173,6 +174,7 @@ class AuthorizedSqlStore: table: str, where: Mapping[str, Any] | None = None, order_by: list[tuple[str, Literal["asc", "desc"]]] | None = None, + action: Action = Action.READ, ) -> dict[str, Any] | None: """Fetch one row with automatic access control checking.""" results = await self.fetch_all( @@ -180,6 +182,7 @@ class AuthorizedSqlStore: where=where, limit=1, order_by=order_by, + action=action, ) return results.data[0] if results.data else None diff --git a/src/llama_stack/providers/remote/files/openai/files.py b/src/llama_stack/providers/remote/files/openai/files.py new file mode 100644 index 000000000..9808b5402 --- /dev/null +++ b/src/llama_stack/providers/remote/files/openai/files.py @@ -0,0 +1,242 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the terms described in the LICENSE file in +# the root directory of this source tree. + +from datetime import UTC, datetime +from typing import Annotated, Any + +from fastapi import Depends, File, Form, Response, UploadFile + +from llama_stack.core.access_control.datatypes import Action +from llama_stack.core.datatypes import AccessRule +from llama_stack.core.storage.sqlstore.authorized_sqlstore import AuthorizedSqlStore +from llama_stack.core.storage.sqlstore.sqlstore import sqlstore_impl +from llama_stack.providers.utils.files.form_data import parse_expires_after +from llama_stack_api import ( + ExpiresAfter, + Files, + ListOpenAIFileResponse, + OpenAIFileDeleteResponse, + OpenAIFileObject, + OpenAIFilePurpose, + Order, + ResourceNotFoundError, +) +from llama_stack_api.internal.sqlstore import ColumnDefinition, ColumnType +from openai import OpenAI + +from .config import OpenAIFilesImplConfig + + +def _make_file_object( + *, + id: str, + filename: str, + purpose: str, + bytes: int, + created_at: int, + expires_at: int, + **kwargs: Any, +) -> OpenAIFileObject: + """ + Construct an OpenAIFileObject and normalize expires_at. + + If expires_at is greater than the max we treat it as no-expiration and + return None for expires_at. + """ + obj = OpenAIFileObject( + id=id, + filename=filename, + purpose=OpenAIFilePurpose(purpose), + bytes=bytes, + created_at=created_at, + expires_at=expires_at, + ) + + if obj.expires_at is not None and obj.expires_at > (obj.created_at + ExpiresAfter.MAX): + obj.expires_at = None # type: ignore + + return obj + + +class OpenAIFilesImpl(Files): + """OpenAI Files API implementation.""" + + def __init__(self, config: OpenAIFilesImplConfig, policy: list[AccessRule]) -> None: + self._config = config + self.policy = policy + self._client: OpenAI | None = None + self._sql_store: AuthorizedSqlStore | None = None + + def _now(self) -> int: + """Return current UTC timestamp as int seconds.""" + return int(datetime.now(UTC).timestamp()) + + async def _get_file( + self, file_id: str, return_expired: bool = False, action: Action = Action.READ + ) -> dict[str, Any]: + where: dict[str, str | dict] = {"id": file_id} + if not return_expired: + where["expires_at"] = {">": self._now()} + if not (row := await self.sql_store.fetch_one("openai_files", where=where, action=action)): + raise ResourceNotFoundError(file_id, "File", "files.list()") + return row + + async def _delete_file(self, file_id: str) -> None: + """Delete a file from OpenAI and the database.""" + try: + self.client.files.delete(file_id) + except Exception as e: + # If file doesn't exist on OpenAI side, just remove from metadata store + if "not found" not in str(e).lower(): + raise RuntimeError(f"Failed to delete file from OpenAI: {e}") from e + + await self.sql_store.delete("openai_files", where={"id": file_id}) + + async def _delete_if_expired(self, file_id: str) -> None: + """If the file exists and is expired, delete it.""" + if row := await self._get_file(file_id, return_expired=True): + if (expires_at := row.get("expires_at")) and expires_at <= self._now(): + await self._delete_file(file_id) + + async def initialize(self) -> None: + self._client = OpenAI(api_key=self._config.api_key) + + self._sql_store = AuthorizedSqlStore(sqlstore_impl(self._config.metadata_store), self.policy) + await self._sql_store.create_table( + "openai_files", + { + "id": ColumnDefinition(type=ColumnType.STRING, primary_key=True), + "filename": ColumnType.STRING, + "purpose": ColumnType.STRING, + "bytes": ColumnType.INTEGER, + "created_at": ColumnType.INTEGER, + "expires_at": ColumnType.INTEGER, + }, + ) + + async def shutdown(self) -> None: + pass + + @property + def client(self) -> OpenAI: + assert self._client is not None, "Provider not initialized" + return self._client + + @property + def sql_store(self) -> AuthorizedSqlStore: + assert self._sql_store is not None, "Provider not initialized" + return self._sql_store + + async def openai_upload_file( + self, + file: Annotated[UploadFile, File()], + purpose: Annotated[OpenAIFilePurpose, Form()], + expires_after: Annotated[ExpiresAfter | None, Depends(parse_expires_after)] = None, + ) -> OpenAIFileObject: + filename = getattr(file, "filename", None) or "uploaded_file" + content = await file.read() + file_size = len(content) + + created_at = self._now() + + expires_at = created_at + ExpiresAfter.MAX * 42 + if purpose == OpenAIFilePurpose.BATCH: + expires_at = created_at + ExpiresAfter.MAX + + if expires_after is not None: + expires_at = created_at + expires_after.seconds + + try: + from io import BytesIO + + file_obj = BytesIO(content) + file_obj.name = filename + + response = self.client.files.create( + file=file_obj, + purpose=purpose.value, + ) + + file_id = response.id + + entry: dict[str, Any] = { + "id": file_id, + "filename": filename, + "purpose": purpose.value, + "bytes": file_size, + "created_at": created_at, + "expires_at": expires_at, + } + + await self.sql_store.insert("openai_files", entry) + + return _make_file_object(**entry) + + except Exception as e: + raise RuntimeError(f"Failed to upload file to OpenAI: {e}") from e + + async def openai_list_files( + self, + after: str | None = None, + limit: int | None = 10000, + order: Order | None = Order.desc, + purpose: OpenAIFilePurpose | None = None, + ) -> ListOpenAIFileResponse: + if not order: + order = Order.desc + + where_conditions: dict[str, Any] = {"expires_at": {">": self._now()}} + if purpose: + where_conditions["purpose"] = purpose.value + + paginated_result = await self.sql_store.fetch_all( + table="openai_files", + where=where_conditions, + order_by=[("created_at", order.value)], + cursor=("id", after) if after else None, + limit=limit, + ) + + files = [_make_file_object(**row) for row in paginated_result.data] + + return ListOpenAIFileResponse( + data=files, + has_more=paginated_result.has_more, + first_id=files[0].id if files else "", + last_id=files[-1].id if files else "", + ) + + async def openai_retrieve_file(self, file_id: str) -> OpenAIFileObject: + await self._delete_if_expired(file_id) + row = await self._get_file(file_id) + return _make_file_object(**row) + + async def openai_delete_file(self, file_id: str) -> OpenAIFileDeleteResponse: + await self._delete_if_expired(file_id) + _ = await self._get_file(file_id, action=Action.DELETE) + await self._delete_file(file_id) + return OpenAIFileDeleteResponse(id=file_id, deleted=True) + + async def openai_retrieve_file_content(self, file_id: str) -> Response: + await self._delete_if_expired(file_id) + + row = await self._get_file(file_id) + + try: + response = self.client.files.content(file_id) + file_content = response.content + + except Exception as e: + if "not found" in str(e).lower(): + await self._delete_file(file_id) + raise ResourceNotFoundError(file_id, "File", "files.list()") from e + raise RuntimeError(f"Failed to download file from OpenAI: {e}") from e + + return Response( + content=file_content, + media_type="application/octet-stream", + headers={"Content-Disposition": f'attachment; filename="{row["filename"]}"'}, + )