The first draft of the Preprocessing API.

2025-12-31 08:30:01 +00:00 · 2025-03-03 13:32:17 +01:00 · 2025-03-03 13:32:17 +01:00 · aa1b670d5c
commit aa1b670d5c
parent 7f9b767277
18 changed files with 327 additions and 0 deletions
--- a/llama_stack/apis/preprocessing/init.py
+++ b/llama_stack/apis/preprocessing/init.py
@ -0,0 +1,7 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
+
+from .preprocessing import *  # noqa: F401 F403
--- a/llama_stack/apis/preprocessing/preprocessing.py
+++ b/llama_stack/apis/preprocessing/preprocessing.py
@ -0,0 +1,54 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
+from enum import Enum
+from typing import Any, Dict, List, Optional, Protocol, runtime_checkable
+
+from pydantic import BaseModel
+
+from llama_stack.apis.common.content_types import URL
+from llama_stack.apis.preprocessing.preprocessors import Preprocessor
+from llama_stack.schema_utils import json_schema_type, webmethod
+
+
+class PreprocessingInputType(Enum):
+    document_content = "document_content"
+    document_path = "document_path"
+
+
+@json_schema_type
+class PreprocessingInput(BaseModel):
+    preprocessor_input_id: str
+    preprocessor_input_type: Optional[PreprocessingInputType]
+    path_or_content: str | URL
+
+
+PreprocessorOptions = Dict[str, Any]
+
+# TODO: shouldn't be just a string
+PreprocessingResult = str
+
+
+@json_schema_type
+class PreprocessingResponse(BaseModel):
+    status: bool
+    results: Optional[List[str | PreprocessingResult]]
+
+
+class PreprocessorStore(Protocol):
+    def get_preprocessor(self, preprocessor_id: str) -> Preprocessor: ...
+
+
+@runtime_checkable
+class Preprocessing(Protocol):
+    preprocessor_store: PreprocessorStore
+
+    @webmethod(route="/preprocess", method="POST")
+    async def preprocess(
+        self,
+        preprocessor_id: str,
+        preprocessor_inputs: List[PreprocessingInput],
+        options: PreprocessorOptions,
+    ) -> PreprocessingResponse: ...
--- a/llama_stack/apis/preprocessing/preprocessors.py
+++ b/llama_stack/apis/preprocessing/preprocessors.py
@ -0,0 +1,65 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
+
+from typing import Any, Dict, List, Literal, Optional, Protocol, runtime_checkable
+
+from pydantic import BaseModel
+
+from llama_stack.apis.resource import Resource, ResourceType
+from llama_stack.providers.utils.telemetry.trace_protocol import trace_protocol
+from llama_stack.schema_utils import json_schema_type, webmethod
+
+
+@json_schema_type
+class Preprocessor(Resource):
+    type: Literal[ResourceType.preprocessor.value] = ResourceType.preprocessor.value
+
+    @property
+    def preprocessor_id(self) -> str:
+        return self.identifier
+
+    @property
+    def provider_preprocessor_id(self) -> str:
+        return self.provider_resource_id
+
+    metadata: Optional[Dict[str, Any]] = None
+
+
+class PreprocessorInput(BaseModel):
+    preprocessor_id: str
+    provider_id: Optional[str] = None
+    metadata: Optional[Dict[str, Any]] = None
+
+
+class ListPreprocessorsResponse(BaseModel):
+    data: List[Preprocessor]
+
+
+@runtime_checkable
+@trace_protocol
+class Preprocessors(Protocol):
+    @webmethod(route="/preprocessors", method="GET")
+    async def list_preprocessors(self) -> ListPreprocessorsResponse: ...
+
+    @webmethod(route="/preprocessors/{preprocessor_id:path}", method="GET")
+    async def get_preprocessor(
+        self,
+        preprocessor_id: str,
+    ) -> Optional[Preprocessor]: ...
+
+    @webmethod(route="/preprocessors", method="POST")
+    async def register_preprocessor(
+        self,
+        preprocessor_id: str,
+        provider_id: Optional[str] = None,
+        metadata: Optional[Dict[str, Any]] = None,
+    ) -> Preprocessor: ...
+
+    @webmethod(route="/preprocessors/{preprocessor_id:path}", method="DELETE")
+    async def unregister_preprocessor(
+        self,
+        preprocessor_id: str,
+    ) -> None: ...