The first draft of the Preprocessing API.

This commit is contained in:
ilya-kolchinsky 2025-03-03 13:32:17 +01:00
parent 7f9b767277
commit aa1b670d5c
18 changed files with 327 additions and 0 deletions

View file

@ -0,0 +1,7 @@
# Copyright (c) Meta Platforms, Inc. and affiliates.
# All rights reserved.
#
# This source code is licensed under the terms described in the LICENSE file in
# the root directory of this source tree.
from .preprocessing import * # noqa: F401 F403

View file

@ -0,0 +1,54 @@
# Copyright (c) Meta Platforms, Inc. and affiliates.
# All rights reserved.
#
# This source code is licensed under the terms described in the LICENSE file in
# the root directory of this source tree.
from enum import Enum
from typing import Any, Dict, List, Optional, Protocol, runtime_checkable
from pydantic import BaseModel
from llama_stack.apis.common.content_types import URL
from llama_stack.apis.preprocessing.preprocessors import Preprocessor
from llama_stack.schema_utils import json_schema_type, webmethod
class PreprocessingInputType(Enum):
document_content = "document_content"
document_path = "document_path"
@json_schema_type
class PreprocessingInput(BaseModel):
preprocessor_input_id: str
preprocessor_input_type: Optional[PreprocessingInputType]
path_or_content: str | URL
PreprocessorOptions = Dict[str, Any]
# TODO: shouldn't be just a string
PreprocessingResult = str
@json_schema_type
class PreprocessingResponse(BaseModel):
status: bool
results: Optional[List[str | PreprocessingResult]]
class PreprocessorStore(Protocol):
def get_preprocessor(self, preprocessor_id: str) -> Preprocessor: ...
@runtime_checkable
class Preprocessing(Protocol):
preprocessor_store: PreprocessorStore
@webmethod(route="/preprocess", method="POST")
async def preprocess(
self,
preprocessor_id: str,
preprocessor_inputs: List[PreprocessingInput],
options: PreprocessorOptions,
) -> PreprocessingResponse: ...

View file

@ -0,0 +1,65 @@
# Copyright (c) Meta Platforms, Inc. and affiliates.
# All rights reserved.
#
# This source code is licensed under the terms described in the LICENSE file in
# the root directory of this source tree.
from typing import Any, Dict, List, Literal, Optional, Protocol, runtime_checkable
from pydantic import BaseModel
from llama_stack.apis.resource import Resource, ResourceType
from llama_stack.providers.utils.telemetry.trace_protocol import trace_protocol
from llama_stack.schema_utils import json_schema_type, webmethod
@json_schema_type
class Preprocessor(Resource):
type: Literal[ResourceType.preprocessor.value] = ResourceType.preprocessor.value
@property
def preprocessor_id(self) -> str:
return self.identifier
@property
def provider_preprocessor_id(self) -> str:
return self.provider_resource_id
metadata: Optional[Dict[str, Any]] = None
class PreprocessorInput(BaseModel):
preprocessor_id: str
provider_id: Optional[str] = None
metadata: Optional[Dict[str, Any]] = None
class ListPreprocessorsResponse(BaseModel):
data: List[Preprocessor]
@runtime_checkable
@trace_protocol
class Preprocessors(Protocol):
@webmethod(route="/preprocessors", method="GET")
async def list_preprocessors(self) -> ListPreprocessorsResponse: ...
@webmethod(route="/preprocessors/{preprocessor_id:path}", method="GET")
async def get_preprocessor(
self,
preprocessor_id: str,
) -> Optional[Preprocessor]: ...
@webmethod(route="/preprocessors", method="POST")
async def register_preprocessor(
self,
preprocessor_id: str,
provider_id: Optional[str] = None,
metadata: Optional[Dict[str, Any]] = None,
) -> Preprocessor: ...
@webmethod(route="/preprocessors/{preprocessor_id:path}", method="DELETE")
async def unregister_preprocessor(
self,
preprocessor_id: str,
) -> None: ...