diff --git a/src/llama_stack/apis/datatypes.py b/src/llama_stack/apis/datatypes.py index ae01c5dfc..f12cf66a2 100644 --- a/src/llama_stack/apis/datatypes.py +++ b/src/llama_stack/apis/datatypes.py @@ -82,6 +82,7 @@ class DynamicApiMeta(EnumMeta): @json_schema_type class Api(Enum, metaclass=DynamicApiMeta): """Enumeration of all available APIs in the Llama Stack system. + :cvar file_processing: File processing and management :cvar providers: Provider management and configuration :cvar inference: Text generation, chat completions, and embeddings :cvar safety: Content moderation and safety shields @@ -104,7 +105,7 @@ class Api(Enum, metaclass=DynamicApiMeta): :cvar prompts: Prompt versions and management :cvar inspect: Built-in system inspection and introspection """ - + file_processing = "file_processing" providers = "providers" inference = "inference" safety = "safety" diff --git a/src/llama_stack/apis/file_processors/file_processors.py b/src/llama_stack/apis/file_processors/file_processors.py new file mode 100644 index 000000000..53e2408e6 --- /dev/null +++ b/src/llama_stack/apis/file_processors/file_processors.py @@ -0,0 +1,50 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the terms described in the LICENSE file in +# the root directory of this source tree. + +from typing import Any, Protocol, runtime_checkable + +from pydantic import BaseModel, Field + +from llama_stack.apis.version import LLAMA_STACK_API_V1 +from llama_stack.core.telemetry.trace_protocol import trace_protocol +from llama_stack.schema_utils import json_schema_type, webmethod + + +@json_schema_type +class ProcessedContent(BaseModel): + """ + Result of file processing containing extracted content and metadata. + + :param content: Extracted text content from the file + :param metadata: Processing metadata including processor info, timing, etc. + """ + content: str = Field(..., description="Extracted text content from file") + metadata: dict[str, Any] = Field(default_factory=dict, description="Processing metadata") + + +@runtime_checkable +@trace_protocol +class FileProcessors(Protocol): + """File Processors + + This API provides document processing capabilities for extracting text content + from various file formats including PDFs, Word documents, and more. + """ + + @webmethod(route="/file-processors/process", method="POST", level=LLAMA_STACK_API_V1) + async def process_file( + self, + file_data: bytes, + filename: str, + options: dict[str, Any] | None = None + ) -> ProcessedContent: + """Process a file and return extracted text content. + + :param file_data: The raw file data as bytes + :param filename: Name of the file (used for format detection) + :param options: Optional processing options (processor-specific) + :returns: ProcessedContent with extracted text and metadata + """ \ No newline at end of file diff --git a/src/llama_stack/core/stack.py b/src/llama_stack/core/stack.py index eccc562ae..bcc0fa5bd 100644 --- a/src/llama_stack/core/stack.py +++ b/src/llama_stack/core/stack.py @@ -55,6 +55,7 @@ from llama_stack.core.store.registry import create_dist_registry from llama_stack.core.utils.dynamic import instantiate_class_type from llama_stack.log import get_logger from llama_stack.providers.datatypes import Api +from llama_stack.apis.file_processors import FileProcessors logger = get_logger(name=__name__, category="core") @@ -82,6 +83,7 @@ class LlamaStack( Files, Prompts, Conversations, + FileProcessors, ): pass