fix: address first round of reviews

Signed-off-by: Alina Ryan <aliryan@redhat.com>
This commit is contained in:
Alina Ryan 2025-11-25 19:44:59 -05:00
parent 402358c8e9
commit c2f0db9128
26 changed files with 44 additions and 145 deletions

View file

@ -3887,7 +3887,7 @@ paths:
schema: schema:
$ref: '#/components/schemas/SupervisedFineTuneRequest' $ref: '#/components/schemas/SupervisedFineTuneRequest'
required: true required: true
/v1alpha/file-processor/process: /v1alpha/file-processors/process:
post: post:
responses: responses:
'200': '200':
@ -3909,13 +3909,13 @@ paths:
description: Default Response description: Default Response
$ref: '#/components/responses/DefaultError' $ref: '#/components/responses/DefaultError'
tags: tags:
- File Processor - File Processors
summary: Process File summary: Process File
description: |- description: |-
Process a file into structured content with optional chunking and embeddings. Process a file into structured content with optional chunking and embeddings.
This method processes raw file data and converts it into text content for applications such as vector store ingestion. This method processes raw file data and converts it into text content for applications such as vector store ingestion.
operationId: process_file_v1alpha_file_processor_process_post operationId: process_file_v1alpha_file_processors_process_post
requestBody: requestBody:
content: content:
application/json: application/json:
@ -13005,7 +13005,7 @@ components:
- benchmarks - benchmarks
- tool_groups - tool_groups
- files - files
- file_processor - file_processors
- prompts - prompts
- conversations - conversations
- inspect - inspect

View file

@ -0,0 +1,10 @@
---
sidebar_label: File Processors
title: File_Processors
---
# File_Processors
## Overview
This section contains documentation for all available providers for the **file_processors** API.

View file

@ -9813,7 +9813,7 @@ components:
- benchmarks - benchmarks
- tool_groups - tool_groups
- files - files
- file_processor - file_processors
- prompts - prompts
- conversations - conversations
- inspect - inspect

View file

@ -630,7 +630,7 @@ paths:
schema: schema:
$ref: '#/components/schemas/SupervisedFineTuneRequest' $ref: '#/components/schemas/SupervisedFineTuneRequest'
required: true required: true
/v1alpha/file-processor/process: /v1alpha/file-processors/process:
post: post:
responses: responses:
'200': '200':
@ -652,13 +652,13 @@ paths:
description: Default Response description: Default Response
$ref: '#/components/responses/DefaultError' $ref: '#/components/responses/DefaultError'
tags: tags:
- File Processor - File Processors
summary: Process File summary: Process File
description: |- description: |-
Process a file into structured content with optional chunking and embeddings. Process a file into structured content with optional chunking and embeddings.
This method processes raw file data and converts it into text content for applications such as vector store ingestion. This method processes raw file data and converts it into text content for applications such as vector store ingestion.
operationId: process_file_v1alpha_file_processor_process_post operationId: process_file_v1alpha_file_processors_process_post
requestBody: requestBody:
content: content:
application/json: application/json:
@ -8777,7 +8777,7 @@ components:
- benchmarks - benchmarks
- tool_groups - tool_groups
- files - files
- file_processor - file_processors
- prompts - prompts
- conversations - conversations
- inspect - inspect

View file

@ -11640,7 +11640,7 @@ components:
- benchmarks - benchmarks
- tool_groups - tool_groups
- files - files
- file_processor - file_processors
- prompts - prompts
- conversations - conversations
- inspect - inspect

View file

@ -3887,7 +3887,7 @@ paths:
schema: schema:
$ref: '#/components/schemas/SupervisedFineTuneRequest' $ref: '#/components/schemas/SupervisedFineTuneRequest'
required: true required: true
/v1alpha/file-processor/process: /v1alpha/file-processors/process:
post: post:
responses: responses:
'200': '200':
@ -3909,13 +3909,13 @@ paths:
description: Default Response description: Default Response
$ref: '#/components/responses/DefaultError' $ref: '#/components/responses/DefaultError'
tags: tags:
- File Processor - File Processors
summary: Process File summary: Process File
description: |- description: |-
Process a file into structured content with optional chunking and embeddings. Process a file into structured content with optional chunking and embeddings.
This method processes raw file data and converts it into text content for applications such as vector store ingestion. This method processes raw file data and converts it into text content for applications such as vector store ingestion.
operationId: process_file_v1alpha_file_processor_process_post operationId: process_file_v1alpha_file_processors_process_post
requestBody: requestBody:
content: content:
application/json: application/json:
@ -13005,7 +13005,7 @@ components:
- benchmarks - benchmarks
- tool_groups - tool_groups
- files - files
- file_processor - file_processors
- prompts - prompts
- conversations - conversations
- inspect - inspect

View file

@ -34,7 +34,7 @@ from llama_stack_api import (
DatasetsProtocolPrivate, DatasetsProtocolPrivate,
Eval, Eval,
ExternalApiSpec, ExternalApiSpec,
FileProcessor, FileProcessors,
Files, Files,
Inference, Inference,
InferenceProvider, InferenceProvider,
@ -101,7 +101,7 @@ def api_protocol_map(external_apis: dict[Api, ExternalApiSpec] | None = None) ->
Api.files: Files, Api.files: Files,
Api.prompts: Prompts, Api.prompts: Prompts,
Api.conversations: Conversations, Api.conversations: Conversations,
Api.file_processor: FileProcessor, Api.file_processors: FileProcessors,
} }
if external_apis: if external_apis:

View file

@ -29,8 +29,6 @@ distribution_spec:
- provider_type: remote::weaviate - provider_type: remote::weaviate
files: files:
- provider_type: inline::localfs - provider_type: inline::localfs
file_processor:
- provider_type: inline::reference
safety: safety:
- provider_type: inline::llama-guard - provider_type: inline::llama-guard
- provider_type: inline::code-scanner - provider_type: inline::code-scanner

View file

@ -5,7 +5,6 @@ apis:
- batches - batches
- datasetio - datasetio
- eval - eval
- file_processor
- files - files
- inference - inference
- post_training - post_training
@ -154,9 +153,6 @@ providers:
metadata_store: metadata_store:
table_name: files_metadata table_name: files_metadata
backend: sql_default backend: sql_default
file_processor:
- provider_id: reference
provider_type: inline::reference
safety: safety:
- provider_id: llama-guard - provider_id: llama-guard
provider_type: inline::llama-guard provider_type: inline::llama-guard

View file

@ -5,7 +5,6 @@ apis:
- batches - batches
- datasetio - datasetio
- eval - eval
- file_processor
- files - files
- inference - inference
- post_training - post_training
@ -154,9 +153,6 @@ providers:
metadata_store: metadata_store:
table_name: files_metadata table_name: files_metadata
backend: sql_default backend: sql_default
file_processor:
- provider_id: reference
provider_type: inline::reference
safety: safety:
- provider_id: llama-guard - provider_id: llama-guard
provider_type: inline::llama-guard provider_type: inline::llama-guard

View file

@ -30,8 +30,6 @@ distribution_spec:
- provider_type: remote::weaviate - provider_type: remote::weaviate
files: files:
- provider_type: inline::localfs - provider_type: inline::localfs
file_processor:
- provider_type: inline::reference
safety: safety:
- provider_type: inline::llama-guard - provider_type: inline::llama-guard
- provider_type: inline::code-scanner - provider_type: inline::code-scanner

View file

@ -5,7 +5,6 @@ apis:
- batches - batches
- datasetio - datasetio
- eval - eval
- file_processor
- files - files
- inference - inference
- post_training - post_training
@ -154,9 +153,6 @@ providers:
metadata_store: metadata_store:
table_name: files_metadata table_name: files_metadata
backend: sql_default backend: sql_default
file_processor:
- provider_id: reference
provider_type: inline::reference
safety: safety:
- provider_id: llama-guard - provider_id: llama-guard
provider_type: inline::llama-guard provider_type: inline::llama-guard

View file

@ -5,7 +5,6 @@ apis:
- batches - batches
- datasetio - datasetio
- eval - eval
- file_processor
- files - files
- inference - inference
- post_training - post_training
@ -154,9 +153,6 @@ providers:
metadata_store: metadata_store:
table_name: files_metadata table_name: files_metadata
backend: sql_default backend: sql_default
file_processor:
- provider_id: reference
provider_type: inline::reference
safety: safety:
- provider_id: llama-guard - provider_id: llama-guard
provider_type: inline::llama-guard provider_type: inline::llama-guard

View file

@ -30,8 +30,6 @@ distribution_spec:
- provider_type: remote::weaviate - provider_type: remote::weaviate
files: files:
- provider_type: inline::localfs - provider_type: inline::localfs
file_processor:
- provider_type: inline::reference
safety: safety:
- provider_type: inline::llama-guard - provider_type: inline::llama-guard
- provider_type: inline::code-scanner - provider_type: inline::code-scanner

View file

@ -5,7 +5,6 @@ apis:
- batches - batches
- datasetio - datasetio
- eval - eval
- file_processor
- files - files
- inference - inference
- post_training - post_training
@ -154,9 +153,6 @@ providers:
metadata_store: metadata_store:
table_name: files_metadata table_name: files_metadata
backend: sql_default backend: sql_default
file_processor:
- provider_id: reference
provider_type: inline::reference
safety: safety:
- provider_id: llama-guard - provider_id: llama-guard
provider_type: inline::llama-guard provider_type: inline::llama-guard

View file

@ -5,7 +5,6 @@ apis:
- batches - batches
- datasetio - datasetio
- eval - eval
- file_processor
- files - files
- inference - inference
- post_training - post_training
@ -154,9 +153,6 @@ providers:
metadata_store: metadata_store:
table_name: files_metadata table_name: files_metadata
backend: sql_default backend: sql_default
file_processor:
- provider_id: reference
provider_type: inline::reference
safety: safety:
- provider_id: llama-guard - provider_id: llama-guard
provider_type: inline::llama-guard provider_type: inline::llama-guard

View file

@ -123,7 +123,6 @@ def get_distribution_template(name: str = "starter") -> DistributionTemplate:
BuildProvider(provider_type="remote::weaviate"), BuildProvider(provider_type="remote::weaviate"),
], ],
"files": [BuildProvider(provider_type="inline::localfs")], "files": [BuildProvider(provider_type="inline::localfs")],
"file_processor": [BuildProvider(provider_type="inline::reference")],
"safety": [ "safety": [
BuildProvider(provider_type="inline::llama-guard"), BuildProvider(provider_type="inline::llama-guard"),
BuildProvider(provider_type="inline::code-scanner"), BuildProvider(provider_type="inline::code-scanner"),

View file

@ -45,7 +45,7 @@ CATEGORIES = [
"providers", "providers",
"models", "models",
"files", "files",
"file_processor", "file_processors",
"vector_io", "vector_io",
"tool_runtime", "tool_runtime",
"cli", "cli",

View file

@ -1,15 +0,0 @@
# Copyright (c) Meta Platforms, Inc. and affiliates.
# All rights reserved.
#
# This source code is licensed under the terms described in the LICENSE file in
# the root directory of this source tree.
from .config import ReferenceFileProcessorImplConfig
async def get_provider_impl(config: ReferenceFileProcessorImplConfig, deps):
from .reference import ReferenceFileProcessorImpl
impl = ReferenceFileProcessorImpl(config, deps)
await impl.initialize()
return impl

View file

@ -1,15 +0,0 @@
# Copyright (c) Meta Platforms, Inc. and affiliates.
# All rights reserved.
#
# This source code is licensed under the terms described in the LICENSE file in
# the root directory of this source tree.
from pydantic import BaseModel
class ReferenceFileProcessorImplConfig(BaseModel):
"""Configuration for the reference file processor implementation."""
@staticmethod
def sample_run_config(**kwargs):
return {}

View file

@ -1,41 +0,0 @@
# Copyright (c) Meta Platforms, Inc. and affiliates.
# All rights reserved.
#
# This source code is licensed under the terms described in the LICENSE file in
# the root directory of this source tree.
from typing import Any
from llama_stack_api import FileProcessor, ProcessedContent, VectorStoreChunkingStrategy
from .config import ReferenceFileProcessorImplConfig
class ReferenceFileProcessorImpl(FileProcessor):
"""Reference implementation of the FileProcessor API."""
def __init__(self, config: ReferenceFileProcessorImplConfig, deps: dict[str, Any]):
self.config = config
self.deps = deps
async def initialize(self) -> None:
pass
async def process_file(
self,
file_data: bytes,
filename: str,
options: dict[str, Any] | None = None,
chunking_strategy: VectorStoreChunkingStrategy | None = None,
include_embeddings: bool = False,
) -> ProcessedContent:
"""Process a file into structured content."""
return ProcessedContent(
content="Placeholder content",
chunks=None,
embeddings=None,
metadata={
"processor": "reference",
"filename": filename,
},
)

View file

@ -1,20 +0,0 @@
# Copyright (c) Meta Platforms, Inc. and affiliates.
# All rights reserved.
#
# This source code is licensed under the terms described in the LICENSE file in
# the root directory of this source tree.
from llama_stack_api import Api, InlineProviderSpec, ProviderSpec
def available_providers() -> list[ProviderSpec]:
return [
InlineProviderSpec(
api=Api.file_processor,
provider_type="inline::reference",
pip_packages=[],
module="llama_stack.providers.inline.file_processor.reference",
config_class="llama_stack.providers.inline.file_processor.reference.config.ReferenceFileProcessorImplConfig",
description="Reference file processor implementation (placeholder for development)",
),
]

View file

@ -0,0 +1,11 @@
# Copyright (c) Meta Platforms, Inc. and affiliates.
# All rights reserved.
#
# This source code is licensed under the terms described in the LICENSE file in
# the root directory of this source tree.
from llama_stack_api import ProviderSpec
def available_providers() -> list[ProviderSpec]:
return []

View file

@ -112,7 +112,7 @@ from .datatypes import (
VectorStoresProtocolPrivate, VectorStoresProtocolPrivate,
) )
from .eval import BenchmarkConfig, Eval, EvalCandidate, EvaluateResponse, ModelCandidate from .eval import BenchmarkConfig, Eval, EvalCandidate, EvaluateResponse, ModelCandidate
from .file_processor import FileProcessor, ProcessedContent, ProcessFileRequest from .file_processors import FileProcessors, ProcessedContent, ProcessFileRequest
from .files import ( from .files import (
ExpiresAfter, ExpiresAfter,
Files, Files,
@ -519,7 +519,7 @@ __all__ = [
"ExpiresAfter", "ExpiresAfter",
"ExternalApiSpec", "ExternalApiSpec",
"ExtraBodyField", "ExtraBodyField",
"FileProcessor", "FileProcessors",
"Files", "Files",
"Fp8QuantizationConfig", "Fp8QuantizationConfig",
"clear_dynamic_schema_types", "clear_dynamic_schema_types",

View file

@ -110,7 +110,7 @@ class Api(Enum, metaclass=DynamicApiMeta):
:cvar benchmarks: Benchmark suite management :cvar benchmarks: Benchmark suite management
:cvar tool_groups: Tool group organization :cvar tool_groups: Tool group organization
:cvar files: File storage and management :cvar files: File storage and management
:cvar file_processor: File parsing and processing operations :cvar file_processors: File parsing and processing operations
:cvar prompts: Prompt versions and management :cvar prompts: Prompt versions and management
:cvar inspect: Built-in system inspection and introspection :cvar inspect: Built-in system inspection and introspection
""" """
@ -135,7 +135,7 @@ class Api(Enum, metaclass=DynamicApiMeta):
benchmarks = "benchmarks" benchmarks = "benchmarks"
tool_groups = "tool_groups" tool_groups = "tool_groups"
files = "files" files = "files"
file_processor = "file_processor" file_processors = "file_processors"
prompts = "prompts" prompts = "prompts"
conversations = "conversations" conversations = "conversations"

View file

@ -53,7 +53,7 @@ class ProcessedContent(BaseModel):
@telemetry_traceable @telemetry_traceable
@runtime_checkable @runtime_checkable
class FileProcessor(Protocol): class FileProcessors(Protocol):
""" """
File Processor API for converting files into structured, processable content. File Processor API for converting files into structured, processable content.
@ -72,7 +72,7 @@ class FileProcessor(Protocol):
processing capabilities, and optimization strategies. processing capabilities, and optimization strategies.
""" """
@webmethod(route="/file-processor/process", method="POST", level=LLAMA_STACK_API_V1ALPHA) @webmethod(route="/file-processors/process", method="POST", level=LLAMA_STACK_API_V1ALPHA)
async def process_file( async def process_file(
self, self,
file_data: bytes, file_data: bytes,