mirror of
https://github.com/meta-llama/llama-stack.git
synced 2025-12-03 01:48:05 +00:00
Merge 3f51e16601 into 4237eb4aaa
This commit is contained in:
commit
0cd2329dbc
15 changed files with 603 additions and 0 deletions
|
|
@ -3893,6 +3893,41 @@ paths:
|
|||
schema:
|
||||
$ref: '#/components/schemas/SupervisedFineTuneRequest'
|
||||
required: true
|
||||
/v1alpha/file-processors/process:
|
||||
post:
|
||||
responses:
|
||||
'200':
|
||||
description: ProcessedContent with extracted text, optional chunks, and metadata.
|
||||
content:
|
||||
application/json:
|
||||
schema:
|
||||
$ref: '#/components/schemas/ProcessedContent'
|
||||
'400':
|
||||
description: Bad Request
|
||||
$ref: '#/components/responses/BadRequest400'
|
||||
'429':
|
||||
description: Too Many Requests
|
||||
$ref: '#/components/responses/TooManyRequests429'
|
||||
'500':
|
||||
description: Internal Server Error
|
||||
$ref: '#/components/responses/InternalServerError500'
|
||||
default:
|
||||
description: Default Response
|
||||
$ref: '#/components/responses/DefaultError'
|
||||
tags:
|
||||
- File Processors
|
||||
summary: Process File
|
||||
description: |-
|
||||
Process a file into structured content with optional chunking and embeddings.
|
||||
|
||||
This method processes raw file data and converts it into text content for applications such as vector store ingestion.
|
||||
operationId: process_file_v1alpha_file_processors_process_post
|
||||
requestBody:
|
||||
content:
|
||||
application/json:
|
||||
schema:
|
||||
$ref: '#/components/schemas/ProcessFileRequest'
|
||||
required: true
|
||||
components:
|
||||
schemas:
|
||||
Error:
|
||||
|
|
@ -12100,6 +12135,73 @@ components:
|
|||
required:
|
||||
- reasoning_tokens
|
||||
title: OutputTokensDetails
|
||||
ProcessFileRequest:
|
||||
properties:
|
||||
file_data:
|
||||
type: string
|
||||
format: binary
|
||||
title: File Data
|
||||
filename:
|
||||
type: string
|
||||
title: Filename
|
||||
options:
|
||||
anyOf:
|
||||
- additionalProperties: true
|
||||
type: object
|
||||
- type: 'null'
|
||||
chunking_strategy:
|
||||
anyOf:
|
||||
- oneOf:
|
||||
- $ref: '#/components/schemas/VectorStoreChunkingStrategyAuto'
|
||||
title: VectorStoreChunkingStrategyAuto
|
||||
- $ref: '#/components/schemas/VectorStoreChunkingStrategyStatic'
|
||||
title: VectorStoreChunkingStrategyStatic
|
||||
discriminator:
|
||||
propertyName: type
|
||||
mapping:
|
||||
auto: '#/components/schemas/VectorStoreChunkingStrategyAuto'
|
||||
static: '#/components/schemas/VectorStoreChunkingStrategyStatic'
|
||||
title: VectorStoreChunkingStrategyAuto | VectorStoreChunkingStrategyStatic
|
||||
- type: 'null'
|
||||
title: Chunking Strategy
|
||||
include_embeddings:
|
||||
type: boolean
|
||||
title: Include Embeddings
|
||||
default: false
|
||||
type: object
|
||||
required:
|
||||
- file_data
|
||||
- filename
|
||||
title: ProcessFileRequest
|
||||
ProcessedContent:
|
||||
properties:
|
||||
content:
|
||||
type: string
|
||||
title: Content
|
||||
chunks:
|
||||
anyOf:
|
||||
- items:
|
||||
$ref: '#/components/schemas/Chunk-Output'
|
||||
type: array
|
||||
- type: 'null'
|
||||
embeddings:
|
||||
anyOf:
|
||||
- items:
|
||||
items:
|
||||
type: number
|
||||
type: array
|
||||
type: array
|
||||
- type: 'null'
|
||||
metadata:
|
||||
additionalProperties: true
|
||||
type: object
|
||||
title: Metadata
|
||||
type: object
|
||||
required:
|
||||
- content
|
||||
- metadata
|
||||
title: ProcessedContent
|
||||
description: Result of file processing operation.
|
||||
SearchRankingOptions:
|
||||
properties:
|
||||
ranker:
|
||||
|
|
@ -12706,6 +12808,7 @@ components:
|
|||
- benchmarks
|
||||
- tool_groups
|
||||
- files
|
||||
- file_processors
|
||||
- prompts
|
||||
- conversations
|
||||
- inspect
|
||||
|
|
|
|||
10
docs/docs/providers/file_processor/index.mdx
Normal file
10
docs/docs/providers/file_processor/index.mdx
Normal file
|
|
@ -0,0 +1,10 @@
|
|||
---
|
||||
sidebar_label: File Processor
|
||||
title: File_Processor
|
||||
---
|
||||
|
||||
# File_Processor
|
||||
|
||||
## Overview
|
||||
|
||||
This section contains documentation for all available providers for the **file_processor** API.
|
||||
17
docs/docs/providers/file_processor/inline_reference.mdx
Normal file
17
docs/docs/providers/file_processor/inline_reference.mdx
Normal file
|
|
@ -0,0 +1,17 @@
|
|||
---
|
||||
description: "Reference file processor implementation (placeholder for development)"
|
||||
sidebar_label: Reference
|
||||
title: inline::reference
|
||||
---
|
||||
|
||||
# inline::reference
|
||||
|
||||
## Description
|
||||
|
||||
Reference file processor implementation (placeholder for development)
|
||||
|
||||
## Sample Configuration
|
||||
|
||||
```yaml
|
||||
{}
|
||||
```
|
||||
10
docs/docs/providers/file_processors/index.mdx
Normal file
10
docs/docs/providers/file_processors/index.mdx
Normal file
|
|
@ -0,0 +1,10 @@
|
|||
---
|
||||
sidebar_label: File Processors
|
||||
title: File_Processors
|
||||
---
|
||||
|
||||
# File_Processors
|
||||
|
||||
## Overview
|
||||
|
||||
This section contains documentation for all available providers for the **file_processors** API.
|
||||
68
docs/static/deprecated-llama-stack-spec.yaml
vendored
68
docs/static/deprecated-llama-stack-spec.yaml
vendored
|
|
@ -9094,6 +9094,73 @@ components:
|
|||
required:
|
||||
- reasoning_tokens
|
||||
title: OutputTokensDetails
|
||||
ProcessFileRequest:
|
||||
properties:
|
||||
file_data:
|
||||
type: string
|
||||
format: binary
|
||||
title: File Data
|
||||
filename:
|
||||
type: string
|
||||
title: Filename
|
||||
options:
|
||||
anyOf:
|
||||
- additionalProperties: true
|
||||
type: object
|
||||
- type: 'null'
|
||||
chunking_strategy:
|
||||
anyOf:
|
||||
- oneOf:
|
||||
- $ref: '#/components/schemas/VectorStoreChunkingStrategyAuto'
|
||||
title: VectorStoreChunkingStrategyAuto
|
||||
- $ref: '#/components/schemas/VectorStoreChunkingStrategyStatic'
|
||||
title: VectorStoreChunkingStrategyStatic
|
||||
discriminator:
|
||||
propertyName: type
|
||||
mapping:
|
||||
auto: '#/components/schemas/VectorStoreChunkingStrategyAuto'
|
||||
static: '#/components/schemas/VectorStoreChunkingStrategyStatic'
|
||||
title: VectorStoreChunkingStrategyAuto | VectorStoreChunkingStrategyStatic
|
||||
- type: 'null'
|
||||
title: Chunking Strategy
|
||||
include_embeddings:
|
||||
type: boolean
|
||||
title: Include Embeddings
|
||||
default: false
|
||||
type: object
|
||||
required:
|
||||
- file_data
|
||||
- filename
|
||||
title: ProcessFileRequest
|
||||
ProcessedContent:
|
||||
properties:
|
||||
content:
|
||||
type: string
|
||||
title: Content
|
||||
chunks:
|
||||
anyOf:
|
||||
- items:
|
||||
$ref: '#/components/schemas/Chunk-Output'
|
||||
type: array
|
||||
- type: 'null'
|
||||
embeddings:
|
||||
anyOf:
|
||||
- items:
|
||||
items:
|
||||
type: number
|
||||
type: array
|
||||
type: array
|
||||
- type: 'null'
|
||||
metadata:
|
||||
additionalProperties: true
|
||||
type: object
|
||||
title: Metadata
|
||||
type: object
|
||||
required:
|
||||
- content
|
||||
- metadata
|
||||
title: ProcessedContent
|
||||
description: Result of file processing operation.
|
||||
SearchRankingOptions:
|
||||
properties:
|
||||
ranker:
|
||||
|
|
@ -9700,6 +9767,7 @@ components:
|
|||
- benchmarks
|
||||
- tool_groups
|
||||
- files
|
||||
- file_processors
|
||||
- prompts
|
||||
- conversations
|
||||
- inspect
|
||||
|
|
|
|||
103
docs/static/experimental-llama-stack-spec.yaml
vendored
103
docs/static/experimental-llama-stack-spec.yaml
vendored
|
|
@ -630,6 +630,41 @@ paths:
|
|||
schema:
|
||||
$ref: '#/components/schemas/SupervisedFineTuneRequest'
|
||||
required: true
|
||||
/v1alpha/file-processors/process:
|
||||
post:
|
||||
responses:
|
||||
'200':
|
||||
description: ProcessedContent with extracted text, optional chunks, and metadata.
|
||||
content:
|
||||
application/json:
|
||||
schema:
|
||||
$ref: '#/components/schemas/ProcessedContent'
|
||||
'400':
|
||||
description: Bad Request
|
||||
$ref: '#/components/responses/BadRequest400'
|
||||
'429':
|
||||
description: Too Many Requests
|
||||
$ref: '#/components/responses/TooManyRequests429'
|
||||
'500':
|
||||
description: Internal Server Error
|
||||
$ref: '#/components/responses/InternalServerError500'
|
||||
default:
|
||||
description: Default Response
|
||||
$ref: '#/components/responses/DefaultError'
|
||||
tags:
|
||||
- File Processors
|
||||
summary: Process File
|
||||
description: |-
|
||||
Process a file into structured content with optional chunking and embeddings.
|
||||
|
||||
This method processes raw file data and converts it into text content for applications such as vector store ingestion.
|
||||
operationId: process_file_v1alpha_file_processors_process_post
|
||||
requestBody:
|
||||
content:
|
||||
application/json:
|
||||
schema:
|
||||
$ref: '#/components/schemas/ProcessFileRequest'
|
||||
required: true
|
||||
components:
|
||||
schemas:
|
||||
Error:
|
||||
|
|
@ -7860,6 +7895,73 @@ components:
|
|||
required:
|
||||
- reasoning_tokens
|
||||
title: OutputTokensDetails
|
||||
ProcessFileRequest:
|
||||
properties:
|
||||
file_data:
|
||||
type: string
|
||||
format: binary
|
||||
title: File Data
|
||||
filename:
|
||||
type: string
|
||||
title: Filename
|
||||
options:
|
||||
anyOf:
|
||||
- additionalProperties: true
|
||||
type: object
|
||||
- type: 'null'
|
||||
chunking_strategy:
|
||||
anyOf:
|
||||
- oneOf:
|
||||
- $ref: '#/components/schemas/VectorStoreChunkingStrategyAuto'
|
||||
title: VectorStoreChunkingStrategyAuto
|
||||
- $ref: '#/components/schemas/VectorStoreChunkingStrategyStatic'
|
||||
title: VectorStoreChunkingStrategyStatic
|
||||
discriminator:
|
||||
propertyName: type
|
||||
mapping:
|
||||
auto: '#/components/schemas/VectorStoreChunkingStrategyAuto'
|
||||
static: '#/components/schemas/VectorStoreChunkingStrategyStatic'
|
||||
title: VectorStoreChunkingStrategyAuto | VectorStoreChunkingStrategyStatic
|
||||
- type: 'null'
|
||||
title: Chunking Strategy
|
||||
include_embeddings:
|
||||
type: boolean
|
||||
title: Include Embeddings
|
||||
default: false
|
||||
type: object
|
||||
required:
|
||||
- file_data
|
||||
- filename
|
||||
title: ProcessFileRequest
|
||||
ProcessedContent:
|
||||
properties:
|
||||
content:
|
||||
type: string
|
||||
title: Content
|
||||
chunks:
|
||||
anyOf:
|
||||
- items:
|
||||
$ref: '#/components/schemas/Chunk-Output'
|
||||
type: array
|
||||
- type: 'null'
|
||||
embeddings:
|
||||
anyOf:
|
||||
- items:
|
||||
items:
|
||||
type: number
|
||||
type: array
|
||||
type: array
|
||||
- type: 'null'
|
||||
metadata:
|
||||
additionalProperties: true
|
||||
type: object
|
||||
title: Metadata
|
||||
type: object
|
||||
required:
|
||||
- content
|
||||
- metadata
|
||||
title: ProcessedContent
|
||||
description: Result of file processing operation.
|
||||
SearchRankingOptions:
|
||||
properties:
|
||||
ranker:
|
||||
|
|
@ -8466,6 +8568,7 @@ components:
|
|||
- benchmarks
|
||||
- tool_groups
|
||||
- files
|
||||
- file_processors
|
||||
- prompts
|
||||
- conversations
|
||||
- inspect
|
||||
|
|
|
|||
68
docs/static/llama-stack-spec.yaml
vendored
68
docs/static/llama-stack-spec.yaml
vendored
|
|
@ -10536,6 +10536,73 @@ components:
|
|||
required:
|
||||
- reasoning_tokens
|
||||
title: OutputTokensDetails
|
||||
ProcessFileRequest:
|
||||
properties:
|
||||
file_data:
|
||||
type: string
|
||||
format: binary
|
||||
title: File Data
|
||||
filename:
|
||||
type: string
|
||||
title: Filename
|
||||
options:
|
||||
anyOf:
|
||||
- additionalProperties: true
|
||||
type: object
|
||||
- type: 'null'
|
||||
chunking_strategy:
|
||||
anyOf:
|
||||
- oneOf:
|
||||
- $ref: '#/components/schemas/VectorStoreChunkingStrategyAuto'
|
||||
title: VectorStoreChunkingStrategyAuto
|
||||
- $ref: '#/components/schemas/VectorStoreChunkingStrategyStatic'
|
||||
title: VectorStoreChunkingStrategyStatic
|
||||
discriminator:
|
||||
propertyName: type
|
||||
mapping:
|
||||
auto: '#/components/schemas/VectorStoreChunkingStrategyAuto'
|
||||
static: '#/components/schemas/VectorStoreChunkingStrategyStatic'
|
||||
title: VectorStoreChunkingStrategyAuto | VectorStoreChunkingStrategyStatic
|
||||
- type: 'null'
|
||||
title: Chunking Strategy
|
||||
include_embeddings:
|
||||
type: boolean
|
||||
title: Include Embeddings
|
||||
default: false
|
||||
type: object
|
||||
required:
|
||||
- file_data
|
||||
- filename
|
||||
title: ProcessFileRequest
|
||||
ProcessedContent:
|
||||
properties:
|
||||
content:
|
||||
type: string
|
||||
title: Content
|
||||
chunks:
|
||||
anyOf:
|
||||
- items:
|
||||
$ref: '#/components/schemas/Chunk-Output'
|
||||
type: array
|
||||
- type: 'null'
|
||||
embeddings:
|
||||
anyOf:
|
||||
- items:
|
||||
items:
|
||||
type: number
|
||||
type: array
|
||||
type: array
|
||||
- type: 'null'
|
||||
metadata:
|
||||
additionalProperties: true
|
||||
type: object
|
||||
title: Metadata
|
||||
type: object
|
||||
required:
|
||||
- content
|
||||
- metadata
|
||||
title: ProcessedContent
|
||||
description: Result of file processing operation.
|
||||
SearchRankingOptions:
|
||||
properties:
|
||||
ranker:
|
||||
|
|
@ -11142,6 +11209,7 @@ components:
|
|||
- benchmarks
|
||||
- tool_groups
|
||||
- files
|
||||
- file_processors
|
||||
- prompts
|
||||
- conversations
|
||||
- inspect
|
||||
|
|
|
|||
103
docs/static/stainless-llama-stack-spec.yaml
vendored
103
docs/static/stainless-llama-stack-spec.yaml
vendored
|
|
@ -3893,6 +3893,41 @@ paths:
|
|||
schema:
|
||||
$ref: '#/components/schemas/SupervisedFineTuneRequest'
|
||||
required: true
|
||||
/v1alpha/file-processors/process:
|
||||
post:
|
||||
responses:
|
||||
'200':
|
||||
description: ProcessedContent with extracted text, optional chunks, and metadata.
|
||||
content:
|
||||
application/json:
|
||||
schema:
|
||||
$ref: '#/components/schemas/ProcessedContent'
|
||||
'400':
|
||||
description: Bad Request
|
||||
$ref: '#/components/responses/BadRequest400'
|
||||
'429':
|
||||
description: Too Many Requests
|
||||
$ref: '#/components/responses/TooManyRequests429'
|
||||
'500':
|
||||
description: Internal Server Error
|
||||
$ref: '#/components/responses/InternalServerError500'
|
||||
default:
|
||||
description: Default Response
|
||||
$ref: '#/components/responses/DefaultError'
|
||||
tags:
|
||||
- File Processors
|
||||
summary: Process File
|
||||
description: |-
|
||||
Process a file into structured content with optional chunking and embeddings.
|
||||
|
||||
This method processes raw file data and converts it into text content for applications such as vector store ingestion.
|
||||
operationId: process_file_v1alpha_file_processors_process_post
|
||||
requestBody:
|
||||
content:
|
||||
application/json:
|
||||
schema:
|
||||
$ref: '#/components/schemas/ProcessFileRequest'
|
||||
required: true
|
||||
components:
|
||||
schemas:
|
||||
Error:
|
||||
|
|
@ -12100,6 +12135,73 @@ components:
|
|||
required:
|
||||
- reasoning_tokens
|
||||
title: OutputTokensDetails
|
||||
ProcessFileRequest:
|
||||
properties:
|
||||
file_data:
|
||||
type: string
|
||||
format: binary
|
||||
title: File Data
|
||||
filename:
|
||||
type: string
|
||||
title: Filename
|
||||
options:
|
||||
anyOf:
|
||||
- additionalProperties: true
|
||||
type: object
|
||||
- type: 'null'
|
||||
chunking_strategy:
|
||||
anyOf:
|
||||
- oneOf:
|
||||
- $ref: '#/components/schemas/VectorStoreChunkingStrategyAuto'
|
||||
title: VectorStoreChunkingStrategyAuto
|
||||
- $ref: '#/components/schemas/VectorStoreChunkingStrategyStatic'
|
||||
title: VectorStoreChunkingStrategyStatic
|
||||
discriminator:
|
||||
propertyName: type
|
||||
mapping:
|
||||
auto: '#/components/schemas/VectorStoreChunkingStrategyAuto'
|
||||
static: '#/components/schemas/VectorStoreChunkingStrategyStatic'
|
||||
title: VectorStoreChunkingStrategyAuto | VectorStoreChunkingStrategyStatic
|
||||
- type: 'null'
|
||||
title: Chunking Strategy
|
||||
include_embeddings:
|
||||
type: boolean
|
||||
title: Include Embeddings
|
||||
default: false
|
||||
type: object
|
||||
required:
|
||||
- file_data
|
||||
- filename
|
||||
title: ProcessFileRequest
|
||||
ProcessedContent:
|
||||
properties:
|
||||
content:
|
||||
type: string
|
||||
title: Content
|
||||
chunks:
|
||||
anyOf:
|
||||
- items:
|
||||
$ref: '#/components/schemas/Chunk-Output'
|
||||
type: array
|
||||
- type: 'null'
|
||||
embeddings:
|
||||
anyOf:
|
||||
- items:
|
||||
items:
|
||||
type: number
|
||||
type: array
|
||||
type: array
|
||||
- type: 'null'
|
||||
metadata:
|
||||
additionalProperties: true
|
||||
type: object
|
||||
title: Metadata
|
||||
type: object
|
||||
required:
|
||||
- content
|
||||
- metadata
|
||||
title: ProcessedContent
|
||||
description: Result of file processing operation.
|
||||
SearchRankingOptions:
|
||||
properties:
|
||||
ranker:
|
||||
|
|
@ -12706,6 +12808,7 @@ components:
|
|||
- benchmarks
|
||||
- tool_groups
|
||||
- files
|
||||
- file_processors
|
||||
- prompts
|
||||
- conversations
|
||||
- inspect
|
||||
|
|
|
|||
|
|
@ -34,6 +34,7 @@ from llama_stack_api import (
|
|||
DatasetsProtocolPrivate,
|
||||
Eval,
|
||||
ExternalApiSpec,
|
||||
FileProcessors,
|
||||
Files,
|
||||
Inference,
|
||||
InferenceProvider,
|
||||
|
|
@ -100,6 +101,7 @@ def api_protocol_map(external_apis: dict[Api, ExternalApiSpec] | None = None) ->
|
|||
Api.files: Files,
|
||||
Api.prompts: Prompts,
|
||||
Api.conversations: Conversations,
|
||||
Api.file_processors: FileProcessors,
|
||||
}
|
||||
|
||||
if external_apis:
|
||||
|
|
|
|||
|
|
@ -44,6 +44,7 @@ CATEGORIES = [
|
|||
"providers",
|
||||
"models",
|
||||
"files",
|
||||
"file_processors",
|
||||
"vector_io",
|
||||
"tool_runtime",
|
||||
"cli",
|
||||
|
|
|
|||
|
|
@ -0,0 +1,5 @@
|
|||
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
||||
# All rights reserved.
|
||||
#
|
||||
# This source code is licensed under the terms described in the LICENSE file in
|
||||
# the root directory of this source tree.
|
||||
11
src/llama_stack/providers/registry/file_processors.py
Normal file
11
src/llama_stack/providers/registry/file_processors.py
Normal file
|
|
@ -0,0 +1,11 @@
|
|||
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
||||
# All rights reserved.
|
||||
#
|
||||
# This source code is licensed under the terms described in the LICENSE file in
|
||||
# the root directory of this source tree.
|
||||
|
||||
from llama_stack_api import ProviderSpec
|
||||
|
||||
|
||||
def available_providers() -> list[ProviderSpec]:
|
||||
return []
|
||||
|
|
@ -112,6 +112,7 @@ from .datatypes import (
|
|||
VectorStoresProtocolPrivate,
|
||||
)
|
||||
from .eval import BenchmarkConfig, Eval, EvalCandidate, EvaluateResponse, ModelCandidate
|
||||
from .file_processors import FileProcessors, ProcessedContent, ProcessFileRequest
|
||||
from .files import (
|
||||
ExpiresAfter,
|
||||
Files,
|
||||
|
|
@ -518,6 +519,7 @@ __all__ = [
|
|||
"ExpiresAfter",
|
||||
"ExternalApiSpec",
|
||||
"ExtraBodyField",
|
||||
"FileProcessors",
|
||||
"Files",
|
||||
"Fp8QuantizationConfig",
|
||||
"clear_dynamic_schema_types",
|
||||
|
|
@ -725,6 +727,8 @@ __all__ = [
|
|||
"ParamType",
|
||||
"parse_type",
|
||||
"PostTraining",
|
||||
"ProcessedContent",
|
||||
"ProcessFileRequest",
|
||||
"PostTrainingMetric",
|
||||
"PostTrainingJob",
|
||||
"PostTrainingJobArtifactsResponse",
|
||||
|
|
|
|||
|
|
@ -110,6 +110,7 @@ class Api(Enum, metaclass=DynamicApiMeta):
|
|||
:cvar benchmarks: Benchmark suite management
|
||||
:cvar tool_groups: Tool group organization
|
||||
:cvar files: File storage and management
|
||||
:cvar file_processors: File parsing and processing operations
|
||||
:cvar prompts: Prompt versions and management
|
||||
:cvar inspect: Built-in system inspection and introspection
|
||||
"""
|
||||
|
|
@ -134,6 +135,7 @@ class Api(Enum, metaclass=DynamicApiMeta):
|
|||
benchmarks = "benchmarks"
|
||||
tool_groups = "tool_groups"
|
||||
files = "files"
|
||||
file_processors = "file_processors"
|
||||
prompts = "prompts"
|
||||
conversations = "conversations"
|
||||
|
||||
|
|
|
|||
96
src/llama_stack_api/file_processors.py
Normal file
96
src/llama_stack_api/file_processors.py
Normal file
|
|
@ -0,0 +1,96 @@
|
|||
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
||||
# All rights reserved.
|
||||
#
|
||||
# This source code is licensed under the terms described in the LICENSE file in
|
||||
# the root directory of this source tree.
|
||||
|
||||
from typing import Any, Protocol, runtime_checkable
|
||||
|
||||
from pydantic import BaseModel
|
||||
|
||||
from .common.tracing import telemetry_traceable
|
||||
from .schema_utils import json_schema_type, webmethod
|
||||
from .vector_io import Chunk, VectorStoreChunkingStrategy
|
||||
from .version import LLAMA_STACK_API_V1ALPHA
|
||||
|
||||
|
||||
@json_schema_type
|
||||
class ProcessFileRequest(BaseModel):
|
||||
"""Request for processing a file into structured content."""
|
||||
|
||||
file_data: bytes
|
||||
"""Raw file data to process."""
|
||||
|
||||
filename: str
|
||||
"""Original filename for format detection and processing hints."""
|
||||
|
||||
options: dict[str, Any] | None = None
|
||||
"""Optional processing options. Provider-specific parameters."""
|
||||
|
||||
chunking_strategy: VectorStoreChunkingStrategy | None = None
|
||||
"""Optional chunking strategy for splitting content into chunks."""
|
||||
|
||||
include_embeddings: bool = False
|
||||
"""Whether to generate embeddings for chunks."""
|
||||
|
||||
|
||||
@json_schema_type
|
||||
class ProcessedContent(BaseModel):
|
||||
"""Result of file processing operation."""
|
||||
|
||||
content: str
|
||||
"""Extracted text content from the file."""
|
||||
|
||||
chunks: list[Chunk] | None = None
|
||||
"""Optional chunks if chunking strategy was provided."""
|
||||
|
||||
embeddings: list[list[float]] | None = None
|
||||
"""Optional embeddings for chunks if requested."""
|
||||
|
||||
metadata: dict[str, Any]
|
||||
"""Processing metadata including processor name, timing, and provider-specific data."""
|
||||
|
||||
|
||||
@telemetry_traceable
|
||||
@runtime_checkable
|
||||
class FileProcessors(Protocol):
|
||||
"""
|
||||
File Processor API for converting files into structured, processable content.
|
||||
|
||||
This API provides a flexible interface for processing various file formats
|
||||
(PDFs, documents, images, etc.) into text content that can be used for
|
||||
vector store ingestion, RAG applications, or standalone content extraction.
|
||||
|
||||
The API supports:
|
||||
- Multiple file formats through extensible provider architecture
|
||||
- Configurable processing options per provider
|
||||
- Integration with vector store chunking strategies
|
||||
- Optional embedding generation for chunks
|
||||
- Rich metadata about processing results
|
||||
|
||||
Future providers can extend this interface to support additional formats,
|
||||
processing capabilities, and optimization strategies.
|
||||
"""
|
||||
|
||||
@webmethod(route="/file-processors/process", method="POST", level=LLAMA_STACK_API_V1ALPHA)
|
||||
async def process_file(
|
||||
self,
|
||||
file_data: bytes,
|
||||
filename: str,
|
||||
options: dict[str, Any] | None = None,
|
||||
chunking_strategy: VectorStoreChunkingStrategy | None = None,
|
||||
include_embeddings: bool = False,
|
||||
) -> ProcessedContent:
|
||||
"""
|
||||
Process a file into structured content with optional chunking and embeddings.
|
||||
|
||||
This method processes raw file data and converts it into text content for applications such as vector store ingestion.
|
||||
|
||||
:param file_data: Raw bytes of the file to process.
|
||||
:param filename: Original filename for format detection.
|
||||
:param options: Provider-specific processing options (e.g., OCR settings, output format).
|
||||
:param chunking_strategy: Optional strategy for splitting content into chunks.
|
||||
:param include_embeddings: Whether to generate embeddings for chunks.
|
||||
:returns: ProcessedContent with extracted text, optional chunks, and metadata.
|
||||
"""
|
||||
...
|
||||
Loading…
Add table
Add a link
Reference in a new issue