mirror of
https://github.com/meta-llama/llama-stack.git
synced 2025-12-03 01:48:05 +00:00
Merge 3f51e16601 into 4237eb4aaa
This commit is contained in:
commit
0cd2329dbc
15 changed files with 603 additions and 0 deletions
|
|
@ -3893,6 +3893,41 @@ paths:
|
||||||
schema:
|
schema:
|
||||||
$ref: '#/components/schemas/SupervisedFineTuneRequest'
|
$ref: '#/components/schemas/SupervisedFineTuneRequest'
|
||||||
required: true
|
required: true
|
||||||
|
/v1alpha/file-processors/process:
|
||||||
|
post:
|
||||||
|
responses:
|
||||||
|
'200':
|
||||||
|
description: ProcessedContent with extracted text, optional chunks, and metadata.
|
||||||
|
content:
|
||||||
|
application/json:
|
||||||
|
schema:
|
||||||
|
$ref: '#/components/schemas/ProcessedContent'
|
||||||
|
'400':
|
||||||
|
description: Bad Request
|
||||||
|
$ref: '#/components/responses/BadRequest400'
|
||||||
|
'429':
|
||||||
|
description: Too Many Requests
|
||||||
|
$ref: '#/components/responses/TooManyRequests429'
|
||||||
|
'500':
|
||||||
|
description: Internal Server Error
|
||||||
|
$ref: '#/components/responses/InternalServerError500'
|
||||||
|
default:
|
||||||
|
description: Default Response
|
||||||
|
$ref: '#/components/responses/DefaultError'
|
||||||
|
tags:
|
||||||
|
- File Processors
|
||||||
|
summary: Process File
|
||||||
|
description: |-
|
||||||
|
Process a file into structured content with optional chunking and embeddings.
|
||||||
|
|
||||||
|
This method processes raw file data and converts it into text content for applications such as vector store ingestion.
|
||||||
|
operationId: process_file_v1alpha_file_processors_process_post
|
||||||
|
requestBody:
|
||||||
|
content:
|
||||||
|
application/json:
|
||||||
|
schema:
|
||||||
|
$ref: '#/components/schemas/ProcessFileRequest'
|
||||||
|
required: true
|
||||||
components:
|
components:
|
||||||
schemas:
|
schemas:
|
||||||
Error:
|
Error:
|
||||||
|
|
@ -12100,6 +12135,73 @@ components:
|
||||||
required:
|
required:
|
||||||
- reasoning_tokens
|
- reasoning_tokens
|
||||||
title: OutputTokensDetails
|
title: OutputTokensDetails
|
||||||
|
ProcessFileRequest:
|
||||||
|
properties:
|
||||||
|
file_data:
|
||||||
|
type: string
|
||||||
|
format: binary
|
||||||
|
title: File Data
|
||||||
|
filename:
|
||||||
|
type: string
|
||||||
|
title: Filename
|
||||||
|
options:
|
||||||
|
anyOf:
|
||||||
|
- additionalProperties: true
|
||||||
|
type: object
|
||||||
|
- type: 'null'
|
||||||
|
chunking_strategy:
|
||||||
|
anyOf:
|
||||||
|
- oneOf:
|
||||||
|
- $ref: '#/components/schemas/VectorStoreChunkingStrategyAuto'
|
||||||
|
title: VectorStoreChunkingStrategyAuto
|
||||||
|
- $ref: '#/components/schemas/VectorStoreChunkingStrategyStatic'
|
||||||
|
title: VectorStoreChunkingStrategyStatic
|
||||||
|
discriminator:
|
||||||
|
propertyName: type
|
||||||
|
mapping:
|
||||||
|
auto: '#/components/schemas/VectorStoreChunkingStrategyAuto'
|
||||||
|
static: '#/components/schemas/VectorStoreChunkingStrategyStatic'
|
||||||
|
title: VectorStoreChunkingStrategyAuto | VectorStoreChunkingStrategyStatic
|
||||||
|
- type: 'null'
|
||||||
|
title: Chunking Strategy
|
||||||
|
include_embeddings:
|
||||||
|
type: boolean
|
||||||
|
title: Include Embeddings
|
||||||
|
default: false
|
||||||
|
type: object
|
||||||
|
required:
|
||||||
|
- file_data
|
||||||
|
- filename
|
||||||
|
title: ProcessFileRequest
|
||||||
|
ProcessedContent:
|
||||||
|
properties:
|
||||||
|
content:
|
||||||
|
type: string
|
||||||
|
title: Content
|
||||||
|
chunks:
|
||||||
|
anyOf:
|
||||||
|
- items:
|
||||||
|
$ref: '#/components/schemas/Chunk-Output'
|
||||||
|
type: array
|
||||||
|
- type: 'null'
|
||||||
|
embeddings:
|
||||||
|
anyOf:
|
||||||
|
- items:
|
||||||
|
items:
|
||||||
|
type: number
|
||||||
|
type: array
|
||||||
|
type: array
|
||||||
|
- type: 'null'
|
||||||
|
metadata:
|
||||||
|
additionalProperties: true
|
||||||
|
type: object
|
||||||
|
title: Metadata
|
||||||
|
type: object
|
||||||
|
required:
|
||||||
|
- content
|
||||||
|
- metadata
|
||||||
|
title: ProcessedContent
|
||||||
|
description: Result of file processing operation.
|
||||||
SearchRankingOptions:
|
SearchRankingOptions:
|
||||||
properties:
|
properties:
|
||||||
ranker:
|
ranker:
|
||||||
|
|
@ -12706,6 +12808,7 @@ components:
|
||||||
- benchmarks
|
- benchmarks
|
||||||
- tool_groups
|
- tool_groups
|
||||||
- files
|
- files
|
||||||
|
- file_processors
|
||||||
- prompts
|
- prompts
|
||||||
- conversations
|
- conversations
|
||||||
- inspect
|
- inspect
|
||||||
|
|
|
||||||
10
docs/docs/providers/file_processor/index.mdx
Normal file
10
docs/docs/providers/file_processor/index.mdx
Normal file
|
|
@ -0,0 +1,10 @@
|
||||||
|
---
|
||||||
|
sidebar_label: File Processor
|
||||||
|
title: File_Processor
|
||||||
|
---
|
||||||
|
|
||||||
|
# File_Processor
|
||||||
|
|
||||||
|
## Overview
|
||||||
|
|
||||||
|
This section contains documentation for all available providers for the **file_processor** API.
|
||||||
17
docs/docs/providers/file_processor/inline_reference.mdx
Normal file
17
docs/docs/providers/file_processor/inline_reference.mdx
Normal file
|
|
@ -0,0 +1,17 @@
|
||||||
|
---
|
||||||
|
description: "Reference file processor implementation (placeholder for development)"
|
||||||
|
sidebar_label: Reference
|
||||||
|
title: inline::reference
|
||||||
|
---
|
||||||
|
|
||||||
|
# inline::reference
|
||||||
|
|
||||||
|
## Description
|
||||||
|
|
||||||
|
Reference file processor implementation (placeholder for development)
|
||||||
|
|
||||||
|
## Sample Configuration
|
||||||
|
|
||||||
|
```yaml
|
||||||
|
{}
|
||||||
|
```
|
||||||
10
docs/docs/providers/file_processors/index.mdx
Normal file
10
docs/docs/providers/file_processors/index.mdx
Normal file
|
|
@ -0,0 +1,10 @@
|
||||||
|
---
|
||||||
|
sidebar_label: File Processors
|
||||||
|
title: File_Processors
|
||||||
|
---
|
||||||
|
|
||||||
|
# File_Processors
|
||||||
|
|
||||||
|
## Overview
|
||||||
|
|
||||||
|
This section contains documentation for all available providers for the **file_processors** API.
|
||||||
68
docs/static/deprecated-llama-stack-spec.yaml
vendored
68
docs/static/deprecated-llama-stack-spec.yaml
vendored
|
|
@ -9094,6 +9094,73 @@ components:
|
||||||
required:
|
required:
|
||||||
- reasoning_tokens
|
- reasoning_tokens
|
||||||
title: OutputTokensDetails
|
title: OutputTokensDetails
|
||||||
|
ProcessFileRequest:
|
||||||
|
properties:
|
||||||
|
file_data:
|
||||||
|
type: string
|
||||||
|
format: binary
|
||||||
|
title: File Data
|
||||||
|
filename:
|
||||||
|
type: string
|
||||||
|
title: Filename
|
||||||
|
options:
|
||||||
|
anyOf:
|
||||||
|
- additionalProperties: true
|
||||||
|
type: object
|
||||||
|
- type: 'null'
|
||||||
|
chunking_strategy:
|
||||||
|
anyOf:
|
||||||
|
- oneOf:
|
||||||
|
- $ref: '#/components/schemas/VectorStoreChunkingStrategyAuto'
|
||||||
|
title: VectorStoreChunkingStrategyAuto
|
||||||
|
- $ref: '#/components/schemas/VectorStoreChunkingStrategyStatic'
|
||||||
|
title: VectorStoreChunkingStrategyStatic
|
||||||
|
discriminator:
|
||||||
|
propertyName: type
|
||||||
|
mapping:
|
||||||
|
auto: '#/components/schemas/VectorStoreChunkingStrategyAuto'
|
||||||
|
static: '#/components/schemas/VectorStoreChunkingStrategyStatic'
|
||||||
|
title: VectorStoreChunkingStrategyAuto | VectorStoreChunkingStrategyStatic
|
||||||
|
- type: 'null'
|
||||||
|
title: Chunking Strategy
|
||||||
|
include_embeddings:
|
||||||
|
type: boolean
|
||||||
|
title: Include Embeddings
|
||||||
|
default: false
|
||||||
|
type: object
|
||||||
|
required:
|
||||||
|
- file_data
|
||||||
|
- filename
|
||||||
|
title: ProcessFileRequest
|
||||||
|
ProcessedContent:
|
||||||
|
properties:
|
||||||
|
content:
|
||||||
|
type: string
|
||||||
|
title: Content
|
||||||
|
chunks:
|
||||||
|
anyOf:
|
||||||
|
- items:
|
||||||
|
$ref: '#/components/schemas/Chunk-Output'
|
||||||
|
type: array
|
||||||
|
- type: 'null'
|
||||||
|
embeddings:
|
||||||
|
anyOf:
|
||||||
|
- items:
|
||||||
|
items:
|
||||||
|
type: number
|
||||||
|
type: array
|
||||||
|
type: array
|
||||||
|
- type: 'null'
|
||||||
|
metadata:
|
||||||
|
additionalProperties: true
|
||||||
|
type: object
|
||||||
|
title: Metadata
|
||||||
|
type: object
|
||||||
|
required:
|
||||||
|
- content
|
||||||
|
- metadata
|
||||||
|
title: ProcessedContent
|
||||||
|
description: Result of file processing operation.
|
||||||
SearchRankingOptions:
|
SearchRankingOptions:
|
||||||
properties:
|
properties:
|
||||||
ranker:
|
ranker:
|
||||||
|
|
@ -9700,6 +9767,7 @@ components:
|
||||||
- benchmarks
|
- benchmarks
|
||||||
- tool_groups
|
- tool_groups
|
||||||
- files
|
- files
|
||||||
|
- file_processors
|
||||||
- prompts
|
- prompts
|
||||||
- conversations
|
- conversations
|
||||||
- inspect
|
- inspect
|
||||||
|
|
|
||||||
103
docs/static/experimental-llama-stack-spec.yaml
vendored
103
docs/static/experimental-llama-stack-spec.yaml
vendored
|
|
@ -630,6 +630,41 @@ paths:
|
||||||
schema:
|
schema:
|
||||||
$ref: '#/components/schemas/SupervisedFineTuneRequest'
|
$ref: '#/components/schemas/SupervisedFineTuneRequest'
|
||||||
required: true
|
required: true
|
||||||
|
/v1alpha/file-processors/process:
|
||||||
|
post:
|
||||||
|
responses:
|
||||||
|
'200':
|
||||||
|
description: ProcessedContent with extracted text, optional chunks, and metadata.
|
||||||
|
content:
|
||||||
|
application/json:
|
||||||
|
schema:
|
||||||
|
$ref: '#/components/schemas/ProcessedContent'
|
||||||
|
'400':
|
||||||
|
description: Bad Request
|
||||||
|
$ref: '#/components/responses/BadRequest400'
|
||||||
|
'429':
|
||||||
|
description: Too Many Requests
|
||||||
|
$ref: '#/components/responses/TooManyRequests429'
|
||||||
|
'500':
|
||||||
|
description: Internal Server Error
|
||||||
|
$ref: '#/components/responses/InternalServerError500'
|
||||||
|
default:
|
||||||
|
description: Default Response
|
||||||
|
$ref: '#/components/responses/DefaultError'
|
||||||
|
tags:
|
||||||
|
- File Processors
|
||||||
|
summary: Process File
|
||||||
|
description: |-
|
||||||
|
Process a file into structured content with optional chunking and embeddings.
|
||||||
|
|
||||||
|
This method processes raw file data and converts it into text content for applications such as vector store ingestion.
|
||||||
|
operationId: process_file_v1alpha_file_processors_process_post
|
||||||
|
requestBody:
|
||||||
|
content:
|
||||||
|
application/json:
|
||||||
|
schema:
|
||||||
|
$ref: '#/components/schemas/ProcessFileRequest'
|
||||||
|
required: true
|
||||||
components:
|
components:
|
||||||
schemas:
|
schemas:
|
||||||
Error:
|
Error:
|
||||||
|
|
@ -7860,6 +7895,73 @@ components:
|
||||||
required:
|
required:
|
||||||
- reasoning_tokens
|
- reasoning_tokens
|
||||||
title: OutputTokensDetails
|
title: OutputTokensDetails
|
||||||
|
ProcessFileRequest:
|
||||||
|
properties:
|
||||||
|
file_data:
|
||||||
|
type: string
|
||||||
|
format: binary
|
||||||
|
title: File Data
|
||||||
|
filename:
|
||||||
|
type: string
|
||||||
|
title: Filename
|
||||||
|
options:
|
||||||
|
anyOf:
|
||||||
|
- additionalProperties: true
|
||||||
|
type: object
|
||||||
|
- type: 'null'
|
||||||
|
chunking_strategy:
|
||||||
|
anyOf:
|
||||||
|
- oneOf:
|
||||||
|
- $ref: '#/components/schemas/VectorStoreChunkingStrategyAuto'
|
||||||
|
title: VectorStoreChunkingStrategyAuto
|
||||||
|
- $ref: '#/components/schemas/VectorStoreChunkingStrategyStatic'
|
||||||
|
title: VectorStoreChunkingStrategyStatic
|
||||||
|
discriminator:
|
||||||
|
propertyName: type
|
||||||
|
mapping:
|
||||||
|
auto: '#/components/schemas/VectorStoreChunkingStrategyAuto'
|
||||||
|
static: '#/components/schemas/VectorStoreChunkingStrategyStatic'
|
||||||
|
title: VectorStoreChunkingStrategyAuto | VectorStoreChunkingStrategyStatic
|
||||||
|
- type: 'null'
|
||||||
|
title: Chunking Strategy
|
||||||
|
include_embeddings:
|
||||||
|
type: boolean
|
||||||
|
title: Include Embeddings
|
||||||
|
default: false
|
||||||
|
type: object
|
||||||
|
required:
|
||||||
|
- file_data
|
||||||
|
- filename
|
||||||
|
title: ProcessFileRequest
|
||||||
|
ProcessedContent:
|
||||||
|
properties:
|
||||||
|
content:
|
||||||
|
type: string
|
||||||
|
title: Content
|
||||||
|
chunks:
|
||||||
|
anyOf:
|
||||||
|
- items:
|
||||||
|
$ref: '#/components/schemas/Chunk-Output'
|
||||||
|
type: array
|
||||||
|
- type: 'null'
|
||||||
|
embeddings:
|
||||||
|
anyOf:
|
||||||
|
- items:
|
||||||
|
items:
|
||||||
|
type: number
|
||||||
|
type: array
|
||||||
|
type: array
|
||||||
|
- type: 'null'
|
||||||
|
metadata:
|
||||||
|
additionalProperties: true
|
||||||
|
type: object
|
||||||
|
title: Metadata
|
||||||
|
type: object
|
||||||
|
required:
|
||||||
|
- content
|
||||||
|
- metadata
|
||||||
|
title: ProcessedContent
|
||||||
|
description: Result of file processing operation.
|
||||||
SearchRankingOptions:
|
SearchRankingOptions:
|
||||||
properties:
|
properties:
|
||||||
ranker:
|
ranker:
|
||||||
|
|
@ -8466,6 +8568,7 @@ components:
|
||||||
- benchmarks
|
- benchmarks
|
||||||
- tool_groups
|
- tool_groups
|
||||||
- files
|
- files
|
||||||
|
- file_processors
|
||||||
- prompts
|
- prompts
|
||||||
- conversations
|
- conversations
|
||||||
- inspect
|
- inspect
|
||||||
|
|
|
||||||
68
docs/static/llama-stack-spec.yaml
vendored
68
docs/static/llama-stack-spec.yaml
vendored
|
|
@ -10536,6 +10536,73 @@ components:
|
||||||
required:
|
required:
|
||||||
- reasoning_tokens
|
- reasoning_tokens
|
||||||
title: OutputTokensDetails
|
title: OutputTokensDetails
|
||||||
|
ProcessFileRequest:
|
||||||
|
properties:
|
||||||
|
file_data:
|
||||||
|
type: string
|
||||||
|
format: binary
|
||||||
|
title: File Data
|
||||||
|
filename:
|
||||||
|
type: string
|
||||||
|
title: Filename
|
||||||
|
options:
|
||||||
|
anyOf:
|
||||||
|
- additionalProperties: true
|
||||||
|
type: object
|
||||||
|
- type: 'null'
|
||||||
|
chunking_strategy:
|
||||||
|
anyOf:
|
||||||
|
- oneOf:
|
||||||
|
- $ref: '#/components/schemas/VectorStoreChunkingStrategyAuto'
|
||||||
|
title: VectorStoreChunkingStrategyAuto
|
||||||
|
- $ref: '#/components/schemas/VectorStoreChunkingStrategyStatic'
|
||||||
|
title: VectorStoreChunkingStrategyStatic
|
||||||
|
discriminator:
|
||||||
|
propertyName: type
|
||||||
|
mapping:
|
||||||
|
auto: '#/components/schemas/VectorStoreChunkingStrategyAuto'
|
||||||
|
static: '#/components/schemas/VectorStoreChunkingStrategyStatic'
|
||||||
|
title: VectorStoreChunkingStrategyAuto | VectorStoreChunkingStrategyStatic
|
||||||
|
- type: 'null'
|
||||||
|
title: Chunking Strategy
|
||||||
|
include_embeddings:
|
||||||
|
type: boolean
|
||||||
|
title: Include Embeddings
|
||||||
|
default: false
|
||||||
|
type: object
|
||||||
|
required:
|
||||||
|
- file_data
|
||||||
|
- filename
|
||||||
|
title: ProcessFileRequest
|
||||||
|
ProcessedContent:
|
||||||
|
properties:
|
||||||
|
content:
|
||||||
|
type: string
|
||||||
|
title: Content
|
||||||
|
chunks:
|
||||||
|
anyOf:
|
||||||
|
- items:
|
||||||
|
$ref: '#/components/schemas/Chunk-Output'
|
||||||
|
type: array
|
||||||
|
- type: 'null'
|
||||||
|
embeddings:
|
||||||
|
anyOf:
|
||||||
|
- items:
|
||||||
|
items:
|
||||||
|
type: number
|
||||||
|
type: array
|
||||||
|
type: array
|
||||||
|
- type: 'null'
|
||||||
|
metadata:
|
||||||
|
additionalProperties: true
|
||||||
|
type: object
|
||||||
|
title: Metadata
|
||||||
|
type: object
|
||||||
|
required:
|
||||||
|
- content
|
||||||
|
- metadata
|
||||||
|
title: ProcessedContent
|
||||||
|
description: Result of file processing operation.
|
||||||
SearchRankingOptions:
|
SearchRankingOptions:
|
||||||
properties:
|
properties:
|
||||||
ranker:
|
ranker:
|
||||||
|
|
@ -11142,6 +11209,7 @@ components:
|
||||||
- benchmarks
|
- benchmarks
|
||||||
- tool_groups
|
- tool_groups
|
||||||
- files
|
- files
|
||||||
|
- file_processors
|
||||||
- prompts
|
- prompts
|
||||||
- conversations
|
- conversations
|
||||||
- inspect
|
- inspect
|
||||||
|
|
|
||||||
103
docs/static/stainless-llama-stack-spec.yaml
vendored
103
docs/static/stainless-llama-stack-spec.yaml
vendored
|
|
@ -3893,6 +3893,41 @@ paths:
|
||||||
schema:
|
schema:
|
||||||
$ref: '#/components/schemas/SupervisedFineTuneRequest'
|
$ref: '#/components/schemas/SupervisedFineTuneRequest'
|
||||||
required: true
|
required: true
|
||||||
|
/v1alpha/file-processors/process:
|
||||||
|
post:
|
||||||
|
responses:
|
||||||
|
'200':
|
||||||
|
description: ProcessedContent with extracted text, optional chunks, and metadata.
|
||||||
|
content:
|
||||||
|
application/json:
|
||||||
|
schema:
|
||||||
|
$ref: '#/components/schemas/ProcessedContent'
|
||||||
|
'400':
|
||||||
|
description: Bad Request
|
||||||
|
$ref: '#/components/responses/BadRequest400'
|
||||||
|
'429':
|
||||||
|
description: Too Many Requests
|
||||||
|
$ref: '#/components/responses/TooManyRequests429'
|
||||||
|
'500':
|
||||||
|
description: Internal Server Error
|
||||||
|
$ref: '#/components/responses/InternalServerError500'
|
||||||
|
default:
|
||||||
|
description: Default Response
|
||||||
|
$ref: '#/components/responses/DefaultError'
|
||||||
|
tags:
|
||||||
|
- File Processors
|
||||||
|
summary: Process File
|
||||||
|
description: |-
|
||||||
|
Process a file into structured content with optional chunking and embeddings.
|
||||||
|
|
||||||
|
This method processes raw file data and converts it into text content for applications such as vector store ingestion.
|
||||||
|
operationId: process_file_v1alpha_file_processors_process_post
|
||||||
|
requestBody:
|
||||||
|
content:
|
||||||
|
application/json:
|
||||||
|
schema:
|
||||||
|
$ref: '#/components/schemas/ProcessFileRequest'
|
||||||
|
required: true
|
||||||
components:
|
components:
|
||||||
schemas:
|
schemas:
|
||||||
Error:
|
Error:
|
||||||
|
|
@ -12100,6 +12135,73 @@ components:
|
||||||
required:
|
required:
|
||||||
- reasoning_tokens
|
- reasoning_tokens
|
||||||
title: OutputTokensDetails
|
title: OutputTokensDetails
|
||||||
|
ProcessFileRequest:
|
||||||
|
properties:
|
||||||
|
file_data:
|
||||||
|
type: string
|
||||||
|
format: binary
|
||||||
|
title: File Data
|
||||||
|
filename:
|
||||||
|
type: string
|
||||||
|
title: Filename
|
||||||
|
options:
|
||||||
|
anyOf:
|
||||||
|
- additionalProperties: true
|
||||||
|
type: object
|
||||||
|
- type: 'null'
|
||||||
|
chunking_strategy:
|
||||||
|
anyOf:
|
||||||
|
- oneOf:
|
||||||
|
- $ref: '#/components/schemas/VectorStoreChunkingStrategyAuto'
|
||||||
|
title: VectorStoreChunkingStrategyAuto
|
||||||
|
- $ref: '#/components/schemas/VectorStoreChunkingStrategyStatic'
|
||||||
|
title: VectorStoreChunkingStrategyStatic
|
||||||
|
discriminator:
|
||||||
|
propertyName: type
|
||||||
|
mapping:
|
||||||
|
auto: '#/components/schemas/VectorStoreChunkingStrategyAuto'
|
||||||
|
static: '#/components/schemas/VectorStoreChunkingStrategyStatic'
|
||||||
|
title: VectorStoreChunkingStrategyAuto | VectorStoreChunkingStrategyStatic
|
||||||
|
- type: 'null'
|
||||||
|
title: Chunking Strategy
|
||||||
|
include_embeddings:
|
||||||
|
type: boolean
|
||||||
|
title: Include Embeddings
|
||||||
|
default: false
|
||||||
|
type: object
|
||||||
|
required:
|
||||||
|
- file_data
|
||||||
|
- filename
|
||||||
|
title: ProcessFileRequest
|
||||||
|
ProcessedContent:
|
||||||
|
properties:
|
||||||
|
content:
|
||||||
|
type: string
|
||||||
|
title: Content
|
||||||
|
chunks:
|
||||||
|
anyOf:
|
||||||
|
- items:
|
||||||
|
$ref: '#/components/schemas/Chunk-Output'
|
||||||
|
type: array
|
||||||
|
- type: 'null'
|
||||||
|
embeddings:
|
||||||
|
anyOf:
|
||||||
|
- items:
|
||||||
|
items:
|
||||||
|
type: number
|
||||||
|
type: array
|
||||||
|
type: array
|
||||||
|
- type: 'null'
|
||||||
|
metadata:
|
||||||
|
additionalProperties: true
|
||||||
|
type: object
|
||||||
|
title: Metadata
|
||||||
|
type: object
|
||||||
|
required:
|
||||||
|
- content
|
||||||
|
- metadata
|
||||||
|
title: ProcessedContent
|
||||||
|
description: Result of file processing operation.
|
||||||
SearchRankingOptions:
|
SearchRankingOptions:
|
||||||
properties:
|
properties:
|
||||||
ranker:
|
ranker:
|
||||||
|
|
@ -12706,6 +12808,7 @@ components:
|
||||||
- benchmarks
|
- benchmarks
|
||||||
- tool_groups
|
- tool_groups
|
||||||
- files
|
- files
|
||||||
|
- file_processors
|
||||||
- prompts
|
- prompts
|
||||||
- conversations
|
- conversations
|
||||||
- inspect
|
- inspect
|
||||||
|
|
|
||||||
|
|
@ -34,6 +34,7 @@ from llama_stack_api import (
|
||||||
DatasetsProtocolPrivate,
|
DatasetsProtocolPrivate,
|
||||||
Eval,
|
Eval,
|
||||||
ExternalApiSpec,
|
ExternalApiSpec,
|
||||||
|
FileProcessors,
|
||||||
Files,
|
Files,
|
||||||
Inference,
|
Inference,
|
||||||
InferenceProvider,
|
InferenceProvider,
|
||||||
|
|
@ -100,6 +101,7 @@ def api_protocol_map(external_apis: dict[Api, ExternalApiSpec] | None = None) ->
|
||||||
Api.files: Files,
|
Api.files: Files,
|
||||||
Api.prompts: Prompts,
|
Api.prompts: Prompts,
|
||||||
Api.conversations: Conversations,
|
Api.conversations: Conversations,
|
||||||
|
Api.file_processors: FileProcessors,
|
||||||
}
|
}
|
||||||
|
|
||||||
if external_apis:
|
if external_apis:
|
||||||
|
|
|
||||||
|
|
@ -44,6 +44,7 @@ CATEGORIES = [
|
||||||
"providers",
|
"providers",
|
||||||
"models",
|
"models",
|
||||||
"files",
|
"files",
|
||||||
|
"file_processors",
|
||||||
"vector_io",
|
"vector_io",
|
||||||
"tool_runtime",
|
"tool_runtime",
|
||||||
"cli",
|
"cli",
|
||||||
|
|
|
||||||
|
|
@ -0,0 +1,5 @@
|
||||||
|
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
||||||
|
# All rights reserved.
|
||||||
|
#
|
||||||
|
# This source code is licensed under the terms described in the LICENSE file in
|
||||||
|
# the root directory of this source tree.
|
||||||
11
src/llama_stack/providers/registry/file_processors.py
Normal file
11
src/llama_stack/providers/registry/file_processors.py
Normal file
|
|
@ -0,0 +1,11 @@
|
||||||
|
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
||||||
|
# All rights reserved.
|
||||||
|
#
|
||||||
|
# This source code is licensed under the terms described in the LICENSE file in
|
||||||
|
# the root directory of this source tree.
|
||||||
|
|
||||||
|
from llama_stack_api import ProviderSpec
|
||||||
|
|
||||||
|
|
||||||
|
def available_providers() -> list[ProviderSpec]:
|
||||||
|
return []
|
||||||
|
|
@ -112,6 +112,7 @@ from .datatypes import (
|
||||||
VectorStoresProtocolPrivate,
|
VectorStoresProtocolPrivate,
|
||||||
)
|
)
|
||||||
from .eval import BenchmarkConfig, Eval, EvalCandidate, EvaluateResponse, ModelCandidate
|
from .eval import BenchmarkConfig, Eval, EvalCandidate, EvaluateResponse, ModelCandidate
|
||||||
|
from .file_processors import FileProcessors, ProcessedContent, ProcessFileRequest
|
||||||
from .files import (
|
from .files import (
|
||||||
ExpiresAfter,
|
ExpiresAfter,
|
||||||
Files,
|
Files,
|
||||||
|
|
@ -518,6 +519,7 @@ __all__ = [
|
||||||
"ExpiresAfter",
|
"ExpiresAfter",
|
||||||
"ExternalApiSpec",
|
"ExternalApiSpec",
|
||||||
"ExtraBodyField",
|
"ExtraBodyField",
|
||||||
|
"FileProcessors",
|
||||||
"Files",
|
"Files",
|
||||||
"Fp8QuantizationConfig",
|
"Fp8QuantizationConfig",
|
||||||
"clear_dynamic_schema_types",
|
"clear_dynamic_schema_types",
|
||||||
|
|
@ -725,6 +727,8 @@ __all__ = [
|
||||||
"ParamType",
|
"ParamType",
|
||||||
"parse_type",
|
"parse_type",
|
||||||
"PostTraining",
|
"PostTraining",
|
||||||
|
"ProcessedContent",
|
||||||
|
"ProcessFileRequest",
|
||||||
"PostTrainingMetric",
|
"PostTrainingMetric",
|
||||||
"PostTrainingJob",
|
"PostTrainingJob",
|
||||||
"PostTrainingJobArtifactsResponse",
|
"PostTrainingJobArtifactsResponse",
|
||||||
|
|
|
||||||
|
|
@ -110,6 +110,7 @@ class Api(Enum, metaclass=DynamicApiMeta):
|
||||||
:cvar benchmarks: Benchmark suite management
|
:cvar benchmarks: Benchmark suite management
|
||||||
:cvar tool_groups: Tool group organization
|
:cvar tool_groups: Tool group organization
|
||||||
:cvar files: File storage and management
|
:cvar files: File storage and management
|
||||||
|
:cvar file_processors: File parsing and processing operations
|
||||||
:cvar prompts: Prompt versions and management
|
:cvar prompts: Prompt versions and management
|
||||||
:cvar inspect: Built-in system inspection and introspection
|
:cvar inspect: Built-in system inspection and introspection
|
||||||
"""
|
"""
|
||||||
|
|
@ -134,6 +135,7 @@ class Api(Enum, metaclass=DynamicApiMeta):
|
||||||
benchmarks = "benchmarks"
|
benchmarks = "benchmarks"
|
||||||
tool_groups = "tool_groups"
|
tool_groups = "tool_groups"
|
||||||
files = "files"
|
files = "files"
|
||||||
|
file_processors = "file_processors"
|
||||||
prompts = "prompts"
|
prompts = "prompts"
|
||||||
conversations = "conversations"
|
conversations = "conversations"
|
||||||
|
|
||||||
|
|
|
||||||
96
src/llama_stack_api/file_processors.py
Normal file
96
src/llama_stack_api/file_processors.py
Normal file
|
|
@ -0,0 +1,96 @@
|
||||||
|
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
||||||
|
# All rights reserved.
|
||||||
|
#
|
||||||
|
# This source code is licensed under the terms described in the LICENSE file in
|
||||||
|
# the root directory of this source tree.
|
||||||
|
|
||||||
|
from typing import Any, Protocol, runtime_checkable
|
||||||
|
|
||||||
|
from pydantic import BaseModel
|
||||||
|
|
||||||
|
from .common.tracing import telemetry_traceable
|
||||||
|
from .schema_utils import json_schema_type, webmethod
|
||||||
|
from .vector_io import Chunk, VectorStoreChunkingStrategy
|
||||||
|
from .version import LLAMA_STACK_API_V1ALPHA
|
||||||
|
|
||||||
|
|
||||||
|
@json_schema_type
|
||||||
|
class ProcessFileRequest(BaseModel):
|
||||||
|
"""Request for processing a file into structured content."""
|
||||||
|
|
||||||
|
file_data: bytes
|
||||||
|
"""Raw file data to process."""
|
||||||
|
|
||||||
|
filename: str
|
||||||
|
"""Original filename for format detection and processing hints."""
|
||||||
|
|
||||||
|
options: dict[str, Any] | None = None
|
||||||
|
"""Optional processing options. Provider-specific parameters."""
|
||||||
|
|
||||||
|
chunking_strategy: VectorStoreChunkingStrategy | None = None
|
||||||
|
"""Optional chunking strategy for splitting content into chunks."""
|
||||||
|
|
||||||
|
include_embeddings: bool = False
|
||||||
|
"""Whether to generate embeddings for chunks."""
|
||||||
|
|
||||||
|
|
||||||
|
@json_schema_type
|
||||||
|
class ProcessedContent(BaseModel):
|
||||||
|
"""Result of file processing operation."""
|
||||||
|
|
||||||
|
content: str
|
||||||
|
"""Extracted text content from the file."""
|
||||||
|
|
||||||
|
chunks: list[Chunk] | None = None
|
||||||
|
"""Optional chunks if chunking strategy was provided."""
|
||||||
|
|
||||||
|
embeddings: list[list[float]] | None = None
|
||||||
|
"""Optional embeddings for chunks if requested."""
|
||||||
|
|
||||||
|
metadata: dict[str, Any]
|
||||||
|
"""Processing metadata including processor name, timing, and provider-specific data."""
|
||||||
|
|
||||||
|
|
||||||
|
@telemetry_traceable
|
||||||
|
@runtime_checkable
|
||||||
|
class FileProcessors(Protocol):
|
||||||
|
"""
|
||||||
|
File Processor API for converting files into structured, processable content.
|
||||||
|
|
||||||
|
This API provides a flexible interface for processing various file formats
|
||||||
|
(PDFs, documents, images, etc.) into text content that can be used for
|
||||||
|
vector store ingestion, RAG applications, or standalone content extraction.
|
||||||
|
|
||||||
|
The API supports:
|
||||||
|
- Multiple file formats through extensible provider architecture
|
||||||
|
- Configurable processing options per provider
|
||||||
|
- Integration with vector store chunking strategies
|
||||||
|
- Optional embedding generation for chunks
|
||||||
|
- Rich metadata about processing results
|
||||||
|
|
||||||
|
Future providers can extend this interface to support additional formats,
|
||||||
|
processing capabilities, and optimization strategies.
|
||||||
|
"""
|
||||||
|
|
||||||
|
@webmethod(route="/file-processors/process", method="POST", level=LLAMA_STACK_API_V1ALPHA)
|
||||||
|
async def process_file(
|
||||||
|
self,
|
||||||
|
file_data: bytes,
|
||||||
|
filename: str,
|
||||||
|
options: dict[str, Any] | None = None,
|
||||||
|
chunking_strategy: VectorStoreChunkingStrategy | None = None,
|
||||||
|
include_embeddings: bool = False,
|
||||||
|
) -> ProcessedContent:
|
||||||
|
"""
|
||||||
|
Process a file into structured content with optional chunking and embeddings.
|
||||||
|
|
||||||
|
This method processes raw file data and converts it into text content for applications such as vector store ingestion.
|
||||||
|
|
||||||
|
:param file_data: Raw bytes of the file to process.
|
||||||
|
:param filename: Original filename for format detection.
|
||||||
|
:param options: Provider-specific processing options (e.g., OCR settings, output format).
|
||||||
|
:param chunking_strategy: Optional strategy for splitting content into chunks.
|
||||||
|
:param include_embeddings: Whether to generate embeddings for chunks.
|
||||||
|
:returns: ProcessedContent with extracted text, optional chunks, and metadata.
|
||||||
|
"""
|
||||||
|
...
|
||||||
Loading…
Add table
Add a link
Reference in a new issue