mirror of
https://github.com/meta-llama/llama-stack.git
synced 2025-12-27 21:00:25 +00:00
feat: Add synthetic-data-kit for file_search doc conversion
This adds a `builtin::document_conversion` tool for converting documents when used with file_search that uses meta-llama/synthetic-data-kit. I also have another local implementation that uses Docling, but need to debug some segfault issues I'm hitting locally with that so pushing this first as a simpler reference implementation. Long-term I think we'll want a remote implemention here as well - like perhaps docling-serve or unstructured.io - but need to look more into that. This passes the existing `tests/verifications/openai_api/test_responses.py` but doesn't yet add any new tests for file types besides text and pdf. Signed-off-by: Ben Browning <bbrownin@redhat.com>
This commit is contained in:
parent
9baa16e498
commit
8bf1d91d38
18 changed files with 230 additions and 18 deletions
|
|
@ -38,6 +38,7 @@ distribution_spec:
|
|||
- remote::brave-search
|
||||
- remote::tavily-search
|
||||
- inline::rag-runtime
|
||||
- inline::synthetic-data-kit
|
||||
- remote::model-context-protocol
|
||||
image_type: conda
|
||||
additional_pip_packages:
|
||||
|
|
|
|||
|
|
@ -163,6 +163,9 @@ providers:
|
|||
- provider_id: rag-runtime
|
||||
provider_type: inline::rag-runtime
|
||||
config: {}
|
||||
- provider_id: synthetic-data-kit
|
||||
provider_type: inline::synthetic-data-kit
|
||||
config: {}
|
||||
- provider_id: model-context-protocol
|
||||
provider_type: remote::model-context-protocol
|
||||
config: {}
|
||||
|
|
@ -822,5 +825,7 @@ tool_groups:
|
|||
provider_id: tavily-search
|
||||
- toolgroup_id: builtin::rag
|
||||
provider_id: rag-runtime
|
||||
- toolgroup_id: builtin::document_conversion
|
||||
provider_id: synthetic-data-kit
|
||||
server:
|
||||
port: 8321
|
||||
|
|
|
|||
|
|
@ -163,6 +163,7 @@ def get_distribution_template() -> DistributionTemplate:
|
|||
"remote::brave-search",
|
||||
"remote::tavily-search",
|
||||
"inline::rag-runtime",
|
||||
"inline::synthetic-data-kit",
|
||||
"remote::model-context-protocol",
|
||||
],
|
||||
}
|
||||
|
|
@ -214,6 +215,10 @@ def get_distribution_template() -> DistributionTemplate:
|
|||
toolgroup_id="builtin::rag",
|
||||
provider_id="rag-runtime",
|
||||
),
|
||||
ToolGroupInput(
|
||||
toolgroup_id="builtin::document_conversion",
|
||||
provider_id="synthetic-data-kit",
|
||||
),
|
||||
]
|
||||
embedding_model = ModelInput(
|
||||
model_id="all-MiniLM-L6-v2",
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue