From 3f15349c9d9974acfaf7f656ba517f85911d02dc Mon Sep 17 00:00:00 2001 From: ilya-kolchinsky Date: Fri, 7 Mar 2025 16:08:14 +0100 Subject: [PATCH] Updated the configuration templates to include the builtin preprocessors. --- .../distributions/remote_hosted_distro/nvidia.md | 1 + .../distributions/self_hosted_distro/bedrock.md | 1 + .../distributions/self_hosted_distro/cerebras.md | 1 + .../distributions/self_hosted_distro/fireworks.md | 1 + .../distributions/self_hosted_distro/groq.md | 1 + .../self_hosted_distro/meta-reference-gpu.md | 1 + .../meta-reference-quantized-gpu.md | 1 + .../self_hosted_distro/remote-vllm.md | 1 + .../distributions/self_hosted_distro/sambanova.md | 1 + .../source/distributions/self_hosted_distro/tgi.md | 1 + .../distributions/self_hosted_distro/together.md | 1 + llama_stack/templates/bedrock/bedrock.py | 14 ++++++++++++++ llama_stack/templates/bedrock/build.yaml | 3 +++ llama_stack/templates/bedrock/run.yaml | 14 +++++++++++++- llama_stack/templates/cerebras/build.yaml | 3 +++ llama_stack/templates/cerebras/cerebras.py | 13 +++++++++++++ llama_stack/templates/cerebras/run.yaml | 14 +++++++++++++- llama_stack/templates/ci-tests/build.yaml | 3 +++ llama_stack/templates/ci-tests/ci_tests.py | 13 +++++++++++++ llama_stack/templates/ci-tests/run.yaml | 14 +++++++++++++- llama_stack/templates/dell/build.yaml | 3 +++ llama_stack/templates/dell/dell.py | 14 ++++++++++++++ llama_stack/templates/dell/run-with-safety.yaml | 14 +++++++++++++- llama_stack/templates/dell/run.yaml | 14 +++++++++++++- llama_stack/templates/dev/build.yaml | 3 +++ llama_stack/templates/dev/dev.py | 13 +++++++++++++ llama_stack/templates/dev/run.yaml | 14 +++++++++++++- llama_stack/templates/fireworks/build.yaml | 3 +++ llama_stack/templates/fireworks/fireworks.py | 14 ++++++++++++++ .../templates/fireworks/run-with-safety.yaml | 14 +++++++++++++- llama_stack/templates/fireworks/run.yaml | 14 +++++++++++++- llama_stack/templates/groq/build.yaml | 3 +++ llama_stack/templates/groq/groq.py | 13 +++++++++++++ llama_stack/templates/groq/run.yaml | 14 +++++++++++++- llama_stack/templates/hf-endpoint/build.yaml | 3 +++ llama_stack/templates/hf-endpoint/hf_endpoint.py | 14 ++++++++++++++ .../templates/hf-endpoint/run-with-safety.yaml | 14 +++++++++++++- llama_stack/templates/hf-endpoint/run.yaml | 14 +++++++++++++- llama_stack/templates/hf-serverless/build.yaml | 3 +++ .../templates/hf-serverless/hf_serverless.py | 14 ++++++++++++++ .../templates/hf-serverless/run-with-safety.yaml | 14 +++++++++++++- llama_stack/templates/hf-serverless/run.yaml | 14 +++++++++++++- .../templates/meta-reference-gpu/build.yaml | 3 +++ .../templates/meta-reference-gpu/meta_reference.py | 14 ++++++++++++++ .../meta-reference-gpu/run-with-safety.yaml | 14 +++++++++++++- llama_stack/templates/meta-reference-gpu/run.yaml | 14 +++++++++++++- .../meta-reference-quantized-gpu/build.yaml | 3 +++ .../meta-reference-quantized-gpu/meta_reference.py | 13 +++++++++++++ .../meta-reference-quantized-gpu/run.yaml | 14 +++++++++++++- llama_stack/templates/nvidia/build.yaml | 3 +++ llama_stack/templates/nvidia/nvidia.py | 13 +++++++++++++ llama_stack/templates/nvidia/run.yaml | 14 +++++++++++++- llama_stack/templates/passthrough/build.yaml | 3 +++ llama_stack/templates/passthrough/run.yaml | 12 ++++++++++++ llama_stack/templates/remote-vllm/build.yaml | 3 +++ .../templates/remote-vllm/run-with-safety.yaml | 14 +++++++++++++- llama_stack/templates/remote-vllm/run.yaml | 14 +++++++++++++- llama_stack/templates/remote-vllm/vllm.py | 14 ++++++++++++++ llama_stack/templates/sambanova/build.yaml | 3 +++ llama_stack/templates/sambanova/run.yaml | 14 +++++++++++++- llama_stack/templates/sambanova/sambanova.py | 13 +++++++++++++ llama_stack/templates/tgi/build.yaml | 3 +++ llama_stack/templates/tgi/run-with-safety.yaml | 14 +++++++++++++- llama_stack/templates/tgi/run.yaml | 14 +++++++++++++- llama_stack/templates/tgi/tgi.py | 14 ++++++++++++++ llama_stack/templates/together/build.yaml | 3 +++ .../templates/together/run-with-safety.yaml | 14 +++++++++++++- llama_stack/templates/together/run.yaml | 14 +++++++++++++- llama_stack/templates/together/together.py | 14 ++++++++++++++ llama_stack/templates/vllm-gpu/build.yaml | 3 +++ llama_stack/templates/vllm-gpu/run.yaml | 14 +++++++++++++- llama_stack/templates/vllm-gpu/vllm.py | 13 +++++++++++++ 72 files changed, 632 insertions(+), 25 deletions(-) diff --git a/docs/source/distributions/remote_hosted_distro/nvidia.md b/docs/source/distributions/remote_hosted_distro/nvidia.md index efa0a2d74..a838cb8d3 100644 --- a/docs/source/distributions/remote_hosted_distro/nvidia.md +++ b/docs/source/distributions/remote_hosted_distro/nvidia.md @@ -9,6 +9,7 @@ The `llamastack/distribution-nvidia` distribution consists of the following prov | datasetio | `remote::huggingface`, `inline::localfs` | | eval | `inline::meta-reference` | | inference | `remote::nvidia` | +| preprocessing | `inline::basic`, `inline::simple_chunking` | | safety | `inline::llama-guard` | | scoring | `inline::basic`, `inline::llm-as-judge`, `inline::braintrust` | | telemetry | `inline::meta-reference` | diff --git a/docs/source/distributions/self_hosted_distro/bedrock.md b/docs/source/distributions/self_hosted_distro/bedrock.md index 623ab6848..03a6444b7 100644 --- a/docs/source/distributions/self_hosted_distro/bedrock.md +++ b/docs/source/distributions/self_hosted_distro/bedrock.md @@ -16,6 +16,7 @@ The `llamastack/distribution-bedrock` distribution consists of the following pro | datasetio | `remote::huggingface`, `inline::localfs` | | eval | `inline::meta-reference` | | inference | `remote::bedrock` | +| preprocessing | `inline::basic`, `inline::simple_chunking` | | safety | `remote::bedrock` | | scoring | `inline::basic`, `inline::llm-as-judge`, `inline::braintrust` | | telemetry | `inline::meta-reference` | diff --git a/docs/source/distributions/self_hosted_distro/cerebras.md b/docs/source/distributions/self_hosted_distro/cerebras.md index 8f14ae7cc..2c99d5399 100644 --- a/docs/source/distributions/self_hosted_distro/cerebras.md +++ b/docs/source/distributions/self_hosted_distro/cerebras.md @@ -9,6 +9,7 @@ The `llamastack/distribution-cerebras` distribution consists of the following pr | datasetio | `remote::huggingface`, `inline::localfs` | | eval | `inline::meta-reference` | | inference | `remote::cerebras`, `inline::sentence-transformers` | +| preprocessing | `inline::basic`, `inline::simple_chunking` | | safety | `inline::llama-guard` | | scoring | `inline::basic`, `inline::llm-as-judge`, `inline::braintrust` | | telemetry | `inline::meta-reference` | diff --git a/docs/source/distributions/self_hosted_distro/fireworks.md b/docs/source/distributions/self_hosted_distro/fireworks.md index 1fcd6f7af..e78d6f7ed 100644 --- a/docs/source/distributions/self_hosted_distro/fireworks.md +++ b/docs/source/distributions/self_hosted_distro/fireworks.md @@ -19,6 +19,7 @@ The `llamastack/distribution-fireworks` distribution consists of the following p | datasetio | `remote::huggingface`, `inline::localfs` | | eval | `inline::meta-reference` | | inference | `remote::fireworks`, `inline::sentence-transformers` | +| preprocessing | `inline::basic`, `inline::simple_chunking` | | safety | `inline::llama-guard` | | scoring | `inline::basic`, `inline::llm-as-judge`, `inline::braintrust` | | telemetry | `inline::meta-reference` | diff --git a/docs/source/distributions/self_hosted_distro/groq.md b/docs/source/distributions/self_hosted_distro/groq.md index ce3f8aecc..6b42f7442 100644 --- a/docs/source/distributions/self_hosted_distro/groq.md +++ b/docs/source/distributions/self_hosted_distro/groq.md @@ -19,6 +19,7 @@ The `llamastack/distribution-groq` distribution consists of the following provid | datasetio | `remote::huggingface`, `inline::localfs` | | eval | `inline::meta-reference` | | inference | `remote::groq` | +| preprocessing | `inline::basic`, `inline::simple_chunking` | | safety | `inline::llama-guard` | | scoring | `inline::basic`, `inline::llm-as-judge`, `inline::braintrust` | | telemetry | `inline::meta-reference` | diff --git a/docs/source/distributions/self_hosted_distro/meta-reference-gpu.md b/docs/source/distributions/self_hosted_distro/meta-reference-gpu.md index b183757db..b69ba0df6 100644 --- a/docs/source/distributions/self_hosted_distro/meta-reference-gpu.md +++ b/docs/source/distributions/self_hosted_distro/meta-reference-gpu.md @@ -19,6 +19,7 @@ The `llamastack/distribution-meta-reference-gpu` distribution consists of the fo | datasetio | `remote::huggingface`, `inline::localfs` | | eval | `inline::meta-reference` | | inference | `inline::meta-reference` | +| preprocessing | `inline::basic`, `inline::simple_chunking` | | safety | `inline::llama-guard` | | scoring | `inline::basic`, `inline::llm-as-judge`, `inline::braintrust` | | telemetry | `inline::meta-reference` | diff --git a/docs/source/distributions/self_hosted_distro/meta-reference-quantized-gpu.md b/docs/source/distributions/self_hosted_distro/meta-reference-quantized-gpu.md index 9aeb7a88b..c6032f4f9 100644 --- a/docs/source/distributions/self_hosted_distro/meta-reference-quantized-gpu.md +++ b/docs/source/distributions/self_hosted_distro/meta-reference-quantized-gpu.md @@ -19,6 +19,7 @@ The `llamastack/distribution-meta-reference-quantized-gpu` distribution consists | datasetio | `remote::huggingface`, `inline::localfs` | | eval | `inline::meta-reference` | | inference | `inline::meta-reference-quantized` | +| preprocessing | `inline::basic`, `inline::simple_chunking` | | safety | `inline::llama-guard` | | scoring | `inline::basic`, `inline::llm-as-judge`, `inline::braintrust` | | telemetry | `inline::meta-reference` | diff --git a/docs/source/distributions/self_hosted_distro/remote-vllm.md b/docs/source/distributions/self_hosted_distro/remote-vllm.md index 01f38807b..18de97934 100644 --- a/docs/source/distributions/self_hosted_distro/remote-vllm.md +++ b/docs/source/distributions/self_hosted_distro/remote-vllm.md @@ -18,6 +18,7 @@ The `llamastack/distribution-remote-vllm` distribution consists of the following | datasetio | `remote::huggingface`, `inline::localfs` | | eval | `inline::meta-reference` | | inference | `remote::vllm`, `inline::sentence-transformers` | +| preprocessing | `inline::basic`, `inline::simple_chunking` | | safety | `inline::llama-guard` | | scoring | `inline::basic`, `inline::llm-as-judge`, `inline::braintrust` | | telemetry | `inline::meta-reference` | diff --git a/docs/source/distributions/self_hosted_distro/sambanova.md b/docs/source/distributions/self_hosted_distro/sambanova.md index a7f738261..cfdcad785 100644 --- a/docs/source/distributions/self_hosted_distro/sambanova.md +++ b/docs/source/distributions/self_hosted_distro/sambanova.md @@ -17,6 +17,7 @@ The `llamastack/distribution-sambanova` distribution consists of the following p |-----|-------------| | agents | `inline::meta-reference` | | inference | `remote::sambanova` | +| preprocessing | `inline::basic`, `inline::simple_chunking` | | safety | `inline::llama-guard` | | telemetry | `inline::meta-reference` | | tool_runtime | `remote::brave-search`, `remote::tavily-search`, `inline::code-interpreter`, `inline::rag-runtime` | diff --git a/docs/source/distributions/self_hosted_distro/tgi.md b/docs/source/distributions/self_hosted_distro/tgi.md index 80baf9c81..485ea5959 100644 --- a/docs/source/distributions/self_hosted_distro/tgi.md +++ b/docs/source/distributions/self_hosted_distro/tgi.md @@ -20,6 +20,7 @@ The `llamastack/distribution-tgi` distribution consists of the following provide | datasetio | `remote::huggingface`, `inline::localfs` | | eval | `inline::meta-reference` | | inference | `remote::tgi`, `inline::sentence-transformers` | +| preprocessing | `inline::basic`, `inline::simple_chunking` | | safety | `inline::llama-guard` | | scoring | `inline::basic`, `inline::llm-as-judge`, `inline::braintrust` | | telemetry | `inline::meta-reference` | diff --git a/docs/source/distributions/self_hosted_distro/together.md b/docs/source/distributions/self_hosted_distro/together.md index f361e93c7..2e3873fe8 100644 --- a/docs/source/distributions/self_hosted_distro/together.md +++ b/docs/source/distributions/self_hosted_distro/together.md @@ -19,6 +19,7 @@ The `llamastack/distribution-together` distribution consists of the following pr | datasetio | `remote::huggingface`, `inline::localfs` | | eval | `inline::meta-reference` | | inference | `remote::together`, `inline::sentence-transformers` | +| preprocessing | `inline::basic`, `inline::simple_chunking` | | safety | `inline::llama-guard` | | scoring | `inline::basic`, `inline::llm-as-judge`, `inline::braintrust` | | telemetry | `inline::meta-reference` | diff --git a/llama_stack/templates/bedrock/bedrock.py b/llama_stack/templates/bedrock/bedrock.py index 18e287390..e7443864b 100644 --- a/llama_stack/templates/bedrock/bedrock.py +++ b/llama_stack/templates/bedrock/bedrock.py @@ -6,6 +6,7 @@ from pathlib import Path +from llama_stack.apis.preprocessing.preprocessors import PreprocessorInput from llama_stack.distribution.datatypes import Provider, ToolGroupInput from llama_stack.providers.inline.vector_io.faiss.config import FaissVectorIOConfig from llama_stack.providers.remote.inference.bedrock.models import MODEL_ENTRIES @@ -29,6 +30,7 @@ def get_distribution_template() -> DistributionTemplate: "inline::rag-runtime", "remote::model-context-protocol", ], + "preprocessing": ["inline::basic", "inline::simple_chunking"], } name = "bedrock" vector_io_provider = Provider( @@ -57,6 +59,17 @@ def get_distribution_template() -> DistributionTemplate: ), ] + default_preprocessors = [ + PreprocessorInput( + preprocessor_id="builtin::basic", + provider_id="basic", + ), + PreprocessorInput( + preprocessor_id="builtin::chunking", + provider_id="simple_chunking", + ), + ] + return DistributionTemplate( name=name, distro_type="self_hosted", @@ -72,6 +85,7 @@ def get_distribution_template() -> DistributionTemplate: }, default_models=default_models, default_tool_groups=default_tool_groups, + default_preprocessors=default_preprocessors, ), }, run_config_env_vars={ diff --git a/llama_stack/templates/bedrock/build.yaml b/llama_stack/templates/bedrock/build.yaml index 6c07b0478..477503321 100644 --- a/llama_stack/templates/bedrock/build.yaml +++ b/llama_stack/templates/bedrock/build.yaml @@ -29,4 +29,7 @@ distribution_spec: - inline::code-interpreter - inline::rag-runtime - remote::model-context-protocol + preprocessing: + - inline::basic + - inline::simple_chunking image_type: conda diff --git a/llama_stack/templates/bedrock/run.yaml b/llama_stack/templates/bedrock/run.yaml index 1199a7a2d..285156c75 100644 --- a/llama_stack/templates/bedrock/run.yaml +++ b/llama_stack/templates/bedrock/run.yaml @@ -5,6 +5,7 @@ apis: - datasetio - eval - inference +- preprocessing - safety - scoring - telemetry @@ -84,6 +85,13 @@ providers: - provider_id: model-context-protocol provider_type: remote::model-context-protocol config: {} + preprocessing: + - provider_id: basic + provider_type: inline::basic + config: {} + - provider_id: simple_chunking + provider_type: inline::simple_chunking + config: {} metadata_store: type: sqlite db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/bedrock}/registry.db @@ -130,6 +138,10 @@ tool_groups: provider_id: rag-runtime - toolgroup_id: builtin::code_interpreter provider_id: code-interpreter -preprocessors: [] +preprocessors: +- preprocessor_id: builtin::basic + provider_id: basic +- preprocessor_id: builtin::chunking + provider_id: simple_chunking server: port: 8321 diff --git a/llama_stack/templates/cerebras/build.yaml b/llama_stack/templates/cerebras/build.yaml index ef6c43212..180357e2a 100644 --- a/llama_stack/templates/cerebras/build.yaml +++ b/llama_stack/templates/cerebras/build.yaml @@ -29,4 +29,7 @@ distribution_spec: - remote::tavily-search - inline::code-interpreter - inline::rag-runtime + preprocessing: + - inline::basic + - inline::simple_chunking image_type: conda diff --git a/llama_stack/templates/cerebras/cerebras.py b/llama_stack/templates/cerebras/cerebras.py index bda22a498..1f6042ec9 100644 --- a/llama_stack/templates/cerebras/cerebras.py +++ b/llama_stack/templates/cerebras/cerebras.py @@ -7,6 +7,7 @@ from pathlib import Path from llama_stack.apis.models.models import ModelType +from llama_stack.apis.preprocessing.preprocessors import PreprocessorInput from llama_stack.distribution.datatypes import ModelInput, Provider, ToolGroupInput from llama_stack.providers.inline.inference.sentence_transformers import ( SentenceTransformersInferenceConfig, @@ -33,6 +34,7 @@ def get_distribution_template() -> DistributionTemplate: "inline::code-interpreter", "inline::rag-runtime", ], + "preprocessing": ["inline::basic", "inline::simple_chunking"], } name = "cerebras" @@ -78,6 +80,16 @@ def get_distribution_template() -> DistributionTemplate: provider_id="code-interpreter", ), ] + default_preprocessors = [ + PreprocessorInput( + preprocessor_id="builtin::basic", + provider_id="basic", + ), + PreprocessorInput( + preprocessor_id="builtin::chunking", + provider_id="simple_chunking", + ), + ] return DistributionTemplate( name="cerebras", @@ -96,6 +108,7 @@ def get_distribution_template() -> DistributionTemplate: default_models=default_models + [embedding_model], default_shields=[], default_tool_groups=default_tool_groups, + default_preprocessors=default_preprocessors, ), }, run_config_env_vars={ diff --git a/llama_stack/templates/cerebras/run.yaml b/llama_stack/templates/cerebras/run.yaml index 0aa509181..468de5b37 100644 --- a/llama_stack/templates/cerebras/run.yaml +++ b/llama_stack/templates/cerebras/run.yaml @@ -5,6 +5,7 @@ apis: - datasetio - eval - inference +- preprocessing - safety - scoring - telemetry @@ -86,6 +87,13 @@ providers: - provider_id: rag-runtime provider_type: inline::rag-runtime config: {} + preprocessing: + - provider_id: basic + provider_type: inline::basic + config: {} + - provider_id: simple_chunking + provider_type: inline::simple_chunking + config: {} metadata_store: type: sqlite db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/cerebras}/registry.db @@ -127,6 +135,10 @@ tool_groups: provider_id: rag-runtime - toolgroup_id: builtin::code_interpreter provider_id: code-interpreter -preprocessors: [] +preprocessors: +- preprocessor_id: builtin::basic + provider_id: basic +- preprocessor_id: builtin::chunking + provider_id: simple_chunking server: port: 8321 diff --git a/llama_stack/templates/ci-tests/build.yaml b/llama_stack/templates/ci-tests/build.yaml index a5c615f2f..d31c14beb 100644 --- a/llama_stack/templates/ci-tests/build.yaml +++ b/llama_stack/templates/ci-tests/build.yaml @@ -30,4 +30,7 @@ distribution_spec: - inline::code-interpreter - inline::rag-runtime - remote::model-context-protocol + preprocessing: + - inline::basic + - inline::simple_chunking image_type: conda diff --git a/llama_stack/templates/ci-tests/ci_tests.py b/llama_stack/templates/ci-tests/ci_tests.py index 979256fa1..43d0344df 100644 --- a/llama_stack/templates/ci-tests/ci_tests.py +++ b/llama_stack/templates/ci-tests/ci_tests.py @@ -6,6 +6,7 @@ from llama_stack.apis.models.models import ModelType +from llama_stack.apis.preprocessing.preprocessors import PreprocessorInput from llama_stack.distribution.datatypes import ( ModelInput, Provider, @@ -38,6 +39,7 @@ def get_distribution_template() -> DistributionTemplate: "inline::rag-runtime", "remote::model-context-protocol", ], + "preprocessing": ["inline::basic", "inline::simple_chunking"], } name = "ci-tests" inference_provider = Provider( @@ -70,6 +72,16 @@ def get_distribution_template() -> DistributionTemplate: provider_id="code-interpreter", ), ] + default_preprocessors = [ + PreprocessorInput( + preprocessor_id="builtin::basic", + provider_id="basic", + ), + PreprocessorInput( + preprocessor_id="builtin::chunking", + provider_id="simple_chunking", + ), + ] available_models = { "fireworks": MODEL_ENTRIES, } @@ -99,6 +111,7 @@ def get_distribution_template() -> DistributionTemplate: }, default_models=default_models + [embedding_model], default_tool_groups=default_tool_groups, + default_preprocessors=default_preprocessors, default_shields=[ShieldInput(shield_id="meta-llama/Llama-Guard-3-8B")], ), }, diff --git a/llama_stack/templates/ci-tests/run.yaml b/llama_stack/templates/ci-tests/run.yaml index d40edc76a..237bbf0c9 100644 --- a/llama_stack/templates/ci-tests/run.yaml +++ b/llama_stack/templates/ci-tests/run.yaml @@ -5,6 +5,7 @@ apis: - datasetio - eval - inference +- preprocessing - safety - scoring - telemetry @@ -86,6 +87,13 @@ providers: - provider_id: model-context-protocol provider_type: remote::model-context-protocol config: {} + preprocessing: + - provider_id: basic + provider_type: inline::basic + config: {} + - provider_id: simple_chunking + provider_type: inline::simple_chunking + config: {} metadata_store: type: sqlite db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/ci-tests}/registry.db @@ -215,6 +223,10 @@ tool_groups: provider_id: rag-runtime - toolgroup_id: builtin::code_interpreter provider_id: code-interpreter -preprocessors: [] +preprocessors: +- preprocessor_id: builtin::basic + provider_id: basic +- preprocessor_id: builtin::chunking + provider_id: simple_chunking server: port: 8321 diff --git a/llama_stack/templates/dell/build.yaml b/llama_stack/templates/dell/build.yaml index 05b98d56f..baabf294a 100644 --- a/llama_stack/templates/dell/build.yaml +++ b/llama_stack/templates/dell/build.yaml @@ -30,4 +30,7 @@ distribution_spec: - remote::tavily-search - inline::code-interpreter - inline::rag-runtime + preprocessing: + - inline::basic + - inline::simple_chunking image_type: conda diff --git a/llama_stack/templates/dell/dell.py b/llama_stack/templates/dell/dell.py index 52c5a5476..32ab9af15 100644 --- a/llama_stack/templates/dell/dell.py +++ b/llama_stack/templates/dell/dell.py @@ -5,6 +5,7 @@ # the root directory of this source tree. from llama_stack.apis.models.models import ModelType +from llama_stack.apis.preprocessing.preprocessors import PreprocessorInput from llama_stack.distribution.datatypes import ( ModelInput, Provider, @@ -33,6 +34,7 @@ def get_distribution_template() -> DistributionTemplate: "inline::code-interpreter", "inline::rag-runtime", ], + "preprocessing": ["inline::basic", "inline::simple_chunking"], } name = "dell" inference_provider = Provider( @@ -92,6 +94,16 @@ def get_distribution_template() -> DistributionTemplate: provider_id="code-interpreter", ), ] + default_preprocessors = [ + PreprocessorInput( + preprocessor_id="builtin::basic", + provider_id="basic", + ), + PreprocessorInput( + preprocessor_id="builtin::chunking", + provider_id="simple_chunking", + ), + ] return DistributionTemplate( name=name, @@ -107,6 +119,7 @@ def get_distribution_template() -> DistributionTemplate: }, default_models=[inference_model, embedding_model], default_tool_groups=default_tool_groups, + default_preprocessors=default_preprocessors, ), "run-with-safety.yaml": RunConfigSettings( provider_overrides={ @@ -120,6 +133,7 @@ def get_distribution_template() -> DistributionTemplate: default_models=[inference_model, safety_model, embedding_model], default_shields=[ShieldInput(shield_id="${env.SAFETY_MODEL}")], default_tool_groups=default_tool_groups, + default_preprocessors=default_preprocessors, ), }, run_config_env_vars={ diff --git a/llama_stack/templates/dell/run-with-safety.yaml b/llama_stack/templates/dell/run-with-safety.yaml index b6a308061..7b451c00c 100644 --- a/llama_stack/templates/dell/run-with-safety.yaml +++ b/llama_stack/templates/dell/run-with-safety.yaml @@ -5,6 +5,7 @@ apis: - datasetio - eval - inference +- preprocessing - safety - scoring - telemetry @@ -86,6 +87,13 @@ providers: - provider_id: rag-runtime provider_type: inline::rag-runtime config: {} + preprocessing: + - provider_id: basic + provider_type: inline::basic + config: {} + - provider_id: simple_chunking + provider_type: inline::simple_chunking + config: {} metadata_store: type: sqlite db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/dell}/registry.db @@ -116,6 +124,10 @@ tool_groups: provider_id: rag-runtime - toolgroup_id: builtin::code_interpreter provider_id: code-interpreter -preprocessors: [] +preprocessors: +- preprocessor_id: builtin::basic + provider_id: basic +- preprocessor_id: builtin::chunking + provider_id: simple_chunking server: port: 8321 diff --git a/llama_stack/templates/dell/run.yaml b/llama_stack/templates/dell/run.yaml index bd67a1204..8aa1d7e2d 100644 --- a/llama_stack/templates/dell/run.yaml +++ b/llama_stack/templates/dell/run.yaml @@ -5,6 +5,7 @@ apis: - datasetio - eval - inference +- preprocessing - safety - scoring - telemetry @@ -82,6 +83,13 @@ providers: - provider_id: rag-runtime provider_type: inline::rag-runtime config: {} + preprocessing: + - provider_id: basic + provider_type: inline::basic + config: {} + - provider_id: simple_chunking + provider_type: inline::simple_chunking + config: {} metadata_store: type: sqlite db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/dell}/registry.db @@ -107,6 +115,10 @@ tool_groups: provider_id: rag-runtime - toolgroup_id: builtin::code_interpreter provider_id: code-interpreter -preprocessors: [] +preprocessors: +- preprocessor_id: builtin::basic + provider_id: basic +- preprocessor_id: builtin::chunking + provider_id: simple_chunking server: port: 8321 diff --git a/llama_stack/templates/dev/build.yaml b/llama_stack/templates/dev/build.yaml index 726ebccca..72323901d 100644 --- a/llama_stack/templates/dev/build.yaml +++ b/llama_stack/templates/dev/build.yaml @@ -34,4 +34,7 @@ distribution_spec: - inline::code-interpreter - inline::rag-runtime - remote::model-context-protocol + preprocessing: + - inline::basic + - inline::simple_chunking image_type: conda diff --git a/llama_stack/templates/dev/dev.py b/llama_stack/templates/dev/dev.py index 694913119..811a289dd 100644 --- a/llama_stack/templates/dev/dev.py +++ b/llama_stack/templates/dev/dev.py @@ -7,6 +7,7 @@ from typing import List, Tuple from llama_stack.apis.models.models import ModelType +from llama_stack.apis.preprocessing.preprocessors import PreprocessorInput from llama_stack.distribution.datatypes import ( ModelInput, Provider, @@ -91,6 +92,7 @@ def get_distribution_template() -> DistributionTemplate: "inline::rag-runtime", "remote::model-context-protocol", ], + "preprocessing": ["inline::basic", "inline::simple_chunking"], } name = "dev" @@ -119,6 +121,16 @@ def get_distribution_template() -> DistributionTemplate: provider_id="code-interpreter", ), ] + default_preprocessors = [ + PreprocessorInput( + preprocessor_id="builtin::basic", + provider_id="basic", + ), + PreprocessorInput( + preprocessor_id="builtin::chunking", + provider_id="simple_chunking", + ), + ] embedding_model = ModelInput( model_id="all-MiniLM-L6-v2", provider_id=embedding_provider.provider_id, @@ -145,6 +157,7 @@ def get_distribution_template() -> DistributionTemplate: }, default_models=default_models + [embedding_model], default_tool_groups=default_tool_groups, + default_preprocessors=default_preprocessors, default_shields=[ShieldInput(shield_id="meta-llama/Llama-Guard-3-8B")], ), }, diff --git a/llama_stack/templates/dev/run.yaml b/llama_stack/templates/dev/run.yaml index e555a97d5..8ef7043e3 100644 --- a/llama_stack/templates/dev/run.yaml +++ b/llama_stack/templates/dev/run.yaml @@ -5,6 +5,7 @@ apis: - datasetio - eval - inference +- preprocessing - safety - scoring - telemetry @@ -103,6 +104,13 @@ providers: - provider_id: model-context-protocol provider_type: remote::model-context-protocol config: {} + preprocessing: + - provider_id: basic + provider_type: inline::basic + config: {} + - provider_id: simple_chunking + provider_type: inline::simple_chunking + config: {} metadata_store: type: sqlite db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/dev}/registry.db @@ -359,6 +367,10 @@ tool_groups: provider_id: rag-runtime - toolgroup_id: builtin::code_interpreter provider_id: code-interpreter -preprocessors: [] +preprocessors: +- preprocessor_id: builtin::basic + provider_id: basic +- preprocessor_id: builtin::chunking + provider_id: simple_chunking server: port: 8321 diff --git a/llama_stack/templates/fireworks/build.yaml b/llama_stack/templates/fireworks/build.yaml index a9c472c53..8893365e2 100644 --- a/llama_stack/templates/fireworks/build.yaml +++ b/llama_stack/templates/fireworks/build.yaml @@ -30,4 +30,7 @@ distribution_spec: - inline::code-interpreter - inline::rag-runtime - remote::model-context-protocol + preprocessing: + - inline::basic + - inline::simple_chunking image_type: conda diff --git a/llama_stack/templates/fireworks/fireworks.py b/llama_stack/templates/fireworks/fireworks.py index 0111bc118..002c464a0 100644 --- a/llama_stack/templates/fireworks/fireworks.py +++ b/llama_stack/templates/fireworks/fireworks.py @@ -7,6 +7,7 @@ from pathlib import Path from llama_stack.apis.models.models import ModelType +from llama_stack.apis.preprocessing.preprocessors import PreprocessorInput from llama_stack.distribution.datatypes import ( ModelInput, Provider, @@ -39,6 +40,7 @@ def get_distribution_template() -> DistributionTemplate: "inline::rag-runtime", "remote::model-context-protocol", ], + "preprocessing": ["inline::basic", "inline::simple_chunking"], } name = "fireworks" @@ -86,6 +88,16 @@ def get_distribution_template() -> DistributionTemplate: provider_id="code-interpreter", ), ] + default_preprocessors = [ + PreprocessorInput( + preprocessor_id="builtin::basic", + provider_id="basic", + ), + PreprocessorInput( + preprocessor_id="builtin::chunking", + provider_id="simple_chunking", + ), + ] return DistributionTemplate( name=name, @@ -104,6 +116,7 @@ def get_distribution_template() -> DistributionTemplate: default_models=default_models + [embedding_model], default_shields=[ShieldInput(shield_id="meta-llama/Llama-Guard-3-8B")], default_tool_groups=default_tool_groups, + default_preprocessors=default_preprocessors, ), "run-with-safety.yaml": RunConfigSettings( provider_overrides={ @@ -149,6 +162,7 @@ def get_distribution_template() -> DistributionTemplate: ), ], default_tool_groups=default_tool_groups, + default_preprocessors=default_preprocessors, ), }, run_config_env_vars={ diff --git a/llama_stack/templates/fireworks/run-with-safety.yaml b/llama_stack/templates/fireworks/run-with-safety.yaml index a79638e89..e6d104290 100644 --- a/llama_stack/templates/fireworks/run-with-safety.yaml +++ b/llama_stack/templates/fireworks/run-with-safety.yaml @@ -5,6 +5,7 @@ apis: - datasetio - eval - inference +- preprocessing - safety - scoring - telemetry @@ -95,6 +96,13 @@ providers: - provider_id: model-context-protocol provider_type: remote::model-context-protocol config: {} + preprocessing: + - provider_id: basic + provider_type: inline::basic + config: {} + - provider_id: simple_chunking + provider_type: inline::simple_chunking + config: {} metadata_store: type: sqlite db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/fireworks}/registry.db @@ -229,6 +237,10 @@ tool_groups: provider_id: rag-runtime - toolgroup_id: builtin::code_interpreter provider_id: code-interpreter -preprocessors: [] +preprocessors: +- preprocessor_id: builtin::basic + provider_id: basic +- preprocessor_id: builtin::chunking + provider_id: simple_chunking server: port: 8321 diff --git a/llama_stack/templates/fireworks/run.yaml b/llama_stack/templates/fireworks/run.yaml index 197be9891..e2da25431 100644 --- a/llama_stack/templates/fireworks/run.yaml +++ b/llama_stack/templates/fireworks/run.yaml @@ -5,6 +5,7 @@ apis: - datasetio - eval - inference +- preprocessing - safety - scoring - telemetry @@ -89,6 +90,13 @@ providers: - provider_id: model-context-protocol provider_type: remote::model-context-protocol config: {} + preprocessing: + - provider_id: basic + provider_type: inline::basic + config: {} + - provider_id: simple_chunking + provider_type: inline::simple_chunking + config: {} metadata_store: type: sqlite db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/fireworks}/registry.db @@ -218,6 +226,10 @@ tool_groups: provider_id: rag-runtime - toolgroup_id: builtin::code_interpreter provider_id: code-interpreter -preprocessors: [] +preprocessors: +- preprocessor_id: builtin::basic + provider_id: basic +- preprocessor_id: builtin::chunking + provider_id: simple_chunking server: port: 8321 diff --git a/llama_stack/templates/groq/build.yaml b/llama_stack/templates/groq/build.yaml index 3263ce83b..ddc922aa7 100644 --- a/llama_stack/templates/groq/build.yaml +++ b/llama_stack/templates/groq/build.yaml @@ -26,4 +26,7 @@ distribution_spec: - remote::tavily-search - inline::code-interpreter - inline::rag-runtime + preprocessing: + - inline::basic + - inline::simple_chunking image_type: conda diff --git a/llama_stack/templates/groq/groq.py b/llama_stack/templates/groq/groq.py index 71c504cde..1c312b7d2 100644 --- a/llama_stack/templates/groq/groq.py +++ b/llama_stack/templates/groq/groq.py @@ -7,6 +7,7 @@ from pathlib import Path from llama_stack.apis.models.models import ModelType +from llama_stack.apis.preprocessing.preprocessors import PreprocessorInput from llama_stack.distribution.datatypes import ( ModelInput, Provider, @@ -36,6 +37,7 @@ def get_distribution_template() -> DistributionTemplate: "inline::code-interpreter", "inline::rag-runtime", ], + "preprocessing": ["inline::basic", "inline::simple_chunking"], } name = "groq" @@ -77,6 +79,16 @@ def get_distribution_template() -> DistributionTemplate: provider_id="code-interpreter", ), ] + default_preprocessors = [ + PreprocessorInput( + preprocessor_id="builtin::basic", + provider_id="basic", + ), + PreprocessorInput( + preprocessor_id="builtin::chunking", + provider_id="simple_chunking", + ), + ] return DistributionTemplate( name=name, @@ -93,6 +105,7 @@ def get_distribution_template() -> DistributionTemplate: }, default_models=default_models + [embedding_model], default_tool_groups=default_tool_groups, + default_preprocessors=default_preprocessors, ), }, run_config_env_vars={ diff --git a/llama_stack/templates/groq/run.yaml b/llama_stack/templates/groq/run.yaml index 2427dcdb5..4a22e9f58 100644 --- a/llama_stack/templates/groq/run.yaml +++ b/llama_stack/templates/groq/run.yaml @@ -5,6 +5,7 @@ apis: - datasetio - eval - inference +- preprocessing - safety - scoring - telemetry @@ -86,6 +87,13 @@ providers: - provider_id: rag-runtime provider_type: inline::rag-runtime config: {} + preprocessing: + - provider_id: basic + provider_type: inline::basic + config: {} + - provider_id: simple_chunking + provider_type: inline::simple_chunking + config: {} metadata_store: type: sqlite db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/groq}/registry.db @@ -152,6 +160,10 @@ tool_groups: provider_id: rag-runtime - toolgroup_id: builtin::code_interpreter provider_id: code-interpreter -preprocessors: [] +preprocessors: +- preprocessor_id: builtin::basic + provider_id: basic +- preprocessor_id: builtin::chunking + provider_id: simple_chunking server: port: 8321 diff --git a/llama_stack/templates/hf-endpoint/build.yaml b/llama_stack/templates/hf-endpoint/build.yaml index c2eaaa05b..9a8cb0768 100644 --- a/llama_stack/templates/hf-endpoint/build.yaml +++ b/llama_stack/templates/hf-endpoint/build.yaml @@ -29,4 +29,7 @@ distribution_spec: - inline::code-interpreter - inline::rag-runtime - remote::model-context-protocol + preprocessing: + - inline::basic + - inline::simple_chunking image_type: conda diff --git a/llama_stack/templates/hf-endpoint/hf_endpoint.py b/llama_stack/templates/hf-endpoint/hf_endpoint.py index f2849f0bc..8e16ae6c4 100644 --- a/llama_stack/templates/hf-endpoint/hf_endpoint.py +++ b/llama_stack/templates/hf-endpoint/hf_endpoint.py @@ -5,6 +5,7 @@ # the root directory of this source tree. from llama_stack.apis.models.models import ModelType +from llama_stack.apis.preprocessing.preprocessors import PreprocessorInput from llama_stack.distribution.datatypes import ( ModelInput, Provider, @@ -36,6 +37,7 @@ def get_distribution_template() -> DistributionTemplate: "inline::rag-runtime", "remote::model-context-protocol", ], + "preprocessing": ["inline::basic", "inline::simple_chunking"], } name = "hf-endpoint" inference_provider = Provider( @@ -84,6 +86,16 @@ def get_distribution_template() -> DistributionTemplate: provider_id="code-interpreter", ), ] + default_preprocessors = [ + PreprocessorInput( + preprocessor_id="builtin::basic", + provider_id="basic", + ), + PreprocessorInput( + preprocessor_id="builtin::chunking", + provider_id="simple_chunking", + ), + ] return DistributionTemplate( name=name, @@ -100,6 +112,7 @@ def get_distribution_template() -> DistributionTemplate: }, default_models=[inference_model, embedding_model], default_tool_groups=default_tool_groups, + default_preprocessors=default_preprocessors, ), "run-with-safety.yaml": RunConfigSettings( provider_overrides={ @@ -123,6 +136,7 @@ def get_distribution_template() -> DistributionTemplate: ], default_shields=[ShieldInput(shield_id="${env.SAFETY_MODEL}")], default_tool_groups=default_tool_groups, + default_preprocessors=default_preprocessors, ), }, run_config_env_vars={ diff --git a/llama_stack/templates/hf-endpoint/run-with-safety.yaml b/llama_stack/templates/hf-endpoint/run-with-safety.yaml index 1158675ee..52adca856 100644 --- a/llama_stack/templates/hf-endpoint/run-with-safety.yaml +++ b/llama_stack/templates/hf-endpoint/run-with-safety.yaml @@ -5,6 +5,7 @@ apis: - datasetio - eval - inference +- preprocessing - safety - scoring - telemetry @@ -94,6 +95,13 @@ providers: - provider_id: model-context-protocol provider_type: remote::model-context-protocol config: {} + preprocessing: + - provider_id: basic + provider_type: inline::basic + config: {} + - provider_id: simple_chunking + provider_type: inline::simple_chunking + config: {} metadata_store: type: sqlite db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/hf-endpoint}/registry.db @@ -124,6 +132,10 @@ tool_groups: provider_id: rag-runtime - toolgroup_id: builtin::code_interpreter provider_id: code-interpreter -preprocessors: [] +preprocessors: +- preprocessor_id: builtin::basic + provider_id: basic +- preprocessor_id: builtin::chunking + provider_id: simple_chunking server: port: 8321 diff --git a/llama_stack/templates/hf-endpoint/run.yaml b/llama_stack/templates/hf-endpoint/run.yaml index 5ee5b0f37..6cf2f04ea 100644 --- a/llama_stack/templates/hf-endpoint/run.yaml +++ b/llama_stack/templates/hf-endpoint/run.yaml @@ -5,6 +5,7 @@ apis: - datasetio - eval - inference +- preprocessing - safety - scoring - telemetry @@ -89,6 +90,13 @@ providers: - provider_id: model-context-protocol provider_type: remote::model-context-protocol config: {} + preprocessing: + - provider_id: basic + provider_type: inline::basic + config: {} + - provider_id: simple_chunking + provider_type: inline::simple_chunking + config: {} metadata_store: type: sqlite db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/hf-endpoint}/registry.db @@ -114,6 +122,10 @@ tool_groups: provider_id: rag-runtime - toolgroup_id: builtin::code_interpreter provider_id: code-interpreter -preprocessors: [] +preprocessors: +- preprocessor_id: builtin::basic + provider_id: basic +- preprocessor_id: builtin::chunking + provider_id: simple_chunking server: port: 8321 diff --git a/llama_stack/templates/hf-serverless/build.yaml b/llama_stack/templates/hf-serverless/build.yaml index c0cc1e2c2..614275115 100644 --- a/llama_stack/templates/hf-serverless/build.yaml +++ b/llama_stack/templates/hf-serverless/build.yaml @@ -30,4 +30,7 @@ distribution_spec: - inline::code-interpreter - inline::rag-runtime - remote::model-context-protocol + preprocessing: + - inline::basic + - inline::simple_chunking image_type: conda diff --git a/llama_stack/templates/hf-serverless/hf_serverless.py b/llama_stack/templates/hf-serverless/hf_serverless.py index cea1075e2..58b8cd3d5 100644 --- a/llama_stack/templates/hf-serverless/hf_serverless.py +++ b/llama_stack/templates/hf-serverless/hf_serverless.py @@ -5,6 +5,7 @@ # the root directory of this source tree. from llama_stack.apis.models.models import ModelType +from llama_stack.apis.preprocessing.preprocessors import PreprocessorInput from llama_stack.distribution.datatypes import ( ModelInput, Provider, @@ -36,6 +37,7 @@ def get_distribution_template() -> DistributionTemplate: "inline::rag-runtime", "remote::model-context-protocol", ], + "preprocessing": ["inline::basic", "inline::simple_chunking"], } name = "hf-serverless" @@ -85,6 +87,16 @@ def get_distribution_template() -> DistributionTemplate: provider_id="code-interpreter", ), ] + default_preprocessors = [ + PreprocessorInput( + preprocessor_id="builtin::basic", + provider_id="basic", + ), + PreprocessorInput( + preprocessor_id="builtin::chunking", + provider_id="simple_chunking", + ), + ] return DistributionTemplate( name=name, @@ -101,6 +113,7 @@ def get_distribution_template() -> DistributionTemplate: }, default_models=[inference_model, embedding_model], default_tool_groups=default_tool_groups, + default_preprocessors=default_preprocessors, ), "run-with-safety.yaml": RunConfigSettings( provider_overrides={ @@ -124,6 +137,7 @@ def get_distribution_template() -> DistributionTemplate: ], default_shields=[ShieldInput(shield_id="${env.SAFETY_MODEL}")], default_tool_groups=default_tool_groups, + default_preprocessors=default_preprocessors, ), }, run_config_env_vars={ diff --git a/llama_stack/templates/hf-serverless/run-with-safety.yaml b/llama_stack/templates/hf-serverless/run-with-safety.yaml index 51ee1b1c9..8ce968fe8 100644 --- a/llama_stack/templates/hf-serverless/run-with-safety.yaml +++ b/llama_stack/templates/hf-serverless/run-with-safety.yaml @@ -5,6 +5,7 @@ apis: - datasetio - eval - inference +- preprocessing - safety - scoring - telemetry @@ -94,6 +95,13 @@ providers: - provider_id: model-context-protocol provider_type: remote::model-context-protocol config: {} + preprocessing: + - provider_id: basic + provider_type: inline::basic + config: {} + - provider_id: simple_chunking + provider_type: inline::simple_chunking + config: {} metadata_store: type: sqlite db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/hf-serverless}/registry.db @@ -124,6 +132,10 @@ tool_groups: provider_id: rag-runtime - toolgroup_id: builtin::code_interpreter provider_id: code-interpreter -preprocessors: [] +preprocessors: +- preprocessor_id: builtin::basic + provider_id: basic +- preprocessor_id: builtin::chunking + provider_id: simple_chunking server: port: 8321 diff --git a/llama_stack/templates/hf-serverless/run.yaml b/llama_stack/templates/hf-serverless/run.yaml index d08456f54..01ddc9910 100644 --- a/llama_stack/templates/hf-serverless/run.yaml +++ b/llama_stack/templates/hf-serverless/run.yaml @@ -5,6 +5,7 @@ apis: - datasetio - eval - inference +- preprocessing - safety - scoring - telemetry @@ -89,6 +90,13 @@ providers: - provider_id: model-context-protocol provider_type: remote::model-context-protocol config: {} + preprocessing: + - provider_id: basic + provider_type: inline::basic + config: {} + - provider_id: simple_chunking + provider_type: inline::simple_chunking + config: {} metadata_store: type: sqlite db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/hf-serverless}/registry.db @@ -114,6 +122,10 @@ tool_groups: provider_id: rag-runtime - toolgroup_id: builtin::code_interpreter provider_id: code-interpreter -preprocessors: [] +preprocessors: +- preprocessor_id: builtin::basic + provider_id: basic +- preprocessor_id: builtin::chunking + provider_id: simple_chunking server: port: 8321 diff --git a/llama_stack/templates/meta-reference-gpu/build.yaml b/llama_stack/templates/meta-reference-gpu/build.yaml index b9130fc7d..42868c4bc 100644 --- a/llama_stack/templates/meta-reference-gpu/build.yaml +++ b/llama_stack/templates/meta-reference-gpu/build.yaml @@ -29,4 +29,7 @@ distribution_spec: - inline::code-interpreter - inline::rag-runtime - remote::model-context-protocol + preprocessing: + - inline::basic + - inline::simple_chunking image_type: conda diff --git a/llama_stack/templates/meta-reference-gpu/meta_reference.py b/llama_stack/templates/meta-reference-gpu/meta_reference.py index 3c38e0edd..6b50c5f87 100644 --- a/llama_stack/templates/meta-reference-gpu/meta_reference.py +++ b/llama_stack/templates/meta-reference-gpu/meta_reference.py @@ -7,6 +7,7 @@ from pathlib import Path from llama_stack.apis.models.models import ModelType +from llama_stack.apis.preprocessing.preprocessors import PreprocessorInput from llama_stack.distribution.datatypes import ( ModelInput, Provider, @@ -40,6 +41,7 @@ def get_distribution_template() -> DistributionTemplate: "inline::rag-runtime", "remote::model-context-protocol", ], + "preprocessing": ["inline::basic", "inline::simple_chunking"], } name = "meta-reference-gpu" inference_provider = Provider( @@ -91,6 +93,16 @@ def get_distribution_template() -> DistributionTemplate: provider_id="code-interpreter", ), ] + default_preprocessors = [ + PreprocessorInput( + preprocessor_id="builtin::basic", + provider_id="basic", + ), + PreprocessorInput( + preprocessor_id="builtin::chunking", + provider_id="simple_chunking", + ), + ] return DistributionTemplate( name=name, @@ -106,6 +118,7 @@ def get_distribution_template() -> DistributionTemplate: }, default_models=[inference_model, embedding_model], default_tool_groups=default_tool_groups, + default_preprocessors=default_preprocessors, ), "run-with-safety.yaml": RunConfigSettings( provider_overrides={ @@ -130,6 +143,7 @@ def get_distribution_template() -> DistributionTemplate: ], default_shields=[ShieldInput(shield_id="${env.SAFETY_MODEL}")], default_tool_groups=default_tool_groups, + default_preprocessors=default_preprocessors, ), }, run_config_env_vars={ diff --git a/llama_stack/templates/meta-reference-gpu/run-with-safety.yaml b/llama_stack/templates/meta-reference-gpu/run-with-safety.yaml index b78fe96ce..6bd4f8347 100644 --- a/llama_stack/templates/meta-reference-gpu/run-with-safety.yaml +++ b/llama_stack/templates/meta-reference-gpu/run-with-safety.yaml @@ -5,6 +5,7 @@ apis: - datasetio - eval - inference +- preprocessing - safety - scoring - telemetry @@ -96,6 +97,13 @@ providers: - provider_id: model-context-protocol provider_type: remote::model-context-protocol config: {} + preprocessing: + - provider_id: basic + provider_type: inline::basic + config: {} + - provider_id: simple_chunking + provider_type: inline::simple_chunking + config: {} metadata_store: type: sqlite db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/meta-reference-gpu}/registry.db @@ -126,6 +134,10 @@ tool_groups: provider_id: rag-runtime - toolgroup_id: builtin::code_interpreter provider_id: code-interpreter -preprocessors: [] +preprocessors: +- preprocessor_id: builtin::basic + provider_id: basic +- preprocessor_id: builtin::chunking + provider_id: simple_chunking server: port: 8321 diff --git a/llama_stack/templates/meta-reference-gpu/run.yaml b/llama_stack/templates/meta-reference-gpu/run.yaml index 8f20482aa..9dc11c030 100644 --- a/llama_stack/templates/meta-reference-gpu/run.yaml +++ b/llama_stack/templates/meta-reference-gpu/run.yaml @@ -5,6 +5,7 @@ apis: - datasetio - eval - inference +- preprocessing - safety - scoring - telemetry @@ -90,6 +91,13 @@ providers: - provider_id: model-context-protocol provider_type: remote::model-context-protocol config: {} + preprocessing: + - provider_id: basic + provider_type: inline::basic + config: {} + - provider_id: simple_chunking + provider_type: inline::simple_chunking + config: {} metadata_store: type: sqlite db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/meta-reference-gpu}/registry.db @@ -115,6 +123,10 @@ tool_groups: provider_id: rag-runtime - toolgroup_id: builtin::code_interpreter provider_id: code-interpreter -preprocessors: [] +preprocessors: +- preprocessor_id: builtin::basic + provider_id: basic +- preprocessor_id: builtin::chunking + provider_id: simple_chunking server: port: 8321 diff --git a/llama_stack/templates/meta-reference-quantized-gpu/build.yaml b/llama_stack/templates/meta-reference-quantized-gpu/build.yaml index 7bbcfe5f2..bf85a8040 100644 --- a/llama_stack/templates/meta-reference-quantized-gpu/build.yaml +++ b/llama_stack/templates/meta-reference-quantized-gpu/build.yaml @@ -29,4 +29,7 @@ distribution_spec: - inline::code-interpreter - inline::rag-runtime - remote::model-context-protocol + preprocessing: + - inline::basic + - inline::simple_chunking image_type: conda diff --git a/llama_stack/templates/meta-reference-quantized-gpu/meta_reference.py b/llama_stack/templates/meta-reference-quantized-gpu/meta_reference.py index 32476f37f..cb9c8c04f 100644 --- a/llama_stack/templates/meta-reference-quantized-gpu/meta_reference.py +++ b/llama_stack/templates/meta-reference-quantized-gpu/meta_reference.py @@ -7,6 +7,7 @@ from pathlib import Path from llama_stack.apis.models.models import ModelType +from llama_stack.apis.preprocessing.preprocessors import PreprocessorInput from llama_stack.distribution.datatypes import ModelInput, Provider, ToolGroupInput from llama_stack.providers.inline.inference.meta_reference import ( MetaReferenceQuantizedInferenceConfig, @@ -35,6 +36,7 @@ def get_distribution_template() -> DistributionTemplate: "inline::rag-runtime", "remote::model-context-protocol", ], + "preprocessing": ["inline::basic", "inline::simple_chunking"], } default_tool_groups = [ ToolGroupInput( @@ -50,6 +52,16 @@ def get_distribution_template() -> DistributionTemplate: provider_id="code-interpreter", ), ] + default_preprocessors = [ + PreprocessorInput( + preprocessor_id="builtin::basic", + provider_id="basic", + ), + PreprocessorInput( + preprocessor_id="builtin::chunking", + provider_id="simple_chunking", + ), + ] name = "meta-reference-quantized-gpu" inference_provider = Provider( provider_id="meta-reference-inference", @@ -96,6 +108,7 @@ def get_distribution_template() -> DistributionTemplate: }, default_models=[inference_model, embedding_model], default_tool_groups=default_tool_groups, + default_preprocessors=default_preprocessors, ), }, run_config_env_vars={ diff --git a/llama_stack/templates/meta-reference-quantized-gpu/run.yaml b/llama_stack/templates/meta-reference-quantized-gpu/run.yaml index 277d97ca3..8be8f2a9c 100644 --- a/llama_stack/templates/meta-reference-quantized-gpu/run.yaml +++ b/llama_stack/templates/meta-reference-quantized-gpu/run.yaml @@ -5,6 +5,7 @@ apis: - datasetio - eval - inference +- preprocessing - safety - scoring - telemetry @@ -92,6 +93,13 @@ providers: - provider_id: model-context-protocol provider_type: remote::model-context-protocol config: {} + preprocessing: + - provider_id: basic + provider_type: inline::basic + config: {} + - provider_id: simple_chunking + provider_type: inline::simple_chunking + config: {} metadata_store: type: sqlite db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/meta-reference-quantized-gpu}/registry.db @@ -117,6 +125,10 @@ tool_groups: provider_id: rag-runtime - toolgroup_id: builtin::code_interpreter provider_id: code-interpreter -preprocessors: [] +preprocessors: +- preprocessor_id: builtin::basic + provider_id: basic +- preprocessor_id: builtin::chunking + provider_id: simple_chunking server: port: 8321 diff --git a/llama_stack/templates/nvidia/build.yaml b/llama_stack/templates/nvidia/build.yaml index e9748721a..d7752d67b 100644 --- a/llama_stack/templates/nvidia/build.yaml +++ b/llama_stack/templates/nvidia/build.yaml @@ -27,4 +27,7 @@ distribution_spec: - inline::code-interpreter - inline::rag-runtime - remote::model-context-protocol + preprocessing: + - inline::basic + - inline::simple_chunking image_type: conda diff --git a/llama_stack/templates/nvidia/nvidia.py b/llama_stack/templates/nvidia/nvidia.py index cc5e96333..fb5b969d9 100644 --- a/llama_stack/templates/nvidia/nvidia.py +++ b/llama_stack/templates/nvidia/nvidia.py @@ -6,6 +6,7 @@ from pathlib import Path +from llama_stack.apis.preprocessing.preprocessors import PreprocessorInput from llama_stack.distribution.datatypes import Provider, ToolGroupInput from llama_stack.providers.remote.inference.nvidia import NVIDIAConfig from llama_stack.providers.remote.inference.nvidia.models import MODEL_ENTRIES @@ -29,6 +30,7 @@ def get_distribution_template() -> DistributionTemplate: "inline::rag-runtime", "remote::model-context-protocol", ], + "preprocessing": ["inline::basic", "inline::simple_chunking"], } inference_provider = Provider( @@ -54,6 +56,16 @@ def get_distribution_template() -> DistributionTemplate: provider_id="code-interpreter", ), ] + default_preprocessors = [ + PreprocessorInput( + preprocessor_id="builtin::basic", + provider_id="basic", + ), + PreprocessorInput( + preprocessor_id="builtin::chunking", + provider_id="simple_chunking", + ), + ] default_models = get_model_registry(available_models) return DistributionTemplate( @@ -71,6 +83,7 @@ def get_distribution_template() -> DistributionTemplate: }, default_models=default_models, default_tool_groups=default_tool_groups, + default_preprocessors=default_preprocessors, ), }, run_config_env_vars={ diff --git a/llama_stack/templates/nvidia/run.yaml b/llama_stack/templates/nvidia/run.yaml index 1720ab942..b83127dd7 100644 --- a/llama_stack/templates/nvidia/run.yaml +++ b/llama_stack/templates/nvidia/run.yaml @@ -5,6 +5,7 @@ apis: - datasetio - eval - inference +- preprocessing - safety - scoring - telemetry @@ -86,6 +87,13 @@ providers: - provider_id: model-context-protocol provider_type: remote::model-context-protocol config: {} + preprocessing: + - provider_id: basic + provider_type: inline::basic + config: {} + - provider_id: simple_chunking + provider_type: inline::simple_chunking + config: {} metadata_store: type: sqlite db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/nvidia}/registry.db @@ -220,6 +228,10 @@ tool_groups: provider_id: rag-runtime - toolgroup_id: builtin::code_interpreter provider_id: code-interpreter -preprocessors: [] +preprocessors: +- preprocessor_id: builtin::basic + provider_id: basic +- preprocessor_id: builtin::chunking + provider_id: simple_chunking server: port: 8321 diff --git a/llama_stack/templates/passthrough/build.yaml b/llama_stack/templates/passthrough/build.yaml index 5fed5286e..b81c54042 100644 --- a/llama_stack/templates/passthrough/build.yaml +++ b/llama_stack/templates/passthrough/build.yaml @@ -29,4 +29,7 @@ distribution_spec: - inline::code-interpreter - inline::rag-runtime - remote::model-context-protocol + preprocessing: + - inline::basic + - inline::simple_chunking image_type: conda diff --git a/llama_stack/templates/passthrough/run.yaml b/llama_stack/templates/passthrough/run.yaml index 2548faa5d..7fac4bf50 100644 --- a/llama_stack/templates/passthrough/run.yaml +++ b/llama_stack/templates/passthrough/run.yaml @@ -89,6 +89,13 @@ providers: - provider_id: model-context-protocol provider_type: remote::model-context-protocol config: {} + preprocessing: + - provider_id: basic + provider_type: inline::basic + config: {} + - provider_id: simple_chunking + provider_type: inline::simple_chunking + config: {} metadata_store: type: sqlite db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/meta-llama}/registry.db @@ -116,5 +123,10 @@ tool_groups: provider_id: rag-runtime - toolgroup_id: builtin::code_interpreter provider_id: code-interpreter +preprocessors: +- preprocessor_id: builtin::basic + provider_id: basic +- preprocessor_id: builtin::chunking + provider_id: simple_chunking server: port: 8321 diff --git a/llama_stack/templates/remote-vllm/build.yaml b/llama_stack/templates/remote-vllm/build.yaml index ccb328c1c..5fc6eed60 100644 --- a/llama_stack/templates/remote-vllm/build.yaml +++ b/llama_stack/templates/remote-vllm/build.yaml @@ -30,4 +30,7 @@ distribution_spec: - inline::code-interpreter - inline::rag-runtime - remote::model-context-protocol + preprocessing: + - inline::basic + - inline::simple_chunking image_type: conda diff --git a/llama_stack/templates/remote-vllm/run-with-safety.yaml b/llama_stack/templates/remote-vllm/run-with-safety.yaml index 059802d64..cc4729505 100644 --- a/llama_stack/templates/remote-vllm/run-with-safety.yaml +++ b/llama_stack/templates/remote-vllm/run-with-safety.yaml @@ -5,6 +5,7 @@ apis: - datasetio - eval - inference +- preprocessing - safety - scoring - telemetry @@ -96,6 +97,13 @@ providers: - provider_id: model-context-protocol provider_type: remote::model-context-protocol config: {} + preprocessing: + - provider_id: basic + provider_type: inline::basic + config: {} + - provider_id: simple_chunking + provider_type: inline::simple_chunking + config: {} metadata_store: type: sqlite db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/remote-vllm}/registry.db @@ -126,6 +134,10 @@ tool_groups: provider_id: rag-runtime - toolgroup_id: builtin::code_interpreter provider_id: code-interpreter -preprocessors: [] +preprocessors: +- preprocessor_id: builtin::basic + provider_id: basic +- preprocessor_id: builtin::chunking + provider_id: simple_chunking server: port: 8321 diff --git a/llama_stack/templates/remote-vllm/run.yaml b/llama_stack/templates/remote-vllm/run.yaml index 997572afa..236122c1a 100644 --- a/llama_stack/templates/remote-vllm/run.yaml +++ b/llama_stack/templates/remote-vllm/run.yaml @@ -5,6 +5,7 @@ apis: - datasetio - eval - inference +- preprocessing - safety - scoring - telemetry @@ -90,6 +91,13 @@ providers: - provider_id: model-context-protocol provider_type: remote::model-context-protocol config: {} + preprocessing: + - provider_id: basic + provider_type: inline::basic + config: {} + - provider_id: simple_chunking + provider_type: inline::simple_chunking + config: {} metadata_store: type: sqlite db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/remote-vllm}/registry.db @@ -115,6 +123,10 @@ tool_groups: provider_id: rag-runtime - toolgroup_id: builtin::code_interpreter provider_id: code-interpreter -preprocessors: [] +preprocessors: +- preprocessor_id: builtin::basic + provider_id: basic +- preprocessor_id: builtin::chunking + provider_id: simple_chunking server: port: 8321 diff --git a/llama_stack/templates/remote-vllm/vllm.py b/llama_stack/templates/remote-vllm/vllm.py index 73ee36c3f..bce1e0fe4 100644 --- a/llama_stack/templates/remote-vllm/vllm.py +++ b/llama_stack/templates/remote-vllm/vllm.py @@ -7,6 +7,7 @@ from pathlib import Path from llama_stack.apis.models.models import ModelType +from llama_stack.apis.preprocessing.preprocessors import PreprocessorInput from llama_stack.distribution.datatypes import ( ModelInput, Provider, @@ -38,6 +39,7 @@ def get_distribution_template() -> DistributionTemplate: "inline::rag-runtime", "remote::model-context-protocol", ], + "preprocessing": ["inline::basic", "inline::simple_chunking"], } name = "remote-vllm" inference_provider = Provider( @@ -88,6 +90,16 @@ def get_distribution_template() -> DistributionTemplate: provider_id="code-interpreter", ), ] + default_preprocessors = [ + PreprocessorInput( + preprocessor_id="builtin::basic", + provider_id="basic", + ), + PreprocessorInput( + preprocessor_id="builtin::chunking", + provider_id="simple_chunking", + ), + ] return DistributionTemplate( name=name, @@ -103,6 +115,7 @@ def get_distribution_template() -> DistributionTemplate: }, default_models=[inference_model, embedding_model], default_tool_groups=default_tool_groups, + default_preprocessors=default_preprocessors, ), "run-with-safety.yaml": RunConfigSettings( provider_overrides={ @@ -126,6 +139,7 @@ def get_distribution_template() -> DistributionTemplate: ], default_shields=[ShieldInput(shield_id="${env.SAFETY_MODEL}")], default_tool_groups=default_tool_groups, + default_preprocessors=default_preprocessors, ), }, run_config_env_vars={ diff --git a/llama_stack/templates/sambanova/build.yaml b/llama_stack/templates/sambanova/build.yaml index ca5ffe618..a518d0c51 100644 --- a/llama_stack/templates/sambanova/build.yaml +++ b/llama_stack/templates/sambanova/build.yaml @@ -19,4 +19,7 @@ distribution_spec: - remote::tavily-search - inline::code-interpreter - inline::rag-runtime + preprocessing: + - inline::basic + - inline::simple_chunking image_type: conda diff --git a/llama_stack/templates/sambanova/run.yaml b/llama_stack/templates/sambanova/run.yaml index 1cf2bcfbb..111f7f948 100644 --- a/llama_stack/templates/sambanova/run.yaml +++ b/llama_stack/templates/sambanova/run.yaml @@ -3,6 +3,7 @@ image_name: sambanova apis: - agents - inference +- preprocessing - safety - telemetry - tool_runtime @@ -64,6 +65,13 @@ providers: - provider_id: rag-runtime provider_type: inline::rag-runtime config: {} + preprocessing: + - provider_id: basic + provider_type: inline::basic + config: {} + - provider_id: simple_chunking + provider_type: inline::simple_chunking + config: {} metadata_store: type: sqlite db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/sambanova}/registry.db @@ -171,6 +179,10 @@ tool_groups: provider_id: rag-runtime - toolgroup_id: builtin::code_interpreter provider_id: code-interpreter -preprocessors: [] +preprocessors: +- preprocessor_id: builtin::basic + provider_id: basic +- preprocessor_id: builtin::chunking + provider_id: simple_chunking server: port: 8321 diff --git a/llama_stack/templates/sambanova/sambanova.py b/llama_stack/templates/sambanova/sambanova.py index 0a0b6bd7e..b77c04fbf 100644 --- a/llama_stack/templates/sambanova/sambanova.py +++ b/llama_stack/templates/sambanova/sambanova.py @@ -6,6 +6,7 @@ from pathlib import Path +from llama_stack.apis.preprocessing.preprocessors import PreprocessorInput from llama_stack.distribution.datatypes import ( Provider, ShieldInput, @@ -29,6 +30,7 @@ def get_distribution_template() -> DistributionTemplate: "inline::code-interpreter", "inline::rag-runtime", ], + "preprocessing": ["inline::basic", "inline::simple_chunking"], } name = "sambanova" @@ -56,6 +58,16 @@ def get_distribution_template() -> DistributionTemplate: provider_id="code-interpreter", ), ] + default_preprocessors = [ + PreprocessorInput( + preprocessor_id="builtin::basic", + provider_id="basic", + ), + PreprocessorInput( + preprocessor_id="builtin::chunking", + provider_id="simple_chunking", + ), + ] return DistributionTemplate( name=name, @@ -73,6 +85,7 @@ def get_distribution_template() -> DistributionTemplate: default_models=default_models, default_shields=[ShieldInput(shield_id="meta-llama/Llama-Guard-3-8B")], default_tool_groups=default_tool_groups, + default_preprocessors=default_preprocessors, ), }, run_config_env_vars={ diff --git a/llama_stack/templates/tgi/build.yaml b/llama_stack/templates/tgi/build.yaml index 9fe79647c..a13b8cd7c 100644 --- a/llama_stack/templates/tgi/build.yaml +++ b/llama_stack/templates/tgi/build.yaml @@ -30,4 +30,7 @@ distribution_spec: - inline::code-interpreter - inline::rag-runtime - remote::model-context-protocol + preprocessing: + - inline::basic + - inline::simple_chunking image_type: conda diff --git a/llama_stack/templates/tgi/run-with-safety.yaml b/llama_stack/templates/tgi/run-with-safety.yaml index d833635b9..3e5199656 100644 --- a/llama_stack/templates/tgi/run-with-safety.yaml +++ b/llama_stack/templates/tgi/run-with-safety.yaml @@ -5,6 +5,7 @@ apis: - datasetio - eval - inference +- preprocessing - safety - scoring - telemetry @@ -89,6 +90,13 @@ providers: - provider_id: model-context-protocol provider_type: remote::model-context-protocol config: {} + preprocessing: + - provider_id: basic + provider_type: inline::basic + config: {} + - provider_id: simple_chunking + provider_type: inline::simple_chunking + config: {} metadata_store: type: sqlite db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/tgi}/registry.db @@ -114,6 +122,10 @@ tool_groups: provider_id: rag-runtime - toolgroup_id: builtin::code_interpreter provider_id: code-interpreter -preprocessors: [] +preprocessors: +- preprocessor_id: builtin::basic + provider_id: basic +- preprocessor_id: builtin::chunking + provider_id: simple_chunking server: port: 8321 diff --git a/llama_stack/templates/tgi/run.yaml b/llama_stack/templates/tgi/run.yaml index 851a76849..68a39e6bf 100644 --- a/llama_stack/templates/tgi/run.yaml +++ b/llama_stack/templates/tgi/run.yaml @@ -5,6 +5,7 @@ apis: - datasetio - eval - inference +- preprocessing - safety - scoring - telemetry @@ -88,6 +89,13 @@ providers: - provider_id: model-context-protocol provider_type: remote::model-context-protocol config: {} + preprocessing: + - provider_id: basic + provider_type: inline::basic + config: {} + - provider_id: simple_chunking + provider_type: inline::simple_chunking + config: {} metadata_store: type: sqlite db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/tgi}/registry.db @@ -113,6 +121,10 @@ tool_groups: provider_id: rag-runtime - toolgroup_id: builtin::code_interpreter provider_id: code-interpreter -preprocessors: [] +preprocessors: +- preprocessor_id: builtin::basic + provider_id: basic +- preprocessor_id: builtin::chunking + provider_id: simple_chunking server: port: 8321 diff --git a/llama_stack/templates/tgi/tgi.py b/llama_stack/templates/tgi/tgi.py index eb49871a0..8327e904c 100644 --- a/llama_stack/templates/tgi/tgi.py +++ b/llama_stack/templates/tgi/tgi.py @@ -7,6 +7,7 @@ from pathlib import Path from llama_stack.apis.models.models import ModelType +from llama_stack.apis.preprocessing.preprocessors import PreprocessorInput from llama_stack.distribution.datatypes import ( ModelInput, Provider, @@ -38,6 +39,7 @@ def get_distribution_template() -> DistributionTemplate: "inline::rag-runtime", "remote::model-context-protocol", ], + "preprocessing": ["inline::basic", "inline::simple_chunking"], } name = "tgi" inference_provider = Provider( @@ -88,6 +90,16 @@ def get_distribution_template() -> DistributionTemplate: provider_id="code-interpreter", ), ] + default_preprocessors = [ + PreprocessorInput( + preprocessor_id="builtin::basic", + provider_id="basic", + ), + PreprocessorInput( + preprocessor_id="builtin::chunking", + provider_id="simple_chunking", + ), + ] return DistributionTemplate( name=name, @@ -104,6 +116,7 @@ def get_distribution_template() -> DistributionTemplate: }, default_models=[inference_model, embedding_model], default_tool_groups=default_tool_groups, + default_preprocessors=default_preprocessors, ), "run-with-safety.yaml": RunConfigSettings( provider_overrides={ @@ -125,6 +138,7 @@ def get_distribution_template() -> DistributionTemplate: ], default_shields=[ShieldInput(shield_id="${env.SAFETY_MODEL}")], default_tool_groups=default_tool_groups, + default_preprocessors=default_preprocessors, ), }, run_config_env_vars={ diff --git a/llama_stack/templates/together/build.yaml b/llama_stack/templates/together/build.yaml index a8a6de28d..037273335 100644 --- a/llama_stack/templates/together/build.yaml +++ b/llama_stack/templates/together/build.yaml @@ -30,4 +30,7 @@ distribution_spec: - inline::code-interpreter - inline::rag-runtime - remote::model-context-protocol + preprocessing: + - inline::basic + - inline::simple_chunking image_type: conda diff --git a/llama_stack/templates/together/run-with-safety.yaml b/llama_stack/templates/together/run-with-safety.yaml index 9c702d9bb..67cc26954 100644 --- a/llama_stack/templates/together/run-with-safety.yaml +++ b/llama_stack/templates/together/run-with-safety.yaml @@ -5,6 +5,7 @@ apis: - datasetio - eval - inference +- preprocessing - safety - scoring - telemetry @@ -95,6 +96,13 @@ providers: - provider_id: model-context-protocol provider_type: remote::model-context-protocol config: {} + preprocessing: + - provider_id: basic + provider_type: inline::basic + config: {} + - provider_id: simple_chunking + provider_type: inline::simple_chunking + config: {} metadata_store: type: sqlite db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/together}/registry.db @@ -226,6 +234,10 @@ tool_groups: provider_id: rag-runtime - toolgroup_id: builtin::code_interpreter provider_id: code-interpreter -preprocessors: [] +preprocessors: +- preprocessor_id: builtin::basic + provider_id: basic +- preprocessor_id: builtin::chunking + provider_id: simple_chunking server: port: 8321 diff --git a/llama_stack/templates/together/run.yaml b/llama_stack/templates/together/run.yaml index e6342ab3f..380f34a1d 100644 --- a/llama_stack/templates/together/run.yaml +++ b/llama_stack/templates/together/run.yaml @@ -5,6 +5,7 @@ apis: - datasetio - eval - inference +- preprocessing - safety - scoring - telemetry @@ -89,6 +90,13 @@ providers: - provider_id: model-context-protocol provider_type: remote::model-context-protocol config: {} + preprocessing: + - provider_id: basic + provider_type: inline::basic + config: {} + - provider_id: simple_chunking + provider_type: inline::simple_chunking + config: {} metadata_store: type: sqlite db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/together}/registry.db @@ -215,6 +223,10 @@ tool_groups: provider_id: rag-runtime - toolgroup_id: builtin::code_interpreter provider_id: code-interpreter -preprocessors: [] +preprocessors: +- preprocessor_id: builtin::basic + provider_id: basic +- preprocessor_id: builtin::chunking + provider_id: simple_chunking server: port: 8321 diff --git a/llama_stack/templates/together/together.py b/llama_stack/templates/together/together.py index 24c395e1e..0bc70449a 100644 --- a/llama_stack/templates/together/together.py +++ b/llama_stack/templates/together/together.py @@ -7,6 +7,7 @@ from pathlib import Path from llama_stack.apis.models.models import ModelType +from llama_stack.apis.preprocessing.preprocessors import PreprocessorInput from llama_stack.distribution.datatypes import ( ModelInput, Provider, @@ -39,6 +40,7 @@ def get_distribution_template() -> DistributionTemplate: "inline::rag-runtime", "remote::model-context-protocol", ], + "preprocessing": ["inline::basic", "inline::simple_chunking"], } name = "together" inference_provider = Provider( @@ -74,6 +76,16 @@ def get_distribution_template() -> DistributionTemplate: provider_id="code-interpreter", ), ] + default_preprocessors = [ + PreprocessorInput( + preprocessor_id="builtin::basic", + provider_id="basic", + ), + PreprocessorInput( + preprocessor_id="builtin::chunking", + provider_id="simple_chunking", + ), + ] embedding_model = ModelInput( model_id="all-MiniLM-L6-v2", provider_id="sentence-transformers", @@ -99,6 +111,7 @@ def get_distribution_template() -> DistributionTemplate: }, default_models=default_models + [embedding_model], default_tool_groups=default_tool_groups, + default_preprocessors=default_preprocessors, default_shields=[ShieldInput(shield_id="meta-llama/Llama-Guard-3-8B")], ), "run-with-safety.yaml": RunConfigSettings( @@ -145,6 +158,7 @@ def get_distribution_template() -> DistributionTemplate: ), ], default_tool_groups=default_tool_groups, + default_preprocessors=default_preprocessors, ), }, run_config_env_vars={ diff --git a/llama_stack/templates/vllm-gpu/build.yaml b/llama_stack/templates/vllm-gpu/build.yaml index 8eb44dc1b..d91d32c40 100644 --- a/llama_stack/templates/vllm-gpu/build.yaml +++ b/llama_stack/templates/vllm-gpu/build.yaml @@ -30,4 +30,7 @@ distribution_spec: - inline::code-interpreter - inline::rag-runtime - remote::model-context-protocol + preprocessing: + - inline::basic + - inline::simple_chunking image_type: conda diff --git a/llama_stack/templates/vllm-gpu/run.yaml b/llama_stack/templates/vllm-gpu/run.yaml index 1323ac6d6..151b47022 100644 --- a/llama_stack/templates/vllm-gpu/run.yaml +++ b/llama_stack/templates/vllm-gpu/run.yaml @@ -5,6 +5,7 @@ apis: - datasetio - eval - inference +- preprocessing - safety - scoring - telemetry @@ -92,6 +93,13 @@ providers: - provider_id: model-context-protocol provider_type: remote::model-context-protocol config: {} + preprocessing: + - provider_id: basic + provider_type: inline::basic + config: {} + - provider_id: simple_chunking + provider_type: inline::simple_chunking + config: {} metadata_store: type: sqlite db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/vllm-gpu}/registry.db @@ -117,6 +125,10 @@ tool_groups: provider_id: rag-runtime - toolgroup_id: builtin::code_interpreter provider_id: code-interpreter -preprocessors: [] +preprocessors: +- preprocessor_id: builtin::basic + provider_id: basic +- preprocessor_id: builtin::chunking + provider_id: simple_chunking server: port: 8321 diff --git a/llama_stack/templates/vllm-gpu/vllm.py b/llama_stack/templates/vllm-gpu/vllm.py index 27a16b93d..ad609186c 100644 --- a/llama_stack/templates/vllm-gpu/vllm.py +++ b/llama_stack/templates/vllm-gpu/vllm.py @@ -5,6 +5,7 @@ # the root directory of this source tree. from llama_stack.apis.models.models import ModelType +from llama_stack.apis.preprocessing.preprocessors import PreprocessorInput from llama_stack.distribution.datatypes import ModelInput, Provider from llama_stack.providers.inline.inference.sentence_transformers import ( SentenceTransformersInferenceConfig, @@ -35,6 +36,7 @@ def get_distribution_template() -> DistributionTemplate: "inline::rag-runtime", "remote::model-context-protocol", ], + "preprocessing": ["inline::basic", "inline::simple_chunking"], } name = "vllm-gpu" @@ -80,6 +82,16 @@ def get_distribution_template() -> DistributionTemplate: provider_id="code-interpreter", ), ] + default_preprocessors = [ + PreprocessorInput( + preprocessor_id="builtin::basic", + provider_id="basic", + ), + PreprocessorInput( + preprocessor_id="builtin::chunking", + provider_id="simple_chunking", + ), + ] return DistributionTemplate( name=name, @@ -96,6 +108,7 @@ def get_distribution_template() -> DistributionTemplate: }, default_models=[inference_model, embedding_model], default_tool_groups=default_tool_groups, + default_preprocessors=default_preprocessors, ), }, run_config_env_vars={