diff --git a/distributions/dependencies.json b/distributions/dependencies.json index 7a974b917..bd363ea40 100644 --- a/distributions/dependencies.json +++ b/distributions/dependencies.json @@ -23,6 +23,7 @@ "psycopg2-binary", "pypdf", "redis", + "requests", "scikit-learn", "scipy", "sentencepiece", @@ -54,6 +55,7 @@ "psycopg2-binary", "pypdf", "redis", + "requests", "scikit-learn", "scipy", "sentencepiece", @@ -86,6 +88,7 @@ "psycopg2-binary", "pypdf", "redis", + "requests", "scikit-learn", "scipy", "sentencepiece", @@ -116,6 +119,7 @@ "psycopg2-binary", "pypdf", "redis", + "requests", "scikit-learn", "scipy", "sentencepiece", @@ -148,6 +152,7 @@ "psycopg2-binary", "pypdf", "redis", + "requests", "scikit-learn", "scipy", "sentencepiece", @@ -181,6 +186,7 @@ "psycopg2-binary", "pypdf", "redis", + "requests", "scikit-learn", "scipy", "sentencepiece", @@ -213,6 +219,7 @@ "psycopg2-binary", "pypdf", "redis", + "requests", "scikit-learn", "scipy", "sentencepiece", @@ -247,6 +254,7 @@ "psycopg2-binary", "pypdf", "redis", + "requests", "scikit-learn", "scipy", "sentence-transformers", @@ -286,6 +294,7 @@ "psycopg2-binary", "pypdf", "redis", + "requests", "scikit-learn", "scipy", "sentence-transformers", @@ -319,6 +328,7 @@ "psycopg2-binary", "pypdf", "redis", + "requests", "scikit-learn", "scipy", "sentencepiece", @@ -352,6 +362,7 @@ "psycopg2-binary", "pypdf", "redis", + "requests", "scikit-learn", "scipy", "sentencepiece", @@ -385,6 +396,7 @@ "psycopg2-binary", "pypdf", "redis", + "requests", "scikit-learn", "scipy", "sentencepiece", diff --git a/docs/source/distributions/self_hosted_distro/bedrock.md b/docs/source/distributions/self_hosted_distro/bedrock.md index 7dab23655..db4c7a8c9 100644 --- a/docs/source/distributions/self_hosted_distro/bedrock.md +++ b/docs/source/distributions/self_hosted_distro/bedrock.md @@ -19,6 +19,7 @@ The `llamastack/distribution-bedrock` distribution consists of the following pro | safety | `remote::bedrock` | | scoring | `inline::basic`, `inline::llm-as-judge`, `inline::braintrust` | | telemetry | `inline::meta-reference` | +| tool_runtime | `remote::brave-search`, `remote::tavily-search`, `inline::code-interpreter`, `inline::memory-runtime` | diff --git a/docs/source/distributions/self_hosted_distro/cerebras.md b/docs/source/distributions/self_hosted_distro/cerebras.md index a8886d39b..f623ed0de 100644 --- a/docs/source/distributions/self_hosted_distro/cerebras.md +++ b/docs/source/distributions/self_hosted_distro/cerebras.md @@ -9,6 +9,7 @@ The `llamastack/distribution-cerebras` distribution consists of the following pr | memory | `inline::meta-reference` | | safety | `inline::llama-guard` | | telemetry | `inline::meta-reference` | +| tool_runtime | `remote::brave-search`, `remote::tavily-search`, `inline::code-interpreter`, `inline::memory-runtime` | ### Environment Variables diff --git a/docs/source/distributions/self_hosted_distro/fireworks.md b/docs/source/distributions/self_hosted_distro/fireworks.md index a78b0ee3f..c5428306a 100644 --- a/docs/source/distributions/self_hosted_distro/fireworks.md +++ b/docs/source/distributions/self_hosted_distro/fireworks.md @@ -22,6 +22,7 @@ The `llamastack/distribution-fireworks` distribution consists of the following p | safety | `inline::llama-guard` | | scoring | `inline::basic`, `inline::llm-as-judge`, `inline::braintrust` | | telemetry | `inline::meta-reference` | +| tool_runtime | `remote::brave-search`, `remote::tavily-search`, `inline::code-interpreter`, `inline::memory-runtime` | ### Environment Variables diff --git a/docs/source/distributions/self_hosted_distro/meta-reference-gpu.md b/docs/source/distributions/self_hosted_distro/meta-reference-gpu.md index d46039318..0ca58e7df 100644 --- a/docs/source/distributions/self_hosted_distro/meta-reference-gpu.md +++ b/docs/source/distributions/self_hosted_distro/meta-reference-gpu.md @@ -22,6 +22,7 @@ The `llamastack/distribution-meta-reference-gpu` distribution consists of the fo | safety | `inline::llama-guard` | | scoring | `inline::basic`, `inline::llm-as-judge`, `inline::braintrust` | | telemetry | `inline::meta-reference` | +| tool_runtime | `remote::brave-search`, `remote::tavily-search`, `inline::code-interpreter`, `inline::memory-runtime` | Note that you need access to nvidia GPUs to run this distribution. This distribution is not compatible with CPU-only machines or machines with AMD GPUs. diff --git a/docs/source/distributions/self_hosted_distro/meta-reference-quantized-gpu.md b/docs/source/distributions/self_hosted_distro/meta-reference-quantized-gpu.md index 837be744a..87f4f4a61 100644 --- a/docs/source/distributions/self_hosted_distro/meta-reference-quantized-gpu.md +++ b/docs/source/distributions/self_hosted_distro/meta-reference-quantized-gpu.md @@ -22,6 +22,7 @@ The `llamastack/distribution-meta-reference-quantized-gpu` distribution consists | safety | `inline::llama-guard` | | scoring | `inline::basic`, `inline::llm-as-judge`, `inline::braintrust` | | telemetry | `inline::meta-reference` | +| tool_runtime | `remote::brave-search`, `remote::tavily-search`, `inline::code-interpreter`, `inline::memory-runtime` | The only difference vs. the `meta-reference-gpu` distribution is that it has support for more efficient inference -- with fp8, int4 quantization, etc. diff --git a/docs/source/distributions/self_hosted_distro/ollama.md b/docs/source/distributions/self_hosted_distro/ollama.md index c915a7ac3..7fe2ae408 100644 --- a/docs/source/distributions/self_hosted_distro/ollama.md +++ b/docs/source/distributions/self_hosted_distro/ollama.md @@ -22,6 +22,7 @@ The `llamastack/distribution-ollama` distribution consists of the following prov | safety | `inline::llama-guard` | | scoring | `inline::basic`, `inline::llm-as-judge`, `inline::braintrust` | | telemetry | `inline::meta-reference` | +| tool_runtime | `remote::brave-search`, `remote::tavily-search`, `inline::code-interpreter`, `inline::memory-runtime` | You should use this distribution if you have a regular desktop machine without very powerful GPUs. Of course, if you have powerful GPUs, you can still continue using this distribution since Ollama supports GPU acceleration.### Environment Variables diff --git a/docs/source/distributions/self_hosted_distro/remote-vllm.md b/docs/source/distributions/self_hosted_distro/remote-vllm.md index 27f917055..e751567ce 100644 --- a/docs/source/distributions/self_hosted_distro/remote-vllm.md +++ b/docs/source/distributions/self_hosted_distro/remote-vllm.md @@ -18,6 +18,7 @@ The `llamastack/distribution-remote-vllm` distribution consists of the following | memory | `inline::faiss`, `remote::chromadb`, `remote::pgvector` | | safety | `inline::llama-guard` | | telemetry | `inline::meta-reference` | +| tool_runtime | `remote::brave-search`, `remote::tavily-search`, `inline::code-interpreter`, `inline::memory-runtime` | You can use this distribution if you have GPUs and want to run an independent vLLM server container for running inference. diff --git a/docs/source/distributions/self_hosted_distro/tgi.md b/docs/source/distributions/self_hosted_distro/tgi.md index 84b91da38..847018809 100644 --- a/docs/source/distributions/self_hosted_distro/tgi.md +++ b/docs/source/distributions/self_hosted_distro/tgi.md @@ -23,6 +23,7 @@ The `llamastack/distribution-tgi` distribution consists of the following provide | safety | `inline::llama-guard` | | scoring | `inline::basic`, `inline::llm-as-judge`, `inline::braintrust` | | telemetry | `inline::meta-reference` | +| tool_runtime | `remote::brave-search`, `remote::tavily-search`, `inline::code-interpreter`, `inline::memory-runtime` | You can use this distribution if you have GPUs and want to run an independent TGI server container for running inference. diff --git a/docs/source/distributions/self_hosted_distro/together.md b/docs/source/distributions/self_hosted_distro/together.md index 856fd264f..72b082226 100644 --- a/docs/source/distributions/self_hosted_distro/together.md +++ b/docs/source/distributions/self_hosted_distro/together.md @@ -22,6 +22,7 @@ The `llamastack/distribution-together` distribution consists of the following pr | safety | `inline::llama-guard` | | scoring | `inline::basic`, `inline::llm-as-judge`, `inline::braintrust` | | telemetry | `inline::meta-reference` | +| tool_runtime | `remote::brave-search`, `remote::tavily-search`, `inline::code-interpreter`, `inline::memory-runtime` | ### Environment Variables diff --git a/llama_stack/providers/remote/tool_runtime/brave_search/config.py b/llama_stack/providers/remote/tool_runtime/brave_search/config.py index 565d428f7..ab6053609 100644 --- a/llama_stack/providers/remote/tool_runtime/brave_search/config.py +++ b/llama_stack/providers/remote/tool_runtime/brave_search/config.py @@ -4,7 +4,7 @@ # This source code is licensed under the terms described in the LICENSE file in # the root directory of this source tree. -from typing import Optional +from typing import Any, Dict, Optional from pydantic import BaseModel, Field @@ -18,3 +18,10 @@ class BraveSearchToolConfig(BaseModel): default=3, description="The maximum number of results to return", ) + + @classmethod + def sample_run_config(cls, __distro_dir__: str) -> Dict[str, Any]: + return { + "api_key": "${env.BRAVE_SEARCH_API_KEY:}", + "max_results": 3, + } diff --git a/llama_stack/providers/remote/tool_runtime/tavily_search/config.py b/llama_stack/providers/remote/tool_runtime/tavily_search/config.py index f7a8f3f09..945430bb1 100644 --- a/llama_stack/providers/remote/tool_runtime/tavily_search/config.py +++ b/llama_stack/providers/remote/tool_runtime/tavily_search/config.py @@ -4,7 +4,7 @@ # This source code is licensed under the terms described in the LICENSE file in # the root directory of this source tree. -from typing import Optional +from typing import Any, Dict, Optional from pydantic import BaseModel, Field @@ -18,3 +18,10 @@ class TavilySearchToolConfig(BaseModel): default=3, description="The maximum number of results to return", ) + + @classmethod + def sample_run_config(cls, __distro_dir__: str) -> Dict[str, Any]: + return { + "api_key": "${env.TAVILY_SEARCH_API_KEY:}", + "max_results": 3, + } diff --git a/llama_stack/templates/bedrock/bedrock.py b/llama_stack/templates/bedrock/bedrock.py index 0b5b7d90d..a579e5b7f 100644 --- a/llama_stack/templates/bedrock/bedrock.py +++ b/llama_stack/templates/bedrock/bedrock.py @@ -9,8 +9,7 @@ from pathlib import Path from llama_models.sku_list import all_registered_models from llama_stack.apis.models import ModelInput -from llama_stack.distribution.datatypes import Provider - +from llama_stack.distribution.datatypes import Provider, ToolGroupInput from llama_stack.providers.inline.memory.faiss.config import FaissImplConfig from llama_stack.providers.remote.inference.bedrock.bedrock import MODEL_ALIASES from llama_stack.templates.template import DistributionTemplate, RunConfigSettings @@ -26,6 +25,12 @@ def get_distribution_template() -> DistributionTemplate: "eval": ["inline::meta-reference"], "datasetio": ["remote::huggingface", "inline::localfs"], "scoring": ["inline::basic", "inline::llm-as-judge", "inline::braintrust"], + "tool_runtime": [ + "remote::brave-search", + "remote::tavily-search", + "inline::code-interpreter", + "inline::memory-runtime", + ], } name = "bedrock" memory_provider = Provider( @@ -46,6 +51,20 @@ def get_distribution_template() -> DistributionTemplate: ) for m in MODEL_ALIASES ] + default_tool_groups = [ + ToolGroupInput( + toolgroup_id="builtin::websearch", + provider_id="tavily-search", + ), + ToolGroupInput( + toolgroup_id="builtin::memory", + provider_id="memory-runtime", + ), + ToolGroupInput( + toolgroup_id="builtin::code_interpreter", + provider_id="code-interpreter", + ), + ] return DistributionTemplate( name=name, @@ -61,6 +80,7 @@ def get_distribution_template() -> DistributionTemplate: "memory": [memory_provider], }, default_models=default_models, + default_tool_groups=default_tool_groups, ), }, run_config_env_vars={ diff --git a/llama_stack/templates/bedrock/build.yaml b/llama_stack/templates/bedrock/build.yaml index cd36c320e..a68a8f6fc 100644 --- a/llama_stack/templates/bedrock/build.yaml +++ b/llama_stack/templates/bedrock/build.yaml @@ -2,7 +2,6 @@ version: '2' name: bedrock distribution_spec: description: Use AWS Bedrock for running LLM inference and safety - docker_image: null providers: inference: - remote::bedrock @@ -25,4 +24,9 @@ distribution_spec: - inline::basic - inline::llm-as-judge - inline::braintrust + tool_runtime: + - remote::brave-search + - remote::tavily-search + - inline::code-interpreter + - inline::memory-runtime image_type: conda diff --git a/llama_stack/templates/bedrock/run.yaml b/llama_stack/templates/bedrock/run.yaml index 9aa5ca914..1d0721773 100644 --- a/llama_stack/templates/bedrock/run.yaml +++ b/llama_stack/templates/bedrock/run.yaml @@ -1,6 +1,5 @@ version: '2' image_name: bedrock -docker_image: null conda_env: bedrock apis: - agents @@ -11,6 +10,7 @@ apis: - safety - scoring - telemetry +- tool_runtime providers: inference: - provider_id: bedrock @@ -65,8 +65,24 @@ providers: provider_type: inline::braintrust config: openai_api_key: ${env.OPENAI_API_KEY:} + tool_runtime: + - provider_id: brave-search + provider_type: remote::brave-search + config: + api_key: ${env.BRAVE_SEARCH_API_KEY:} + max_results: 3 + - provider_id: tavily-search + provider_type: remote::tavily-search + config: + api_key: ${env.TAVILY_SEARCH_API_KEY:} + max_results: 3 + - provider_id: code-interpreter + provider_type: inline::code-interpreter + config: {} + - provider_id: memory-runtime + provider_type: inline::memory-runtime + config: {} metadata_store: - namespace: null type: sqlite db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/bedrock}/registry.db models: @@ -90,3 +106,10 @@ memory_banks: [] datasets: [] scoring_fns: [] eval_tasks: [] +tool_groups: +- toolgroup_id: builtin::websearch + provider_id: tavily-search +- toolgroup_id: builtin::memory + provider_id: memory-runtime +- toolgroup_id: builtin::code_interpreter + provider_id: code-interpreter diff --git a/llama_stack/templates/cerebras/build.yaml b/llama_stack/templates/cerebras/build.yaml index a1fe93099..307e0303a 100644 --- a/llama_stack/templates/cerebras/build.yaml +++ b/llama_stack/templates/cerebras/build.yaml @@ -2,7 +2,6 @@ version: '2' name: cerebras distribution_spec: description: Use Cerebras for running LLM inference - docker_image: null providers: inference: - remote::cerebras @@ -14,4 +13,9 @@ distribution_spec: - inline::meta-reference telemetry: - inline::meta-reference + tool_runtime: + - remote::brave-search + - remote::tavily-search + - inline::code-interpreter + - inline::memory-runtime image_type: conda diff --git a/llama_stack/templates/cerebras/cerebras.py b/llama_stack/templates/cerebras/cerebras.py index 9acb244bd..cbacdbaec 100644 --- a/llama_stack/templates/cerebras/cerebras.py +++ b/llama_stack/templates/cerebras/cerebras.py @@ -9,8 +9,12 @@ from pathlib import Path from llama_models.sku_list import all_registered_models from llama_stack.apis.models.models import ModelType - -from llama_stack.distribution.datatypes import ModelInput, Provider, ShieldInput +from llama_stack.distribution.datatypes import ( + ModelInput, + Provider, + ShieldInput, + ToolGroupInput, +) from llama_stack.providers.inline.inference.sentence_transformers import ( SentenceTransformersInferenceConfig, ) @@ -26,6 +30,12 @@ def get_distribution_template() -> DistributionTemplate: "memory": ["inline::meta-reference"], "agents": ["inline::meta-reference"], "telemetry": ["inline::meta-reference"], + "tool_runtime": [ + "remote::brave-search", + "remote::tavily-search", + "inline::code-interpreter", + "inline::memory-runtime", + ], } inference_provider = Provider( @@ -58,6 +68,20 @@ def get_distribution_template() -> DistributionTemplate: "embedding_dimension": 384, }, ) + default_tool_groups = [ + ToolGroupInput( + toolgroup_id="builtin::websearch", + provider_id="tavily-search", + ), + ToolGroupInput( + toolgroup_id="builtin::memory", + provider_id="memory-runtime", + ), + ToolGroupInput( + toolgroup_id="builtin::code_interpreter", + provider_id="code-interpreter", + ), + ] return DistributionTemplate( name="cerebras", @@ -74,6 +98,7 @@ def get_distribution_template() -> DistributionTemplate: }, default_models=default_models + [embedding_model], default_shields=[ShieldInput(shield_id="meta-llama/Llama-Guard-3-8B")], + default_tool_groups=default_tool_groups, ), }, run_config_env_vars={ diff --git a/llama_stack/templates/cerebras/run.yaml b/llama_stack/templates/cerebras/run.yaml index 05b21bf0a..e06b17a50 100644 --- a/llama_stack/templates/cerebras/run.yaml +++ b/llama_stack/templates/cerebras/run.yaml @@ -1,6 +1,5 @@ version: '2' image_name: cerebras -docker_image: null conda_env: cerebras apis: - agents @@ -8,6 +7,7 @@ apis: - memory - safety - telemetry +- tool_runtime providers: inference: - provider_id: cerebras @@ -45,8 +45,24 @@ providers: service_name: ${env.OTEL_SERVICE_NAME:llama-stack} sinks: ${env.TELEMETRY_SINKS:console,sqlite} sqlite_db_path: ${env.SQLITE_DB_PATH:~/.llama/distributions/cerebras/trace_store.db} + tool_runtime: + - provider_id: brave-search + provider_type: remote::brave-search + config: + api_key: ${env.BRAVE_SEARCH_API_KEY:} + max_results: 3 + - provider_id: tavily-search + provider_type: remote::tavily-search + config: + api_key: ${env.TAVILY_SEARCH_API_KEY:} + max_results: 3 + - provider_id: code-interpreter + provider_type: inline::code-interpreter + config: {} + - provider_id: memory-runtime + provider_type: inline::memory-runtime + config: {} metadata_store: - namespace: null type: sqlite db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/cerebras}/registry.db models: @@ -64,14 +80,17 @@ models: embedding_dimension: 384 model_id: all-MiniLM-L6-v2 provider_id: sentence-transformers - provider_model_id: null model_type: embedding shields: -- params: null - shield_id: meta-llama/Llama-Guard-3-8B - provider_id: null - provider_shield_id: null +- shield_id: meta-llama/Llama-Guard-3-8B memory_banks: [] datasets: [] scoring_fns: [] eval_tasks: [] +tool_groups: +- toolgroup_id: builtin::websearch + provider_id: tavily-search +- toolgroup_id: builtin::memory + provider_id: memory-runtime +- toolgroup_id: builtin::code_interpreter + provider_id: code-interpreter diff --git a/llama_stack/templates/fireworks/build.yaml b/llama_stack/templates/fireworks/build.yaml index 30ea347ae..e76cc86f1 100644 --- a/llama_stack/templates/fireworks/build.yaml +++ b/llama_stack/templates/fireworks/build.yaml @@ -2,7 +2,6 @@ version: '2' name: fireworks distribution_spec: description: Use Fireworks.AI for running LLM inference - docker_image: null providers: inference: - remote::fireworks @@ -25,4 +24,9 @@ distribution_spec: - inline::basic - inline::llm-as-judge - inline::braintrust + tool_runtime: + - remote::brave-search + - remote::tavily-search + - inline::code-interpreter + - inline::memory-runtime image_type: conda diff --git a/llama_stack/templates/fireworks/fireworks.py b/llama_stack/templates/fireworks/fireworks.py index cbcac0f92..090f98b59 100644 --- a/llama_stack/templates/fireworks/fireworks.py +++ b/llama_stack/templates/fireworks/fireworks.py @@ -9,8 +9,12 @@ from pathlib import Path from llama_models.sku_list import all_registered_models from llama_stack.apis.models.models import ModelType - -from llama_stack.distribution.datatypes import ModelInput, Provider, ShieldInput +from llama_stack.distribution.datatypes import ( + ModelInput, + Provider, + ShieldInput, + ToolGroupInput, +) from llama_stack.providers.inline.inference.sentence_transformers import ( SentenceTransformersInferenceConfig, ) @@ -30,6 +34,12 @@ def get_distribution_template() -> DistributionTemplate: "eval": ["inline::meta-reference"], "datasetio": ["remote::huggingface", "inline::localfs"], "scoring": ["inline::basic", "inline::llm-as-judge", "inline::braintrust"], + "tool_runtime": [ + "remote::brave-search", + "remote::tavily-search", + "inline::code-interpreter", + "inline::memory-runtime", + ], } name = "fireworks" @@ -69,6 +79,20 @@ def get_distribution_template() -> DistributionTemplate: "embedding_dimension": 384, }, ) + default_tool_groups = [ + ToolGroupInput( + toolgroup_id="builtin::websearch", + provider_id="tavily-search", + ), + ToolGroupInput( + toolgroup_id="builtin::memory", + provider_id="memory-runtime", + ), + ToolGroupInput( + toolgroup_id="builtin::code_interpreter", + provider_id="code-interpreter", + ), + ] return DistributionTemplate( name=name, @@ -86,6 +110,7 @@ def get_distribution_template() -> DistributionTemplate: }, default_models=default_models + [embedding_model], default_shields=[ShieldInput(shield_id="meta-llama/Llama-Guard-3-8B")], + default_tool_groups=default_tool_groups, ), }, run_config_env_vars={ diff --git a/llama_stack/templates/fireworks/run.yaml b/llama_stack/templates/fireworks/run.yaml index 99f155a4a..444679da7 100644 --- a/llama_stack/templates/fireworks/run.yaml +++ b/llama_stack/templates/fireworks/run.yaml @@ -1,6 +1,5 @@ version: '2' image_name: fireworks -docker_image: null conda_env: fireworks apis: - agents @@ -11,6 +10,7 @@ apis: - safety - scoring - telemetry +- tool_runtime providers: inference: - provider_id: fireworks @@ -70,8 +70,24 @@ providers: provider_type: inline::braintrust config: openai_api_key: ${env.OPENAI_API_KEY:} + tool_runtime: + - provider_id: brave-search + provider_type: remote::brave-search + config: + api_key: ${env.BRAVE_SEARCH_API_KEY:} + max_results: 3 + - provider_id: tavily-search + provider_type: remote::tavily-search + config: + api_key: ${env.TAVILY_SEARCH_API_KEY:} + max_results: 3 + - provider_id: code-interpreter + provider_type: inline::code-interpreter + config: {} + - provider_id: memory-runtime + provider_type: inline::memory-runtime + config: {} metadata_store: - namespace: null type: sqlite db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/fireworks}/registry.db models: @@ -129,14 +145,17 @@ models: embedding_dimension: 384 model_id: all-MiniLM-L6-v2 provider_id: sentence-transformers - provider_model_id: null model_type: embedding shields: -- params: null - shield_id: meta-llama/Llama-Guard-3-8B - provider_id: null - provider_shield_id: null +- shield_id: meta-llama/Llama-Guard-3-8B memory_banks: [] datasets: [] scoring_fns: [] eval_tasks: [] +tool_groups: +- toolgroup_id: builtin::websearch + provider_id: tavily-search +- toolgroup_id: builtin::memory + provider_id: memory-runtime +- toolgroup_id: builtin::code_interpreter + provider_id: code-interpreter diff --git a/llama_stack/templates/hf-endpoint/build.yaml b/llama_stack/templates/hf-endpoint/build.yaml index 523cf5d83..c18689855 100644 --- a/llama_stack/templates/hf-endpoint/build.yaml +++ b/llama_stack/templates/hf-endpoint/build.yaml @@ -2,7 +2,6 @@ version: '2' name: hf-endpoint distribution_spec: description: Use (an external) Hugging Face Inference Endpoint for running LLM inference - docker_image: null providers: inference: - remote::hf::endpoint @@ -25,4 +24,9 @@ distribution_spec: - inline::basic - inline::llm-as-judge - inline::braintrust + tool_runtime: + - remote::brave-search + - remote::tavily-search + - inline::code-interpreter + - inline::memory-runtime image_type: conda diff --git a/llama_stack/templates/hf-endpoint/hf_endpoint.py b/llama_stack/templates/hf-endpoint/hf_endpoint.py index 404440be6..8bac2588d 100644 --- a/llama_stack/templates/hf-endpoint/hf_endpoint.py +++ b/llama_stack/templates/hf-endpoint/hf_endpoint.py @@ -5,7 +5,12 @@ # the root directory of this source tree. from llama_stack.apis.models.models import ModelType -from llama_stack.distribution.datatypes import ModelInput, Provider, ShieldInput +from llama_stack.distribution.datatypes import ( + ModelInput, + Provider, + ShieldInput, + ToolGroupInput, +) from llama_stack.providers.inline.inference.sentence_transformers import ( SentenceTransformersInferenceConfig, ) @@ -24,6 +29,12 @@ def get_distribution_template() -> DistributionTemplate: "eval": ["inline::meta-reference"], "datasetio": ["remote::huggingface", "inline::localfs"], "scoring": ["inline::basic", "inline::llm-as-judge", "inline::braintrust"], + "tool_runtime": [ + "remote::brave-search", + "remote::tavily-search", + "inline::code-interpreter", + "inline::memory-runtime", + ], } name = "hf-endpoint" inference_provider = Provider( @@ -58,6 +69,20 @@ def get_distribution_template() -> DistributionTemplate: "embedding_dimension": 384, }, ) + default_tool_groups = [ + ToolGroupInput( + toolgroup_id="builtin::websearch", + provider_id="tavily-search", + ), + ToolGroupInput( + toolgroup_id="builtin::memory", + provider_id="memory-runtime", + ), + ToolGroupInput( + toolgroup_id="builtin::code_interpreter", + provider_id="code-interpreter", + ), + ] return DistributionTemplate( name=name, @@ -74,6 +99,7 @@ def get_distribution_template() -> DistributionTemplate: "memory": [memory_provider], }, default_models=[inference_model, embedding_model], + default_tool_groups=default_tool_groups, ), "run-with-safety.yaml": RunConfigSettings( provider_overrides={ @@ -96,6 +122,7 @@ def get_distribution_template() -> DistributionTemplate: embedding_model, ], default_shields=[ShieldInput(shield_id="${env.SAFETY_MODEL}")], + default_tool_groups=default_tool_groups, ), }, run_config_env_vars={ diff --git a/llama_stack/templates/hf-endpoint/run-with-safety.yaml b/llama_stack/templates/hf-endpoint/run-with-safety.yaml index 8e566de9a..a9d895d23 100644 --- a/llama_stack/templates/hf-endpoint/run-with-safety.yaml +++ b/llama_stack/templates/hf-endpoint/run-with-safety.yaml @@ -1,6 +1,5 @@ version: '2' image_name: hf-endpoint -docker_image: null conda_env: hf-endpoint apis: - agents @@ -11,6 +10,7 @@ apis: - safety - scoring - telemetry +- tool_runtime providers: inference: - provider_id: hf-endpoint @@ -75,33 +75,50 @@ providers: provider_type: inline::braintrust config: openai_api_key: ${env.OPENAI_API_KEY:} + tool_runtime: + - provider_id: brave-search + provider_type: remote::brave-search + config: + api_key: ${env.BRAVE_SEARCH_API_KEY:} + max_results: 3 + - provider_id: tavily-search + provider_type: remote::tavily-search + config: + api_key: ${env.TAVILY_SEARCH_API_KEY:} + max_results: 3 + - provider_id: code-interpreter + provider_type: inline::code-interpreter + config: {} + - provider_id: memory-runtime + provider_type: inline::memory-runtime + config: {} metadata_store: - namespace: null type: sqlite db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/hf-endpoint}/registry.db models: - metadata: {} model_id: ${env.INFERENCE_MODEL} provider_id: hf-endpoint - provider_model_id: null model_type: llm - metadata: {} model_id: ${env.SAFETY_MODEL} provider_id: hf-endpoint-safety - provider_model_id: null model_type: llm - metadata: embedding_dimension: 384 model_id: all-MiniLM-L6-v2 provider_id: sentence-transformers - provider_model_id: null model_type: embedding shields: -- params: null - shield_id: ${env.SAFETY_MODEL} - provider_id: null - provider_shield_id: null +- shield_id: ${env.SAFETY_MODEL} memory_banks: [] datasets: [] scoring_fns: [] eval_tasks: [] +tool_groups: +- toolgroup_id: builtin::websearch + provider_id: tavily-search +- toolgroup_id: builtin::memory + provider_id: memory-runtime +- toolgroup_id: builtin::code_interpreter + provider_id: code-interpreter diff --git a/llama_stack/templates/hf-endpoint/run.yaml b/llama_stack/templates/hf-endpoint/run.yaml index c1b3a64d0..e9b58c962 100644 --- a/llama_stack/templates/hf-endpoint/run.yaml +++ b/llama_stack/templates/hf-endpoint/run.yaml @@ -1,6 +1,5 @@ version: '2' image_name: hf-endpoint -docker_image: null conda_env: hf-endpoint apis: - agents @@ -11,6 +10,7 @@ apis: - safety - scoring - telemetry +- tool_runtime providers: inference: - provider_id: hf-endpoint @@ -70,24 +70,45 @@ providers: provider_type: inline::braintrust config: openai_api_key: ${env.OPENAI_API_KEY:} + tool_runtime: + - provider_id: brave-search + provider_type: remote::brave-search + config: + api_key: ${env.BRAVE_SEARCH_API_KEY:} + max_results: 3 + - provider_id: tavily-search + provider_type: remote::tavily-search + config: + api_key: ${env.TAVILY_SEARCH_API_KEY:} + max_results: 3 + - provider_id: code-interpreter + provider_type: inline::code-interpreter + config: {} + - provider_id: memory-runtime + provider_type: inline::memory-runtime + config: {} metadata_store: - namespace: null type: sqlite db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/hf-endpoint}/registry.db models: - metadata: {} model_id: ${env.INFERENCE_MODEL} provider_id: hf-endpoint - provider_model_id: null model_type: llm - metadata: embedding_dimension: 384 model_id: all-MiniLM-L6-v2 provider_id: sentence-transformers - provider_model_id: null model_type: embedding shields: [] memory_banks: [] datasets: [] scoring_fns: [] eval_tasks: [] +tool_groups: +- toolgroup_id: builtin::websearch + provider_id: tavily-search +- toolgroup_id: builtin::memory + provider_id: memory-runtime +- toolgroup_id: builtin::code_interpreter + provider_id: code-interpreter diff --git a/llama_stack/templates/hf-serverless/build.yaml b/llama_stack/templates/hf-serverless/build.yaml index af7eb60fe..a6b551e4a 100644 --- a/llama_stack/templates/hf-serverless/build.yaml +++ b/llama_stack/templates/hf-serverless/build.yaml @@ -2,7 +2,6 @@ version: '2' name: hf-serverless distribution_spec: description: Use (an external) Hugging Face Inference Endpoint for running LLM inference - docker_image: null providers: inference: - remote::hf::serverless @@ -25,4 +24,9 @@ distribution_spec: - inline::basic - inline::llm-as-judge - inline::braintrust + tool_runtime: + - remote::brave-search + - remote::tavily-search + - inline::code-interpreter + - inline::memory-runtime image_type: conda diff --git a/llama_stack/templates/hf-serverless/hf_serverless.py b/llama_stack/templates/hf-serverless/hf_serverless.py index 63b423412..33eb594fe 100644 --- a/llama_stack/templates/hf-serverless/hf_serverless.py +++ b/llama_stack/templates/hf-serverless/hf_serverless.py @@ -5,7 +5,12 @@ # the root directory of this source tree. from llama_stack.apis.models.models import ModelType -from llama_stack.distribution.datatypes import ModelInput, Provider, ShieldInput +from llama_stack.distribution.datatypes import ( + ModelInput, + Provider, + ShieldInput, + ToolGroupInput, +) from llama_stack.providers.inline.inference.sentence_transformers import ( SentenceTransformersInferenceConfig, ) @@ -24,6 +29,12 @@ def get_distribution_template() -> DistributionTemplate: "eval": ["inline::meta-reference"], "datasetio": ["remote::huggingface", "inline::localfs"], "scoring": ["inline::basic", "inline::llm-as-judge", "inline::braintrust"], + "tool_runtime": [ + "remote::brave-search", + "remote::tavily-search", + "inline::code-interpreter", + "inline::memory-runtime", + ], } name = "hf-serverless" @@ -59,6 +70,20 @@ def get_distribution_template() -> DistributionTemplate: "embedding_dimension": 384, }, ) + default_tool_groups = [ + ToolGroupInput( + toolgroup_id="builtin::websearch", + provider_id="tavily-search", + ), + ToolGroupInput( + toolgroup_id="builtin::memory", + provider_id="memory-runtime", + ), + ToolGroupInput( + toolgroup_id="builtin::code_interpreter", + provider_id="code-interpreter", + ), + ] return DistributionTemplate( name=name, @@ -97,6 +122,7 @@ def get_distribution_template() -> DistributionTemplate: embedding_model, ], default_shields=[ShieldInput(shield_id="${env.SAFETY_MODEL}")], + default_tool_groups=default_tool_groups, ), }, run_config_env_vars={ diff --git a/llama_stack/templates/hf-serverless/run-with-safety.yaml b/llama_stack/templates/hf-serverless/run-with-safety.yaml index 2b24ab074..415cec648 100644 --- a/llama_stack/templates/hf-serverless/run-with-safety.yaml +++ b/llama_stack/templates/hf-serverless/run-with-safety.yaml @@ -1,6 +1,5 @@ version: '2' image_name: hf-serverless -docker_image: null conda_env: hf-serverless apis: - agents @@ -11,6 +10,7 @@ apis: - safety - scoring - telemetry +- tool_runtime providers: inference: - provider_id: hf-serverless @@ -75,33 +75,50 @@ providers: provider_type: inline::braintrust config: openai_api_key: ${env.OPENAI_API_KEY:} + tool_runtime: + - provider_id: brave-search + provider_type: remote::brave-search + config: + api_key: ${env.BRAVE_SEARCH_API_KEY:} + max_results: 3 + - provider_id: tavily-search + provider_type: remote::tavily-search + config: + api_key: ${env.TAVILY_SEARCH_API_KEY:} + max_results: 3 + - provider_id: code-interpreter + provider_type: inline::code-interpreter + config: {} + - provider_id: memory-runtime + provider_type: inline::memory-runtime + config: {} metadata_store: - namespace: null type: sqlite db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/hf-serverless}/registry.db models: - metadata: {} model_id: ${env.INFERENCE_MODEL} provider_id: hf-serverless - provider_model_id: null model_type: llm - metadata: {} model_id: ${env.SAFETY_MODEL} provider_id: hf-serverless-safety - provider_model_id: null model_type: llm - metadata: embedding_dimension: 384 model_id: all-MiniLM-L6-v2 provider_id: sentence-transformers - provider_model_id: null model_type: embedding shields: -- params: null - shield_id: ${env.SAFETY_MODEL} - provider_id: null - provider_shield_id: null +- shield_id: ${env.SAFETY_MODEL} memory_banks: [] datasets: [] scoring_fns: [] eval_tasks: [] +tool_groups: +- toolgroup_id: builtin::websearch + provider_id: tavily-search +- toolgroup_id: builtin::memory + provider_id: memory-runtime +- toolgroup_id: builtin::code_interpreter + provider_id: code-interpreter diff --git a/llama_stack/templates/hf-serverless/run.yaml b/llama_stack/templates/hf-serverless/run.yaml index 394d689da..ef9dedeed 100644 --- a/llama_stack/templates/hf-serverless/run.yaml +++ b/llama_stack/templates/hf-serverless/run.yaml @@ -1,6 +1,5 @@ version: '2' image_name: hf-serverless -docker_image: null conda_env: hf-serverless apis: - agents @@ -11,6 +10,7 @@ apis: - safety - scoring - telemetry +- tool_runtime providers: inference: - provider_id: hf-serverless @@ -70,24 +70,39 @@ providers: provider_type: inline::braintrust config: openai_api_key: ${env.OPENAI_API_KEY:} + tool_runtime: + - provider_id: brave-search + provider_type: remote::brave-search + config: + api_key: ${env.BRAVE_SEARCH_API_KEY:} + max_results: 3 + - provider_id: tavily-search + provider_type: remote::tavily-search + config: + api_key: ${env.TAVILY_SEARCH_API_KEY:} + max_results: 3 + - provider_id: code-interpreter + provider_type: inline::code-interpreter + config: {} + - provider_id: memory-runtime + provider_type: inline::memory-runtime + config: {} metadata_store: - namespace: null type: sqlite db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/hf-serverless}/registry.db models: - metadata: {} model_id: ${env.INFERENCE_MODEL} provider_id: hf-serverless - provider_model_id: null model_type: llm - metadata: embedding_dimension: 384 model_id: all-MiniLM-L6-v2 provider_id: sentence-transformers - provider_model_id: null model_type: embedding shields: [] memory_banks: [] datasets: [] scoring_fns: [] eval_tasks: [] +tool_groups: [] diff --git a/llama_stack/templates/meta-reference-gpu/build.yaml b/llama_stack/templates/meta-reference-gpu/build.yaml index 300b75b14..ba8413fa6 100644 --- a/llama_stack/templates/meta-reference-gpu/build.yaml +++ b/llama_stack/templates/meta-reference-gpu/build.yaml @@ -2,7 +2,6 @@ version: '2' name: meta-reference-gpu distribution_spec: description: Use Meta Reference for running LLM inference - docker_image: null providers: inference: - inline::meta-reference @@ -25,4 +24,9 @@ distribution_spec: - inline::basic - inline::llm-as-judge - inline::braintrust + tool_runtime: + - remote::brave-search + - remote::tavily-search + - inline::code-interpreter + - inline::memory-runtime image_type: conda diff --git a/llama_stack/templates/meta-reference-gpu/meta_reference.py b/llama_stack/templates/meta-reference-gpu/meta_reference.py index 461d89a4a..8ad56d7f5 100644 --- a/llama_stack/templates/meta-reference-gpu/meta_reference.py +++ b/llama_stack/templates/meta-reference-gpu/meta_reference.py @@ -7,8 +7,12 @@ from pathlib import Path from llama_stack.apis.models.models import ModelType - -from llama_stack.distribution.datatypes import ModelInput, Provider, ShieldInput +from llama_stack.distribution.datatypes import ( + ModelInput, + Provider, + ShieldInput, + ToolGroupInput, +) from llama_stack.providers.inline.inference.meta_reference import ( MetaReferenceInferenceConfig, ) @@ -29,6 +33,12 @@ def get_distribution_template() -> DistributionTemplate: "eval": ["inline::meta-reference"], "datasetio": ["remote::huggingface", "inline::localfs"], "scoring": ["inline::basic", "inline::llm-as-judge", "inline::braintrust"], + "tool_runtime": [ + "remote::brave-search", + "remote::tavily-search", + "inline::code-interpreter", + "inline::memory-runtime", + ], } name = "meta-reference-gpu" inference_provider = Provider( @@ -66,6 +76,20 @@ def get_distribution_template() -> DistributionTemplate: model_id="${env.SAFETY_MODEL}", provider_id="meta-reference-safety", ) + default_tool_groups = [ + ToolGroupInput( + toolgroup_id="builtin::websearch", + provider_id="tavily-search", + ), + ToolGroupInput( + toolgroup_id="builtin::memory", + provider_id="memory-runtime", + ), + ToolGroupInput( + toolgroup_id="builtin::code_interpreter", + provider_id="code-interpreter", + ), + ] return DistributionTemplate( name=name, @@ -104,6 +128,7 @@ def get_distribution_template() -> DistributionTemplate: embedding_model, ], default_shields=[ShieldInput(shield_id="${env.SAFETY_MODEL}")], + default_tool_groups=default_tool_groups, ), }, run_config_env_vars={ diff --git a/llama_stack/templates/meta-reference-gpu/run-with-safety.yaml b/llama_stack/templates/meta-reference-gpu/run-with-safety.yaml index deb6c4a91..4946fdab7 100644 --- a/llama_stack/templates/meta-reference-gpu/run-with-safety.yaml +++ b/llama_stack/templates/meta-reference-gpu/run-with-safety.yaml @@ -1,6 +1,5 @@ version: '2' image_name: meta-reference-gpu -docker_image: null conda_env: meta-reference-gpu apis: - agents @@ -11,6 +10,7 @@ apis: - safety - scoring - telemetry +- tool_runtime providers: inference: - provider_id: meta-reference-inference @@ -77,33 +77,50 @@ providers: provider_type: inline::braintrust config: openai_api_key: ${env.OPENAI_API_KEY:} + tool_runtime: + - provider_id: brave-search + provider_type: remote::brave-search + config: + api_key: ${env.BRAVE_SEARCH_API_KEY:} + max_results: 3 + - provider_id: tavily-search + provider_type: remote::tavily-search + config: + api_key: ${env.TAVILY_SEARCH_API_KEY:} + max_results: 3 + - provider_id: code-interpreter + provider_type: inline::code-interpreter + config: {} + - provider_id: memory-runtime + provider_type: inline::memory-runtime + config: {} metadata_store: - namespace: null type: sqlite db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/meta-reference-gpu}/registry.db models: - metadata: {} model_id: ${env.INFERENCE_MODEL} provider_id: meta-reference-inference - provider_model_id: null model_type: llm - metadata: {} model_id: ${env.SAFETY_MODEL} provider_id: meta-reference-safety - provider_model_id: null model_type: llm - metadata: embedding_dimension: 384 model_id: all-MiniLM-L6-v2 provider_id: sentence-transformers - provider_model_id: null model_type: embedding shields: -- params: null - shield_id: ${env.SAFETY_MODEL} - provider_id: null - provider_shield_id: null +- shield_id: ${env.SAFETY_MODEL} memory_banks: [] datasets: [] scoring_fns: [] eval_tasks: [] +tool_groups: +- toolgroup_id: builtin::websearch + provider_id: tavily-search +- toolgroup_id: builtin::memory + provider_id: memory-runtime +- toolgroup_id: builtin::code_interpreter + provider_id: code-interpreter diff --git a/llama_stack/templates/meta-reference-gpu/run.yaml b/llama_stack/templates/meta-reference-gpu/run.yaml index c19066664..52345f3c1 100644 --- a/llama_stack/templates/meta-reference-gpu/run.yaml +++ b/llama_stack/templates/meta-reference-gpu/run.yaml @@ -1,6 +1,5 @@ version: '2' image_name: meta-reference-gpu -docker_image: null conda_env: meta-reference-gpu apis: - agents @@ -11,6 +10,7 @@ apis: - safety - scoring - telemetry +- tool_runtime providers: inference: - provider_id: meta-reference-inference @@ -71,24 +71,39 @@ providers: provider_type: inline::braintrust config: openai_api_key: ${env.OPENAI_API_KEY:} + tool_runtime: + - provider_id: brave-search + provider_type: remote::brave-search + config: + api_key: ${env.BRAVE_SEARCH_API_KEY:} + max_results: 3 + - provider_id: tavily-search + provider_type: remote::tavily-search + config: + api_key: ${env.TAVILY_SEARCH_API_KEY:} + max_results: 3 + - provider_id: code-interpreter + provider_type: inline::code-interpreter + config: {} + - provider_id: memory-runtime + provider_type: inline::memory-runtime + config: {} metadata_store: - namespace: null type: sqlite db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/meta-reference-gpu}/registry.db models: - metadata: {} model_id: ${env.INFERENCE_MODEL} provider_id: meta-reference-inference - provider_model_id: null model_type: llm - metadata: embedding_dimension: 384 model_id: all-MiniLM-L6-v2 provider_id: sentence-transformers - provider_model_id: null model_type: embedding shields: [] memory_banks: [] datasets: [] scoring_fns: [] eval_tasks: [] +tool_groups: [] diff --git a/llama_stack/templates/meta-reference-quantized-gpu/build.yaml b/llama_stack/templates/meta-reference-quantized-gpu/build.yaml index 9d866de18..41ab44e38 100644 --- a/llama_stack/templates/meta-reference-quantized-gpu/build.yaml +++ b/llama_stack/templates/meta-reference-quantized-gpu/build.yaml @@ -2,7 +2,6 @@ version: '2' name: meta-reference-quantized-gpu distribution_spec: description: Use Meta Reference with fp8, int4 quantization for running LLM inference - docker_image: null providers: inference: - inline::meta-reference-quantized @@ -25,4 +24,9 @@ distribution_spec: - inline::basic - inline::llm-as-judge - inline::braintrust + tool_runtime: + - remote::brave-search + - remote::tavily-search + - inline::code-interpreter + - inline::memory-runtime image_type: conda diff --git a/llama_stack/templates/meta-reference-quantized-gpu/meta_reference.py b/llama_stack/templates/meta-reference-quantized-gpu/meta_reference.py index c460860c5..6af7175f7 100644 --- a/llama_stack/templates/meta-reference-quantized-gpu/meta_reference.py +++ b/llama_stack/templates/meta-reference-quantized-gpu/meta_reference.py @@ -7,8 +7,7 @@ from pathlib import Path from llama_stack.apis.models.models import ModelType - -from llama_stack.distribution.datatypes import ModelInput, Provider +from llama_stack.distribution.datatypes import ModelInput, Provider, ToolGroupInput from llama_stack.providers.inline.inference.meta_reference import ( MetaReferenceQuantizedInferenceConfig, ) @@ -29,7 +28,27 @@ def get_distribution_template() -> DistributionTemplate: "eval": ["inline::meta-reference"], "datasetio": ["remote::huggingface", "inline::localfs"], "scoring": ["inline::basic", "inline::llm-as-judge", "inline::braintrust"], + "tool_runtime": [ + "remote::brave-search", + "remote::tavily-search", + "inline::code-interpreter", + "inline::memory-runtime", + ], } + default_tool_groups = [ + ToolGroupInput( + toolgroup_id="builtin::websearch", + provider_id="tavily-search", + ), + ToolGroupInput( + toolgroup_id="builtin::memory", + provider_id="memory-runtime", + ), + ToolGroupInput( + toolgroup_id="builtin::code_interpreter", + provider_id="code-interpreter", + ), + ] name = "meta-reference-quantized-gpu" inference_provider = Provider( provider_id="meta-reference-inference", @@ -76,6 +95,7 @@ def get_distribution_template() -> DistributionTemplate: "memory": [memory_provider], }, default_models=[inference_model, embedding_model], + default_tool_groups=default_tool_groups, ), }, run_config_env_vars={ diff --git a/llama_stack/templates/meta-reference-quantized-gpu/run.yaml b/llama_stack/templates/meta-reference-quantized-gpu/run.yaml index 550170a00..02a5bacaa 100644 --- a/llama_stack/templates/meta-reference-quantized-gpu/run.yaml +++ b/llama_stack/templates/meta-reference-quantized-gpu/run.yaml @@ -1,6 +1,5 @@ version: '2' image_name: meta-reference-quantized-gpu -docker_image: null conda_env: meta-reference-quantized-gpu apis: - agents @@ -11,6 +10,7 @@ apis: - safety - scoring - telemetry +- tool_runtime providers: inference: - provider_id: meta-reference-inference @@ -73,24 +73,45 @@ providers: provider_type: inline::braintrust config: openai_api_key: ${env.OPENAI_API_KEY:} + tool_runtime: + - provider_id: brave-search + provider_type: remote::brave-search + config: + api_key: ${env.BRAVE_SEARCH_API_KEY:} + max_results: 3 + - provider_id: tavily-search + provider_type: remote::tavily-search + config: + api_key: ${env.TAVILY_SEARCH_API_KEY:} + max_results: 3 + - provider_id: code-interpreter + provider_type: inline::code-interpreter + config: {} + - provider_id: memory-runtime + provider_type: inline::memory-runtime + config: {} metadata_store: - namespace: null type: sqlite db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/meta-reference-quantized-gpu}/registry.db models: - metadata: {} model_id: ${env.INFERENCE_MODEL} provider_id: meta-reference-inference - provider_model_id: null model_type: llm - metadata: embedding_dimension: 384 model_id: all-MiniLM-L6-v2 provider_id: sentence-transformers - provider_model_id: null model_type: embedding shields: [] memory_banks: [] datasets: [] scoring_fns: [] eval_tasks: [] +tool_groups: +- toolgroup_id: builtin::websearch + provider_id: tavily-search +- toolgroup_id: builtin::memory + provider_id: memory-runtime +- toolgroup_id: builtin::code_interpreter + provider_id: code-interpreter diff --git a/llama_stack/templates/ollama/build.yaml b/llama_stack/templates/ollama/build.yaml index a021e4993..cbd9101cf 100644 --- a/llama_stack/templates/ollama/build.yaml +++ b/llama_stack/templates/ollama/build.yaml @@ -2,7 +2,6 @@ version: '2' name: ollama distribution_spec: description: Use (an external) Ollama server for running LLM inference - docker_image: null providers: inference: - remote::ollama @@ -25,4 +24,9 @@ distribution_spec: - inline::basic - inline::llm-as-judge - inline::braintrust + tool_runtime: + - remote::brave-search + - remote::tavily-search + - inline::code-interpreter + - inline::memory-runtime image_type: conda diff --git a/llama_stack/templates/ollama/ollama.py b/llama_stack/templates/ollama/ollama.py index 1e3180a77..9a76e9371 100644 --- a/llama_stack/templates/ollama/ollama.py +++ b/llama_stack/templates/ollama/ollama.py @@ -7,8 +7,12 @@ from pathlib import Path from llama_stack.apis.models.models import ModelType - -from llama_stack.distribution.datatypes import ModelInput, Provider, ShieldInput +from llama_stack.distribution.datatypes import ( + ModelInput, + Provider, + ShieldInput, + ToolGroupInput, +) from llama_stack.providers.inline.inference.sentence_transformers import ( SentenceTransformersInferenceConfig, ) @@ -27,6 +31,12 @@ def get_distribution_template() -> DistributionTemplate: "eval": ["inline::meta-reference"], "datasetio": ["remote::huggingface", "inline::localfs"], "scoring": ["inline::basic", "inline::llm-as-judge", "inline::braintrust"], + "tool_runtime": [ + "remote::brave-search", + "remote::tavily-search", + "inline::code-interpreter", + "inline::memory-runtime", + ], } name = "ollama" inference_provider = Provider( @@ -61,6 +71,20 @@ def get_distribution_template() -> DistributionTemplate: "embedding_dimension": 384, }, ) + default_tool_groups = [ + ToolGroupInput( + toolgroup_id="builtin::websearch", + provider_id="tavily-search", + ), + ToolGroupInput( + toolgroup_id="builtin::memory", + provider_id="memory-runtime", + ), + ToolGroupInput( + toolgroup_id="builtin::code_interpreter", + provider_id="code-interpreter", + ), + ] return DistributionTemplate( name=name, @@ -92,6 +116,7 @@ def get_distribution_template() -> DistributionTemplate: embedding_model, ], default_shields=[ShieldInput(shield_id="${env.SAFETY_MODEL}")], + default_tool_groups=default_tool_groups, ), }, run_config_env_vars={ diff --git a/llama_stack/templates/ollama/run-with-safety.yaml b/llama_stack/templates/ollama/run-with-safety.yaml index 100886c95..96cb1d668 100644 --- a/llama_stack/templates/ollama/run-with-safety.yaml +++ b/llama_stack/templates/ollama/run-with-safety.yaml @@ -1,6 +1,5 @@ version: '2' image_name: ollama -docker_image: null conda_env: ollama apis: - agents @@ -11,6 +10,7 @@ apis: - safety - scoring - telemetry +- tool_runtime providers: inference: - provider_id: ollama @@ -69,33 +69,50 @@ providers: provider_type: inline::braintrust config: openai_api_key: ${env.OPENAI_API_KEY:} + tool_runtime: + - provider_id: brave-search + provider_type: remote::brave-search + config: + api_key: ${env.BRAVE_SEARCH_API_KEY:} + max_results: 3 + - provider_id: tavily-search + provider_type: remote::tavily-search + config: + api_key: ${env.TAVILY_SEARCH_API_KEY:} + max_results: 3 + - provider_id: code-interpreter + provider_type: inline::code-interpreter + config: {} + - provider_id: memory-runtime + provider_type: inline::memory-runtime + config: {} metadata_store: - namespace: null type: sqlite db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/ollama}/registry.db models: - metadata: {} model_id: ${env.INFERENCE_MODEL} provider_id: ollama - provider_model_id: null model_type: llm - metadata: {} model_id: ${env.SAFETY_MODEL} provider_id: ollama - provider_model_id: null model_type: llm - metadata: embedding_dimension: 384 model_id: all-MiniLM-L6-v2 provider_id: sentence-transformers - provider_model_id: null model_type: embedding shields: -- params: null - shield_id: ${env.SAFETY_MODEL} - provider_id: null - provider_shield_id: null +- shield_id: ${env.SAFETY_MODEL} memory_banks: [] datasets: [] scoring_fns: [] eval_tasks: [] +tool_groups: +- toolgroup_id: builtin::websearch + provider_id: tavily-search +- toolgroup_id: builtin::memory + provider_id: memory-runtime +- toolgroup_id: builtin::code_interpreter + provider_id: code-interpreter diff --git a/llama_stack/templates/ollama/run.yaml b/llama_stack/templates/ollama/run.yaml index bcbed3e6e..176465299 100644 --- a/llama_stack/templates/ollama/run.yaml +++ b/llama_stack/templates/ollama/run.yaml @@ -1,6 +1,5 @@ version: '2' image_name: ollama -docker_image: null conda_env: ollama apis: - agents @@ -11,6 +10,7 @@ apis: - safety - scoring - telemetry +- tool_runtime providers: inference: - provider_id: ollama @@ -69,24 +69,39 @@ providers: provider_type: inline::braintrust config: openai_api_key: ${env.OPENAI_API_KEY:} + tool_runtime: + - provider_id: brave-search + provider_type: remote::brave-search + config: + api_key: ${env.BRAVE_SEARCH_API_KEY:} + max_results: 3 + - provider_id: tavily-search + provider_type: remote::tavily-search + config: + api_key: ${env.TAVILY_SEARCH_API_KEY:} + max_results: 3 + - provider_id: code-interpreter + provider_type: inline::code-interpreter + config: {} + - provider_id: memory-runtime + provider_type: inline::memory-runtime + config: {} metadata_store: - namespace: null type: sqlite db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/ollama}/registry.db models: - metadata: {} model_id: ${env.INFERENCE_MODEL} provider_id: ollama - provider_model_id: null model_type: llm - metadata: embedding_dimension: 384 model_id: all-MiniLM-L6-v2 provider_id: sentence-transformers - provider_model_id: null model_type: embedding shields: [] memory_banks: [] datasets: [] scoring_fns: [] eval_tasks: [] +tool_groups: [] diff --git a/llama_stack/templates/remote-vllm/build.yaml b/llama_stack/templates/remote-vllm/build.yaml index 9f4597cb0..246e53db0 100644 --- a/llama_stack/templates/remote-vllm/build.yaml +++ b/llama_stack/templates/remote-vllm/build.yaml @@ -2,7 +2,6 @@ version: '2' name: remote-vllm distribution_spec: description: Use (an external) vLLM server for running LLM inference - docker_image: null providers: inference: - remote::vllm @@ -16,4 +15,9 @@ distribution_spec: - inline::meta-reference telemetry: - inline::meta-reference + tool_runtime: + - remote::brave-search + - remote::tavily-search + - inline::code-interpreter + - inline::memory-runtime image_type: conda diff --git a/llama_stack/templates/remote-vllm/run-with-safety.yaml b/llama_stack/templates/remote-vllm/run-with-safety.yaml index 7097bc649..1babd04ac 100644 --- a/llama_stack/templates/remote-vllm/run-with-safety.yaml +++ b/llama_stack/templates/remote-vllm/run-with-safety.yaml @@ -1,6 +1,5 @@ version: '2' image_name: remote-vllm -docker_image: null conda_env: remote-vllm apis: - agents @@ -8,6 +7,7 @@ apis: - memory - safety - telemetry +- tool_runtime providers: inference: - provider_id: vllm-inference @@ -52,33 +52,50 @@ providers: service_name: ${env.OTEL_SERVICE_NAME:llama-stack} sinks: ${env.TELEMETRY_SINKS:console,sqlite} sqlite_db_path: ${env.SQLITE_DB_PATH:~/.llama/distributions/remote-vllm/trace_store.db} + tool_runtime: + - provider_id: brave-search + provider_type: remote::brave-search + config: + api_key: ${env.BRAVE_SEARCH_API_KEY:} + max_results: 3 + - provider_id: tavily-search + provider_type: remote::tavily-search + config: + api_key: ${env.TAVILY_SEARCH_API_KEY:} + max_results: 3 + - provider_id: code-interpreter + provider_type: inline::code-interpreter + config: {} + - provider_id: memory-runtime + provider_type: inline::memory-runtime + config: {} metadata_store: - namespace: null type: sqlite db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/remote-vllm}/registry.db models: - metadata: {} model_id: ${env.INFERENCE_MODEL} provider_id: vllm-inference - provider_model_id: null model_type: llm - metadata: {} model_id: ${env.SAFETY_MODEL} provider_id: vllm-safety - provider_model_id: null model_type: llm - metadata: embedding_dimension: 384 model_id: all-MiniLM-L6-v2 provider_id: sentence-transformers - provider_model_id: null model_type: embedding shields: -- params: null - shield_id: ${env.SAFETY_MODEL} - provider_id: null - provider_shield_id: null +- shield_id: ${env.SAFETY_MODEL} memory_banks: [] datasets: [] scoring_fns: [] eval_tasks: [] +tool_groups: +- toolgroup_id: builtin::websearch + provider_id: tavily-search +- toolgroup_id: builtin::memory + provider_id: memory-runtime +- toolgroup_id: builtin::code_interpreter + provider_id: code-interpreter diff --git a/llama_stack/templates/remote-vllm/run.yaml b/llama_stack/templates/remote-vllm/run.yaml index c957b05d0..a3a571423 100644 --- a/llama_stack/templates/remote-vllm/run.yaml +++ b/llama_stack/templates/remote-vllm/run.yaml @@ -1,6 +1,5 @@ version: '2' image_name: remote-vllm -docker_image: null conda_env: remote-vllm apis: - agents @@ -8,6 +7,7 @@ apis: - memory - safety - telemetry +- tool_runtime providers: inference: - provider_id: vllm-inference @@ -46,24 +46,39 @@ providers: service_name: ${env.OTEL_SERVICE_NAME:llama-stack} sinks: ${env.TELEMETRY_SINKS:console,sqlite} sqlite_db_path: ${env.SQLITE_DB_PATH:~/.llama/distributions/remote-vllm/trace_store.db} + tool_runtime: + - provider_id: brave-search + provider_type: remote::brave-search + config: + api_key: ${env.BRAVE_SEARCH_API_KEY:} + max_results: 3 + - provider_id: tavily-search + provider_type: remote::tavily-search + config: + api_key: ${env.TAVILY_SEARCH_API_KEY:} + max_results: 3 + - provider_id: code-interpreter + provider_type: inline::code-interpreter + config: {} + - provider_id: memory-runtime + provider_type: inline::memory-runtime + config: {} metadata_store: - namespace: null type: sqlite db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/remote-vllm}/registry.db models: - metadata: {} model_id: ${env.INFERENCE_MODEL} provider_id: vllm-inference - provider_model_id: null model_type: llm - metadata: embedding_dimension: 384 model_id: all-MiniLM-L6-v2 provider_id: sentence-transformers - provider_model_id: null model_type: embedding shields: [] memory_banks: [] datasets: [] scoring_fns: [] eval_tasks: [] +tool_groups: [] diff --git a/llama_stack/templates/remote-vllm/vllm.py b/llama_stack/templates/remote-vllm/vllm.py index e4c948fbf..f12752f2b 100644 --- a/llama_stack/templates/remote-vllm/vllm.py +++ b/llama_stack/templates/remote-vllm/vllm.py @@ -7,8 +7,12 @@ from pathlib import Path from llama_stack.apis.models.models import ModelType - -from llama_stack.distribution.datatypes import ModelInput, Provider, ShieldInput +from llama_stack.distribution.datatypes import ( + ModelInput, + Provider, + ShieldInput, + ToolGroupInput, +) from llama_stack.providers.inline.inference.sentence_transformers import ( SentenceTransformersInferenceConfig, ) @@ -24,6 +28,12 @@ def get_distribution_template() -> DistributionTemplate: "safety": ["inline::llama-guard"], "agents": ["inline::meta-reference"], "telemetry": ["inline::meta-reference"], + "tool_runtime": [ + "remote::brave-search", + "remote::tavily-search", + "inline::code-interpreter", + "inline::memory-runtime", + ], } name = "remote-vllm" inference_provider = Provider( @@ -60,6 +70,20 @@ def get_distribution_template() -> DistributionTemplate: "embedding_dimension": 384, }, ) + default_tool_groups = [ + ToolGroupInput( + toolgroup_id="builtin::websearch", + provider_id="tavily-search", + ), + ToolGroupInput( + toolgroup_id="builtin::memory", + provider_id="memory-runtime", + ), + ToolGroupInput( + toolgroup_id="builtin::code_interpreter", + provider_id="code-interpreter", + ), + ] return DistributionTemplate( name=name, @@ -97,6 +121,7 @@ def get_distribution_template() -> DistributionTemplate: embedding_model, ], default_shields=[ShieldInput(shield_id="${env.SAFETY_MODEL}")], + default_tool_groups=default_tool_groups, ), }, run_config_env_vars={ diff --git a/llama_stack/templates/template.py b/llama_stack/templates/template.py index 0ec8c1f09..5bb88c821 100644 --- a/llama_stack/templates/template.py +++ b/llama_stack/templates/template.py @@ -20,6 +20,7 @@ from llama_stack.distribution.datatypes import ( Provider, ShieldInput, StackRunConfig, + ToolGroupInput, ) from llama_stack.distribution.distribution import get_provider_registry from llama_stack.distribution.utils.dynamic import instantiate_class_type @@ -30,6 +31,7 @@ class RunConfigSettings(BaseModel): provider_overrides: Dict[str, List[Provider]] = Field(default_factory=dict) default_models: Optional[List[ModelInput]] = None default_shields: Optional[List[ShieldInput]] = None + default_tool_groups: Optional[List[ToolGroupInput]] = None def run_config( self, @@ -91,6 +93,7 @@ class RunConfigSettings(BaseModel): ), models=self.default_models or [], shields=self.default_shields or [], + tool_groups=self.default_tool_groups or [], ) @@ -159,14 +162,22 @@ class DistributionTemplate(BaseModel): build_config = self.build_config() with open(yaml_output_dir / "build.yaml", "w") as f: - yaml.safe_dump(build_config.model_dump(), f, sort_keys=False) + yaml.safe_dump( + build_config.model_dump(exclude_none=True), + f, + sort_keys=False, + ) for yaml_pth, settings in self.run_configs.items(): run_config = settings.run_config( self.name, self.providers, self.docker_image ) with open(yaml_output_dir / yaml_pth, "w") as f: - yaml.safe_dump(run_config.model_dump(), f, sort_keys=False) + yaml.safe_dump( + run_config.model_dump(exclude_none=True), + f, + sort_keys=False, + ) if self.template_path: docs = self.generate_markdown_docs() diff --git a/llama_stack/templates/tgi/build.yaml b/llama_stack/templates/tgi/build.yaml index d90b505df..399d4a616 100644 --- a/llama_stack/templates/tgi/build.yaml +++ b/llama_stack/templates/tgi/build.yaml @@ -2,7 +2,6 @@ version: '2' name: tgi distribution_spec: description: Use (an external) TGI server for running LLM inference - docker_image: null providers: inference: - remote::tgi @@ -25,4 +24,9 @@ distribution_spec: - inline::basic - inline::llm-as-judge - inline::braintrust + tool_runtime: + - remote::brave-search + - remote::tavily-search + - inline::code-interpreter + - inline::memory-runtime image_type: conda diff --git a/llama_stack/templates/tgi/run-with-safety.yaml b/llama_stack/templates/tgi/run-with-safety.yaml index ef8344a7a..4134101f6 100644 --- a/llama_stack/templates/tgi/run-with-safety.yaml +++ b/llama_stack/templates/tgi/run-with-safety.yaml @@ -1,6 +1,5 @@ version: '2' image_name: tgi -docker_image: null conda_env: tgi apis: - agents @@ -11,6 +10,7 @@ apis: - safety - scoring - telemetry +- tool_runtime providers: inference: - provider_id: tgi-inference @@ -70,27 +70,45 @@ providers: provider_type: inline::braintrust config: openai_api_key: ${env.OPENAI_API_KEY:} + tool_runtime: + - provider_id: brave-search + provider_type: remote::brave-search + config: + api_key: ${env.BRAVE_SEARCH_API_KEY:} + max_results: 3 + - provider_id: tavily-search + provider_type: remote::tavily-search + config: + api_key: ${env.TAVILY_SEARCH_API_KEY:} + max_results: 3 + - provider_id: code-interpreter + provider_type: inline::code-interpreter + config: {} + - provider_id: memory-runtime + provider_type: inline::memory-runtime + config: {} metadata_store: - namespace: null type: sqlite db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/tgi}/registry.db models: - metadata: {} model_id: ${env.INFERENCE_MODEL} provider_id: tgi-inference - provider_model_id: null model_type: llm - metadata: {} model_id: ${env.SAFETY_MODEL} provider_id: tgi-safety - provider_model_id: null model_type: llm shields: -- params: null - shield_id: ${env.SAFETY_MODEL} - provider_id: null - provider_shield_id: null +- shield_id: ${env.SAFETY_MODEL} memory_banks: [] datasets: [] scoring_fns: [] eval_tasks: [] +tool_groups: +- toolgroup_id: builtin::websearch + provider_id: tavily-search +- toolgroup_id: builtin::memory + provider_id: memory-runtime +- toolgroup_id: builtin::code_interpreter + provider_id: code-interpreter diff --git a/llama_stack/templates/tgi/run.yaml b/llama_stack/templates/tgi/run.yaml index 22c08d1d3..b0b78e33b 100644 --- a/llama_stack/templates/tgi/run.yaml +++ b/llama_stack/templates/tgi/run.yaml @@ -1,6 +1,5 @@ version: '2' image_name: tgi -docker_image: null conda_env: tgi apis: - agents @@ -11,6 +10,7 @@ apis: - safety - scoring - telemetry +- tool_runtime providers: inference: - provider_id: tgi-inference @@ -69,24 +69,39 @@ providers: provider_type: inline::braintrust config: openai_api_key: ${env.OPENAI_API_KEY:} + tool_runtime: + - provider_id: brave-search + provider_type: remote::brave-search + config: + api_key: ${env.BRAVE_SEARCH_API_KEY:} + max_results: 3 + - provider_id: tavily-search + provider_type: remote::tavily-search + config: + api_key: ${env.TAVILY_SEARCH_API_KEY:} + max_results: 3 + - provider_id: code-interpreter + provider_type: inline::code-interpreter + config: {} + - provider_id: memory-runtime + provider_type: inline::memory-runtime + config: {} metadata_store: - namespace: null type: sqlite db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/tgi}/registry.db models: - metadata: {} model_id: ${env.INFERENCE_MODEL} provider_id: tgi-inference - provider_model_id: null model_type: llm - metadata: embedding_dimension: 384 model_id: all-MiniLM-L6-v2 provider_id: sentence-transformers - provider_model_id: null model_type: embedding shields: [] memory_banks: [] datasets: [] scoring_fns: [] eval_tasks: [] +tool_groups: [] diff --git a/llama_stack/templates/tgi/tgi.py b/llama_stack/templates/tgi/tgi.py index c84f5b5fe..892d539d2 100644 --- a/llama_stack/templates/tgi/tgi.py +++ b/llama_stack/templates/tgi/tgi.py @@ -7,8 +7,12 @@ from pathlib import Path from llama_stack.apis.models.models import ModelType - -from llama_stack.distribution.datatypes import ModelInput, Provider, ShieldInput +from llama_stack.distribution.datatypes import ( + ModelInput, + Provider, + ShieldInput, + ToolGroupInput, +) from llama_stack.providers.inline.inference.sentence_transformers import ( SentenceTransformersInferenceConfig, ) @@ -27,6 +31,12 @@ def get_distribution_template() -> DistributionTemplate: "eval": ["inline::meta-reference"], "datasetio": ["remote::huggingface", "inline::localfs"], "scoring": ["inline::basic", "inline::llm-as-judge", "inline::braintrust"], + "tool_runtime": [ + "remote::brave-search", + "remote::tavily-search", + "inline::code-interpreter", + "inline::memory-runtime", + ], } name = "tgi" inference_provider = Provider( @@ -63,6 +73,20 @@ def get_distribution_template() -> DistributionTemplate: model_id="${env.SAFETY_MODEL}", provider_id="tgi-safety", ) + default_tool_groups = [ + ToolGroupInput( + toolgroup_id="builtin::websearch", + provider_id="tavily-search", + ), + ToolGroupInput( + toolgroup_id="builtin::memory", + provider_id="memory-runtime", + ), + ToolGroupInput( + toolgroup_id="builtin::code_interpreter", + provider_id="code-interpreter", + ), + ] return DistributionTemplate( name=name, @@ -99,6 +123,7 @@ def get_distribution_template() -> DistributionTemplate: safety_model, ], default_shields=[ShieldInput(shield_id="${env.SAFETY_MODEL}")], + default_tool_groups=default_tool_groups, ), }, run_config_env_vars={ diff --git a/llama_stack/templates/together/build.yaml b/llama_stack/templates/together/build.yaml index 6930b7692..96f9f758e 100644 --- a/llama_stack/templates/together/build.yaml +++ b/llama_stack/templates/together/build.yaml @@ -2,7 +2,6 @@ version: '2' name: together distribution_spec: description: Use Together.AI for running LLM inference - docker_image: null providers: inference: - remote::together @@ -25,4 +24,9 @@ distribution_spec: - inline::basic - inline::llm-as-judge - inline::braintrust + tool_runtime: + - remote::brave-search + - remote::tavily-search + - inline::code-interpreter + - inline::memory-runtime image_type: conda diff --git a/llama_stack/templates/together/run.yaml b/llama_stack/templates/together/run.yaml index 44e33662b..ed65ded57 100644 --- a/llama_stack/templates/together/run.yaml +++ b/llama_stack/templates/together/run.yaml @@ -1,6 +1,5 @@ version: '2' image_name: together -docker_image: null conda_env: together apis: - agents @@ -11,6 +10,7 @@ apis: - safety - scoring - telemetry +- tool_runtime providers: inference: - provider_id: together @@ -70,8 +70,24 @@ providers: provider_type: inline::braintrust config: openai_api_key: ${env.OPENAI_API_KEY:} + tool_runtime: + - provider_id: brave-search + provider_type: remote::brave-search + config: + api_key: ${env.BRAVE_SEARCH_API_KEY:} + max_results: 3 + - provider_id: tavily-search + provider_type: remote::tavily-search + config: + api_key: ${env.TAVILY_SEARCH_API_KEY:} + max_results: 3 + - provider_id: code-interpreter + provider_type: inline::code-interpreter + config: {} + - provider_id: memory-runtime + provider_type: inline::memory-runtime + config: {} metadata_store: - namespace: null type: sqlite db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/together}/registry.db models: @@ -124,14 +140,17 @@ models: embedding_dimension: 384 model_id: all-MiniLM-L6-v2 provider_id: sentence-transformers - provider_model_id: null model_type: embedding shields: -- params: null - shield_id: meta-llama/Llama-Guard-3-8B - provider_id: null - provider_shield_id: null +- shield_id: meta-llama/Llama-Guard-3-8B memory_banks: [] datasets: [] scoring_fns: [] eval_tasks: [] +tool_groups: +- toolgroup_id: builtin::websearch + provider_id: tavily-search +- toolgroup_id: builtin::memory + provider_id: memory-runtime +- toolgroup_id: builtin::code_interpreter + provider_id: code-interpreter diff --git a/llama_stack/templates/together/together.py b/llama_stack/templates/together/together.py index 994cf5549..d73e23e77 100644 --- a/llama_stack/templates/together/together.py +++ b/llama_stack/templates/together/together.py @@ -9,8 +9,12 @@ from pathlib import Path from llama_models.sku_list import all_registered_models from llama_stack.apis.models.models import ModelType - -from llama_stack.distribution.datatypes import ModelInput, Provider, ShieldInput +from llama_stack.distribution.datatypes import ( + ModelInput, + Provider, + ShieldInput, + ToolGroupInput, +) from llama_stack.providers.inline.inference.sentence_transformers import ( SentenceTransformersInferenceConfig, ) @@ -30,6 +34,12 @@ def get_distribution_template() -> DistributionTemplate: "eval": ["inline::meta-reference"], "datasetio": ["remote::huggingface", "inline::localfs"], "scoring": ["inline::basic", "inline::llm-as-judge", "inline::braintrust"], + "tool_runtime": [ + "remote::brave-search", + "remote::tavily-search", + "inline::code-interpreter", + "inline::memory-runtime", + ], } name = "together" inference_provider = Provider( @@ -59,6 +69,20 @@ def get_distribution_template() -> DistributionTemplate: ) for m in MODEL_ALIASES ] + default_tool_groups = [ + ToolGroupInput( + toolgroup_id="builtin::websearch", + provider_id="tavily-search", + ), + ToolGroupInput( + toolgroup_id="builtin::memory", + provider_id="memory-runtime", + ), + ToolGroupInput( + toolgroup_id="builtin::code_interpreter", + provider_id="code-interpreter", + ), + ] embedding_model = ModelInput( model_id="all-MiniLM-L6-v2", provider_id="sentence-transformers", @@ -83,6 +107,7 @@ def get_distribution_template() -> DistributionTemplate: "memory": [memory_provider], }, default_models=default_models + [embedding_model], + default_tool_groups=default_tool_groups, default_shields=[ShieldInput(shield_id="meta-llama/Llama-Guard-3-8B")], ), }, diff --git a/llama_stack/templates/vllm-gpu/build.yaml b/llama_stack/templates/vllm-gpu/build.yaml index 4289296ec..959f91d3e 100644 --- a/llama_stack/templates/vllm-gpu/build.yaml +++ b/llama_stack/templates/vllm-gpu/build.yaml @@ -2,7 +2,6 @@ version: '2' name: vllm-gpu distribution_spec: description: Use a built-in vLLM engine for running LLM inference - docker_image: null providers: inference: - inline::vllm @@ -25,4 +24,9 @@ distribution_spec: - inline::basic - inline::llm-as-judge - inline::braintrust + tool_runtime: + - remote::brave-search + - remote::tavily-search + - inline::code-interpreter + - inline::memory-runtime image_type: conda diff --git a/llama_stack/templates/vllm-gpu/run.yaml b/llama_stack/templates/vllm-gpu/run.yaml index 171f25d63..48ec57cfb 100644 --- a/llama_stack/templates/vllm-gpu/run.yaml +++ b/llama_stack/templates/vllm-gpu/run.yaml @@ -1,6 +1,5 @@ version: '2' image_name: vllm-gpu -docker_image: null conda_env: vllm-gpu apis: - agents @@ -11,6 +10,7 @@ apis: - safety - scoring - telemetry +- tool_runtime providers: inference: - provider_id: vllm @@ -73,24 +73,45 @@ providers: provider_type: inline::braintrust config: openai_api_key: ${env.OPENAI_API_KEY:} + tool_runtime: + - provider_id: brave-search + provider_type: remote::brave-search + config: + api_key: ${env.BRAVE_SEARCH_API_KEY:} + max_results: 3 + - provider_id: tavily-search + provider_type: remote::tavily-search + config: + api_key: ${env.TAVILY_SEARCH_API_KEY:} + max_results: 3 + - provider_id: code-interpreter + provider_type: inline::code-interpreter + config: {} + - provider_id: memory-runtime + provider_type: inline::memory-runtime + config: {} metadata_store: - namespace: null type: sqlite db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/vllm-gpu}/registry.db models: - metadata: {} model_id: ${env.INFERENCE_MODEL} provider_id: vllm - provider_model_id: null model_type: llm - metadata: embedding_dimension: 384 model_id: all-MiniLM-L6-v2 provider_id: sentence-transformers - provider_model_id: null model_type: embedding shields: [] memory_banks: [] datasets: [] scoring_fns: [] eval_tasks: [] +tool_groups: +- toolgroup_id: builtin::websearch + provider_id: tavily-search +- toolgroup_id: builtin::memory + provider_id: memory-runtime +- toolgroup_id: builtin::code_interpreter + provider_id: code-interpreter diff --git a/llama_stack/templates/vllm-gpu/vllm.py b/llama_stack/templates/vllm-gpu/vllm.py index fe6fb7186..5cf478990 100644 --- a/llama_stack/templates/vllm-gpu/vllm.py +++ b/llama_stack/templates/vllm-gpu/vllm.py @@ -11,7 +11,11 @@ from llama_stack.providers.inline.inference.sentence_transformers import ( ) from llama_stack.providers.inline.inference.vllm import VLLMConfig from llama_stack.providers.inline.memory.faiss.config import FaissImplConfig -from llama_stack.templates.template import DistributionTemplate, RunConfigSettings +from llama_stack.templates.template import ( + DistributionTemplate, + RunConfigSettings, + ToolGroupInput, +) def get_distribution_template() -> DistributionTemplate: @@ -24,7 +28,14 @@ def get_distribution_template() -> DistributionTemplate: "eval": ["inline::meta-reference"], "datasetio": ["remote::huggingface", "inline::localfs"], "scoring": ["inline::basic", "inline::llm-as-judge", "inline::braintrust"], + "tool_runtime": [ + "remote::brave-search", + "remote::tavily-search", + "inline::code-interpreter", + "inline::memory-runtime", + ], } + name = "vllm-gpu" inference_provider = Provider( provider_id="vllm", @@ -54,6 +65,20 @@ def get_distribution_template() -> DistributionTemplate: "embedding_dimension": 384, }, ) + default_tool_groups = [ + ToolGroupInput( + toolgroup_id="builtin::websearch", + provider_id="tavily-search", + ), + ToolGroupInput( + toolgroup_id="builtin::memory", + provider_id="memory-runtime", + ), + ToolGroupInput( + toolgroup_id="builtin::code_interpreter", + provider_id="code-interpreter", + ), + ] return DistributionTemplate( name=name, @@ -70,6 +95,7 @@ def get_distribution_template() -> DistributionTemplate: "memory": [memory_provider], }, default_models=[inference_model, embedding_model], + default_tool_groups=default_tool_groups, ), }, run_config_env_vars={