all templates to include toolgroups and tool runtime

This commit is contained in:
Dinesh Yeduguru 2025-01-08 15:28:03 -08:00
parent 67b35613bb
commit edcfd66be3
55 changed files with 854 additions and 145 deletions

View file

@ -23,6 +23,7 @@
"psycopg2-binary", "psycopg2-binary",
"pypdf", "pypdf",
"redis", "redis",
"requests",
"scikit-learn", "scikit-learn",
"scipy", "scipy",
"sentencepiece", "sentencepiece",
@ -54,6 +55,7 @@
"psycopg2-binary", "psycopg2-binary",
"pypdf", "pypdf",
"redis", "redis",
"requests",
"scikit-learn", "scikit-learn",
"scipy", "scipy",
"sentencepiece", "sentencepiece",
@ -86,6 +88,7 @@
"psycopg2-binary", "psycopg2-binary",
"pypdf", "pypdf",
"redis", "redis",
"requests",
"scikit-learn", "scikit-learn",
"scipy", "scipy",
"sentencepiece", "sentencepiece",
@ -116,6 +119,7 @@
"psycopg2-binary", "psycopg2-binary",
"pypdf", "pypdf",
"redis", "redis",
"requests",
"scikit-learn", "scikit-learn",
"scipy", "scipy",
"sentencepiece", "sentencepiece",
@ -148,6 +152,7 @@
"psycopg2-binary", "psycopg2-binary",
"pypdf", "pypdf",
"redis", "redis",
"requests",
"scikit-learn", "scikit-learn",
"scipy", "scipy",
"sentencepiece", "sentencepiece",
@ -181,6 +186,7 @@
"psycopg2-binary", "psycopg2-binary",
"pypdf", "pypdf",
"redis", "redis",
"requests",
"scikit-learn", "scikit-learn",
"scipy", "scipy",
"sentencepiece", "sentencepiece",
@ -213,6 +219,7 @@
"psycopg2-binary", "psycopg2-binary",
"pypdf", "pypdf",
"redis", "redis",
"requests",
"scikit-learn", "scikit-learn",
"scipy", "scipy",
"sentencepiece", "sentencepiece",
@ -247,6 +254,7 @@
"psycopg2-binary", "psycopg2-binary",
"pypdf", "pypdf",
"redis", "redis",
"requests",
"scikit-learn", "scikit-learn",
"scipy", "scipy",
"sentence-transformers", "sentence-transformers",
@ -286,6 +294,7 @@
"psycopg2-binary", "psycopg2-binary",
"pypdf", "pypdf",
"redis", "redis",
"requests",
"scikit-learn", "scikit-learn",
"scipy", "scipy",
"sentence-transformers", "sentence-transformers",
@ -319,6 +328,7 @@
"psycopg2-binary", "psycopg2-binary",
"pypdf", "pypdf",
"redis", "redis",
"requests",
"scikit-learn", "scikit-learn",
"scipy", "scipy",
"sentencepiece", "sentencepiece",
@ -352,6 +362,7 @@
"psycopg2-binary", "psycopg2-binary",
"pypdf", "pypdf",
"redis", "redis",
"requests",
"scikit-learn", "scikit-learn",
"scipy", "scipy",
"sentencepiece", "sentencepiece",
@ -385,6 +396,7 @@
"psycopg2-binary", "psycopg2-binary",
"pypdf", "pypdf",
"redis", "redis",
"requests",
"scikit-learn", "scikit-learn",
"scipy", "scipy",
"sentencepiece", "sentencepiece",

View file

@ -19,6 +19,7 @@ The `llamastack/distribution-bedrock` distribution consists of the following pro
| safety | `remote::bedrock` | | safety | `remote::bedrock` |
| scoring | `inline::basic`, `inline::llm-as-judge`, `inline::braintrust` | | scoring | `inline::basic`, `inline::llm-as-judge`, `inline::braintrust` |
| telemetry | `inline::meta-reference` | | telemetry | `inline::meta-reference` |
| tool_runtime | `remote::brave-search`, `remote::tavily-search`, `inline::code-interpreter`, `inline::memory-runtime` |

View file

@ -9,6 +9,7 @@ The `llamastack/distribution-cerebras` distribution consists of the following pr
| memory | `inline::meta-reference` | | memory | `inline::meta-reference` |
| safety | `inline::llama-guard` | | safety | `inline::llama-guard` |
| telemetry | `inline::meta-reference` | | telemetry | `inline::meta-reference` |
| tool_runtime | `remote::brave-search`, `remote::tavily-search`, `inline::code-interpreter`, `inline::memory-runtime` |
### Environment Variables ### Environment Variables

View file

@ -22,6 +22,7 @@ The `llamastack/distribution-fireworks` distribution consists of the following p
| safety | `inline::llama-guard` | | safety | `inline::llama-guard` |
| scoring | `inline::basic`, `inline::llm-as-judge`, `inline::braintrust` | | scoring | `inline::basic`, `inline::llm-as-judge`, `inline::braintrust` |
| telemetry | `inline::meta-reference` | | telemetry | `inline::meta-reference` |
| tool_runtime | `remote::brave-search`, `remote::tavily-search`, `inline::code-interpreter`, `inline::memory-runtime` |
### Environment Variables ### Environment Variables

View file

@ -22,6 +22,7 @@ The `llamastack/distribution-meta-reference-gpu` distribution consists of the fo
| safety | `inline::llama-guard` | | safety | `inline::llama-guard` |
| scoring | `inline::basic`, `inline::llm-as-judge`, `inline::braintrust` | | scoring | `inline::basic`, `inline::llm-as-judge`, `inline::braintrust` |
| telemetry | `inline::meta-reference` | | telemetry | `inline::meta-reference` |
| tool_runtime | `remote::brave-search`, `remote::tavily-search`, `inline::code-interpreter`, `inline::memory-runtime` |
Note that you need access to nvidia GPUs to run this distribution. This distribution is not compatible with CPU-only machines or machines with AMD GPUs. Note that you need access to nvidia GPUs to run this distribution. This distribution is not compatible with CPU-only machines or machines with AMD GPUs.

View file

@ -22,6 +22,7 @@ The `llamastack/distribution-meta-reference-quantized-gpu` distribution consists
| safety | `inline::llama-guard` | | safety | `inline::llama-guard` |
| scoring | `inline::basic`, `inline::llm-as-judge`, `inline::braintrust` | | scoring | `inline::basic`, `inline::llm-as-judge`, `inline::braintrust` |
| telemetry | `inline::meta-reference` | | telemetry | `inline::meta-reference` |
| tool_runtime | `remote::brave-search`, `remote::tavily-search`, `inline::code-interpreter`, `inline::memory-runtime` |
The only difference vs. the `meta-reference-gpu` distribution is that it has support for more efficient inference -- with fp8, int4 quantization, etc. The only difference vs. the `meta-reference-gpu` distribution is that it has support for more efficient inference -- with fp8, int4 quantization, etc.

View file

@ -22,6 +22,7 @@ The `llamastack/distribution-ollama` distribution consists of the following prov
| safety | `inline::llama-guard` | | safety | `inline::llama-guard` |
| scoring | `inline::basic`, `inline::llm-as-judge`, `inline::braintrust` | | scoring | `inline::basic`, `inline::llm-as-judge`, `inline::braintrust` |
| telemetry | `inline::meta-reference` | | telemetry | `inline::meta-reference` |
| tool_runtime | `remote::brave-search`, `remote::tavily-search`, `inline::code-interpreter`, `inline::memory-runtime` |
You should use this distribution if you have a regular desktop machine without very powerful GPUs. Of course, if you have powerful GPUs, you can still continue using this distribution since Ollama supports GPU acceleration.### Environment Variables You should use this distribution if you have a regular desktop machine without very powerful GPUs. Of course, if you have powerful GPUs, you can still continue using this distribution since Ollama supports GPU acceleration.### Environment Variables

View file

@ -18,6 +18,7 @@ The `llamastack/distribution-remote-vllm` distribution consists of the following
| memory | `inline::faiss`, `remote::chromadb`, `remote::pgvector` | | memory | `inline::faiss`, `remote::chromadb`, `remote::pgvector` |
| safety | `inline::llama-guard` | | safety | `inline::llama-guard` |
| telemetry | `inline::meta-reference` | | telemetry | `inline::meta-reference` |
| tool_runtime | `remote::brave-search`, `remote::tavily-search`, `inline::code-interpreter`, `inline::memory-runtime` |
You can use this distribution if you have GPUs and want to run an independent vLLM server container for running inference. You can use this distribution if you have GPUs and want to run an independent vLLM server container for running inference.

View file

@ -23,6 +23,7 @@ The `llamastack/distribution-tgi` distribution consists of the following provide
| safety | `inline::llama-guard` | | safety | `inline::llama-guard` |
| scoring | `inline::basic`, `inline::llm-as-judge`, `inline::braintrust` | | scoring | `inline::basic`, `inline::llm-as-judge`, `inline::braintrust` |
| telemetry | `inline::meta-reference` | | telemetry | `inline::meta-reference` |
| tool_runtime | `remote::brave-search`, `remote::tavily-search`, `inline::code-interpreter`, `inline::memory-runtime` |
You can use this distribution if you have GPUs and want to run an independent TGI server container for running inference. You can use this distribution if you have GPUs and want to run an independent TGI server container for running inference.

View file

@ -22,6 +22,7 @@ The `llamastack/distribution-together` distribution consists of the following pr
| safety | `inline::llama-guard` | | safety | `inline::llama-guard` |
| scoring | `inline::basic`, `inline::llm-as-judge`, `inline::braintrust` | | scoring | `inline::basic`, `inline::llm-as-judge`, `inline::braintrust` |
| telemetry | `inline::meta-reference` | | telemetry | `inline::meta-reference` |
| tool_runtime | `remote::brave-search`, `remote::tavily-search`, `inline::code-interpreter`, `inline::memory-runtime` |
### Environment Variables ### Environment Variables

View file

@ -4,7 +4,7 @@
# This source code is licensed under the terms described in the LICENSE file in # This source code is licensed under the terms described in the LICENSE file in
# the root directory of this source tree. # the root directory of this source tree.
from typing import Optional from typing import Any, Dict, Optional
from pydantic import BaseModel, Field from pydantic import BaseModel, Field
@ -18,3 +18,10 @@ class BraveSearchToolConfig(BaseModel):
default=3, default=3,
description="The maximum number of results to return", description="The maximum number of results to return",
) )
@classmethod
def sample_run_config(cls, __distro_dir__: str) -> Dict[str, Any]:
return {
"api_key": "${env.BRAVE_SEARCH_API_KEY:}",
"max_results": 3,
}

View file

@ -4,7 +4,7 @@
# This source code is licensed under the terms described in the LICENSE file in # This source code is licensed under the terms described in the LICENSE file in
# the root directory of this source tree. # the root directory of this source tree.
from typing import Optional from typing import Any, Dict, Optional
from pydantic import BaseModel, Field from pydantic import BaseModel, Field
@ -18,3 +18,10 @@ class TavilySearchToolConfig(BaseModel):
default=3, default=3,
description="The maximum number of results to return", description="The maximum number of results to return",
) )
@classmethod
def sample_run_config(cls, __distro_dir__: str) -> Dict[str, Any]:
return {
"api_key": "${env.TAVILY_SEARCH_API_KEY:}",
"max_results": 3,
}

View file

@ -9,8 +9,7 @@ from pathlib import Path
from llama_models.sku_list import all_registered_models from llama_models.sku_list import all_registered_models
from llama_stack.apis.models import ModelInput from llama_stack.apis.models import ModelInput
from llama_stack.distribution.datatypes import Provider from llama_stack.distribution.datatypes import Provider, ToolGroupInput
from llama_stack.providers.inline.memory.faiss.config import FaissImplConfig from llama_stack.providers.inline.memory.faiss.config import FaissImplConfig
from llama_stack.providers.remote.inference.bedrock.bedrock import MODEL_ALIASES from llama_stack.providers.remote.inference.bedrock.bedrock import MODEL_ALIASES
from llama_stack.templates.template import DistributionTemplate, RunConfigSettings from llama_stack.templates.template import DistributionTemplate, RunConfigSettings
@ -26,6 +25,12 @@ def get_distribution_template() -> DistributionTemplate:
"eval": ["inline::meta-reference"], "eval": ["inline::meta-reference"],
"datasetio": ["remote::huggingface", "inline::localfs"], "datasetio": ["remote::huggingface", "inline::localfs"],
"scoring": ["inline::basic", "inline::llm-as-judge", "inline::braintrust"], "scoring": ["inline::basic", "inline::llm-as-judge", "inline::braintrust"],
"tool_runtime": [
"remote::brave-search",
"remote::tavily-search",
"inline::code-interpreter",
"inline::memory-runtime",
],
} }
name = "bedrock" name = "bedrock"
memory_provider = Provider( memory_provider = Provider(
@ -46,6 +51,20 @@ def get_distribution_template() -> DistributionTemplate:
) )
for m in MODEL_ALIASES for m in MODEL_ALIASES
] ]
default_tool_groups = [
ToolGroupInput(
toolgroup_id="builtin::websearch",
provider_id="tavily-search",
),
ToolGroupInput(
toolgroup_id="builtin::memory",
provider_id="memory-runtime",
),
ToolGroupInput(
toolgroup_id="builtin::code_interpreter",
provider_id="code-interpreter",
),
]
return DistributionTemplate( return DistributionTemplate(
name=name, name=name,
@ -61,6 +80,7 @@ def get_distribution_template() -> DistributionTemplate:
"memory": [memory_provider], "memory": [memory_provider],
}, },
default_models=default_models, default_models=default_models,
default_tool_groups=default_tool_groups,
), ),
}, },
run_config_env_vars={ run_config_env_vars={

View file

@ -2,7 +2,6 @@ version: '2'
name: bedrock name: bedrock
distribution_spec: distribution_spec:
description: Use AWS Bedrock for running LLM inference and safety description: Use AWS Bedrock for running LLM inference and safety
docker_image: null
providers: providers:
inference: inference:
- remote::bedrock - remote::bedrock
@ -25,4 +24,9 @@ distribution_spec:
- inline::basic - inline::basic
- inline::llm-as-judge - inline::llm-as-judge
- inline::braintrust - inline::braintrust
tool_runtime:
- remote::brave-search
- remote::tavily-search
- inline::code-interpreter
- inline::memory-runtime
image_type: conda image_type: conda

View file

@ -1,6 +1,5 @@
version: '2' version: '2'
image_name: bedrock image_name: bedrock
docker_image: null
conda_env: bedrock conda_env: bedrock
apis: apis:
- agents - agents
@ -11,6 +10,7 @@ apis:
- safety - safety
- scoring - scoring
- telemetry - telemetry
- tool_runtime
providers: providers:
inference: inference:
- provider_id: bedrock - provider_id: bedrock
@ -65,8 +65,24 @@ providers:
provider_type: inline::braintrust provider_type: inline::braintrust
config: config:
openai_api_key: ${env.OPENAI_API_KEY:} openai_api_key: ${env.OPENAI_API_KEY:}
tool_runtime:
- provider_id: brave-search
provider_type: remote::brave-search
config:
api_key: ${env.BRAVE_SEARCH_API_KEY:}
max_results: 3
- provider_id: tavily-search
provider_type: remote::tavily-search
config:
api_key: ${env.TAVILY_SEARCH_API_KEY:}
max_results: 3
- provider_id: code-interpreter
provider_type: inline::code-interpreter
config: {}
- provider_id: memory-runtime
provider_type: inline::memory-runtime
config: {}
metadata_store: metadata_store:
namespace: null
type: sqlite type: sqlite
db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/bedrock}/registry.db db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/bedrock}/registry.db
models: models:
@ -90,3 +106,10 @@ memory_banks: []
datasets: [] datasets: []
scoring_fns: [] scoring_fns: []
eval_tasks: [] eval_tasks: []
tool_groups:
- toolgroup_id: builtin::websearch
provider_id: tavily-search
- toolgroup_id: builtin::memory
provider_id: memory-runtime
- toolgroup_id: builtin::code_interpreter
provider_id: code-interpreter

View file

@ -2,7 +2,6 @@ version: '2'
name: cerebras name: cerebras
distribution_spec: distribution_spec:
description: Use Cerebras for running LLM inference description: Use Cerebras for running LLM inference
docker_image: null
providers: providers:
inference: inference:
- remote::cerebras - remote::cerebras
@ -14,4 +13,9 @@ distribution_spec:
- inline::meta-reference - inline::meta-reference
telemetry: telemetry:
- inline::meta-reference - inline::meta-reference
tool_runtime:
- remote::brave-search
- remote::tavily-search
- inline::code-interpreter
- inline::memory-runtime
image_type: conda image_type: conda

View file

@ -9,8 +9,12 @@ from pathlib import Path
from llama_models.sku_list import all_registered_models from llama_models.sku_list import all_registered_models
from llama_stack.apis.models.models import ModelType from llama_stack.apis.models.models import ModelType
from llama_stack.distribution.datatypes import (
from llama_stack.distribution.datatypes import ModelInput, Provider, ShieldInput ModelInput,
Provider,
ShieldInput,
ToolGroupInput,
)
from llama_stack.providers.inline.inference.sentence_transformers import ( from llama_stack.providers.inline.inference.sentence_transformers import (
SentenceTransformersInferenceConfig, SentenceTransformersInferenceConfig,
) )
@ -26,6 +30,12 @@ def get_distribution_template() -> DistributionTemplate:
"memory": ["inline::meta-reference"], "memory": ["inline::meta-reference"],
"agents": ["inline::meta-reference"], "agents": ["inline::meta-reference"],
"telemetry": ["inline::meta-reference"], "telemetry": ["inline::meta-reference"],
"tool_runtime": [
"remote::brave-search",
"remote::tavily-search",
"inline::code-interpreter",
"inline::memory-runtime",
],
} }
inference_provider = Provider( inference_provider = Provider(
@ -58,6 +68,20 @@ def get_distribution_template() -> DistributionTemplate:
"embedding_dimension": 384, "embedding_dimension": 384,
}, },
) )
default_tool_groups = [
ToolGroupInput(
toolgroup_id="builtin::websearch",
provider_id="tavily-search",
),
ToolGroupInput(
toolgroup_id="builtin::memory",
provider_id="memory-runtime",
),
ToolGroupInput(
toolgroup_id="builtin::code_interpreter",
provider_id="code-interpreter",
),
]
return DistributionTemplate( return DistributionTemplate(
name="cerebras", name="cerebras",
@ -74,6 +98,7 @@ def get_distribution_template() -> DistributionTemplate:
}, },
default_models=default_models + [embedding_model], default_models=default_models + [embedding_model],
default_shields=[ShieldInput(shield_id="meta-llama/Llama-Guard-3-8B")], default_shields=[ShieldInput(shield_id="meta-llama/Llama-Guard-3-8B")],
default_tool_groups=default_tool_groups,
), ),
}, },
run_config_env_vars={ run_config_env_vars={

View file

@ -1,6 +1,5 @@
version: '2' version: '2'
image_name: cerebras image_name: cerebras
docker_image: null
conda_env: cerebras conda_env: cerebras
apis: apis:
- agents - agents
@ -8,6 +7,7 @@ apis:
- memory - memory
- safety - safety
- telemetry - telemetry
- tool_runtime
providers: providers:
inference: inference:
- provider_id: cerebras - provider_id: cerebras
@ -45,8 +45,24 @@ providers:
service_name: ${env.OTEL_SERVICE_NAME:llama-stack} service_name: ${env.OTEL_SERVICE_NAME:llama-stack}
sinks: ${env.TELEMETRY_SINKS:console,sqlite} sinks: ${env.TELEMETRY_SINKS:console,sqlite}
sqlite_db_path: ${env.SQLITE_DB_PATH:~/.llama/distributions/cerebras/trace_store.db} sqlite_db_path: ${env.SQLITE_DB_PATH:~/.llama/distributions/cerebras/trace_store.db}
tool_runtime:
- provider_id: brave-search
provider_type: remote::brave-search
config:
api_key: ${env.BRAVE_SEARCH_API_KEY:}
max_results: 3
- provider_id: tavily-search
provider_type: remote::tavily-search
config:
api_key: ${env.TAVILY_SEARCH_API_KEY:}
max_results: 3
- provider_id: code-interpreter
provider_type: inline::code-interpreter
config: {}
- provider_id: memory-runtime
provider_type: inline::memory-runtime
config: {}
metadata_store: metadata_store:
namespace: null
type: sqlite type: sqlite
db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/cerebras}/registry.db db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/cerebras}/registry.db
models: models:
@ -64,14 +80,17 @@ models:
embedding_dimension: 384 embedding_dimension: 384
model_id: all-MiniLM-L6-v2 model_id: all-MiniLM-L6-v2
provider_id: sentence-transformers provider_id: sentence-transformers
provider_model_id: null
model_type: embedding model_type: embedding
shields: shields:
- params: null - shield_id: meta-llama/Llama-Guard-3-8B
shield_id: meta-llama/Llama-Guard-3-8B
provider_id: null
provider_shield_id: null
memory_banks: [] memory_banks: []
datasets: [] datasets: []
scoring_fns: [] scoring_fns: []
eval_tasks: [] eval_tasks: []
tool_groups:
- toolgroup_id: builtin::websearch
provider_id: tavily-search
- toolgroup_id: builtin::memory
provider_id: memory-runtime
- toolgroup_id: builtin::code_interpreter
provider_id: code-interpreter

View file

@ -2,7 +2,6 @@ version: '2'
name: fireworks name: fireworks
distribution_spec: distribution_spec:
description: Use Fireworks.AI for running LLM inference description: Use Fireworks.AI for running LLM inference
docker_image: null
providers: providers:
inference: inference:
- remote::fireworks - remote::fireworks
@ -25,4 +24,9 @@ distribution_spec:
- inline::basic - inline::basic
- inline::llm-as-judge - inline::llm-as-judge
- inline::braintrust - inline::braintrust
tool_runtime:
- remote::brave-search
- remote::tavily-search
- inline::code-interpreter
- inline::memory-runtime
image_type: conda image_type: conda

View file

@ -9,8 +9,12 @@ from pathlib import Path
from llama_models.sku_list import all_registered_models from llama_models.sku_list import all_registered_models
from llama_stack.apis.models.models import ModelType from llama_stack.apis.models.models import ModelType
from llama_stack.distribution.datatypes import (
from llama_stack.distribution.datatypes import ModelInput, Provider, ShieldInput ModelInput,
Provider,
ShieldInput,
ToolGroupInput,
)
from llama_stack.providers.inline.inference.sentence_transformers import ( from llama_stack.providers.inline.inference.sentence_transformers import (
SentenceTransformersInferenceConfig, SentenceTransformersInferenceConfig,
) )
@ -30,6 +34,12 @@ def get_distribution_template() -> DistributionTemplate:
"eval": ["inline::meta-reference"], "eval": ["inline::meta-reference"],
"datasetio": ["remote::huggingface", "inline::localfs"], "datasetio": ["remote::huggingface", "inline::localfs"],
"scoring": ["inline::basic", "inline::llm-as-judge", "inline::braintrust"], "scoring": ["inline::basic", "inline::llm-as-judge", "inline::braintrust"],
"tool_runtime": [
"remote::brave-search",
"remote::tavily-search",
"inline::code-interpreter",
"inline::memory-runtime",
],
} }
name = "fireworks" name = "fireworks"
@ -69,6 +79,20 @@ def get_distribution_template() -> DistributionTemplate:
"embedding_dimension": 384, "embedding_dimension": 384,
}, },
) )
default_tool_groups = [
ToolGroupInput(
toolgroup_id="builtin::websearch",
provider_id="tavily-search",
),
ToolGroupInput(
toolgroup_id="builtin::memory",
provider_id="memory-runtime",
),
ToolGroupInput(
toolgroup_id="builtin::code_interpreter",
provider_id="code-interpreter",
),
]
return DistributionTemplate( return DistributionTemplate(
name=name, name=name,
@ -86,6 +110,7 @@ def get_distribution_template() -> DistributionTemplate:
}, },
default_models=default_models + [embedding_model], default_models=default_models + [embedding_model],
default_shields=[ShieldInput(shield_id="meta-llama/Llama-Guard-3-8B")], default_shields=[ShieldInput(shield_id="meta-llama/Llama-Guard-3-8B")],
default_tool_groups=default_tool_groups,
), ),
}, },
run_config_env_vars={ run_config_env_vars={

View file

@ -1,6 +1,5 @@
version: '2' version: '2'
image_name: fireworks image_name: fireworks
docker_image: null
conda_env: fireworks conda_env: fireworks
apis: apis:
- agents - agents
@ -11,6 +10,7 @@ apis:
- safety - safety
- scoring - scoring
- telemetry - telemetry
- tool_runtime
providers: providers:
inference: inference:
- provider_id: fireworks - provider_id: fireworks
@ -70,8 +70,24 @@ providers:
provider_type: inline::braintrust provider_type: inline::braintrust
config: config:
openai_api_key: ${env.OPENAI_API_KEY:} openai_api_key: ${env.OPENAI_API_KEY:}
tool_runtime:
- provider_id: brave-search
provider_type: remote::brave-search
config:
api_key: ${env.BRAVE_SEARCH_API_KEY:}
max_results: 3
- provider_id: tavily-search
provider_type: remote::tavily-search
config:
api_key: ${env.TAVILY_SEARCH_API_KEY:}
max_results: 3
- provider_id: code-interpreter
provider_type: inline::code-interpreter
config: {}
- provider_id: memory-runtime
provider_type: inline::memory-runtime
config: {}
metadata_store: metadata_store:
namespace: null
type: sqlite type: sqlite
db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/fireworks}/registry.db db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/fireworks}/registry.db
models: models:
@ -129,14 +145,17 @@ models:
embedding_dimension: 384 embedding_dimension: 384
model_id: all-MiniLM-L6-v2 model_id: all-MiniLM-L6-v2
provider_id: sentence-transformers provider_id: sentence-transformers
provider_model_id: null
model_type: embedding model_type: embedding
shields: shields:
- params: null - shield_id: meta-llama/Llama-Guard-3-8B
shield_id: meta-llama/Llama-Guard-3-8B
provider_id: null
provider_shield_id: null
memory_banks: [] memory_banks: []
datasets: [] datasets: []
scoring_fns: [] scoring_fns: []
eval_tasks: [] eval_tasks: []
tool_groups:
- toolgroup_id: builtin::websearch
provider_id: tavily-search
- toolgroup_id: builtin::memory
provider_id: memory-runtime
- toolgroup_id: builtin::code_interpreter
provider_id: code-interpreter

View file

@ -2,7 +2,6 @@ version: '2'
name: hf-endpoint name: hf-endpoint
distribution_spec: distribution_spec:
description: Use (an external) Hugging Face Inference Endpoint for running LLM inference description: Use (an external) Hugging Face Inference Endpoint for running LLM inference
docker_image: null
providers: providers:
inference: inference:
- remote::hf::endpoint - remote::hf::endpoint
@ -25,4 +24,9 @@ distribution_spec:
- inline::basic - inline::basic
- inline::llm-as-judge - inline::llm-as-judge
- inline::braintrust - inline::braintrust
tool_runtime:
- remote::brave-search
- remote::tavily-search
- inline::code-interpreter
- inline::memory-runtime
image_type: conda image_type: conda

View file

@ -5,7 +5,12 @@
# the root directory of this source tree. # the root directory of this source tree.
from llama_stack.apis.models.models import ModelType from llama_stack.apis.models.models import ModelType
from llama_stack.distribution.datatypes import ModelInput, Provider, ShieldInput from llama_stack.distribution.datatypes import (
ModelInput,
Provider,
ShieldInput,
ToolGroupInput,
)
from llama_stack.providers.inline.inference.sentence_transformers import ( from llama_stack.providers.inline.inference.sentence_transformers import (
SentenceTransformersInferenceConfig, SentenceTransformersInferenceConfig,
) )
@ -24,6 +29,12 @@ def get_distribution_template() -> DistributionTemplate:
"eval": ["inline::meta-reference"], "eval": ["inline::meta-reference"],
"datasetio": ["remote::huggingface", "inline::localfs"], "datasetio": ["remote::huggingface", "inline::localfs"],
"scoring": ["inline::basic", "inline::llm-as-judge", "inline::braintrust"], "scoring": ["inline::basic", "inline::llm-as-judge", "inline::braintrust"],
"tool_runtime": [
"remote::brave-search",
"remote::tavily-search",
"inline::code-interpreter",
"inline::memory-runtime",
],
} }
name = "hf-endpoint" name = "hf-endpoint"
inference_provider = Provider( inference_provider = Provider(
@ -58,6 +69,20 @@ def get_distribution_template() -> DistributionTemplate:
"embedding_dimension": 384, "embedding_dimension": 384,
}, },
) )
default_tool_groups = [
ToolGroupInput(
toolgroup_id="builtin::websearch",
provider_id="tavily-search",
),
ToolGroupInput(
toolgroup_id="builtin::memory",
provider_id="memory-runtime",
),
ToolGroupInput(
toolgroup_id="builtin::code_interpreter",
provider_id="code-interpreter",
),
]
return DistributionTemplate( return DistributionTemplate(
name=name, name=name,
@ -74,6 +99,7 @@ def get_distribution_template() -> DistributionTemplate:
"memory": [memory_provider], "memory": [memory_provider],
}, },
default_models=[inference_model, embedding_model], default_models=[inference_model, embedding_model],
default_tool_groups=default_tool_groups,
), ),
"run-with-safety.yaml": RunConfigSettings( "run-with-safety.yaml": RunConfigSettings(
provider_overrides={ provider_overrides={
@ -96,6 +122,7 @@ def get_distribution_template() -> DistributionTemplate:
embedding_model, embedding_model,
], ],
default_shields=[ShieldInput(shield_id="${env.SAFETY_MODEL}")], default_shields=[ShieldInput(shield_id="${env.SAFETY_MODEL}")],
default_tool_groups=default_tool_groups,
), ),
}, },
run_config_env_vars={ run_config_env_vars={

View file

@ -1,6 +1,5 @@
version: '2' version: '2'
image_name: hf-endpoint image_name: hf-endpoint
docker_image: null
conda_env: hf-endpoint conda_env: hf-endpoint
apis: apis:
- agents - agents
@ -11,6 +10,7 @@ apis:
- safety - safety
- scoring - scoring
- telemetry - telemetry
- tool_runtime
providers: providers:
inference: inference:
- provider_id: hf-endpoint - provider_id: hf-endpoint
@ -75,33 +75,50 @@ providers:
provider_type: inline::braintrust provider_type: inline::braintrust
config: config:
openai_api_key: ${env.OPENAI_API_KEY:} openai_api_key: ${env.OPENAI_API_KEY:}
tool_runtime:
- provider_id: brave-search
provider_type: remote::brave-search
config:
api_key: ${env.BRAVE_SEARCH_API_KEY:}
max_results: 3
- provider_id: tavily-search
provider_type: remote::tavily-search
config:
api_key: ${env.TAVILY_SEARCH_API_KEY:}
max_results: 3
- provider_id: code-interpreter
provider_type: inline::code-interpreter
config: {}
- provider_id: memory-runtime
provider_type: inline::memory-runtime
config: {}
metadata_store: metadata_store:
namespace: null
type: sqlite type: sqlite
db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/hf-endpoint}/registry.db db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/hf-endpoint}/registry.db
models: models:
- metadata: {} - metadata: {}
model_id: ${env.INFERENCE_MODEL} model_id: ${env.INFERENCE_MODEL}
provider_id: hf-endpoint provider_id: hf-endpoint
provider_model_id: null
model_type: llm model_type: llm
- metadata: {} - metadata: {}
model_id: ${env.SAFETY_MODEL} model_id: ${env.SAFETY_MODEL}
provider_id: hf-endpoint-safety provider_id: hf-endpoint-safety
provider_model_id: null
model_type: llm model_type: llm
- metadata: - metadata:
embedding_dimension: 384 embedding_dimension: 384
model_id: all-MiniLM-L6-v2 model_id: all-MiniLM-L6-v2
provider_id: sentence-transformers provider_id: sentence-transformers
provider_model_id: null
model_type: embedding model_type: embedding
shields: shields:
- params: null - shield_id: ${env.SAFETY_MODEL}
shield_id: ${env.SAFETY_MODEL}
provider_id: null
provider_shield_id: null
memory_banks: [] memory_banks: []
datasets: [] datasets: []
scoring_fns: [] scoring_fns: []
eval_tasks: [] eval_tasks: []
tool_groups:
- toolgroup_id: builtin::websearch
provider_id: tavily-search
- toolgroup_id: builtin::memory
provider_id: memory-runtime
- toolgroup_id: builtin::code_interpreter
provider_id: code-interpreter

View file

@ -1,6 +1,5 @@
version: '2' version: '2'
image_name: hf-endpoint image_name: hf-endpoint
docker_image: null
conda_env: hf-endpoint conda_env: hf-endpoint
apis: apis:
- agents - agents
@ -11,6 +10,7 @@ apis:
- safety - safety
- scoring - scoring
- telemetry - telemetry
- tool_runtime
providers: providers:
inference: inference:
- provider_id: hf-endpoint - provider_id: hf-endpoint
@ -70,24 +70,45 @@ providers:
provider_type: inline::braintrust provider_type: inline::braintrust
config: config:
openai_api_key: ${env.OPENAI_API_KEY:} openai_api_key: ${env.OPENAI_API_KEY:}
tool_runtime:
- provider_id: brave-search
provider_type: remote::brave-search
config:
api_key: ${env.BRAVE_SEARCH_API_KEY:}
max_results: 3
- provider_id: tavily-search
provider_type: remote::tavily-search
config:
api_key: ${env.TAVILY_SEARCH_API_KEY:}
max_results: 3
- provider_id: code-interpreter
provider_type: inline::code-interpreter
config: {}
- provider_id: memory-runtime
provider_type: inline::memory-runtime
config: {}
metadata_store: metadata_store:
namespace: null
type: sqlite type: sqlite
db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/hf-endpoint}/registry.db db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/hf-endpoint}/registry.db
models: models:
- metadata: {} - metadata: {}
model_id: ${env.INFERENCE_MODEL} model_id: ${env.INFERENCE_MODEL}
provider_id: hf-endpoint provider_id: hf-endpoint
provider_model_id: null
model_type: llm model_type: llm
- metadata: - metadata:
embedding_dimension: 384 embedding_dimension: 384
model_id: all-MiniLM-L6-v2 model_id: all-MiniLM-L6-v2
provider_id: sentence-transformers provider_id: sentence-transformers
provider_model_id: null
model_type: embedding model_type: embedding
shields: [] shields: []
memory_banks: [] memory_banks: []
datasets: [] datasets: []
scoring_fns: [] scoring_fns: []
eval_tasks: [] eval_tasks: []
tool_groups:
- toolgroup_id: builtin::websearch
provider_id: tavily-search
- toolgroup_id: builtin::memory
provider_id: memory-runtime
- toolgroup_id: builtin::code_interpreter
provider_id: code-interpreter

View file

@ -2,7 +2,6 @@ version: '2'
name: hf-serverless name: hf-serverless
distribution_spec: distribution_spec:
description: Use (an external) Hugging Face Inference Endpoint for running LLM inference description: Use (an external) Hugging Face Inference Endpoint for running LLM inference
docker_image: null
providers: providers:
inference: inference:
- remote::hf::serverless - remote::hf::serverless
@ -25,4 +24,9 @@ distribution_spec:
- inline::basic - inline::basic
- inline::llm-as-judge - inline::llm-as-judge
- inline::braintrust - inline::braintrust
tool_runtime:
- remote::brave-search
- remote::tavily-search
- inline::code-interpreter
- inline::memory-runtime
image_type: conda image_type: conda

View file

@ -5,7 +5,12 @@
# the root directory of this source tree. # the root directory of this source tree.
from llama_stack.apis.models.models import ModelType from llama_stack.apis.models.models import ModelType
from llama_stack.distribution.datatypes import ModelInput, Provider, ShieldInput from llama_stack.distribution.datatypes import (
ModelInput,
Provider,
ShieldInput,
ToolGroupInput,
)
from llama_stack.providers.inline.inference.sentence_transformers import ( from llama_stack.providers.inline.inference.sentence_transformers import (
SentenceTransformersInferenceConfig, SentenceTransformersInferenceConfig,
) )
@ -24,6 +29,12 @@ def get_distribution_template() -> DistributionTemplate:
"eval": ["inline::meta-reference"], "eval": ["inline::meta-reference"],
"datasetio": ["remote::huggingface", "inline::localfs"], "datasetio": ["remote::huggingface", "inline::localfs"],
"scoring": ["inline::basic", "inline::llm-as-judge", "inline::braintrust"], "scoring": ["inline::basic", "inline::llm-as-judge", "inline::braintrust"],
"tool_runtime": [
"remote::brave-search",
"remote::tavily-search",
"inline::code-interpreter",
"inline::memory-runtime",
],
} }
name = "hf-serverless" name = "hf-serverless"
@ -59,6 +70,20 @@ def get_distribution_template() -> DistributionTemplate:
"embedding_dimension": 384, "embedding_dimension": 384,
}, },
) )
default_tool_groups = [
ToolGroupInput(
toolgroup_id="builtin::websearch",
provider_id="tavily-search",
),
ToolGroupInput(
toolgroup_id="builtin::memory",
provider_id="memory-runtime",
),
ToolGroupInput(
toolgroup_id="builtin::code_interpreter",
provider_id="code-interpreter",
),
]
return DistributionTemplate( return DistributionTemplate(
name=name, name=name,
@ -97,6 +122,7 @@ def get_distribution_template() -> DistributionTemplate:
embedding_model, embedding_model,
], ],
default_shields=[ShieldInput(shield_id="${env.SAFETY_MODEL}")], default_shields=[ShieldInput(shield_id="${env.SAFETY_MODEL}")],
default_tool_groups=default_tool_groups,
), ),
}, },
run_config_env_vars={ run_config_env_vars={

View file

@ -1,6 +1,5 @@
version: '2' version: '2'
image_name: hf-serverless image_name: hf-serverless
docker_image: null
conda_env: hf-serverless conda_env: hf-serverless
apis: apis:
- agents - agents
@ -11,6 +10,7 @@ apis:
- safety - safety
- scoring - scoring
- telemetry - telemetry
- tool_runtime
providers: providers:
inference: inference:
- provider_id: hf-serverless - provider_id: hf-serverless
@ -75,33 +75,50 @@ providers:
provider_type: inline::braintrust provider_type: inline::braintrust
config: config:
openai_api_key: ${env.OPENAI_API_KEY:} openai_api_key: ${env.OPENAI_API_KEY:}
tool_runtime:
- provider_id: brave-search
provider_type: remote::brave-search
config:
api_key: ${env.BRAVE_SEARCH_API_KEY:}
max_results: 3
- provider_id: tavily-search
provider_type: remote::tavily-search
config:
api_key: ${env.TAVILY_SEARCH_API_KEY:}
max_results: 3
- provider_id: code-interpreter
provider_type: inline::code-interpreter
config: {}
- provider_id: memory-runtime
provider_type: inline::memory-runtime
config: {}
metadata_store: metadata_store:
namespace: null
type: sqlite type: sqlite
db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/hf-serverless}/registry.db db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/hf-serverless}/registry.db
models: models:
- metadata: {} - metadata: {}
model_id: ${env.INFERENCE_MODEL} model_id: ${env.INFERENCE_MODEL}
provider_id: hf-serverless provider_id: hf-serverless
provider_model_id: null
model_type: llm model_type: llm
- metadata: {} - metadata: {}
model_id: ${env.SAFETY_MODEL} model_id: ${env.SAFETY_MODEL}
provider_id: hf-serverless-safety provider_id: hf-serverless-safety
provider_model_id: null
model_type: llm model_type: llm
- metadata: - metadata:
embedding_dimension: 384 embedding_dimension: 384
model_id: all-MiniLM-L6-v2 model_id: all-MiniLM-L6-v2
provider_id: sentence-transformers provider_id: sentence-transformers
provider_model_id: null
model_type: embedding model_type: embedding
shields: shields:
- params: null - shield_id: ${env.SAFETY_MODEL}
shield_id: ${env.SAFETY_MODEL}
provider_id: null
provider_shield_id: null
memory_banks: [] memory_banks: []
datasets: [] datasets: []
scoring_fns: [] scoring_fns: []
eval_tasks: [] eval_tasks: []
tool_groups:
- toolgroup_id: builtin::websearch
provider_id: tavily-search
- toolgroup_id: builtin::memory
provider_id: memory-runtime
- toolgroup_id: builtin::code_interpreter
provider_id: code-interpreter

View file

@ -1,6 +1,5 @@
version: '2' version: '2'
image_name: hf-serverless image_name: hf-serverless
docker_image: null
conda_env: hf-serverless conda_env: hf-serverless
apis: apis:
- agents - agents
@ -11,6 +10,7 @@ apis:
- safety - safety
- scoring - scoring
- telemetry - telemetry
- tool_runtime
providers: providers:
inference: inference:
- provider_id: hf-serverless - provider_id: hf-serverless
@ -70,24 +70,39 @@ providers:
provider_type: inline::braintrust provider_type: inline::braintrust
config: config:
openai_api_key: ${env.OPENAI_API_KEY:} openai_api_key: ${env.OPENAI_API_KEY:}
tool_runtime:
- provider_id: brave-search
provider_type: remote::brave-search
config:
api_key: ${env.BRAVE_SEARCH_API_KEY:}
max_results: 3
- provider_id: tavily-search
provider_type: remote::tavily-search
config:
api_key: ${env.TAVILY_SEARCH_API_KEY:}
max_results: 3
- provider_id: code-interpreter
provider_type: inline::code-interpreter
config: {}
- provider_id: memory-runtime
provider_type: inline::memory-runtime
config: {}
metadata_store: metadata_store:
namespace: null
type: sqlite type: sqlite
db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/hf-serverless}/registry.db db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/hf-serverless}/registry.db
models: models:
- metadata: {} - metadata: {}
model_id: ${env.INFERENCE_MODEL} model_id: ${env.INFERENCE_MODEL}
provider_id: hf-serverless provider_id: hf-serverless
provider_model_id: null
model_type: llm model_type: llm
- metadata: - metadata:
embedding_dimension: 384 embedding_dimension: 384
model_id: all-MiniLM-L6-v2 model_id: all-MiniLM-L6-v2
provider_id: sentence-transformers provider_id: sentence-transformers
provider_model_id: null
model_type: embedding model_type: embedding
shields: [] shields: []
memory_banks: [] memory_banks: []
datasets: [] datasets: []
scoring_fns: [] scoring_fns: []
eval_tasks: [] eval_tasks: []
tool_groups: []

View file

@ -2,7 +2,6 @@ version: '2'
name: meta-reference-gpu name: meta-reference-gpu
distribution_spec: distribution_spec:
description: Use Meta Reference for running LLM inference description: Use Meta Reference for running LLM inference
docker_image: null
providers: providers:
inference: inference:
- inline::meta-reference - inline::meta-reference
@ -25,4 +24,9 @@ distribution_spec:
- inline::basic - inline::basic
- inline::llm-as-judge - inline::llm-as-judge
- inline::braintrust - inline::braintrust
tool_runtime:
- remote::brave-search
- remote::tavily-search
- inline::code-interpreter
- inline::memory-runtime
image_type: conda image_type: conda

View file

@ -7,8 +7,12 @@
from pathlib import Path from pathlib import Path
from llama_stack.apis.models.models import ModelType from llama_stack.apis.models.models import ModelType
from llama_stack.distribution.datatypes import (
from llama_stack.distribution.datatypes import ModelInput, Provider, ShieldInput ModelInput,
Provider,
ShieldInput,
ToolGroupInput,
)
from llama_stack.providers.inline.inference.meta_reference import ( from llama_stack.providers.inline.inference.meta_reference import (
MetaReferenceInferenceConfig, MetaReferenceInferenceConfig,
) )
@ -29,6 +33,12 @@ def get_distribution_template() -> DistributionTemplate:
"eval": ["inline::meta-reference"], "eval": ["inline::meta-reference"],
"datasetio": ["remote::huggingface", "inline::localfs"], "datasetio": ["remote::huggingface", "inline::localfs"],
"scoring": ["inline::basic", "inline::llm-as-judge", "inline::braintrust"], "scoring": ["inline::basic", "inline::llm-as-judge", "inline::braintrust"],
"tool_runtime": [
"remote::brave-search",
"remote::tavily-search",
"inline::code-interpreter",
"inline::memory-runtime",
],
} }
name = "meta-reference-gpu" name = "meta-reference-gpu"
inference_provider = Provider( inference_provider = Provider(
@ -66,6 +76,20 @@ def get_distribution_template() -> DistributionTemplate:
model_id="${env.SAFETY_MODEL}", model_id="${env.SAFETY_MODEL}",
provider_id="meta-reference-safety", provider_id="meta-reference-safety",
) )
default_tool_groups = [
ToolGroupInput(
toolgroup_id="builtin::websearch",
provider_id="tavily-search",
),
ToolGroupInput(
toolgroup_id="builtin::memory",
provider_id="memory-runtime",
),
ToolGroupInput(
toolgroup_id="builtin::code_interpreter",
provider_id="code-interpreter",
),
]
return DistributionTemplate( return DistributionTemplate(
name=name, name=name,
@ -104,6 +128,7 @@ def get_distribution_template() -> DistributionTemplate:
embedding_model, embedding_model,
], ],
default_shields=[ShieldInput(shield_id="${env.SAFETY_MODEL}")], default_shields=[ShieldInput(shield_id="${env.SAFETY_MODEL}")],
default_tool_groups=default_tool_groups,
), ),
}, },
run_config_env_vars={ run_config_env_vars={

View file

@ -1,6 +1,5 @@
version: '2' version: '2'
image_name: meta-reference-gpu image_name: meta-reference-gpu
docker_image: null
conda_env: meta-reference-gpu conda_env: meta-reference-gpu
apis: apis:
- agents - agents
@ -11,6 +10,7 @@ apis:
- safety - safety
- scoring - scoring
- telemetry - telemetry
- tool_runtime
providers: providers:
inference: inference:
- provider_id: meta-reference-inference - provider_id: meta-reference-inference
@ -77,33 +77,50 @@ providers:
provider_type: inline::braintrust provider_type: inline::braintrust
config: config:
openai_api_key: ${env.OPENAI_API_KEY:} openai_api_key: ${env.OPENAI_API_KEY:}
tool_runtime:
- provider_id: brave-search
provider_type: remote::brave-search
config:
api_key: ${env.BRAVE_SEARCH_API_KEY:}
max_results: 3
- provider_id: tavily-search
provider_type: remote::tavily-search
config:
api_key: ${env.TAVILY_SEARCH_API_KEY:}
max_results: 3
- provider_id: code-interpreter
provider_type: inline::code-interpreter
config: {}
- provider_id: memory-runtime
provider_type: inline::memory-runtime
config: {}
metadata_store: metadata_store:
namespace: null
type: sqlite type: sqlite
db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/meta-reference-gpu}/registry.db db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/meta-reference-gpu}/registry.db
models: models:
- metadata: {} - metadata: {}
model_id: ${env.INFERENCE_MODEL} model_id: ${env.INFERENCE_MODEL}
provider_id: meta-reference-inference provider_id: meta-reference-inference
provider_model_id: null
model_type: llm model_type: llm
- metadata: {} - metadata: {}
model_id: ${env.SAFETY_MODEL} model_id: ${env.SAFETY_MODEL}
provider_id: meta-reference-safety provider_id: meta-reference-safety
provider_model_id: null
model_type: llm model_type: llm
- metadata: - metadata:
embedding_dimension: 384 embedding_dimension: 384
model_id: all-MiniLM-L6-v2 model_id: all-MiniLM-L6-v2
provider_id: sentence-transformers provider_id: sentence-transformers
provider_model_id: null
model_type: embedding model_type: embedding
shields: shields:
- params: null - shield_id: ${env.SAFETY_MODEL}
shield_id: ${env.SAFETY_MODEL}
provider_id: null
provider_shield_id: null
memory_banks: [] memory_banks: []
datasets: [] datasets: []
scoring_fns: [] scoring_fns: []
eval_tasks: [] eval_tasks: []
tool_groups:
- toolgroup_id: builtin::websearch
provider_id: tavily-search
- toolgroup_id: builtin::memory
provider_id: memory-runtime
- toolgroup_id: builtin::code_interpreter
provider_id: code-interpreter

View file

@ -1,6 +1,5 @@
version: '2' version: '2'
image_name: meta-reference-gpu image_name: meta-reference-gpu
docker_image: null
conda_env: meta-reference-gpu conda_env: meta-reference-gpu
apis: apis:
- agents - agents
@ -11,6 +10,7 @@ apis:
- safety - safety
- scoring - scoring
- telemetry - telemetry
- tool_runtime
providers: providers:
inference: inference:
- provider_id: meta-reference-inference - provider_id: meta-reference-inference
@ -71,24 +71,39 @@ providers:
provider_type: inline::braintrust provider_type: inline::braintrust
config: config:
openai_api_key: ${env.OPENAI_API_KEY:} openai_api_key: ${env.OPENAI_API_KEY:}
tool_runtime:
- provider_id: brave-search
provider_type: remote::brave-search
config:
api_key: ${env.BRAVE_SEARCH_API_KEY:}
max_results: 3
- provider_id: tavily-search
provider_type: remote::tavily-search
config:
api_key: ${env.TAVILY_SEARCH_API_KEY:}
max_results: 3
- provider_id: code-interpreter
provider_type: inline::code-interpreter
config: {}
- provider_id: memory-runtime
provider_type: inline::memory-runtime
config: {}
metadata_store: metadata_store:
namespace: null
type: sqlite type: sqlite
db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/meta-reference-gpu}/registry.db db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/meta-reference-gpu}/registry.db
models: models:
- metadata: {} - metadata: {}
model_id: ${env.INFERENCE_MODEL} model_id: ${env.INFERENCE_MODEL}
provider_id: meta-reference-inference provider_id: meta-reference-inference
provider_model_id: null
model_type: llm model_type: llm
- metadata: - metadata:
embedding_dimension: 384 embedding_dimension: 384
model_id: all-MiniLM-L6-v2 model_id: all-MiniLM-L6-v2
provider_id: sentence-transformers provider_id: sentence-transformers
provider_model_id: null
model_type: embedding model_type: embedding
shields: [] shields: []
memory_banks: [] memory_banks: []
datasets: [] datasets: []
scoring_fns: [] scoring_fns: []
eval_tasks: [] eval_tasks: []
tool_groups: []

View file

@ -2,7 +2,6 @@ version: '2'
name: meta-reference-quantized-gpu name: meta-reference-quantized-gpu
distribution_spec: distribution_spec:
description: Use Meta Reference with fp8, int4 quantization for running LLM inference description: Use Meta Reference with fp8, int4 quantization for running LLM inference
docker_image: null
providers: providers:
inference: inference:
- inline::meta-reference-quantized - inline::meta-reference-quantized
@ -25,4 +24,9 @@ distribution_spec:
- inline::basic - inline::basic
- inline::llm-as-judge - inline::llm-as-judge
- inline::braintrust - inline::braintrust
tool_runtime:
- remote::brave-search
- remote::tavily-search
- inline::code-interpreter
- inline::memory-runtime
image_type: conda image_type: conda

View file

@ -7,8 +7,7 @@
from pathlib import Path from pathlib import Path
from llama_stack.apis.models.models import ModelType from llama_stack.apis.models.models import ModelType
from llama_stack.distribution.datatypes import ModelInput, Provider, ToolGroupInput
from llama_stack.distribution.datatypes import ModelInput, Provider
from llama_stack.providers.inline.inference.meta_reference import ( from llama_stack.providers.inline.inference.meta_reference import (
MetaReferenceQuantizedInferenceConfig, MetaReferenceQuantizedInferenceConfig,
) )
@ -29,7 +28,27 @@ def get_distribution_template() -> DistributionTemplate:
"eval": ["inline::meta-reference"], "eval": ["inline::meta-reference"],
"datasetio": ["remote::huggingface", "inline::localfs"], "datasetio": ["remote::huggingface", "inline::localfs"],
"scoring": ["inline::basic", "inline::llm-as-judge", "inline::braintrust"], "scoring": ["inline::basic", "inline::llm-as-judge", "inline::braintrust"],
"tool_runtime": [
"remote::brave-search",
"remote::tavily-search",
"inline::code-interpreter",
"inline::memory-runtime",
],
} }
default_tool_groups = [
ToolGroupInput(
toolgroup_id="builtin::websearch",
provider_id="tavily-search",
),
ToolGroupInput(
toolgroup_id="builtin::memory",
provider_id="memory-runtime",
),
ToolGroupInput(
toolgroup_id="builtin::code_interpreter",
provider_id="code-interpreter",
),
]
name = "meta-reference-quantized-gpu" name = "meta-reference-quantized-gpu"
inference_provider = Provider( inference_provider = Provider(
provider_id="meta-reference-inference", provider_id="meta-reference-inference",
@ -76,6 +95,7 @@ def get_distribution_template() -> DistributionTemplate:
"memory": [memory_provider], "memory": [memory_provider],
}, },
default_models=[inference_model, embedding_model], default_models=[inference_model, embedding_model],
default_tool_groups=default_tool_groups,
), ),
}, },
run_config_env_vars={ run_config_env_vars={

View file

@ -1,6 +1,5 @@
version: '2' version: '2'
image_name: meta-reference-quantized-gpu image_name: meta-reference-quantized-gpu
docker_image: null
conda_env: meta-reference-quantized-gpu conda_env: meta-reference-quantized-gpu
apis: apis:
- agents - agents
@ -11,6 +10,7 @@ apis:
- safety - safety
- scoring - scoring
- telemetry - telemetry
- tool_runtime
providers: providers:
inference: inference:
- provider_id: meta-reference-inference - provider_id: meta-reference-inference
@ -73,24 +73,45 @@ providers:
provider_type: inline::braintrust provider_type: inline::braintrust
config: config:
openai_api_key: ${env.OPENAI_API_KEY:} openai_api_key: ${env.OPENAI_API_KEY:}
tool_runtime:
- provider_id: brave-search
provider_type: remote::brave-search
config:
api_key: ${env.BRAVE_SEARCH_API_KEY:}
max_results: 3
- provider_id: tavily-search
provider_type: remote::tavily-search
config:
api_key: ${env.TAVILY_SEARCH_API_KEY:}
max_results: 3
- provider_id: code-interpreter
provider_type: inline::code-interpreter
config: {}
- provider_id: memory-runtime
provider_type: inline::memory-runtime
config: {}
metadata_store: metadata_store:
namespace: null
type: sqlite type: sqlite
db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/meta-reference-quantized-gpu}/registry.db db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/meta-reference-quantized-gpu}/registry.db
models: models:
- metadata: {} - metadata: {}
model_id: ${env.INFERENCE_MODEL} model_id: ${env.INFERENCE_MODEL}
provider_id: meta-reference-inference provider_id: meta-reference-inference
provider_model_id: null
model_type: llm model_type: llm
- metadata: - metadata:
embedding_dimension: 384 embedding_dimension: 384
model_id: all-MiniLM-L6-v2 model_id: all-MiniLM-L6-v2
provider_id: sentence-transformers provider_id: sentence-transformers
provider_model_id: null
model_type: embedding model_type: embedding
shields: [] shields: []
memory_banks: [] memory_banks: []
datasets: [] datasets: []
scoring_fns: [] scoring_fns: []
eval_tasks: [] eval_tasks: []
tool_groups:
- toolgroup_id: builtin::websearch
provider_id: tavily-search
- toolgroup_id: builtin::memory
provider_id: memory-runtime
- toolgroup_id: builtin::code_interpreter
provider_id: code-interpreter

View file

@ -2,7 +2,6 @@ version: '2'
name: ollama name: ollama
distribution_spec: distribution_spec:
description: Use (an external) Ollama server for running LLM inference description: Use (an external) Ollama server for running LLM inference
docker_image: null
providers: providers:
inference: inference:
- remote::ollama - remote::ollama
@ -25,4 +24,9 @@ distribution_spec:
- inline::basic - inline::basic
- inline::llm-as-judge - inline::llm-as-judge
- inline::braintrust - inline::braintrust
tool_runtime:
- remote::brave-search
- remote::tavily-search
- inline::code-interpreter
- inline::memory-runtime
image_type: conda image_type: conda

View file

@ -7,8 +7,12 @@
from pathlib import Path from pathlib import Path
from llama_stack.apis.models.models import ModelType from llama_stack.apis.models.models import ModelType
from llama_stack.distribution.datatypes import (
from llama_stack.distribution.datatypes import ModelInput, Provider, ShieldInput ModelInput,
Provider,
ShieldInput,
ToolGroupInput,
)
from llama_stack.providers.inline.inference.sentence_transformers import ( from llama_stack.providers.inline.inference.sentence_transformers import (
SentenceTransformersInferenceConfig, SentenceTransformersInferenceConfig,
) )
@ -27,6 +31,12 @@ def get_distribution_template() -> DistributionTemplate:
"eval": ["inline::meta-reference"], "eval": ["inline::meta-reference"],
"datasetio": ["remote::huggingface", "inline::localfs"], "datasetio": ["remote::huggingface", "inline::localfs"],
"scoring": ["inline::basic", "inline::llm-as-judge", "inline::braintrust"], "scoring": ["inline::basic", "inline::llm-as-judge", "inline::braintrust"],
"tool_runtime": [
"remote::brave-search",
"remote::tavily-search",
"inline::code-interpreter",
"inline::memory-runtime",
],
} }
name = "ollama" name = "ollama"
inference_provider = Provider( inference_provider = Provider(
@ -61,6 +71,20 @@ def get_distribution_template() -> DistributionTemplate:
"embedding_dimension": 384, "embedding_dimension": 384,
}, },
) )
default_tool_groups = [
ToolGroupInput(
toolgroup_id="builtin::websearch",
provider_id="tavily-search",
),
ToolGroupInput(
toolgroup_id="builtin::memory",
provider_id="memory-runtime",
),
ToolGroupInput(
toolgroup_id="builtin::code_interpreter",
provider_id="code-interpreter",
),
]
return DistributionTemplate( return DistributionTemplate(
name=name, name=name,
@ -92,6 +116,7 @@ def get_distribution_template() -> DistributionTemplate:
embedding_model, embedding_model,
], ],
default_shields=[ShieldInput(shield_id="${env.SAFETY_MODEL}")], default_shields=[ShieldInput(shield_id="${env.SAFETY_MODEL}")],
default_tool_groups=default_tool_groups,
), ),
}, },
run_config_env_vars={ run_config_env_vars={

View file

@ -1,6 +1,5 @@
version: '2' version: '2'
image_name: ollama image_name: ollama
docker_image: null
conda_env: ollama conda_env: ollama
apis: apis:
- agents - agents
@ -11,6 +10,7 @@ apis:
- safety - safety
- scoring - scoring
- telemetry - telemetry
- tool_runtime
providers: providers:
inference: inference:
- provider_id: ollama - provider_id: ollama
@ -69,33 +69,50 @@ providers:
provider_type: inline::braintrust provider_type: inline::braintrust
config: config:
openai_api_key: ${env.OPENAI_API_KEY:} openai_api_key: ${env.OPENAI_API_KEY:}
tool_runtime:
- provider_id: brave-search
provider_type: remote::brave-search
config:
api_key: ${env.BRAVE_SEARCH_API_KEY:}
max_results: 3
- provider_id: tavily-search
provider_type: remote::tavily-search
config:
api_key: ${env.TAVILY_SEARCH_API_KEY:}
max_results: 3
- provider_id: code-interpreter
provider_type: inline::code-interpreter
config: {}
- provider_id: memory-runtime
provider_type: inline::memory-runtime
config: {}
metadata_store: metadata_store:
namespace: null
type: sqlite type: sqlite
db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/ollama}/registry.db db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/ollama}/registry.db
models: models:
- metadata: {} - metadata: {}
model_id: ${env.INFERENCE_MODEL} model_id: ${env.INFERENCE_MODEL}
provider_id: ollama provider_id: ollama
provider_model_id: null
model_type: llm model_type: llm
- metadata: {} - metadata: {}
model_id: ${env.SAFETY_MODEL} model_id: ${env.SAFETY_MODEL}
provider_id: ollama provider_id: ollama
provider_model_id: null
model_type: llm model_type: llm
- metadata: - metadata:
embedding_dimension: 384 embedding_dimension: 384
model_id: all-MiniLM-L6-v2 model_id: all-MiniLM-L6-v2
provider_id: sentence-transformers provider_id: sentence-transformers
provider_model_id: null
model_type: embedding model_type: embedding
shields: shields:
- params: null - shield_id: ${env.SAFETY_MODEL}
shield_id: ${env.SAFETY_MODEL}
provider_id: null
provider_shield_id: null
memory_banks: [] memory_banks: []
datasets: [] datasets: []
scoring_fns: [] scoring_fns: []
eval_tasks: [] eval_tasks: []
tool_groups:
- toolgroup_id: builtin::websearch
provider_id: tavily-search
- toolgroup_id: builtin::memory
provider_id: memory-runtime
- toolgroup_id: builtin::code_interpreter
provider_id: code-interpreter

View file

@ -1,6 +1,5 @@
version: '2' version: '2'
image_name: ollama image_name: ollama
docker_image: null
conda_env: ollama conda_env: ollama
apis: apis:
- agents - agents
@ -11,6 +10,7 @@ apis:
- safety - safety
- scoring - scoring
- telemetry - telemetry
- tool_runtime
providers: providers:
inference: inference:
- provider_id: ollama - provider_id: ollama
@ -69,24 +69,39 @@ providers:
provider_type: inline::braintrust provider_type: inline::braintrust
config: config:
openai_api_key: ${env.OPENAI_API_KEY:} openai_api_key: ${env.OPENAI_API_KEY:}
tool_runtime:
- provider_id: brave-search
provider_type: remote::brave-search
config:
api_key: ${env.BRAVE_SEARCH_API_KEY:}
max_results: 3
- provider_id: tavily-search
provider_type: remote::tavily-search
config:
api_key: ${env.TAVILY_SEARCH_API_KEY:}
max_results: 3
- provider_id: code-interpreter
provider_type: inline::code-interpreter
config: {}
- provider_id: memory-runtime
provider_type: inline::memory-runtime
config: {}
metadata_store: metadata_store:
namespace: null
type: sqlite type: sqlite
db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/ollama}/registry.db db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/ollama}/registry.db
models: models:
- metadata: {} - metadata: {}
model_id: ${env.INFERENCE_MODEL} model_id: ${env.INFERENCE_MODEL}
provider_id: ollama provider_id: ollama
provider_model_id: null
model_type: llm model_type: llm
- metadata: - metadata:
embedding_dimension: 384 embedding_dimension: 384
model_id: all-MiniLM-L6-v2 model_id: all-MiniLM-L6-v2
provider_id: sentence-transformers provider_id: sentence-transformers
provider_model_id: null
model_type: embedding model_type: embedding
shields: [] shields: []
memory_banks: [] memory_banks: []
datasets: [] datasets: []
scoring_fns: [] scoring_fns: []
eval_tasks: [] eval_tasks: []
tool_groups: []

View file

@ -2,7 +2,6 @@ version: '2'
name: remote-vllm name: remote-vllm
distribution_spec: distribution_spec:
description: Use (an external) vLLM server for running LLM inference description: Use (an external) vLLM server for running LLM inference
docker_image: null
providers: providers:
inference: inference:
- remote::vllm - remote::vllm
@ -16,4 +15,9 @@ distribution_spec:
- inline::meta-reference - inline::meta-reference
telemetry: telemetry:
- inline::meta-reference - inline::meta-reference
tool_runtime:
- remote::brave-search
- remote::tavily-search
- inline::code-interpreter
- inline::memory-runtime
image_type: conda image_type: conda

View file

@ -1,6 +1,5 @@
version: '2' version: '2'
image_name: remote-vllm image_name: remote-vllm
docker_image: null
conda_env: remote-vllm conda_env: remote-vllm
apis: apis:
- agents - agents
@ -8,6 +7,7 @@ apis:
- memory - memory
- safety - safety
- telemetry - telemetry
- tool_runtime
providers: providers:
inference: inference:
- provider_id: vllm-inference - provider_id: vllm-inference
@ -52,33 +52,50 @@ providers:
service_name: ${env.OTEL_SERVICE_NAME:llama-stack} service_name: ${env.OTEL_SERVICE_NAME:llama-stack}
sinks: ${env.TELEMETRY_SINKS:console,sqlite} sinks: ${env.TELEMETRY_SINKS:console,sqlite}
sqlite_db_path: ${env.SQLITE_DB_PATH:~/.llama/distributions/remote-vllm/trace_store.db} sqlite_db_path: ${env.SQLITE_DB_PATH:~/.llama/distributions/remote-vllm/trace_store.db}
tool_runtime:
- provider_id: brave-search
provider_type: remote::brave-search
config:
api_key: ${env.BRAVE_SEARCH_API_KEY:}
max_results: 3
- provider_id: tavily-search
provider_type: remote::tavily-search
config:
api_key: ${env.TAVILY_SEARCH_API_KEY:}
max_results: 3
- provider_id: code-interpreter
provider_type: inline::code-interpreter
config: {}
- provider_id: memory-runtime
provider_type: inline::memory-runtime
config: {}
metadata_store: metadata_store:
namespace: null
type: sqlite type: sqlite
db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/remote-vllm}/registry.db db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/remote-vllm}/registry.db
models: models:
- metadata: {} - metadata: {}
model_id: ${env.INFERENCE_MODEL} model_id: ${env.INFERENCE_MODEL}
provider_id: vllm-inference provider_id: vllm-inference
provider_model_id: null
model_type: llm model_type: llm
- metadata: {} - metadata: {}
model_id: ${env.SAFETY_MODEL} model_id: ${env.SAFETY_MODEL}
provider_id: vllm-safety provider_id: vllm-safety
provider_model_id: null
model_type: llm model_type: llm
- metadata: - metadata:
embedding_dimension: 384 embedding_dimension: 384
model_id: all-MiniLM-L6-v2 model_id: all-MiniLM-L6-v2
provider_id: sentence-transformers provider_id: sentence-transformers
provider_model_id: null
model_type: embedding model_type: embedding
shields: shields:
- params: null - shield_id: ${env.SAFETY_MODEL}
shield_id: ${env.SAFETY_MODEL}
provider_id: null
provider_shield_id: null
memory_banks: [] memory_banks: []
datasets: [] datasets: []
scoring_fns: [] scoring_fns: []
eval_tasks: [] eval_tasks: []
tool_groups:
- toolgroup_id: builtin::websearch
provider_id: tavily-search
- toolgroup_id: builtin::memory
provider_id: memory-runtime
- toolgroup_id: builtin::code_interpreter
provider_id: code-interpreter

View file

@ -1,6 +1,5 @@
version: '2' version: '2'
image_name: remote-vllm image_name: remote-vllm
docker_image: null
conda_env: remote-vllm conda_env: remote-vllm
apis: apis:
- agents - agents
@ -8,6 +7,7 @@ apis:
- memory - memory
- safety - safety
- telemetry - telemetry
- tool_runtime
providers: providers:
inference: inference:
- provider_id: vllm-inference - provider_id: vllm-inference
@ -46,24 +46,39 @@ providers:
service_name: ${env.OTEL_SERVICE_NAME:llama-stack} service_name: ${env.OTEL_SERVICE_NAME:llama-stack}
sinks: ${env.TELEMETRY_SINKS:console,sqlite} sinks: ${env.TELEMETRY_SINKS:console,sqlite}
sqlite_db_path: ${env.SQLITE_DB_PATH:~/.llama/distributions/remote-vllm/trace_store.db} sqlite_db_path: ${env.SQLITE_DB_PATH:~/.llama/distributions/remote-vllm/trace_store.db}
tool_runtime:
- provider_id: brave-search
provider_type: remote::brave-search
config:
api_key: ${env.BRAVE_SEARCH_API_KEY:}
max_results: 3
- provider_id: tavily-search
provider_type: remote::tavily-search
config:
api_key: ${env.TAVILY_SEARCH_API_KEY:}
max_results: 3
- provider_id: code-interpreter
provider_type: inline::code-interpreter
config: {}
- provider_id: memory-runtime
provider_type: inline::memory-runtime
config: {}
metadata_store: metadata_store:
namespace: null
type: sqlite type: sqlite
db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/remote-vllm}/registry.db db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/remote-vllm}/registry.db
models: models:
- metadata: {} - metadata: {}
model_id: ${env.INFERENCE_MODEL} model_id: ${env.INFERENCE_MODEL}
provider_id: vllm-inference provider_id: vllm-inference
provider_model_id: null
model_type: llm model_type: llm
- metadata: - metadata:
embedding_dimension: 384 embedding_dimension: 384
model_id: all-MiniLM-L6-v2 model_id: all-MiniLM-L6-v2
provider_id: sentence-transformers provider_id: sentence-transformers
provider_model_id: null
model_type: embedding model_type: embedding
shields: [] shields: []
memory_banks: [] memory_banks: []
datasets: [] datasets: []
scoring_fns: [] scoring_fns: []
eval_tasks: [] eval_tasks: []
tool_groups: []

View file

@ -7,8 +7,12 @@
from pathlib import Path from pathlib import Path
from llama_stack.apis.models.models import ModelType from llama_stack.apis.models.models import ModelType
from llama_stack.distribution.datatypes import (
from llama_stack.distribution.datatypes import ModelInput, Provider, ShieldInput ModelInput,
Provider,
ShieldInput,
ToolGroupInput,
)
from llama_stack.providers.inline.inference.sentence_transformers import ( from llama_stack.providers.inline.inference.sentence_transformers import (
SentenceTransformersInferenceConfig, SentenceTransformersInferenceConfig,
) )
@ -24,6 +28,12 @@ def get_distribution_template() -> DistributionTemplate:
"safety": ["inline::llama-guard"], "safety": ["inline::llama-guard"],
"agents": ["inline::meta-reference"], "agents": ["inline::meta-reference"],
"telemetry": ["inline::meta-reference"], "telemetry": ["inline::meta-reference"],
"tool_runtime": [
"remote::brave-search",
"remote::tavily-search",
"inline::code-interpreter",
"inline::memory-runtime",
],
} }
name = "remote-vllm" name = "remote-vllm"
inference_provider = Provider( inference_provider = Provider(
@ -60,6 +70,20 @@ def get_distribution_template() -> DistributionTemplate:
"embedding_dimension": 384, "embedding_dimension": 384,
}, },
) )
default_tool_groups = [
ToolGroupInput(
toolgroup_id="builtin::websearch",
provider_id="tavily-search",
),
ToolGroupInput(
toolgroup_id="builtin::memory",
provider_id="memory-runtime",
),
ToolGroupInput(
toolgroup_id="builtin::code_interpreter",
provider_id="code-interpreter",
),
]
return DistributionTemplate( return DistributionTemplate(
name=name, name=name,
@ -97,6 +121,7 @@ def get_distribution_template() -> DistributionTemplate:
embedding_model, embedding_model,
], ],
default_shields=[ShieldInput(shield_id="${env.SAFETY_MODEL}")], default_shields=[ShieldInput(shield_id="${env.SAFETY_MODEL}")],
default_tool_groups=default_tool_groups,
), ),
}, },
run_config_env_vars={ run_config_env_vars={

View file

@ -20,6 +20,7 @@ from llama_stack.distribution.datatypes import (
Provider, Provider,
ShieldInput, ShieldInput,
StackRunConfig, StackRunConfig,
ToolGroupInput,
) )
from llama_stack.distribution.distribution import get_provider_registry from llama_stack.distribution.distribution import get_provider_registry
from llama_stack.distribution.utils.dynamic import instantiate_class_type from llama_stack.distribution.utils.dynamic import instantiate_class_type
@ -30,6 +31,7 @@ class RunConfigSettings(BaseModel):
provider_overrides: Dict[str, List[Provider]] = Field(default_factory=dict) provider_overrides: Dict[str, List[Provider]] = Field(default_factory=dict)
default_models: Optional[List[ModelInput]] = None default_models: Optional[List[ModelInput]] = None
default_shields: Optional[List[ShieldInput]] = None default_shields: Optional[List[ShieldInput]] = None
default_tool_groups: Optional[List[ToolGroupInput]] = None
def run_config( def run_config(
self, self,
@ -91,6 +93,7 @@ class RunConfigSettings(BaseModel):
), ),
models=self.default_models or [], models=self.default_models or [],
shields=self.default_shields or [], shields=self.default_shields or [],
tool_groups=self.default_tool_groups or [],
) )
@ -159,14 +162,22 @@ class DistributionTemplate(BaseModel):
build_config = self.build_config() build_config = self.build_config()
with open(yaml_output_dir / "build.yaml", "w") as f: with open(yaml_output_dir / "build.yaml", "w") as f:
yaml.safe_dump(build_config.model_dump(), f, sort_keys=False) yaml.safe_dump(
build_config.model_dump(exclude_none=True),
f,
sort_keys=False,
)
for yaml_pth, settings in self.run_configs.items(): for yaml_pth, settings in self.run_configs.items():
run_config = settings.run_config( run_config = settings.run_config(
self.name, self.providers, self.docker_image self.name, self.providers, self.docker_image
) )
with open(yaml_output_dir / yaml_pth, "w") as f: with open(yaml_output_dir / yaml_pth, "w") as f:
yaml.safe_dump(run_config.model_dump(), f, sort_keys=False) yaml.safe_dump(
run_config.model_dump(exclude_none=True),
f,
sort_keys=False,
)
if self.template_path: if self.template_path:
docs = self.generate_markdown_docs() docs = self.generate_markdown_docs()

View file

@ -2,7 +2,6 @@ version: '2'
name: tgi name: tgi
distribution_spec: distribution_spec:
description: Use (an external) TGI server for running LLM inference description: Use (an external) TGI server for running LLM inference
docker_image: null
providers: providers:
inference: inference:
- remote::tgi - remote::tgi
@ -25,4 +24,9 @@ distribution_spec:
- inline::basic - inline::basic
- inline::llm-as-judge - inline::llm-as-judge
- inline::braintrust - inline::braintrust
tool_runtime:
- remote::brave-search
- remote::tavily-search
- inline::code-interpreter
- inline::memory-runtime
image_type: conda image_type: conda

View file

@ -1,6 +1,5 @@
version: '2' version: '2'
image_name: tgi image_name: tgi
docker_image: null
conda_env: tgi conda_env: tgi
apis: apis:
- agents - agents
@ -11,6 +10,7 @@ apis:
- safety - safety
- scoring - scoring
- telemetry - telemetry
- tool_runtime
providers: providers:
inference: inference:
- provider_id: tgi-inference - provider_id: tgi-inference
@ -70,27 +70,45 @@ providers:
provider_type: inline::braintrust provider_type: inline::braintrust
config: config:
openai_api_key: ${env.OPENAI_API_KEY:} openai_api_key: ${env.OPENAI_API_KEY:}
tool_runtime:
- provider_id: brave-search
provider_type: remote::brave-search
config:
api_key: ${env.BRAVE_SEARCH_API_KEY:}
max_results: 3
- provider_id: tavily-search
provider_type: remote::tavily-search
config:
api_key: ${env.TAVILY_SEARCH_API_KEY:}
max_results: 3
- provider_id: code-interpreter
provider_type: inline::code-interpreter
config: {}
- provider_id: memory-runtime
provider_type: inline::memory-runtime
config: {}
metadata_store: metadata_store:
namespace: null
type: sqlite type: sqlite
db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/tgi}/registry.db db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/tgi}/registry.db
models: models:
- metadata: {} - metadata: {}
model_id: ${env.INFERENCE_MODEL} model_id: ${env.INFERENCE_MODEL}
provider_id: tgi-inference provider_id: tgi-inference
provider_model_id: null
model_type: llm model_type: llm
- metadata: {} - metadata: {}
model_id: ${env.SAFETY_MODEL} model_id: ${env.SAFETY_MODEL}
provider_id: tgi-safety provider_id: tgi-safety
provider_model_id: null
model_type: llm model_type: llm
shields: shields:
- params: null - shield_id: ${env.SAFETY_MODEL}
shield_id: ${env.SAFETY_MODEL}
provider_id: null
provider_shield_id: null
memory_banks: [] memory_banks: []
datasets: [] datasets: []
scoring_fns: [] scoring_fns: []
eval_tasks: [] eval_tasks: []
tool_groups:
- toolgroup_id: builtin::websearch
provider_id: tavily-search
- toolgroup_id: builtin::memory
provider_id: memory-runtime
- toolgroup_id: builtin::code_interpreter
provider_id: code-interpreter

View file

@ -1,6 +1,5 @@
version: '2' version: '2'
image_name: tgi image_name: tgi
docker_image: null
conda_env: tgi conda_env: tgi
apis: apis:
- agents - agents
@ -11,6 +10,7 @@ apis:
- safety - safety
- scoring - scoring
- telemetry - telemetry
- tool_runtime
providers: providers:
inference: inference:
- provider_id: tgi-inference - provider_id: tgi-inference
@ -69,24 +69,39 @@ providers:
provider_type: inline::braintrust provider_type: inline::braintrust
config: config:
openai_api_key: ${env.OPENAI_API_KEY:} openai_api_key: ${env.OPENAI_API_KEY:}
tool_runtime:
- provider_id: brave-search
provider_type: remote::brave-search
config:
api_key: ${env.BRAVE_SEARCH_API_KEY:}
max_results: 3
- provider_id: tavily-search
provider_type: remote::tavily-search
config:
api_key: ${env.TAVILY_SEARCH_API_KEY:}
max_results: 3
- provider_id: code-interpreter
provider_type: inline::code-interpreter
config: {}
- provider_id: memory-runtime
provider_type: inline::memory-runtime
config: {}
metadata_store: metadata_store:
namespace: null
type: sqlite type: sqlite
db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/tgi}/registry.db db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/tgi}/registry.db
models: models:
- metadata: {} - metadata: {}
model_id: ${env.INFERENCE_MODEL} model_id: ${env.INFERENCE_MODEL}
provider_id: tgi-inference provider_id: tgi-inference
provider_model_id: null
model_type: llm model_type: llm
- metadata: - metadata:
embedding_dimension: 384 embedding_dimension: 384
model_id: all-MiniLM-L6-v2 model_id: all-MiniLM-L6-v2
provider_id: sentence-transformers provider_id: sentence-transformers
provider_model_id: null
model_type: embedding model_type: embedding
shields: [] shields: []
memory_banks: [] memory_banks: []
datasets: [] datasets: []
scoring_fns: [] scoring_fns: []
eval_tasks: [] eval_tasks: []
tool_groups: []

View file

@ -7,8 +7,12 @@
from pathlib import Path from pathlib import Path
from llama_stack.apis.models.models import ModelType from llama_stack.apis.models.models import ModelType
from llama_stack.distribution.datatypes import (
from llama_stack.distribution.datatypes import ModelInput, Provider, ShieldInput ModelInput,
Provider,
ShieldInput,
ToolGroupInput,
)
from llama_stack.providers.inline.inference.sentence_transformers import ( from llama_stack.providers.inline.inference.sentence_transformers import (
SentenceTransformersInferenceConfig, SentenceTransformersInferenceConfig,
) )
@ -27,6 +31,12 @@ def get_distribution_template() -> DistributionTemplate:
"eval": ["inline::meta-reference"], "eval": ["inline::meta-reference"],
"datasetio": ["remote::huggingface", "inline::localfs"], "datasetio": ["remote::huggingface", "inline::localfs"],
"scoring": ["inline::basic", "inline::llm-as-judge", "inline::braintrust"], "scoring": ["inline::basic", "inline::llm-as-judge", "inline::braintrust"],
"tool_runtime": [
"remote::brave-search",
"remote::tavily-search",
"inline::code-interpreter",
"inline::memory-runtime",
],
} }
name = "tgi" name = "tgi"
inference_provider = Provider( inference_provider = Provider(
@ -63,6 +73,20 @@ def get_distribution_template() -> DistributionTemplate:
model_id="${env.SAFETY_MODEL}", model_id="${env.SAFETY_MODEL}",
provider_id="tgi-safety", provider_id="tgi-safety",
) )
default_tool_groups = [
ToolGroupInput(
toolgroup_id="builtin::websearch",
provider_id="tavily-search",
),
ToolGroupInput(
toolgroup_id="builtin::memory",
provider_id="memory-runtime",
),
ToolGroupInput(
toolgroup_id="builtin::code_interpreter",
provider_id="code-interpreter",
),
]
return DistributionTemplate( return DistributionTemplate(
name=name, name=name,
@ -99,6 +123,7 @@ def get_distribution_template() -> DistributionTemplate:
safety_model, safety_model,
], ],
default_shields=[ShieldInput(shield_id="${env.SAFETY_MODEL}")], default_shields=[ShieldInput(shield_id="${env.SAFETY_MODEL}")],
default_tool_groups=default_tool_groups,
), ),
}, },
run_config_env_vars={ run_config_env_vars={

View file

@ -2,7 +2,6 @@ version: '2'
name: together name: together
distribution_spec: distribution_spec:
description: Use Together.AI for running LLM inference description: Use Together.AI for running LLM inference
docker_image: null
providers: providers:
inference: inference:
- remote::together - remote::together
@ -25,4 +24,9 @@ distribution_spec:
- inline::basic - inline::basic
- inline::llm-as-judge - inline::llm-as-judge
- inline::braintrust - inline::braintrust
tool_runtime:
- remote::brave-search
- remote::tavily-search
- inline::code-interpreter
- inline::memory-runtime
image_type: conda image_type: conda

View file

@ -1,6 +1,5 @@
version: '2' version: '2'
image_name: together image_name: together
docker_image: null
conda_env: together conda_env: together
apis: apis:
- agents - agents
@ -11,6 +10,7 @@ apis:
- safety - safety
- scoring - scoring
- telemetry - telemetry
- tool_runtime
providers: providers:
inference: inference:
- provider_id: together - provider_id: together
@ -70,8 +70,24 @@ providers:
provider_type: inline::braintrust provider_type: inline::braintrust
config: config:
openai_api_key: ${env.OPENAI_API_KEY:} openai_api_key: ${env.OPENAI_API_KEY:}
tool_runtime:
- provider_id: brave-search
provider_type: remote::brave-search
config:
api_key: ${env.BRAVE_SEARCH_API_KEY:}
max_results: 3
- provider_id: tavily-search
provider_type: remote::tavily-search
config:
api_key: ${env.TAVILY_SEARCH_API_KEY:}
max_results: 3
- provider_id: code-interpreter
provider_type: inline::code-interpreter
config: {}
- provider_id: memory-runtime
provider_type: inline::memory-runtime
config: {}
metadata_store: metadata_store:
namespace: null
type: sqlite type: sqlite
db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/together}/registry.db db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/together}/registry.db
models: models:
@ -124,14 +140,17 @@ models:
embedding_dimension: 384 embedding_dimension: 384
model_id: all-MiniLM-L6-v2 model_id: all-MiniLM-L6-v2
provider_id: sentence-transformers provider_id: sentence-transformers
provider_model_id: null
model_type: embedding model_type: embedding
shields: shields:
- params: null - shield_id: meta-llama/Llama-Guard-3-8B
shield_id: meta-llama/Llama-Guard-3-8B
provider_id: null
provider_shield_id: null
memory_banks: [] memory_banks: []
datasets: [] datasets: []
scoring_fns: [] scoring_fns: []
eval_tasks: [] eval_tasks: []
tool_groups:
- toolgroup_id: builtin::websearch
provider_id: tavily-search
- toolgroup_id: builtin::memory
provider_id: memory-runtime
- toolgroup_id: builtin::code_interpreter
provider_id: code-interpreter

View file

@ -9,8 +9,12 @@ from pathlib import Path
from llama_models.sku_list import all_registered_models from llama_models.sku_list import all_registered_models
from llama_stack.apis.models.models import ModelType from llama_stack.apis.models.models import ModelType
from llama_stack.distribution.datatypes import (
from llama_stack.distribution.datatypes import ModelInput, Provider, ShieldInput ModelInput,
Provider,
ShieldInput,
ToolGroupInput,
)
from llama_stack.providers.inline.inference.sentence_transformers import ( from llama_stack.providers.inline.inference.sentence_transformers import (
SentenceTransformersInferenceConfig, SentenceTransformersInferenceConfig,
) )
@ -30,6 +34,12 @@ def get_distribution_template() -> DistributionTemplate:
"eval": ["inline::meta-reference"], "eval": ["inline::meta-reference"],
"datasetio": ["remote::huggingface", "inline::localfs"], "datasetio": ["remote::huggingface", "inline::localfs"],
"scoring": ["inline::basic", "inline::llm-as-judge", "inline::braintrust"], "scoring": ["inline::basic", "inline::llm-as-judge", "inline::braintrust"],
"tool_runtime": [
"remote::brave-search",
"remote::tavily-search",
"inline::code-interpreter",
"inline::memory-runtime",
],
} }
name = "together" name = "together"
inference_provider = Provider( inference_provider = Provider(
@ -59,6 +69,20 @@ def get_distribution_template() -> DistributionTemplate:
) )
for m in MODEL_ALIASES for m in MODEL_ALIASES
] ]
default_tool_groups = [
ToolGroupInput(
toolgroup_id="builtin::websearch",
provider_id="tavily-search",
),
ToolGroupInput(
toolgroup_id="builtin::memory",
provider_id="memory-runtime",
),
ToolGroupInput(
toolgroup_id="builtin::code_interpreter",
provider_id="code-interpreter",
),
]
embedding_model = ModelInput( embedding_model = ModelInput(
model_id="all-MiniLM-L6-v2", model_id="all-MiniLM-L6-v2",
provider_id="sentence-transformers", provider_id="sentence-transformers",
@ -83,6 +107,7 @@ def get_distribution_template() -> DistributionTemplate:
"memory": [memory_provider], "memory": [memory_provider],
}, },
default_models=default_models + [embedding_model], default_models=default_models + [embedding_model],
default_tool_groups=default_tool_groups,
default_shields=[ShieldInput(shield_id="meta-llama/Llama-Guard-3-8B")], default_shields=[ShieldInput(shield_id="meta-llama/Llama-Guard-3-8B")],
), ),
}, },

View file

@ -2,7 +2,6 @@ version: '2'
name: vllm-gpu name: vllm-gpu
distribution_spec: distribution_spec:
description: Use a built-in vLLM engine for running LLM inference description: Use a built-in vLLM engine for running LLM inference
docker_image: null
providers: providers:
inference: inference:
- inline::vllm - inline::vllm
@ -25,4 +24,9 @@ distribution_spec:
- inline::basic - inline::basic
- inline::llm-as-judge - inline::llm-as-judge
- inline::braintrust - inline::braintrust
tool_runtime:
- remote::brave-search
- remote::tavily-search
- inline::code-interpreter
- inline::memory-runtime
image_type: conda image_type: conda

View file

@ -1,6 +1,5 @@
version: '2' version: '2'
image_name: vllm-gpu image_name: vllm-gpu
docker_image: null
conda_env: vllm-gpu conda_env: vllm-gpu
apis: apis:
- agents - agents
@ -11,6 +10,7 @@ apis:
- safety - safety
- scoring - scoring
- telemetry - telemetry
- tool_runtime
providers: providers:
inference: inference:
- provider_id: vllm - provider_id: vllm
@ -73,24 +73,45 @@ providers:
provider_type: inline::braintrust provider_type: inline::braintrust
config: config:
openai_api_key: ${env.OPENAI_API_KEY:} openai_api_key: ${env.OPENAI_API_KEY:}
tool_runtime:
- provider_id: brave-search
provider_type: remote::brave-search
config:
api_key: ${env.BRAVE_SEARCH_API_KEY:}
max_results: 3
- provider_id: tavily-search
provider_type: remote::tavily-search
config:
api_key: ${env.TAVILY_SEARCH_API_KEY:}
max_results: 3
- provider_id: code-interpreter
provider_type: inline::code-interpreter
config: {}
- provider_id: memory-runtime
provider_type: inline::memory-runtime
config: {}
metadata_store: metadata_store:
namespace: null
type: sqlite type: sqlite
db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/vllm-gpu}/registry.db db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/vllm-gpu}/registry.db
models: models:
- metadata: {} - metadata: {}
model_id: ${env.INFERENCE_MODEL} model_id: ${env.INFERENCE_MODEL}
provider_id: vllm provider_id: vllm
provider_model_id: null
model_type: llm model_type: llm
- metadata: - metadata:
embedding_dimension: 384 embedding_dimension: 384
model_id: all-MiniLM-L6-v2 model_id: all-MiniLM-L6-v2
provider_id: sentence-transformers provider_id: sentence-transformers
provider_model_id: null
model_type: embedding model_type: embedding
shields: [] shields: []
memory_banks: [] memory_banks: []
datasets: [] datasets: []
scoring_fns: [] scoring_fns: []
eval_tasks: [] eval_tasks: []
tool_groups:
- toolgroup_id: builtin::websearch
provider_id: tavily-search
- toolgroup_id: builtin::memory
provider_id: memory-runtime
- toolgroup_id: builtin::code_interpreter
provider_id: code-interpreter

View file

@ -11,7 +11,11 @@ from llama_stack.providers.inline.inference.sentence_transformers import (
) )
from llama_stack.providers.inline.inference.vllm import VLLMConfig from llama_stack.providers.inline.inference.vllm import VLLMConfig
from llama_stack.providers.inline.memory.faiss.config import FaissImplConfig from llama_stack.providers.inline.memory.faiss.config import FaissImplConfig
from llama_stack.templates.template import DistributionTemplate, RunConfigSettings from llama_stack.templates.template import (
DistributionTemplate,
RunConfigSettings,
ToolGroupInput,
)
def get_distribution_template() -> DistributionTemplate: def get_distribution_template() -> DistributionTemplate:
@ -24,7 +28,14 @@ def get_distribution_template() -> DistributionTemplate:
"eval": ["inline::meta-reference"], "eval": ["inline::meta-reference"],
"datasetio": ["remote::huggingface", "inline::localfs"], "datasetio": ["remote::huggingface", "inline::localfs"],
"scoring": ["inline::basic", "inline::llm-as-judge", "inline::braintrust"], "scoring": ["inline::basic", "inline::llm-as-judge", "inline::braintrust"],
"tool_runtime": [
"remote::brave-search",
"remote::tavily-search",
"inline::code-interpreter",
"inline::memory-runtime",
],
} }
name = "vllm-gpu" name = "vllm-gpu"
inference_provider = Provider( inference_provider = Provider(
provider_id="vllm", provider_id="vllm",
@ -54,6 +65,20 @@ def get_distribution_template() -> DistributionTemplate:
"embedding_dimension": 384, "embedding_dimension": 384,
}, },
) )
default_tool_groups = [
ToolGroupInput(
toolgroup_id="builtin::websearch",
provider_id="tavily-search",
),
ToolGroupInput(
toolgroup_id="builtin::memory",
provider_id="memory-runtime",
),
ToolGroupInput(
toolgroup_id="builtin::code_interpreter",
provider_id="code-interpreter",
),
]
return DistributionTemplate( return DistributionTemplate(
name=name, name=name,
@ -70,6 +95,7 @@ def get_distribution_template() -> DistributionTemplate:
"memory": [memory_provider], "memory": [memory_provider],
}, },
default_models=[inference_model, embedding_model], default_models=[inference_model, embedding_model],
default_tool_groups=default_tool_groups,
), ),
}, },
run_config_env_vars={ run_config_env_vars={