all templates to include toolgroups and tool runtime

This commit is contained in:
Dinesh Yeduguru 2025-01-08 15:28:03 -08:00
parent 67b35613bb
commit edcfd66be3
55 changed files with 854 additions and 145 deletions

View file

@ -23,6 +23,7 @@
"psycopg2-binary",
"pypdf",
"redis",
"requests",
"scikit-learn",
"scipy",
"sentencepiece",
@ -54,6 +55,7 @@
"psycopg2-binary",
"pypdf",
"redis",
"requests",
"scikit-learn",
"scipy",
"sentencepiece",
@ -86,6 +88,7 @@
"psycopg2-binary",
"pypdf",
"redis",
"requests",
"scikit-learn",
"scipy",
"sentencepiece",
@ -116,6 +119,7 @@
"psycopg2-binary",
"pypdf",
"redis",
"requests",
"scikit-learn",
"scipy",
"sentencepiece",
@ -148,6 +152,7 @@
"psycopg2-binary",
"pypdf",
"redis",
"requests",
"scikit-learn",
"scipy",
"sentencepiece",
@ -181,6 +186,7 @@
"psycopg2-binary",
"pypdf",
"redis",
"requests",
"scikit-learn",
"scipy",
"sentencepiece",
@ -213,6 +219,7 @@
"psycopg2-binary",
"pypdf",
"redis",
"requests",
"scikit-learn",
"scipy",
"sentencepiece",
@ -247,6 +254,7 @@
"psycopg2-binary",
"pypdf",
"redis",
"requests",
"scikit-learn",
"scipy",
"sentence-transformers",
@ -286,6 +294,7 @@
"psycopg2-binary",
"pypdf",
"redis",
"requests",
"scikit-learn",
"scipy",
"sentence-transformers",
@ -319,6 +328,7 @@
"psycopg2-binary",
"pypdf",
"redis",
"requests",
"scikit-learn",
"scipy",
"sentencepiece",
@ -352,6 +362,7 @@
"psycopg2-binary",
"pypdf",
"redis",
"requests",
"scikit-learn",
"scipy",
"sentencepiece",
@ -385,6 +396,7 @@
"psycopg2-binary",
"pypdf",
"redis",
"requests",
"scikit-learn",
"scipy",
"sentencepiece",

View file

@ -19,6 +19,7 @@ The `llamastack/distribution-bedrock` distribution consists of the following pro
| safety | `remote::bedrock` |
| scoring | `inline::basic`, `inline::llm-as-judge`, `inline::braintrust` |
| telemetry | `inline::meta-reference` |
| tool_runtime | `remote::brave-search`, `remote::tavily-search`, `inline::code-interpreter`, `inline::memory-runtime` |

View file

@ -9,6 +9,7 @@ The `llamastack/distribution-cerebras` distribution consists of the following pr
| memory | `inline::meta-reference` |
| safety | `inline::llama-guard` |
| telemetry | `inline::meta-reference` |
| tool_runtime | `remote::brave-search`, `remote::tavily-search`, `inline::code-interpreter`, `inline::memory-runtime` |
### Environment Variables

View file

@ -22,6 +22,7 @@ The `llamastack/distribution-fireworks` distribution consists of the following p
| safety | `inline::llama-guard` |
| scoring | `inline::basic`, `inline::llm-as-judge`, `inline::braintrust` |
| telemetry | `inline::meta-reference` |
| tool_runtime | `remote::brave-search`, `remote::tavily-search`, `inline::code-interpreter`, `inline::memory-runtime` |
### Environment Variables

View file

@ -22,6 +22,7 @@ The `llamastack/distribution-meta-reference-gpu` distribution consists of the fo
| safety | `inline::llama-guard` |
| scoring | `inline::basic`, `inline::llm-as-judge`, `inline::braintrust` |
| telemetry | `inline::meta-reference` |
| tool_runtime | `remote::brave-search`, `remote::tavily-search`, `inline::code-interpreter`, `inline::memory-runtime` |
Note that you need access to nvidia GPUs to run this distribution. This distribution is not compatible with CPU-only machines or machines with AMD GPUs.

View file

@ -22,6 +22,7 @@ The `llamastack/distribution-meta-reference-quantized-gpu` distribution consists
| safety | `inline::llama-guard` |
| scoring | `inline::basic`, `inline::llm-as-judge`, `inline::braintrust` |
| telemetry | `inline::meta-reference` |
| tool_runtime | `remote::brave-search`, `remote::tavily-search`, `inline::code-interpreter`, `inline::memory-runtime` |
The only difference vs. the `meta-reference-gpu` distribution is that it has support for more efficient inference -- with fp8, int4 quantization, etc.

View file

@ -22,6 +22,7 @@ The `llamastack/distribution-ollama` distribution consists of the following prov
| safety | `inline::llama-guard` |
| scoring | `inline::basic`, `inline::llm-as-judge`, `inline::braintrust` |
| telemetry | `inline::meta-reference` |
| tool_runtime | `remote::brave-search`, `remote::tavily-search`, `inline::code-interpreter`, `inline::memory-runtime` |
You should use this distribution if you have a regular desktop machine without very powerful GPUs. Of course, if you have powerful GPUs, you can still continue using this distribution since Ollama supports GPU acceleration.### Environment Variables

View file

@ -18,6 +18,7 @@ The `llamastack/distribution-remote-vllm` distribution consists of the following
| memory | `inline::faiss`, `remote::chromadb`, `remote::pgvector` |
| safety | `inline::llama-guard` |
| telemetry | `inline::meta-reference` |
| tool_runtime | `remote::brave-search`, `remote::tavily-search`, `inline::code-interpreter`, `inline::memory-runtime` |
You can use this distribution if you have GPUs and want to run an independent vLLM server container for running inference.

View file

@ -23,6 +23,7 @@ The `llamastack/distribution-tgi` distribution consists of the following provide
| safety | `inline::llama-guard` |
| scoring | `inline::basic`, `inline::llm-as-judge`, `inline::braintrust` |
| telemetry | `inline::meta-reference` |
| tool_runtime | `remote::brave-search`, `remote::tavily-search`, `inline::code-interpreter`, `inline::memory-runtime` |
You can use this distribution if you have GPUs and want to run an independent TGI server container for running inference.

View file

@ -22,6 +22,7 @@ The `llamastack/distribution-together` distribution consists of the following pr
| safety | `inline::llama-guard` |
| scoring | `inline::basic`, `inline::llm-as-judge`, `inline::braintrust` |
| telemetry | `inline::meta-reference` |
| tool_runtime | `remote::brave-search`, `remote::tavily-search`, `inline::code-interpreter`, `inline::memory-runtime` |
### Environment Variables

View file

@ -4,7 +4,7 @@
# This source code is licensed under the terms described in the LICENSE file in
# the root directory of this source tree.
from typing import Optional
from typing import Any, Dict, Optional
from pydantic import BaseModel, Field
@ -18,3 +18,10 @@ class BraveSearchToolConfig(BaseModel):
default=3,
description="The maximum number of results to return",
)
@classmethod
def sample_run_config(cls, __distro_dir__: str) -> Dict[str, Any]:
return {
"api_key": "${env.BRAVE_SEARCH_API_KEY:}",
"max_results": 3,
}

View file

@ -4,7 +4,7 @@
# This source code is licensed under the terms described in the LICENSE file in
# the root directory of this source tree.
from typing import Optional
from typing import Any, Dict, Optional
from pydantic import BaseModel, Field
@ -18,3 +18,10 @@ class TavilySearchToolConfig(BaseModel):
default=3,
description="The maximum number of results to return",
)
@classmethod
def sample_run_config(cls, __distro_dir__: str) -> Dict[str, Any]:
return {
"api_key": "${env.TAVILY_SEARCH_API_KEY:}",
"max_results": 3,
}

View file

@ -9,8 +9,7 @@ from pathlib import Path
from llama_models.sku_list import all_registered_models
from llama_stack.apis.models import ModelInput
from llama_stack.distribution.datatypes import Provider
from llama_stack.distribution.datatypes import Provider, ToolGroupInput
from llama_stack.providers.inline.memory.faiss.config import FaissImplConfig
from llama_stack.providers.remote.inference.bedrock.bedrock import MODEL_ALIASES
from llama_stack.templates.template import DistributionTemplate, RunConfigSettings
@ -26,6 +25,12 @@ def get_distribution_template() -> DistributionTemplate:
"eval": ["inline::meta-reference"],
"datasetio": ["remote::huggingface", "inline::localfs"],
"scoring": ["inline::basic", "inline::llm-as-judge", "inline::braintrust"],
"tool_runtime": [
"remote::brave-search",
"remote::tavily-search",
"inline::code-interpreter",
"inline::memory-runtime",
],
}
name = "bedrock"
memory_provider = Provider(
@ -46,6 +51,20 @@ def get_distribution_template() -> DistributionTemplate:
)
for m in MODEL_ALIASES
]
default_tool_groups = [
ToolGroupInput(
toolgroup_id="builtin::websearch",
provider_id="tavily-search",
),
ToolGroupInput(
toolgroup_id="builtin::memory",
provider_id="memory-runtime",
),
ToolGroupInput(
toolgroup_id="builtin::code_interpreter",
provider_id="code-interpreter",
),
]
return DistributionTemplate(
name=name,
@ -61,6 +80,7 @@ def get_distribution_template() -> DistributionTemplate:
"memory": [memory_provider],
},
default_models=default_models,
default_tool_groups=default_tool_groups,
),
},
run_config_env_vars={

View file

@ -2,7 +2,6 @@ version: '2'
name: bedrock
distribution_spec:
description: Use AWS Bedrock for running LLM inference and safety
docker_image: null
providers:
inference:
- remote::bedrock
@ -25,4 +24,9 @@ distribution_spec:
- inline::basic
- inline::llm-as-judge
- inline::braintrust
tool_runtime:
- remote::brave-search
- remote::tavily-search
- inline::code-interpreter
- inline::memory-runtime
image_type: conda

View file

@ -1,6 +1,5 @@
version: '2'
image_name: bedrock
docker_image: null
conda_env: bedrock
apis:
- agents
@ -11,6 +10,7 @@ apis:
- safety
- scoring
- telemetry
- tool_runtime
providers:
inference:
- provider_id: bedrock
@ -65,8 +65,24 @@ providers:
provider_type: inline::braintrust
config:
openai_api_key: ${env.OPENAI_API_KEY:}
tool_runtime:
- provider_id: brave-search
provider_type: remote::brave-search
config:
api_key: ${env.BRAVE_SEARCH_API_KEY:}
max_results: 3
- provider_id: tavily-search
provider_type: remote::tavily-search
config:
api_key: ${env.TAVILY_SEARCH_API_KEY:}
max_results: 3
- provider_id: code-interpreter
provider_type: inline::code-interpreter
config: {}
- provider_id: memory-runtime
provider_type: inline::memory-runtime
config: {}
metadata_store:
namespace: null
type: sqlite
db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/bedrock}/registry.db
models:
@ -90,3 +106,10 @@ memory_banks: []
datasets: []
scoring_fns: []
eval_tasks: []
tool_groups:
- toolgroup_id: builtin::websearch
provider_id: tavily-search
- toolgroup_id: builtin::memory
provider_id: memory-runtime
- toolgroup_id: builtin::code_interpreter
provider_id: code-interpreter

View file

@ -2,7 +2,6 @@ version: '2'
name: cerebras
distribution_spec:
description: Use Cerebras for running LLM inference
docker_image: null
providers:
inference:
- remote::cerebras
@ -14,4 +13,9 @@ distribution_spec:
- inline::meta-reference
telemetry:
- inline::meta-reference
tool_runtime:
- remote::brave-search
- remote::tavily-search
- inline::code-interpreter
- inline::memory-runtime
image_type: conda

View file

@ -9,8 +9,12 @@ from pathlib import Path
from llama_models.sku_list import all_registered_models
from llama_stack.apis.models.models import ModelType
from llama_stack.distribution.datatypes import ModelInput, Provider, ShieldInput
from llama_stack.distribution.datatypes import (
ModelInput,
Provider,
ShieldInput,
ToolGroupInput,
)
from llama_stack.providers.inline.inference.sentence_transformers import (
SentenceTransformersInferenceConfig,
)
@ -26,6 +30,12 @@ def get_distribution_template() -> DistributionTemplate:
"memory": ["inline::meta-reference"],
"agents": ["inline::meta-reference"],
"telemetry": ["inline::meta-reference"],
"tool_runtime": [
"remote::brave-search",
"remote::tavily-search",
"inline::code-interpreter",
"inline::memory-runtime",
],
}
inference_provider = Provider(
@ -58,6 +68,20 @@ def get_distribution_template() -> DistributionTemplate:
"embedding_dimension": 384,
},
)
default_tool_groups = [
ToolGroupInput(
toolgroup_id="builtin::websearch",
provider_id="tavily-search",
),
ToolGroupInput(
toolgroup_id="builtin::memory",
provider_id="memory-runtime",
),
ToolGroupInput(
toolgroup_id="builtin::code_interpreter",
provider_id="code-interpreter",
),
]
return DistributionTemplate(
name="cerebras",
@ -74,6 +98,7 @@ def get_distribution_template() -> DistributionTemplate:
},
default_models=default_models + [embedding_model],
default_shields=[ShieldInput(shield_id="meta-llama/Llama-Guard-3-8B")],
default_tool_groups=default_tool_groups,
),
},
run_config_env_vars={

View file

@ -1,6 +1,5 @@
version: '2'
image_name: cerebras
docker_image: null
conda_env: cerebras
apis:
- agents
@ -8,6 +7,7 @@ apis:
- memory
- safety
- telemetry
- tool_runtime
providers:
inference:
- provider_id: cerebras
@ -45,8 +45,24 @@ providers:
service_name: ${env.OTEL_SERVICE_NAME:llama-stack}
sinks: ${env.TELEMETRY_SINKS:console,sqlite}
sqlite_db_path: ${env.SQLITE_DB_PATH:~/.llama/distributions/cerebras/trace_store.db}
tool_runtime:
- provider_id: brave-search
provider_type: remote::brave-search
config:
api_key: ${env.BRAVE_SEARCH_API_KEY:}
max_results: 3
- provider_id: tavily-search
provider_type: remote::tavily-search
config:
api_key: ${env.TAVILY_SEARCH_API_KEY:}
max_results: 3
- provider_id: code-interpreter
provider_type: inline::code-interpreter
config: {}
- provider_id: memory-runtime
provider_type: inline::memory-runtime
config: {}
metadata_store:
namespace: null
type: sqlite
db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/cerebras}/registry.db
models:
@ -64,14 +80,17 @@ models:
embedding_dimension: 384
model_id: all-MiniLM-L6-v2
provider_id: sentence-transformers
provider_model_id: null
model_type: embedding
shields:
- params: null
shield_id: meta-llama/Llama-Guard-3-8B
provider_id: null
provider_shield_id: null
- shield_id: meta-llama/Llama-Guard-3-8B
memory_banks: []
datasets: []
scoring_fns: []
eval_tasks: []
tool_groups:
- toolgroup_id: builtin::websearch
provider_id: tavily-search
- toolgroup_id: builtin::memory
provider_id: memory-runtime
- toolgroup_id: builtin::code_interpreter
provider_id: code-interpreter

View file

@ -2,7 +2,6 @@ version: '2'
name: fireworks
distribution_spec:
description: Use Fireworks.AI for running LLM inference
docker_image: null
providers:
inference:
- remote::fireworks
@ -25,4 +24,9 @@ distribution_spec:
- inline::basic
- inline::llm-as-judge
- inline::braintrust
tool_runtime:
- remote::brave-search
- remote::tavily-search
- inline::code-interpreter
- inline::memory-runtime
image_type: conda

View file

@ -9,8 +9,12 @@ from pathlib import Path
from llama_models.sku_list import all_registered_models
from llama_stack.apis.models.models import ModelType
from llama_stack.distribution.datatypes import ModelInput, Provider, ShieldInput
from llama_stack.distribution.datatypes import (
ModelInput,
Provider,
ShieldInput,
ToolGroupInput,
)
from llama_stack.providers.inline.inference.sentence_transformers import (
SentenceTransformersInferenceConfig,
)
@ -30,6 +34,12 @@ def get_distribution_template() -> DistributionTemplate:
"eval": ["inline::meta-reference"],
"datasetio": ["remote::huggingface", "inline::localfs"],
"scoring": ["inline::basic", "inline::llm-as-judge", "inline::braintrust"],
"tool_runtime": [
"remote::brave-search",
"remote::tavily-search",
"inline::code-interpreter",
"inline::memory-runtime",
],
}
name = "fireworks"
@ -69,6 +79,20 @@ def get_distribution_template() -> DistributionTemplate:
"embedding_dimension": 384,
},
)
default_tool_groups = [
ToolGroupInput(
toolgroup_id="builtin::websearch",
provider_id="tavily-search",
),
ToolGroupInput(
toolgroup_id="builtin::memory",
provider_id="memory-runtime",
),
ToolGroupInput(
toolgroup_id="builtin::code_interpreter",
provider_id="code-interpreter",
),
]
return DistributionTemplate(
name=name,
@ -86,6 +110,7 @@ def get_distribution_template() -> DistributionTemplate:
},
default_models=default_models + [embedding_model],
default_shields=[ShieldInput(shield_id="meta-llama/Llama-Guard-3-8B")],
default_tool_groups=default_tool_groups,
),
},
run_config_env_vars={

View file

@ -1,6 +1,5 @@
version: '2'
image_name: fireworks
docker_image: null
conda_env: fireworks
apis:
- agents
@ -11,6 +10,7 @@ apis:
- safety
- scoring
- telemetry
- tool_runtime
providers:
inference:
- provider_id: fireworks
@ -70,8 +70,24 @@ providers:
provider_type: inline::braintrust
config:
openai_api_key: ${env.OPENAI_API_KEY:}
tool_runtime:
- provider_id: brave-search
provider_type: remote::brave-search
config:
api_key: ${env.BRAVE_SEARCH_API_KEY:}
max_results: 3
- provider_id: tavily-search
provider_type: remote::tavily-search
config:
api_key: ${env.TAVILY_SEARCH_API_KEY:}
max_results: 3
- provider_id: code-interpreter
provider_type: inline::code-interpreter
config: {}
- provider_id: memory-runtime
provider_type: inline::memory-runtime
config: {}
metadata_store:
namespace: null
type: sqlite
db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/fireworks}/registry.db
models:
@ -129,14 +145,17 @@ models:
embedding_dimension: 384
model_id: all-MiniLM-L6-v2
provider_id: sentence-transformers
provider_model_id: null
model_type: embedding
shields:
- params: null
shield_id: meta-llama/Llama-Guard-3-8B
provider_id: null
provider_shield_id: null
- shield_id: meta-llama/Llama-Guard-3-8B
memory_banks: []
datasets: []
scoring_fns: []
eval_tasks: []
tool_groups:
- toolgroup_id: builtin::websearch
provider_id: tavily-search
- toolgroup_id: builtin::memory
provider_id: memory-runtime
- toolgroup_id: builtin::code_interpreter
provider_id: code-interpreter

View file

@ -2,7 +2,6 @@ version: '2'
name: hf-endpoint
distribution_spec:
description: Use (an external) Hugging Face Inference Endpoint for running LLM inference
docker_image: null
providers:
inference:
- remote::hf::endpoint
@ -25,4 +24,9 @@ distribution_spec:
- inline::basic
- inline::llm-as-judge
- inline::braintrust
tool_runtime:
- remote::brave-search
- remote::tavily-search
- inline::code-interpreter
- inline::memory-runtime
image_type: conda

View file

@ -5,7 +5,12 @@
# the root directory of this source tree.
from llama_stack.apis.models.models import ModelType
from llama_stack.distribution.datatypes import ModelInput, Provider, ShieldInput
from llama_stack.distribution.datatypes import (
ModelInput,
Provider,
ShieldInput,
ToolGroupInput,
)
from llama_stack.providers.inline.inference.sentence_transformers import (
SentenceTransformersInferenceConfig,
)
@ -24,6 +29,12 @@ def get_distribution_template() -> DistributionTemplate:
"eval": ["inline::meta-reference"],
"datasetio": ["remote::huggingface", "inline::localfs"],
"scoring": ["inline::basic", "inline::llm-as-judge", "inline::braintrust"],
"tool_runtime": [
"remote::brave-search",
"remote::tavily-search",
"inline::code-interpreter",
"inline::memory-runtime",
],
}
name = "hf-endpoint"
inference_provider = Provider(
@ -58,6 +69,20 @@ def get_distribution_template() -> DistributionTemplate:
"embedding_dimension": 384,
},
)
default_tool_groups = [
ToolGroupInput(
toolgroup_id="builtin::websearch",
provider_id="tavily-search",
),
ToolGroupInput(
toolgroup_id="builtin::memory",
provider_id="memory-runtime",
),
ToolGroupInput(
toolgroup_id="builtin::code_interpreter",
provider_id="code-interpreter",
),
]
return DistributionTemplate(
name=name,
@ -74,6 +99,7 @@ def get_distribution_template() -> DistributionTemplate:
"memory": [memory_provider],
},
default_models=[inference_model, embedding_model],
default_tool_groups=default_tool_groups,
),
"run-with-safety.yaml": RunConfigSettings(
provider_overrides={
@ -96,6 +122,7 @@ def get_distribution_template() -> DistributionTemplate:
embedding_model,
],
default_shields=[ShieldInput(shield_id="${env.SAFETY_MODEL}")],
default_tool_groups=default_tool_groups,
),
},
run_config_env_vars={

View file

@ -1,6 +1,5 @@
version: '2'
image_name: hf-endpoint
docker_image: null
conda_env: hf-endpoint
apis:
- agents
@ -11,6 +10,7 @@ apis:
- safety
- scoring
- telemetry
- tool_runtime
providers:
inference:
- provider_id: hf-endpoint
@ -75,33 +75,50 @@ providers:
provider_type: inline::braintrust
config:
openai_api_key: ${env.OPENAI_API_KEY:}
tool_runtime:
- provider_id: brave-search
provider_type: remote::brave-search
config:
api_key: ${env.BRAVE_SEARCH_API_KEY:}
max_results: 3
- provider_id: tavily-search
provider_type: remote::tavily-search
config:
api_key: ${env.TAVILY_SEARCH_API_KEY:}
max_results: 3
- provider_id: code-interpreter
provider_type: inline::code-interpreter
config: {}
- provider_id: memory-runtime
provider_type: inline::memory-runtime
config: {}
metadata_store:
namespace: null
type: sqlite
db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/hf-endpoint}/registry.db
models:
- metadata: {}
model_id: ${env.INFERENCE_MODEL}
provider_id: hf-endpoint
provider_model_id: null
model_type: llm
- metadata: {}
model_id: ${env.SAFETY_MODEL}
provider_id: hf-endpoint-safety
provider_model_id: null
model_type: llm
- metadata:
embedding_dimension: 384
model_id: all-MiniLM-L6-v2
provider_id: sentence-transformers
provider_model_id: null
model_type: embedding
shields:
- params: null
shield_id: ${env.SAFETY_MODEL}
provider_id: null
provider_shield_id: null
- shield_id: ${env.SAFETY_MODEL}
memory_banks: []
datasets: []
scoring_fns: []
eval_tasks: []
tool_groups:
- toolgroup_id: builtin::websearch
provider_id: tavily-search
- toolgroup_id: builtin::memory
provider_id: memory-runtime
- toolgroup_id: builtin::code_interpreter
provider_id: code-interpreter

View file

@ -1,6 +1,5 @@
version: '2'
image_name: hf-endpoint
docker_image: null
conda_env: hf-endpoint
apis:
- agents
@ -11,6 +10,7 @@ apis:
- safety
- scoring
- telemetry
- tool_runtime
providers:
inference:
- provider_id: hf-endpoint
@ -70,24 +70,45 @@ providers:
provider_type: inline::braintrust
config:
openai_api_key: ${env.OPENAI_API_KEY:}
tool_runtime:
- provider_id: brave-search
provider_type: remote::brave-search
config:
api_key: ${env.BRAVE_SEARCH_API_KEY:}
max_results: 3
- provider_id: tavily-search
provider_type: remote::tavily-search
config:
api_key: ${env.TAVILY_SEARCH_API_KEY:}
max_results: 3
- provider_id: code-interpreter
provider_type: inline::code-interpreter
config: {}
- provider_id: memory-runtime
provider_type: inline::memory-runtime
config: {}
metadata_store:
namespace: null
type: sqlite
db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/hf-endpoint}/registry.db
models:
- metadata: {}
model_id: ${env.INFERENCE_MODEL}
provider_id: hf-endpoint
provider_model_id: null
model_type: llm
- metadata:
embedding_dimension: 384
model_id: all-MiniLM-L6-v2
provider_id: sentence-transformers
provider_model_id: null
model_type: embedding
shields: []
memory_banks: []
datasets: []
scoring_fns: []
eval_tasks: []
tool_groups:
- toolgroup_id: builtin::websearch
provider_id: tavily-search
- toolgroup_id: builtin::memory
provider_id: memory-runtime
- toolgroup_id: builtin::code_interpreter
provider_id: code-interpreter

View file

@ -2,7 +2,6 @@ version: '2'
name: hf-serverless
distribution_spec:
description: Use (an external) Hugging Face Inference Endpoint for running LLM inference
docker_image: null
providers:
inference:
- remote::hf::serverless
@ -25,4 +24,9 @@ distribution_spec:
- inline::basic
- inline::llm-as-judge
- inline::braintrust
tool_runtime:
- remote::brave-search
- remote::tavily-search
- inline::code-interpreter
- inline::memory-runtime
image_type: conda

View file

@ -5,7 +5,12 @@
# the root directory of this source tree.
from llama_stack.apis.models.models import ModelType
from llama_stack.distribution.datatypes import ModelInput, Provider, ShieldInput
from llama_stack.distribution.datatypes import (
ModelInput,
Provider,
ShieldInput,
ToolGroupInput,
)
from llama_stack.providers.inline.inference.sentence_transformers import (
SentenceTransformersInferenceConfig,
)
@ -24,6 +29,12 @@ def get_distribution_template() -> DistributionTemplate:
"eval": ["inline::meta-reference"],
"datasetio": ["remote::huggingface", "inline::localfs"],
"scoring": ["inline::basic", "inline::llm-as-judge", "inline::braintrust"],
"tool_runtime": [
"remote::brave-search",
"remote::tavily-search",
"inline::code-interpreter",
"inline::memory-runtime",
],
}
name = "hf-serverless"
@ -59,6 +70,20 @@ def get_distribution_template() -> DistributionTemplate:
"embedding_dimension": 384,
},
)
default_tool_groups = [
ToolGroupInput(
toolgroup_id="builtin::websearch",
provider_id="tavily-search",
),
ToolGroupInput(
toolgroup_id="builtin::memory",
provider_id="memory-runtime",
),
ToolGroupInput(
toolgroup_id="builtin::code_interpreter",
provider_id="code-interpreter",
),
]
return DistributionTemplate(
name=name,
@ -97,6 +122,7 @@ def get_distribution_template() -> DistributionTemplate:
embedding_model,
],
default_shields=[ShieldInput(shield_id="${env.SAFETY_MODEL}")],
default_tool_groups=default_tool_groups,
),
},
run_config_env_vars={

View file

@ -1,6 +1,5 @@
version: '2'
image_name: hf-serverless
docker_image: null
conda_env: hf-serverless
apis:
- agents
@ -11,6 +10,7 @@ apis:
- safety
- scoring
- telemetry
- tool_runtime
providers:
inference:
- provider_id: hf-serverless
@ -75,33 +75,50 @@ providers:
provider_type: inline::braintrust
config:
openai_api_key: ${env.OPENAI_API_KEY:}
tool_runtime:
- provider_id: brave-search
provider_type: remote::brave-search
config:
api_key: ${env.BRAVE_SEARCH_API_KEY:}
max_results: 3
- provider_id: tavily-search
provider_type: remote::tavily-search
config:
api_key: ${env.TAVILY_SEARCH_API_KEY:}
max_results: 3
- provider_id: code-interpreter
provider_type: inline::code-interpreter
config: {}
- provider_id: memory-runtime
provider_type: inline::memory-runtime
config: {}
metadata_store:
namespace: null
type: sqlite
db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/hf-serverless}/registry.db
models:
- metadata: {}
model_id: ${env.INFERENCE_MODEL}
provider_id: hf-serverless
provider_model_id: null
model_type: llm
- metadata: {}
model_id: ${env.SAFETY_MODEL}
provider_id: hf-serverless-safety
provider_model_id: null
model_type: llm
- metadata:
embedding_dimension: 384
model_id: all-MiniLM-L6-v2
provider_id: sentence-transformers
provider_model_id: null
model_type: embedding
shields:
- params: null
shield_id: ${env.SAFETY_MODEL}
provider_id: null
provider_shield_id: null
- shield_id: ${env.SAFETY_MODEL}
memory_banks: []
datasets: []
scoring_fns: []
eval_tasks: []
tool_groups:
- toolgroup_id: builtin::websearch
provider_id: tavily-search
- toolgroup_id: builtin::memory
provider_id: memory-runtime
- toolgroup_id: builtin::code_interpreter
provider_id: code-interpreter

View file

@ -1,6 +1,5 @@
version: '2'
image_name: hf-serverless
docker_image: null
conda_env: hf-serverless
apis:
- agents
@ -11,6 +10,7 @@ apis:
- safety
- scoring
- telemetry
- tool_runtime
providers:
inference:
- provider_id: hf-serverless
@ -70,24 +70,39 @@ providers:
provider_type: inline::braintrust
config:
openai_api_key: ${env.OPENAI_API_KEY:}
tool_runtime:
- provider_id: brave-search
provider_type: remote::brave-search
config:
api_key: ${env.BRAVE_SEARCH_API_KEY:}
max_results: 3
- provider_id: tavily-search
provider_type: remote::tavily-search
config:
api_key: ${env.TAVILY_SEARCH_API_KEY:}
max_results: 3
- provider_id: code-interpreter
provider_type: inline::code-interpreter
config: {}
- provider_id: memory-runtime
provider_type: inline::memory-runtime
config: {}
metadata_store:
namespace: null
type: sqlite
db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/hf-serverless}/registry.db
models:
- metadata: {}
model_id: ${env.INFERENCE_MODEL}
provider_id: hf-serverless
provider_model_id: null
model_type: llm
- metadata:
embedding_dimension: 384
model_id: all-MiniLM-L6-v2
provider_id: sentence-transformers
provider_model_id: null
model_type: embedding
shields: []
memory_banks: []
datasets: []
scoring_fns: []
eval_tasks: []
tool_groups: []

View file

@ -2,7 +2,6 @@ version: '2'
name: meta-reference-gpu
distribution_spec:
description: Use Meta Reference for running LLM inference
docker_image: null
providers:
inference:
- inline::meta-reference
@ -25,4 +24,9 @@ distribution_spec:
- inline::basic
- inline::llm-as-judge
- inline::braintrust
tool_runtime:
- remote::brave-search
- remote::tavily-search
- inline::code-interpreter
- inline::memory-runtime
image_type: conda

View file

@ -7,8 +7,12 @@
from pathlib import Path
from llama_stack.apis.models.models import ModelType
from llama_stack.distribution.datatypes import ModelInput, Provider, ShieldInput
from llama_stack.distribution.datatypes import (
ModelInput,
Provider,
ShieldInput,
ToolGroupInput,
)
from llama_stack.providers.inline.inference.meta_reference import (
MetaReferenceInferenceConfig,
)
@ -29,6 +33,12 @@ def get_distribution_template() -> DistributionTemplate:
"eval": ["inline::meta-reference"],
"datasetio": ["remote::huggingface", "inline::localfs"],
"scoring": ["inline::basic", "inline::llm-as-judge", "inline::braintrust"],
"tool_runtime": [
"remote::brave-search",
"remote::tavily-search",
"inline::code-interpreter",
"inline::memory-runtime",
],
}
name = "meta-reference-gpu"
inference_provider = Provider(
@ -66,6 +76,20 @@ def get_distribution_template() -> DistributionTemplate:
model_id="${env.SAFETY_MODEL}",
provider_id="meta-reference-safety",
)
default_tool_groups = [
ToolGroupInput(
toolgroup_id="builtin::websearch",
provider_id="tavily-search",
),
ToolGroupInput(
toolgroup_id="builtin::memory",
provider_id="memory-runtime",
),
ToolGroupInput(
toolgroup_id="builtin::code_interpreter",
provider_id="code-interpreter",
),
]
return DistributionTemplate(
name=name,
@ -104,6 +128,7 @@ def get_distribution_template() -> DistributionTemplate:
embedding_model,
],
default_shields=[ShieldInput(shield_id="${env.SAFETY_MODEL}")],
default_tool_groups=default_tool_groups,
),
},
run_config_env_vars={

View file

@ -1,6 +1,5 @@
version: '2'
image_name: meta-reference-gpu
docker_image: null
conda_env: meta-reference-gpu
apis:
- agents
@ -11,6 +10,7 @@ apis:
- safety
- scoring
- telemetry
- tool_runtime
providers:
inference:
- provider_id: meta-reference-inference
@ -77,33 +77,50 @@ providers:
provider_type: inline::braintrust
config:
openai_api_key: ${env.OPENAI_API_KEY:}
tool_runtime:
- provider_id: brave-search
provider_type: remote::brave-search
config:
api_key: ${env.BRAVE_SEARCH_API_KEY:}
max_results: 3
- provider_id: tavily-search
provider_type: remote::tavily-search
config:
api_key: ${env.TAVILY_SEARCH_API_KEY:}
max_results: 3
- provider_id: code-interpreter
provider_type: inline::code-interpreter
config: {}
- provider_id: memory-runtime
provider_type: inline::memory-runtime
config: {}
metadata_store:
namespace: null
type: sqlite
db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/meta-reference-gpu}/registry.db
models:
- metadata: {}
model_id: ${env.INFERENCE_MODEL}
provider_id: meta-reference-inference
provider_model_id: null
model_type: llm
- metadata: {}
model_id: ${env.SAFETY_MODEL}
provider_id: meta-reference-safety
provider_model_id: null
model_type: llm
- metadata:
embedding_dimension: 384
model_id: all-MiniLM-L6-v2
provider_id: sentence-transformers
provider_model_id: null
model_type: embedding
shields:
- params: null
shield_id: ${env.SAFETY_MODEL}
provider_id: null
provider_shield_id: null
- shield_id: ${env.SAFETY_MODEL}
memory_banks: []
datasets: []
scoring_fns: []
eval_tasks: []
tool_groups:
- toolgroup_id: builtin::websearch
provider_id: tavily-search
- toolgroup_id: builtin::memory
provider_id: memory-runtime
- toolgroup_id: builtin::code_interpreter
provider_id: code-interpreter

View file

@ -1,6 +1,5 @@
version: '2'
image_name: meta-reference-gpu
docker_image: null
conda_env: meta-reference-gpu
apis:
- agents
@ -11,6 +10,7 @@ apis:
- safety
- scoring
- telemetry
- tool_runtime
providers:
inference:
- provider_id: meta-reference-inference
@ -71,24 +71,39 @@ providers:
provider_type: inline::braintrust
config:
openai_api_key: ${env.OPENAI_API_KEY:}
tool_runtime:
- provider_id: brave-search
provider_type: remote::brave-search
config:
api_key: ${env.BRAVE_SEARCH_API_KEY:}
max_results: 3
- provider_id: tavily-search
provider_type: remote::tavily-search
config:
api_key: ${env.TAVILY_SEARCH_API_KEY:}
max_results: 3
- provider_id: code-interpreter
provider_type: inline::code-interpreter
config: {}
- provider_id: memory-runtime
provider_type: inline::memory-runtime
config: {}
metadata_store:
namespace: null
type: sqlite
db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/meta-reference-gpu}/registry.db
models:
- metadata: {}
model_id: ${env.INFERENCE_MODEL}
provider_id: meta-reference-inference
provider_model_id: null
model_type: llm
- metadata:
embedding_dimension: 384
model_id: all-MiniLM-L6-v2
provider_id: sentence-transformers
provider_model_id: null
model_type: embedding
shields: []
memory_banks: []
datasets: []
scoring_fns: []
eval_tasks: []
tool_groups: []

View file

@ -2,7 +2,6 @@ version: '2'
name: meta-reference-quantized-gpu
distribution_spec:
description: Use Meta Reference with fp8, int4 quantization for running LLM inference
docker_image: null
providers:
inference:
- inline::meta-reference-quantized
@ -25,4 +24,9 @@ distribution_spec:
- inline::basic
- inline::llm-as-judge
- inline::braintrust
tool_runtime:
- remote::brave-search
- remote::tavily-search
- inline::code-interpreter
- inline::memory-runtime
image_type: conda

View file

@ -7,8 +7,7 @@
from pathlib import Path
from llama_stack.apis.models.models import ModelType
from llama_stack.distribution.datatypes import ModelInput, Provider
from llama_stack.distribution.datatypes import ModelInput, Provider, ToolGroupInput
from llama_stack.providers.inline.inference.meta_reference import (
MetaReferenceQuantizedInferenceConfig,
)
@ -29,7 +28,27 @@ def get_distribution_template() -> DistributionTemplate:
"eval": ["inline::meta-reference"],
"datasetio": ["remote::huggingface", "inline::localfs"],
"scoring": ["inline::basic", "inline::llm-as-judge", "inline::braintrust"],
"tool_runtime": [
"remote::brave-search",
"remote::tavily-search",
"inline::code-interpreter",
"inline::memory-runtime",
],
}
default_tool_groups = [
ToolGroupInput(
toolgroup_id="builtin::websearch",
provider_id="tavily-search",
),
ToolGroupInput(
toolgroup_id="builtin::memory",
provider_id="memory-runtime",
),
ToolGroupInput(
toolgroup_id="builtin::code_interpreter",
provider_id="code-interpreter",
),
]
name = "meta-reference-quantized-gpu"
inference_provider = Provider(
provider_id="meta-reference-inference",
@ -76,6 +95,7 @@ def get_distribution_template() -> DistributionTemplate:
"memory": [memory_provider],
},
default_models=[inference_model, embedding_model],
default_tool_groups=default_tool_groups,
),
},
run_config_env_vars={

View file

@ -1,6 +1,5 @@
version: '2'
image_name: meta-reference-quantized-gpu
docker_image: null
conda_env: meta-reference-quantized-gpu
apis:
- agents
@ -11,6 +10,7 @@ apis:
- safety
- scoring
- telemetry
- tool_runtime
providers:
inference:
- provider_id: meta-reference-inference
@ -73,24 +73,45 @@ providers:
provider_type: inline::braintrust
config:
openai_api_key: ${env.OPENAI_API_KEY:}
tool_runtime:
- provider_id: brave-search
provider_type: remote::brave-search
config:
api_key: ${env.BRAVE_SEARCH_API_KEY:}
max_results: 3
- provider_id: tavily-search
provider_type: remote::tavily-search
config:
api_key: ${env.TAVILY_SEARCH_API_KEY:}
max_results: 3
- provider_id: code-interpreter
provider_type: inline::code-interpreter
config: {}
- provider_id: memory-runtime
provider_type: inline::memory-runtime
config: {}
metadata_store:
namespace: null
type: sqlite
db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/meta-reference-quantized-gpu}/registry.db
models:
- metadata: {}
model_id: ${env.INFERENCE_MODEL}
provider_id: meta-reference-inference
provider_model_id: null
model_type: llm
- metadata:
embedding_dimension: 384
model_id: all-MiniLM-L6-v2
provider_id: sentence-transformers
provider_model_id: null
model_type: embedding
shields: []
memory_banks: []
datasets: []
scoring_fns: []
eval_tasks: []
tool_groups:
- toolgroup_id: builtin::websearch
provider_id: tavily-search
- toolgroup_id: builtin::memory
provider_id: memory-runtime
- toolgroup_id: builtin::code_interpreter
provider_id: code-interpreter

View file

@ -2,7 +2,6 @@ version: '2'
name: ollama
distribution_spec:
description: Use (an external) Ollama server for running LLM inference
docker_image: null
providers:
inference:
- remote::ollama
@ -25,4 +24,9 @@ distribution_spec:
- inline::basic
- inline::llm-as-judge
- inline::braintrust
tool_runtime:
- remote::brave-search
- remote::tavily-search
- inline::code-interpreter
- inline::memory-runtime
image_type: conda

View file

@ -7,8 +7,12 @@
from pathlib import Path
from llama_stack.apis.models.models import ModelType
from llama_stack.distribution.datatypes import ModelInput, Provider, ShieldInput
from llama_stack.distribution.datatypes import (
ModelInput,
Provider,
ShieldInput,
ToolGroupInput,
)
from llama_stack.providers.inline.inference.sentence_transformers import (
SentenceTransformersInferenceConfig,
)
@ -27,6 +31,12 @@ def get_distribution_template() -> DistributionTemplate:
"eval": ["inline::meta-reference"],
"datasetio": ["remote::huggingface", "inline::localfs"],
"scoring": ["inline::basic", "inline::llm-as-judge", "inline::braintrust"],
"tool_runtime": [
"remote::brave-search",
"remote::tavily-search",
"inline::code-interpreter",
"inline::memory-runtime",
],
}
name = "ollama"
inference_provider = Provider(
@ -61,6 +71,20 @@ def get_distribution_template() -> DistributionTemplate:
"embedding_dimension": 384,
},
)
default_tool_groups = [
ToolGroupInput(
toolgroup_id="builtin::websearch",
provider_id="tavily-search",
),
ToolGroupInput(
toolgroup_id="builtin::memory",
provider_id="memory-runtime",
),
ToolGroupInput(
toolgroup_id="builtin::code_interpreter",
provider_id="code-interpreter",
),
]
return DistributionTemplate(
name=name,
@ -92,6 +116,7 @@ def get_distribution_template() -> DistributionTemplate:
embedding_model,
],
default_shields=[ShieldInput(shield_id="${env.SAFETY_MODEL}")],
default_tool_groups=default_tool_groups,
),
},
run_config_env_vars={

View file

@ -1,6 +1,5 @@
version: '2'
image_name: ollama
docker_image: null
conda_env: ollama
apis:
- agents
@ -11,6 +10,7 @@ apis:
- safety
- scoring
- telemetry
- tool_runtime
providers:
inference:
- provider_id: ollama
@ -69,33 +69,50 @@ providers:
provider_type: inline::braintrust
config:
openai_api_key: ${env.OPENAI_API_KEY:}
tool_runtime:
- provider_id: brave-search
provider_type: remote::brave-search
config:
api_key: ${env.BRAVE_SEARCH_API_KEY:}
max_results: 3
- provider_id: tavily-search
provider_type: remote::tavily-search
config:
api_key: ${env.TAVILY_SEARCH_API_KEY:}
max_results: 3
- provider_id: code-interpreter
provider_type: inline::code-interpreter
config: {}
- provider_id: memory-runtime
provider_type: inline::memory-runtime
config: {}
metadata_store:
namespace: null
type: sqlite
db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/ollama}/registry.db
models:
- metadata: {}
model_id: ${env.INFERENCE_MODEL}
provider_id: ollama
provider_model_id: null
model_type: llm
- metadata: {}
model_id: ${env.SAFETY_MODEL}
provider_id: ollama
provider_model_id: null
model_type: llm
- metadata:
embedding_dimension: 384
model_id: all-MiniLM-L6-v2
provider_id: sentence-transformers
provider_model_id: null
model_type: embedding
shields:
- params: null
shield_id: ${env.SAFETY_MODEL}
provider_id: null
provider_shield_id: null
- shield_id: ${env.SAFETY_MODEL}
memory_banks: []
datasets: []
scoring_fns: []
eval_tasks: []
tool_groups:
- toolgroup_id: builtin::websearch
provider_id: tavily-search
- toolgroup_id: builtin::memory
provider_id: memory-runtime
- toolgroup_id: builtin::code_interpreter
provider_id: code-interpreter

View file

@ -1,6 +1,5 @@
version: '2'
image_name: ollama
docker_image: null
conda_env: ollama
apis:
- agents
@ -11,6 +10,7 @@ apis:
- safety
- scoring
- telemetry
- tool_runtime
providers:
inference:
- provider_id: ollama
@ -69,24 +69,39 @@ providers:
provider_type: inline::braintrust
config:
openai_api_key: ${env.OPENAI_API_KEY:}
tool_runtime:
- provider_id: brave-search
provider_type: remote::brave-search
config:
api_key: ${env.BRAVE_SEARCH_API_KEY:}
max_results: 3
- provider_id: tavily-search
provider_type: remote::tavily-search
config:
api_key: ${env.TAVILY_SEARCH_API_KEY:}
max_results: 3
- provider_id: code-interpreter
provider_type: inline::code-interpreter
config: {}
- provider_id: memory-runtime
provider_type: inline::memory-runtime
config: {}
metadata_store:
namespace: null
type: sqlite
db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/ollama}/registry.db
models:
- metadata: {}
model_id: ${env.INFERENCE_MODEL}
provider_id: ollama
provider_model_id: null
model_type: llm
- metadata:
embedding_dimension: 384
model_id: all-MiniLM-L6-v2
provider_id: sentence-transformers
provider_model_id: null
model_type: embedding
shields: []
memory_banks: []
datasets: []
scoring_fns: []
eval_tasks: []
tool_groups: []

View file

@ -2,7 +2,6 @@ version: '2'
name: remote-vllm
distribution_spec:
description: Use (an external) vLLM server for running LLM inference
docker_image: null
providers:
inference:
- remote::vllm
@ -16,4 +15,9 @@ distribution_spec:
- inline::meta-reference
telemetry:
- inline::meta-reference
tool_runtime:
- remote::brave-search
- remote::tavily-search
- inline::code-interpreter
- inline::memory-runtime
image_type: conda

View file

@ -1,6 +1,5 @@
version: '2'
image_name: remote-vllm
docker_image: null
conda_env: remote-vllm
apis:
- agents
@ -8,6 +7,7 @@ apis:
- memory
- safety
- telemetry
- tool_runtime
providers:
inference:
- provider_id: vllm-inference
@ -52,33 +52,50 @@ providers:
service_name: ${env.OTEL_SERVICE_NAME:llama-stack}
sinks: ${env.TELEMETRY_SINKS:console,sqlite}
sqlite_db_path: ${env.SQLITE_DB_PATH:~/.llama/distributions/remote-vllm/trace_store.db}
tool_runtime:
- provider_id: brave-search
provider_type: remote::brave-search
config:
api_key: ${env.BRAVE_SEARCH_API_KEY:}
max_results: 3
- provider_id: tavily-search
provider_type: remote::tavily-search
config:
api_key: ${env.TAVILY_SEARCH_API_KEY:}
max_results: 3
- provider_id: code-interpreter
provider_type: inline::code-interpreter
config: {}
- provider_id: memory-runtime
provider_type: inline::memory-runtime
config: {}
metadata_store:
namespace: null
type: sqlite
db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/remote-vllm}/registry.db
models:
- metadata: {}
model_id: ${env.INFERENCE_MODEL}
provider_id: vllm-inference
provider_model_id: null
model_type: llm
- metadata: {}
model_id: ${env.SAFETY_MODEL}
provider_id: vllm-safety
provider_model_id: null
model_type: llm
- metadata:
embedding_dimension: 384
model_id: all-MiniLM-L6-v2
provider_id: sentence-transformers
provider_model_id: null
model_type: embedding
shields:
- params: null
shield_id: ${env.SAFETY_MODEL}
provider_id: null
provider_shield_id: null
- shield_id: ${env.SAFETY_MODEL}
memory_banks: []
datasets: []
scoring_fns: []
eval_tasks: []
tool_groups:
- toolgroup_id: builtin::websearch
provider_id: tavily-search
- toolgroup_id: builtin::memory
provider_id: memory-runtime
- toolgroup_id: builtin::code_interpreter
provider_id: code-interpreter

View file

@ -1,6 +1,5 @@
version: '2'
image_name: remote-vllm
docker_image: null
conda_env: remote-vllm
apis:
- agents
@ -8,6 +7,7 @@ apis:
- memory
- safety
- telemetry
- tool_runtime
providers:
inference:
- provider_id: vllm-inference
@ -46,24 +46,39 @@ providers:
service_name: ${env.OTEL_SERVICE_NAME:llama-stack}
sinks: ${env.TELEMETRY_SINKS:console,sqlite}
sqlite_db_path: ${env.SQLITE_DB_PATH:~/.llama/distributions/remote-vllm/trace_store.db}
tool_runtime:
- provider_id: brave-search
provider_type: remote::brave-search
config:
api_key: ${env.BRAVE_SEARCH_API_KEY:}
max_results: 3
- provider_id: tavily-search
provider_type: remote::tavily-search
config:
api_key: ${env.TAVILY_SEARCH_API_KEY:}
max_results: 3
- provider_id: code-interpreter
provider_type: inline::code-interpreter
config: {}
- provider_id: memory-runtime
provider_type: inline::memory-runtime
config: {}
metadata_store:
namespace: null
type: sqlite
db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/remote-vllm}/registry.db
models:
- metadata: {}
model_id: ${env.INFERENCE_MODEL}
provider_id: vllm-inference
provider_model_id: null
model_type: llm
- metadata:
embedding_dimension: 384
model_id: all-MiniLM-L6-v2
provider_id: sentence-transformers
provider_model_id: null
model_type: embedding
shields: []
memory_banks: []
datasets: []
scoring_fns: []
eval_tasks: []
tool_groups: []

View file

@ -7,8 +7,12 @@
from pathlib import Path
from llama_stack.apis.models.models import ModelType
from llama_stack.distribution.datatypes import ModelInput, Provider, ShieldInput
from llama_stack.distribution.datatypes import (
ModelInput,
Provider,
ShieldInput,
ToolGroupInput,
)
from llama_stack.providers.inline.inference.sentence_transformers import (
SentenceTransformersInferenceConfig,
)
@ -24,6 +28,12 @@ def get_distribution_template() -> DistributionTemplate:
"safety": ["inline::llama-guard"],
"agents": ["inline::meta-reference"],
"telemetry": ["inline::meta-reference"],
"tool_runtime": [
"remote::brave-search",
"remote::tavily-search",
"inline::code-interpreter",
"inline::memory-runtime",
],
}
name = "remote-vllm"
inference_provider = Provider(
@ -60,6 +70,20 @@ def get_distribution_template() -> DistributionTemplate:
"embedding_dimension": 384,
},
)
default_tool_groups = [
ToolGroupInput(
toolgroup_id="builtin::websearch",
provider_id="tavily-search",
),
ToolGroupInput(
toolgroup_id="builtin::memory",
provider_id="memory-runtime",
),
ToolGroupInput(
toolgroup_id="builtin::code_interpreter",
provider_id="code-interpreter",
),
]
return DistributionTemplate(
name=name,
@ -97,6 +121,7 @@ def get_distribution_template() -> DistributionTemplate:
embedding_model,
],
default_shields=[ShieldInput(shield_id="${env.SAFETY_MODEL}")],
default_tool_groups=default_tool_groups,
),
},
run_config_env_vars={

View file

@ -20,6 +20,7 @@ from llama_stack.distribution.datatypes import (
Provider,
ShieldInput,
StackRunConfig,
ToolGroupInput,
)
from llama_stack.distribution.distribution import get_provider_registry
from llama_stack.distribution.utils.dynamic import instantiate_class_type
@ -30,6 +31,7 @@ class RunConfigSettings(BaseModel):
provider_overrides: Dict[str, List[Provider]] = Field(default_factory=dict)
default_models: Optional[List[ModelInput]] = None
default_shields: Optional[List[ShieldInput]] = None
default_tool_groups: Optional[List[ToolGroupInput]] = None
def run_config(
self,
@ -91,6 +93,7 @@ class RunConfigSettings(BaseModel):
),
models=self.default_models or [],
shields=self.default_shields or [],
tool_groups=self.default_tool_groups or [],
)
@ -159,14 +162,22 @@ class DistributionTemplate(BaseModel):
build_config = self.build_config()
with open(yaml_output_dir / "build.yaml", "w") as f:
yaml.safe_dump(build_config.model_dump(), f, sort_keys=False)
yaml.safe_dump(
build_config.model_dump(exclude_none=True),
f,
sort_keys=False,
)
for yaml_pth, settings in self.run_configs.items():
run_config = settings.run_config(
self.name, self.providers, self.docker_image
)
with open(yaml_output_dir / yaml_pth, "w") as f:
yaml.safe_dump(run_config.model_dump(), f, sort_keys=False)
yaml.safe_dump(
run_config.model_dump(exclude_none=True),
f,
sort_keys=False,
)
if self.template_path:
docs = self.generate_markdown_docs()

View file

@ -2,7 +2,6 @@ version: '2'
name: tgi
distribution_spec:
description: Use (an external) TGI server for running LLM inference
docker_image: null
providers:
inference:
- remote::tgi
@ -25,4 +24,9 @@ distribution_spec:
- inline::basic
- inline::llm-as-judge
- inline::braintrust
tool_runtime:
- remote::brave-search
- remote::tavily-search
- inline::code-interpreter
- inline::memory-runtime
image_type: conda

View file

@ -1,6 +1,5 @@
version: '2'
image_name: tgi
docker_image: null
conda_env: tgi
apis:
- agents
@ -11,6 +10,7 @@ apis:
- safety
- scoring
- telemetry
- tool_runtime
providers:
inference:
- provider_id: tgi-inference
@ -70,27 +70,45 @@ providers:
provider_type: inline::braintrust
config:
openai_api_key: ${env.OPENAI_API_KEY:}
tool_runtime:
- provider_id: brave-search
provider_type: remote::brave-search
config:
api_key: ${env.BRAVE_SEARCH_API_KEY:}
max_results: 3
- provider_id: tavily-search
provider_type: remote::tavily-search
config:
api_key: ${env.TAVILY_SEARCH_API_KEY:}
max_results: 3
- provider_id: code-interpreter
provider_type: inline::code-interpreter
config: {}
- provider_id: memory-runtime
provider_type: inline::memory-runtime
config: {}
metadata_store:
namespace: null
type: sqlite
db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/tgi}/registry.db
models:
- metadata: {}
model_id: ${env.INFERENCE_MODEL}
provider_id: tgi-inference
provider_model_id: null
model_type: llm
- metadata: {}
model_id: ${env.SAFETY_MODEL}
provider_id: tgi-safety
provider_model_id: null
model_type: llm
shields:
- params: null
shield_id: ${env.SAFETY_MODEL}
provider_id: null
provider_shield_id: null
- shield_id: ${env.SAFETY_MODEL}
memory_banks: []
datasets: []
scoring_fns: []
eval_tasks: []
tool_groups:
- toolgroup_id: builtin::websearch
provider_id: tavily-search
- toolgroup_id: builtin::memory
provider_id: memory-runtime
- toolgroup_id: builtin::code_interpreter
provider_id: code-interpreter

View file

@ -1,6 +1,5 @@
version: '2'
image_name: tgi
docker_image: null
conda_env: tgi
apis:
- agents
@ -11,6 +10,7 @@ apis:
- safety
- scoring
- telemetry
- tool_runtime
providers:
inference:
- provider_id: tgi-inference
@ -69,24 +69,39 @@ providers:
provider_type: inline::braintrust
config:
openai_api_key: ${env.OPENAI_API_KEY:}
tool_runtime:
- provider_id: brave-search
provider_type: remote::brave-search
config:
api_key: ${env.BRAVE_SEARCH_API_KEY:}
max_results: 3
- provider_id: tavily-search
provider_type: remote::tavily-search
config:
api_key: ${env.TAVILY_SEARCH_API_KEY:}
max_results: 3
- provider_id: code-interpreter
provider_type: inline::code-interpreter
config: {}
- provider_id: memory-runtime
provider_type: inline::memory-runtime
config: {}
metadata_store:
namespace: null
type: sqlite
db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/tgi}/registry.db
models:
- metadata: {}
model_id: ${env.INFERENCE_MODEL}
provider_id: tgi-inference
provider_model_id: null
model_type: llm
- metadata:
embedding_dimension: 384
model_id: all-MiniLM-L6-v2
provider_id: sentence-transformers
provider_model_id: null
model_type: embedding
shields: []
memory_banks: []
datasets: []
scoring_fns: []
eval_tasks: []
tool_groups: []

View file

@ -7,8 +7,12 @@
from pathlib import Path
from llama_stack.apis.models.models import ModelType
from llama_stack.distribution.datatypes import ModelInput, Provider, ShieldInput
from llama_stack.distribution.datatypes import (
ModelInput,
Provider,
ShieldInput,
ToolGroupInput,
)
from llama_stack.providers.inline.inference.sentence_transformers import (
SentenceTransformersInferenceConfig,
)
@ -27,6 +31,12 @@ def get_distribution_template() -> DistributionTemplate:
"eval": ["inline::meta-reference"],
"datasetio": ["remote::huggingface", "inline::localfs"],
"scoring": ["inline::basic", "inline::llm-as-judge", "inline::braintrust"],
"tool_runtime": [
"remote::brave-search",
"remote::tavily-search",
"inline::code-interpreter",
"inline::memory-runtime",
],
}
name = "tgi"
inference_provider = Provider(
@ -63,6 +73,20 @@ def get_distribution_template() -> DistributionTemplate:
model_id="${env.SAFETY_MODEL}",
provider_id="tgi-safety",
)
default_tool_groups = [
ToolGroupInput(
toolgroup_id="builtin::websearch",
provider_id="tavily-search",
),
ToolGroupInput(
toolgroup_id="builtin::memory",
provider_id="memory-runtime",
),
ToolGroupInput(
toolgroup_id="builtin::code_interpreter",
provider_id="code-interpreter",
),
]
return DistributionTemplate(
name=name,
@ -99,6 +123,7 @@ def get_distribution_template() -> DistributionTemplate:
safety_model,
],
default_shields=[ShieldInput(shield_id="${env.SAFETY_MODEL}")],
default_tool_groups=default_tool_groups,
),
},
run_config_env_vars={

View file

@ -2,7 +2,6 @@ version: '2'
name: together
distribution_spec:
description: Use Together.AI for running LLM inference
docker_image: null
providers:
inference:
- remote::together
@ -25,4 +24,9 @@ distribution_spec:
- inline::basic
- inline::llm-as-judge
- inline::braintrust
tool_runtime:
- remote::brave-search
- remote::tavily-search
- inline::code-interpreter
- inline::memory-runtime
image_type: conda

View file

@ -1,6 +1,5 @@
version: '2'
image_name: together
docker_image: null
conda_env: together
apis:
- agents
@ -11,6 +10,7 @@ apis:
- safety
- scoring
- telemetry
- tool_runtime
providers:
inference:
- provider_id: together
@ -70,8 +70,24 @@ providers:
provider_type: inline::braintrust
config:
openai_api_key: ${env.OPENAI_API_KEY:}
tool_runtime:
- provider_id: brave-search
provider_type: remote::brave-search
config:
api_key: ${env.BRAVE_SEARCH_API_KEY:}
max_results: 3
- provider_id: tavily-search
provider_type: remote::tavily-search
config:
api_key: ${env.TAVILY_SEARCH_API_KEY:}
max_results: 3
- provider_id: code-interpreter
provider_type: inline::code-interpreter
config: {}
- provider_id: memory-runtime
provider_type: inline::memory-runtime
config: {}
metadata_store:
namespace: null
type: sqlite
db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/together}/registry.db
models:
@ -124,14 +140,17 @@ models:
embedding_dimension: 384
model_id: all-MiniLM-L6-v2
provider_id: sentence-transformers
provider_model_id: null
model_type: embedding
shields:
- params: null
shield_id: meta-llama/Llama-Guard-3-8B
provider_id: null
provider_shield_id: null
- shield_id: meta-llama/Llama-Guard-3-8B
memory_banks: []
datasets: []
scoring_fns: []
eval_tasks: []
tool_groups:
- toolgroup_id: builtin::websearch
provider_id: tavily-search
- toolgroup_id: builtin::memory
provider_id: memory-runtime
- toolgroup_id: builtin::code_interpreter
provider_id: code-interpreter

View file

@ -9,8 +9,12 @@ from pathlib import Path
from llama_models.sku_list import all_registered_models
from llama_stack.apis.models.models import ModelType
from llama_stack.distribution.datatypes import ModelInput, Provider, ShieldInput
from llama_stack.distribution.datatypes import (
ModelInput,
Provider,
ShieldInput,
ToolGroupInput,
)
from llama_stack.providers.inline.inference.sentence_transformers import (
SentenceTransformersInferenceConfig,
)
@ -30,6 +34,12 @@ def get_distribution_template() -> DistributionTemplate:
"eval": ["inline::meta-reference"],
"datasetio": ["remote::huggingface", "inline::localfs"],
"scoring": ["inline::basic", "inline::llm-as-judge", "inline::braintrust"],
"tool_runtime": [
"remote::brave-search",
"remote::tavily-search",
"inline::code-interpreter",
"inline::memory-runtime",
],
}
name = "together"
inference_provider = Provider(
@ -59,6 +69,20 @@ def get_distribution_template() -> DistributionTemplate:
)
for m in MODEL_ALIASES
]
default_tool_groups = [
ToolGroupInput(
toolgroup_id="builtin::websearch",
provider_id="tavily-search",
),
ToolGroupInput(
toolgroup_id="builtin::memory",
provider_id="memory-runtime",
),
ToolGroupInput(
toolgroup_id="builtin::code_interpreter",
provider_id="code-interpreter",
),
]
embedding_model = ModelInput(
model_id="all-MiniLM-L6-v2",
provider_id="sentence-transformers",
@ -83,6 +107,7 @@ def get_distribution_template() -> DistributionTemplate:
"memory": [memory_provider],
},
default_models=default_models + [embedding_model],
default_tool_groups=default_tool_groups,
default_shields=[ShieldInput(shield_id="meta-llama/Llama-Guard-3-8B")],
),
},

View file

@ -2,7 +2,6 @@ version: '2'
name: vllm-gpu
distribution_spec:
description: Use a built-in vLLM engine for running LLM inference
docker_image: null
providers:
inference:
- inline::vllm
@ -25,4 +24,9 @@ distribution_spec:
- inline::basic
- inline::llm-as-judge
- inline::braintrust
tool_runtime:
- remote::brave-search
- remote::tavily-search
- inline::code-interpreter
- inline::memory-runtime
image_type: conda

View file

@ -1,6 +1,5 @@
version: '2'
image_name: vllm-gpu
docker_image: null
conda_env: vllm-gpu
apis:
- agents
@ -11,6 +10,7 @@ apis:
- safety
- scoring
- telemetry
- tool_runtime
providers:
inference:
- provider_id: vllm
@ -73,24 +73,45 @@ providers:
provider_type: inline::braintrust
config:
openai_api_key: ${env.OPENAI_API_KEY:}
tool_runtime:
- provider_id: brave-search
provider_type: remote::brave-search
config:
api_key: ${env.BRAVE_SEARCH_API_KEY:}
max_results: 3
- provider_id: tavily-search
provider_type: remote::tavily-search
config:
api_key: ${env.TAVILY_SEARCH_API_KEY:}
max_results: 3
- provider_id: code-interpreter
provider_type: inline::code-interpreter
config: {}
- provider_id: memory-runtime
provider_type: inline::memory-runtime
config: {}
metadata_store:
namespace: null
type: sqlite
db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/vllm-gpu}/registry.db
models:
- metadata: {}
model_id: ${env.INFERENCE_MODEL}
provider_id: vllm
provider_model_id: null
model_type: llm
- metadata:
embedding_dimension: 384
model_id: all-MiniLM-L6-v2
provider_id: sentence-transformers
provider_model_id: null
model_type: embedding
shields: []
memory_banks: []
datasets: []
scoring_fns: []
eval_tasks: []
tool_groups:
- toolgroup_id: builtin::websearch
provider_id: tavily-search
- toolgroup_id: builtin::memory
provider_id: memory-runtime
- toolgroup_id: builtin::code_interpreter
provider_id: code-interpreter

View file

@ -11,7 +11,11 @@ from llama_stack.providers.inline.inference.sentence_transformers import (
)
from llama_stack.providers.inline.inference.vllm import VLLMConfig
from llama_stack.providers.inline.memory.faiss.config import FaissImplConfig
from llama_stack.templates.template import DistributionTemplate, RunConfigSettings
from llama_stack.templates.template import (
DistributionTemplate,
RunConfigSettings,
ToolGroupInput,
)
def get_distribution_template() -> DistributionTemplate:
@ -24,7 +28,14 @@ def get_distribution_template() -> DistributionTemplate:
"eval": ["inline::meta-reference"],
"datasetio": ["remote::huggingface", "inline::localfs"],
"scoring": ["inline::basic", "inline::llm-as-judge", "inline::braintrust"],
"tool_runtime": [
"remote::brave-search",
"remote::tavily-search",
"inline::code-interpreter",
"inline::memory-runtime",
],
}
name = "vllm-gpu"
inference_provider = Provider(
provider_id="vllm",
@ -54,6 +65,20 @@ def get_distribution_template() -> DistributionTemplate:
"embedding_dimension": 384,
},
)
default_tool_groups = [
ToolGroupInput(
toolgroup_id="builtin::websearch",
provider_id="tavily-search",
),
ToolGroupInput(
toolgroup_id="builtin::memory",
provider_id="memory-runtime",
),
ToolGroupInput(
toolgroup_id="builtin::code_interpreter",
provider_id="code-interpreter",
),
]
return DistributionTemplate(
name=name,
@ -70,6 +95,7 @@ def get_distribution_template() -> DistributionTemplate:
"memory": [memory_provider],
},
default_models=[inference_model, embedding_model],
default_tool_groups=default_tool_groups,
),
},
run_config_env_vars={