diff --git a/docs/source/distributions/remote_hosted_distro/nvidia.md b/docs/source/distributions/remote_hosted_distro/nvidia.md index f352f737e..3ebcbafff 100644 --- a/docs/source/distributions/remote_hosted_distro/nvidia.md +++ b/docs/source/distributions/remote_hosted_distro/nvidia.md @@ -9,7 +9,7 @@ The `llamastack/distribution-nvidia` distribution consists of the following prov | datasetio | `remote::huggingface`, `inline::localfs` | | eval | `inline::meta-reference` | | inference | `remote::nvidia` | -| safety | `inline::llama-guard` | +| safety | `remote::fiddlecube` | | scoring | `inline::basic`, `inline::llm-as-judge`, `inline::braintrust` | | telemetry | `inline::meta-reference` | | tool_runtime | `remote::brave-search`, `remote::tavily-search`, `inline::code-interpreter`, `inline::rag-runtime`, `remote::model-context-protocol` | diff --git a/docs/source/distributions/self_hosted_distro/cerebras.md b/docs/source/distributions/self_hosted_distro/cerebras.md index a0c9eb263..05a9a6953 100644 --- a/docs/source/distributions/self_hosted_distro/cerebras.md +++ b/docs/source/distributions/self_hosted_distro/cerebras.md @@ -9,7 +9,7 @@ The `llamastack/distribution-cerebras` distribution consists of the following pr | datasetio | `remote::huggingface`, `inline::localfs` | | eval | `inline::meta-reference` | | inference | `remote::cerebras` | -| safety | `inline::llama-guard` | +| safety | `remote::fiddlecube` | | scoring | `inline::basic`, `inline::llm-as-judge`, `inline::braintrust` | | telemetry | `inline::meta-reference` | | tool_runtime | `remote::brave-search`, `remote::tavily-search`, `inline::code-interpreter`, `inline::rag-runtime` | diff --git a/docs/source/distributions/self_hosted_distro/meta-reference-quantized-gpu.md b/docs/source/distributions/self_hosted_distro/meta-reference-quantized-gpu.md index 9aeb7a88b..87a2c068f 100644 --- a/docs/source/distributions/self_hosted_distro/meta-reference-quantized-gpu.md +++ b/docs/source/distributions/self_hosted_distro/meta-reference-quantized-gpu.md @@ -19,7 +19,7 @@ The `llamastack/distribution-meta-reference-quantized-gpu` distribution consists | datasetio | `remote::huggingface`, `inline::localfs` | | eval | `inline::meta-reference` | | inference | `inline::meta-reference-quantized` | -| safety | `inline::llama-guard` | +| safety | `remote::fiddlecube` | | scoring | `inline::basic`, `inline::llm-as-judge`, `inline::braintrust` | | telemetry | `inline::meta-reference` | | tool_runtime | `remote::brave-search`, `remote::tavily-search`, `inline::code-interpreter`, `inline::rag-runtime`, `remote::model-context-protocol` | diff --git a/docs/source/distributions/self_hosted_distro/sambanova.md b/docs/source/distributions/self_hosted_distro/sambanova.md index e6ac616be..02d89dfe3 100644 --- a/docs/source/distributions/self_hosted_distro/sambanova.md +++ b/docs/source/distributions/self_hosted_distro/sambanova.md @@ -17,7 +17,7 @@ The `llamastack/distribution-sambanova` distribution consists of the following p |-----|-------------| | agents | `inline::meta-reference` | | inference | `remote::sambanova` | -| safety | `inline::llama-guard` | +| safety | `remote::fiddlecube` | | telemetry | `inline::meta-reference` | | tool_runtime | `remote::brave-search`, `remote::tavily-search`, `inline::code-interpreter`, `inline::rag-runtime` | | vector_io | `inline::faiss`, `remote::chromadb`, `remote::pgvector` | diff --git a/llama_stack/templates/cerebras/build.yaml b/llama_stack/templates/cerebras/build.yaml index 9d5ab1a52..266b2d4b4 100644 --- a/llama_stack/templates/cerebras/build.yaml +++ b/llama_stack/templates/cerebras/build.yaml @@ -5,7 +5,7 @@ distribution_spec: inference: - remote::cerebras safety: - - inline::llama-guard + - remote::fiddlecube vector_io: - inline::faiss - remote::chromadb diff --git a/llama_stack/templates/cerebras/cerebras.py b/llama_stack/templates/cerebras/cerebras.py index 2dfae04f8..da0b79286 100644 --- a/llama_stack/templates/cerebras/cerebras.py +++ b/llama_stack/templates/cerebras/cerebras.py @@ -22,7 +22,7 @@ from llama_stack.templates.template import DistributionTemplate, RunConfigSettin def get_distribution_template() -> DistributionTemplate: providers = { "inference": ["remote::cerebras"], - "safety": ["inline::llama-guard"], + "safety": ["remote::fiddlecube"], "vector_io": ["inline::faiss", "remote::chromadb", "remote::pgvector"], "agents": ["inline::meta-reference"], "eval": ["inline::meta-reference"], diff --git a/llama_stack/templates/cerebras/run.yaml b/llama_stack/templates/cerebras/run.yaml index 05d3f4525..d0873357d 100644 --- a/llama_stack/templates/cerebras/run.yaml +++ b/llama_stack/templates/cerebras/run.yaml @@ -21,8 +21,8 @@ providers: provider_type: inline::sentence-transformers config: {} safety: - - provider_id: llama-guard - provider_type: inline::llama-guard + - provider_id: fiddlecube + provider_type: remote::fiddlecube config: {} vector_io: - provider_id: faiss diff --git a/llama_stack/templates/meta-reference-quantized-gpu/build.yaml b/llama_stack/templates/meta-reference-quantized-gpu/build.yaml index 7bbcfe5f2..911f0f731 100644 --- a/llama_stack/templates/meta-reference-quantized-gpu/build.yaml +++ b/llama_stack/templates/meta-reference-quantized-gpu/build.yaml @@ -9,7 +9,7 @@ distribution_spec: - remote::chromadb - remote::pgvector safety: - - inline::llama-guard + - remote::fiddlecube agents: - inline::meta-reference telemetry: diff --git a/llama_stack/templates/meta-reference-quantized-gpu/meta_reference.py b/llama_stack/templates/meta-reference-quantized-gpu/meta_reference.py index 8c2a6ec9f..fac46489a 100644 --- a/llama_stack/templates/meta-reference-quantized-gpu/meta_reference.py +++ b/llama_stack/templates/meta-reference-quantized-gpu/meta_reference.py @@ -22,7 +22,7 @@ def get_distribution_template() -> DistributionTemplate: providers = { "inference": ["inline::meta-reference-quantized"], "vector_io": ["inline::faiss", "remote::chromadb", "remote::pgvector"], - "safety": ["inline::llama-guard"], + "safety": ["remote::fiddlecube"], "agents": ["inline::meta-reference"], "telemetry": ["inline::meta-reference"], "eval": ["inline::meta-reference"], diff --git a/llama_stack/templates/meta-reference-quantized-gpu/run.yaml b/llama_stack/templates/meta-reference-quantized-gpu/run.yaml index d43cf3917..e1152147a 100644 --- a/llama_stack/templates/meta-reference-quantized-gpu/run.yaml +++ b/llama_stack/templates/meta-reference-quantized-gpu/run.yaml @@ -32,8 +32,8 @@ providers: namespace: null db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/meta-reference-quantized-gpu}/faiss_store.db safety: - - provider_id: llama-guard - provider_type: inline::llama-guard + - provider_id: fiddlecube + provider_type: remote::fiddlecube config: {} agents: - provider_id: meta-reference diff --git a/llama_stack/templates/nvidia/build.yaml b/llama_stack/templates/nvidia/build.yaml index e9748721a..a05242f03 100644 --- a/llama_stack/templates/nvidia/build.yaml +++ b/llama_stack/templates/nvidia/build.yaml @@ -7,7 +7,7 @@ distribution_spec: vector_io: - inline::faiss safety: - - inline::llama-guard + - remote::fiddlecube agents: - inline::meta-reference telemetry: diff --git a/llama_stack/templates/nvidia/nvidia.py b/llama_stack/templates/nvidia/nvidia.py index d24c9ed48..eaf9ac500 100644 --- a/llama_stack/templates/nvidia/nvidia.py +++ b/llama_stack/templates/nvidia/nvidia.py @@ -18,7 +18,7 @@ def get_distribution_template() -> DistributionTemplate: providers = { "inference": ["remote::nvidia"], "vector_io": ["inline::faiss"], - "safety": ["inline::llama-guard"], + "safety": ["remote::fiddlecube"], "agents": ["inline::meta-reference"], "telemetry": ["inline::meta-reference"], "eval": ["inline::meta-reference"], diff --git a/llama_stack/templates/nvidia/run.yaml b/llama_stack/templates/nvidia/run.yaml index c8ae362f5..9bc572980 100644 --- a/llama_stack/templates/nvidia/run.yaml +++ b/llama_stack/templates/nvidia/run.yaml @@ -26,8 +26,8 @@ providers: namespace: null db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/nvidia}/faiss_store.db safety: - - provider_id: llama-guard - provider_type: inline::llama-guard + - provider_id: fiddlecube + provider_type: remote::fiddlecube config: {} agents: - provider_id: meta-reference diff --git a/llama_stack/templates/sambanova/build.yaml b/llama_stack/templates/sambanova/build.yaml index ca5ffe618..8a4e9c58b 100644 --- a/llama_stack/templates/sambanova/build.yaml +++ b/llama_stack/templates/sambanova/build.yaml @@ -9,7 +9,7 @@ distribution_spec: - remote::chromadb - remote::pgvector safety: - - inline::llama-guard + - remote::fiddlecube agents: - inline::meta-reference telemetry: diff --git a/llama_stack/templates/sambanova/run.yaml b/llama_stack/templates/sambanova/run.yaml index 39b0f3c4e..56e122711 100644 --- a/llama_stack/templates/sambanova/run.yaml +++ b/llama_stack/templates/sambanova/run.yaml @@ -29,8 +29,8 @@ providers: provider_type: remote::pgvector config: {} safety: - - provider_id: llama-guard - provider_type: inline::llama-guard + - provider_id: fiddlecube + provider_type: remote::fiddlecube config: {} agents: - provider_id: meta-reference diff --git a/llama_stack/templates/sambanova/sambanova.py b/llama_stack/templates/sambanova/sambanova.py index 70b54b010..9402624e4 100644 --- a/llama_stack/templates/sambanova/sambanova.py +++ b/llama_stack/templates/sambanova/sambanova.py @@ -24,7 +24,7 @@ def get_distribution_template() -> DistributionTemplate: providers = { "inference": ["remote::sambanova"], "vector_io": ["inline::faiss", "remote::chromadb", "remote::pgvector"], - "safety": ["inline::llama-guard"], + "safety": ["remote::fiddlecube"], "agents": ["inline::meta-reference"], "telemetry": ["inline::meta-reference"], "tool_runtime": [