From 2cebb24d3a6c61ba9473435f4db85210ce7456eb Mon Sep 17 00:00:00 2001
From: Hardik Shah <hjshah@meta.com>
Date: Fri, 24 Jan 2025 11:28:20 -0800
Subject: [PATCH] Update doc templates for running safety on self-hosted
 templates (#874)

---
 distributions/dependencies.json               | 30 ++++++++++
 .../self_hosted_distro/ollama.md              |  9 ++-
 .../self_hosted_distro/remote-vllm.md         | 10 +++-
 .../self_hosted_distro/sambanova.md           | 17 +++---
 .../distributions/self_hosted_distro/tgi.md   | 11 +++-
 .../self_hosted_distro/together.md            |  3 +
 llama_stack/templates/ollama/doc_template.md  |  6 +-
 .../templates/remote-vllm/doc_template.md     |  7 ++-
 llama_stack/templates/sambanova/build.yaml    |  9 ++-
 llama_stack/templates/sambanova/run.yaml      | 60 +++++++++++++------
 llama_stack/templates/sambanova/sambanova.py  |  8 ++-
 llama_stack/templates/tgi/doc_template.md     |  7 ++-
 12 files changed, 140 insertions(+), 37 deletions(-)

diff --git a/distributions/dependencies.json b/distributions/dependencies.json
index 7b5d8b002..2b2e35a50 100644
--- a/distributions/dependencies.json
+++ b/distributions/dependencies.json
@@ -1,4 +1,34 @@
 {
+  "sambanova": [
+    "aiosqlite",
+    "blobfile",
+    "chardet",
+    "chromadb-client",
+    "faiss-cpu",
+    "fastapi",
+    "fire",
+    "httpx",
+    "matplotlib",
+    "nltk",
+    "numpy",
+    "openai",
+    "opentelemetry-exporter-otlp-proto-http",
+    "opentelemetry-sdk",
+    "pandas",
+    "pillow",
+    "psycopg2-binary",
+    "pypdf",
+    "redis",
+    "requests",
+    "scikit-learn",
+    "scipy",
+    "sentencepiece",
+    "tqdm",
+    "transformers",
+    "uvicorn",
+    "sentence-transformers --no-deps",
+    "torch --index-url https://download.pytorch.org/whl/cpu"
+  ],
   "hf-serverless": [
     "aiohttp",
     "aiosqlite",
diff --git a/docs/source/distributions/self_hosted_distro/ollama.md b/docs/source/distributions/self_hosted_distro/ollama.md
index 93f4adfb3..92e1f7dbf 100644
--- a/docs/source/distributions/self_hosted_distro/ollama.md
+++ b/docs/source/distributions/self_hosted_distro/ollama.md
@@ -1,3 +1,6 @@
+---
+orphan: true
+---
 # Ollama Distribution
 
 ```{toctree}
@@ -79,11 +82,15 @@ docker run \
 If you are using Llama Stack Safety / Shield APIs, use:
 
 ```bash
+# You need a local checkout of llama-stack to run this, get it using
+# git clone https://github.com/meta-llama/llama-stack.git
+cd /path/to/llama-stack
+
 docker run \
   -it \
   -p $LLAMA_STACK_PORT:$LLAMA_STACK_PORT \
   -v ~/.llama:/root/.llama \
-  -v ./run-with-safety.yaml:/root/my-run.yaml \
+  -v ./llama_stack/templates/ollama/run-with-safety.yaml:/root/my-run.yaml \
   llamastack/distribution-ollama \
   --yaml-config /root/my-run.yaml \
   --port $LLAMA_STACK_PORT \
diff --git a/docs/source/distributions/self_hosted_distro/remote-vllm.md b/docs/source/distributions/self_hosted_distro/remote-vllm.md
index 1638e9b11..b2d28be1b 100644
--- a/docs/source/distributions/self_hosted_distro/remote-vllm.md
+++ b/docs/source/distributions/self_hosted_distro/remote-vllm.md
@@ -1,3 +1,6 @@
+---
+orphan: true
+---
 # Remote vLLM Distribution
 ```{toctree}
 :maxdepth: 2
@@ -107,10 +110,15 @@ If you are using Llama Stack Safety / Shield APIs, use:
 export SAFETY_PORT=8081
 export SAFETY_MODEL=meta-llama/Llama-Guard-3-1B
 
+# You need a local checkout of llama-stack to run this, get it using
+# git clone https://github.com/meta-llama/llama-stack.git
+cd /path/to/llama-stack
+
 docker run \
   -it \
   -p $LLAMA_STACK_PORT:$LLAMA_STACK_PORT \
-  -v ./run-with-safety.yaml:/root/my-run.yaml \
+  -v ~/.llama:/root/.llama \
+  -v ./llama_stack/templates/remote-vllm/run-with-safety.yaml:/root/my-run.yaml \
   llamastack/distribution-remote-vllm \
   --yaml-config /root/my-run.yaml \
   --port $LLAMA_STACK_PORT \
diff --git a/docs/source/distributions/self_hosted_distro/sambanova.md b/docs/source/distributions/self_hosted_distro/sambanova.md
index 52d1cd962..199279990 100644
--- a/docs/source/distributions/self_hosted_distro/sambanova.md
+++ b/docs/source/distributions/self_hosted_distro/sambanova.md
@@ -16,9 +16,10 @@ The `llamastack/distribution-sambanova` distribution consists of the following p
 |-----|-------------|
 | agents | `inline::meta-reference` |
 | inference | `remote::sambanova` |
-| memory | `inline::faiss`, `remote::chromadb`, `remote::pgvector` |
 | safety | `inline::llama-guard` |
 | telemetry | `inline::meta-reference` |
+| tool_runtime | `remote::brave-search`, `remote::tavily-search`, `inline::code-interpreter`, `inline::rag-runtime` |
+| vector_io | `inline::faiss`, `remote::chromadb`, `remote::pgvector` |
 
 
 ### Environment Variables
@@ -32,13 +33,13 @@ The following environment variables can be configured:
 
 The following models are available by default:
 
-- `meta-llama/Llama-3.1-8B-Instruct`
-- `meta-llama/Llama-3.1-70B-Instruct`
-- `meta-llama/Llama-3.1-405B-Instruct`
-- `meta-llama/Llama-3.2-1B-Instruct`
-- `meta-llama/Llama-3.2-3B-Instruct`
-- `meta-llama/Llama-3.2-11B-Vision-Instruct`
-- `meta-llama/Llama-3.2-90B-Vision-Instruct`
+- `meta-llama/Llama-3.1-8B-Instruct (Meta-Llama-3.1-8B-Instruct)`
+- `meta-llama/Llama-3.1-70B-Instruct (Meta-Llama-3.1-70B-Instruct)`
+- `meta-llama/Llama-3.1-405B-Instruct-FP8 (Meta-Llama-3.1-405B-Instruct)`
+- `meta-llama/Llama-3.2-1B-Instruct (Meta-Llama-3.2-1B-Instruct)`
+- `meta-llama/Llama-3.2-3B-Instruct (Meta-Llama-3.2-3B-Instruct)`
+- `meta-llama/Llama-3.2-11B-Vision-Instruct (Llama-3.2-11B-Vision-Instruct)`
+- `meta-llama/Llama-3.2-90B-Vision-Instruct (Llama-3.2-90B-Vision-Instruct)`
 
 
 ### Prerequisite: API Keys
diff --git a/docs/source/distributions/self_hosted_distro/tgi.md b/docs/source/distributions/self_hosted_distro/tgi.md
index 5a709d0a8..ba5dee77f 100644
--- a/docs/source/distributions/self_hosted_distro/tgi.md
+++ b/docs/source/distributions/self_hosted_distro/tgi.md
@@ -1,3 +1,7 @@
+---
+orphan: true
+---
+
 # TGI Distribution
 
 ```{toctree}
@@ -98,10 +102,15 @@ docker run \
 If you are using Llama Stack Safety / Shield APIs, use:
 
 ```bash
+# You need a local checkout of llama-stack to run this, get it using
+# git clone https://github.com/meta-llama/llama-stack.git
+cd /path/to/llama-stack
+
 docker run \
   -it \
   -p $LLAMA_STACK_PORT:$LLAMA_STACK_PORT \
-  -v ./run-with-safety.yaml:/root/my-run.yaml \
+  -v ~/.llama:/root/.llama \
+  -v ./llama_stack/templates/tgi/run-with-safety.yaml:/root/my-run.yaml \
   llamastack/distribution-tgi \
   --yaml-config /root/my-run.yaml \
   --port $LLAMA_STACK_PORT \
diff --git a/docs/source/distributions/self_hosted_distro/together.md b/docs/source/distributions/self_hosted_distro/together.md
index 707f5be7a..2d5c8fc77 100644
--- a/docs/source/distributions/self_hosted_distro/together.md
+++ b/docs/source/distributions/self_hosted_distro/together.md
@@ -1,3 +1,6 @@
+---
+orphan: true
+---
 # Together Distribution
 
 ```{toctree}
diff --git a/llama_stack/templates/ollama/doc_template.md b/llama_stack/templates/ollama/doc_template.md
index a75583592..7c260e2c1 100644
--- a/llama_stack/templates/ollama/doc_template.md
+++ b/llama_stack/templates/ollama/doc_template.md
@@ -74,11 +74,15 @@ docker run \
 If you are using Llama Stack Safety / Shield APIs, use:
 
 ```bash
+# You need a local checkout of llama-stack to run this, get it using
+# git clone https://github.com/meta-llama/llama-stack.git
+cd /path/to/llama-stack
+
 docker run \
   -it \
   -p $LLAMA_STACK_PORT:$LLAMA_STACK_PORT \
   -v ~/.llama:/root/.llama \
-  -v ./run-with-safety.yaml:/root/my-run.yaml \
+  -v ./llama_stack/templates/ollama/run-with-safety.yaml:/root/my-run.yaml \
   llamastack/distribution-{{ name }} \
   --yaml-config /root/my-run.yaml \
   --port $LLAMA_STACK_PORT \
diff --git a/llama_stack/templates/remote-vllm/doc_template.md b/llama_stack/templates/remote-vllm/doc_template.md
index 7f48f961e..c855f6e62 100644
--- a/llama_stack/templates/remote-vllm/doc_template.md
+++ b/llama_stack/templates/remote-vllm/doc_template.md
@@ -98,10 +98,15 @@ If you are using Llama Stack Safety / Shield APIs, use:
 export SAFETY_PORT=8081
 export SAFETY_MODEL=meta-llama/Llama-Guard-3-1B
 
+# You need a local checkout of llama-stack to run this, get it using
+# git clone https://github.com/meta-llama/llama-stack.git
+cd /path/to/llama-stack
+
 docker run \
   -it \
   -p $LLAMA_STACK_PORT:$LLAMA_STACK_PORT \
-  -v ./run-with-safety.yaml:/root/my-run.yaml \
+  -v ~/.llama:/root/.llama \
+  -v ./llama_stack/templates/remote-vllm/run-with-safety.yaml:/root/my-run.yaml \
   llamastack/distribution-{{ name }} \
   --yaml-config /root/my-run.yaml \
   --port $LLAMA_STACK_PORT \
diff --git a/llama_stack/templates/sambanova/build.yaml b/llama_stack/templates/sambanova/build.yaml
index d6da478d1..ca5ffe618 100644
--- a/llama_stack/templates/sambanova/build.yaml
+++ b/llama_stack/templates/sambanova/build.yaml
@@ -1,12 +1,10 @@
 version: '2'
-name: sambanova
 distribution_spec:
   description: Use SambaNova.AI for running LLM inference
-  docker_image: null
   providers:
     inference:
     - remote::sambanova
-    memory:
+    vector_io:
     - inline::faiss
     - remote::chromadb
     - remote::pgvector
@@ -16,4 +14,9 @@ distribution_spec:
     - inline::meta-reference
     telemetry:
     - inline::meta-reference
+    tool_runtime:
+    - remote::brave-search
+    - remote::tavily-search
+    - inline::code-interpreter
+    - inline::rag-runtime
 image_type: conda
diff --git a/llama_stack/templates/sambanova/run.yaml b/llama_stack/templates/sambanova/run.yaml
index 03c8ea44f..31f47e0c1 100644
--- a/llama_stack/templates/sambanova/run.yaml
+++ b/llama_stack/templates/sambanova/run.yaml
@@ -1,21 +1,20 @@
 version: '2'
 image_name: sambanova
-docker_image: null
-conda_env: sambanova
 apis:
 - agents
 - inference
-- memory
 - safety
 - telemetry
+- tool_runtime
+- vector_io
 providers:
   inference:
   - provider_id: sambanova
     provider_type: remote::sambanova
     config:
-      url: https://api.sambanova.ai/v1/
+      url: https://api.sambanova.ai/v1
       api_key: ${env.SAMBANOVA_API_KEY}
-  memory:
+  vector_io:
   - provider_id: faiss
     provider_type: inline::faiss
     config:
@@ -23,6 +22,12 @@ providers:
         type: sqlite
         namespace: null
         db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/sambanova}/faiss_store.db
+  - provider_id: chromadb
+    provider_type: remote::chromadb
+    config: {}
+  - provider_id: pgvector
+    provider_type: remote::pgvector
+    config: {}
   safety:
   - provider_id: llama-guard
     provider_type: inline::llama-guard
@@ -38,46 +43,63 @@ providers:
   telemetry:
   - provider_id: meta-reference
     provider_type: inline::meta-reference
+    config:
+      service_name: ${env.OTEL_SERVICE_NAME:llama-stack}
+      sinks: ${env.TELEMETRY_SINKS:console,sqlite}
+      sqlite_db_path: ${env.SQLITE_DB_PATH:~/.llama/distributions/sambanova/trace_store.db}
+  tool_runtime:
+  - provider_id: brave-search
+    provider_type: remote::brave-search
+    config:
+      api_key: ${env.BRAVE_SEARCH_API_KEY:}
+      max_results: 3
+  - provider_id: tavily-search
+    provider_type: remote::tavily-search
+    config:
+      api_key: ${env.TAVILY_SEARCH_API_KEY:}
+      max_results: 3
+  - provider_id: code-interpreter
+    provider_type: inline::code-interpreter
+    config: {}
+  - provider_id: rag-runtime
+    provider_type: inline::rag-runtime
     config: {}
 metadata_store:
-  namespace: null
   type: sqlite
   db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/sambanova}/registry.db
 models:
 - metadata: {}
   model_id: meta-llama/Llama-3.1-8B-Instruct
-  provider_id: null
   provider_model_id: Meta-Llama-3.1-8B-Instruct
+  model_type: llm
 - metadata: {}
   model_id: meta-llama/Llama-3.1-70B-Instruct
-  provider_id: null
   provider_model_id: Meta-Llama-3.1-70B-Instruct
+  model_type: llm
 - metadata: {}
-  model_id: meta-llama/Llama-3.1-405B-Instruct
-  provider_id: null
+  model_id: meta-llama/Llama-3.1-405B-Instruct-FP8
   provider_model_id: Meta-Llama-3.1-405B-Instruct
+  model_type: llm
 - metadata: {}
   model_id: meta-llama/Llama-3.2-1B-Instruct
-  provider_id: null
   provider_model_id: Meta-Llama-3.2-1B-Instruct
+  model_type: llm
 - metadata: {}
   model_id: meta-llama/Llama-3.2-3B-Instruct
-  provider_id: null
   provider_model_id: Meta-Llama-3.2-3B-Instruct
+  model_type: llm
 - metadata: {}
   model_id: meta-llama/Llama-3.2-11B-Vision-Instruct
-  provider_id: null
   provider_model_id: Llama-3.2-11B-Vision-Instruct
+  model_type: llm
 - metadata: {}
   model_id: meta-llama/Llama-3.2-90B-Vision-Instruct
-  provider_id: null
   provider_model_id: Llama-3.2-90B-Vision-Instruct
+  model_type: llm
 shields:
-- params: null
-  shield_id: meta-llama/Llama-Guard-3-8B
-  provider_id: null
-  provider_shield_id: null
-memory_banks: []
+- shield_id: meta-llama/Llama-Guard-3-8B
+vector_dbs: []
 datasets: []
 scoring_fns: []
 eval_tasks: []
+tool_groups: []
diff --git a/llama_stack/templates/sambanova/sambanova.py b/llama_stack/templates/sambanova/sambanova.py
index 8c231617b..389e2a6c5 100644
--- a/llama_stack/templates/sambanova/sambanova.py
+++ b/llama_stack/templates/sambanova/sambanova.py
@@ -18,10 +18,16 @@ from llama_stack.templates.template import DistributionTemplate, RunConfigSettin
 def get_distribution_template() -> DistributionTemplate:
     providers = {
         "inference": ["remote::sambanova"],
-        "memory": ["inline::faiss", "remote::chromadb", "remote::pgvector"],
+        "vector_io": ["inline::faiss", "remote::chromadb", "remote::pgvector"],
         "safety": ["inline::llama-guard"],
         "agents": ["inline::meta-reference"],
         "telemetry": ["inline::meta-reference"],
+        "tool_runtime": [
+            "remote::brave-search",
+            "remote::tavily-search",
+            "inline::code-interpreter",
+            "inline::rag-runtime",
+        ],
     }
 
     inference_provider = Provider(
diff --git a/llama_stack/templates/tgi/doc_template.md b/llama_stack/templates/tgi/doc_template.md
index 067f69d1f..18b7d6b86 100644
--- a/llama_stack/templates/tgi/doc_template.md
+++ b/llama_stack/templates/tgi/doc_template.md
@@ -91,10 +91,15 @@ docker run \
 If you are using Llama Stack Safety / Shield APIs, use:
 
 ```bash
+# You need a local checkout of llama-stack to run this, get it using
+# git clone https://github.com/meta-llama/llama-stack.git
+cd /path/to/llama-stack
+
 docker run \
   -it \
   -p $LLAMA_STACK_PORT:$LLAMA_STACK_PORT \
-  -v ./run-with-safety.yaml:/root/my-run.yaml \
+  -v ~/.llama:/root/.llama \
+  -v ./llama_stack/templates/tgi/run-with-safety.yaml:/root/my-run.yaml \
   llamastack/distribution-{{ name }} \
   --yaml-config /root/my-run.yaml \
   --port $LLAMA_STACK_PORT \