Update doc templates for running safety on self-hosted templates (#874)

2025-06-28 02:53:30 +00:00 · 2025-01-24 11:28:20 -08:00 · 2025-01-24 11:28:20 -08:00 · 2cebb24d3a
commit 2cebb24d3a
parent eaba6a550a
12 changed files with 140 additions and 37 deletions
--- a/distributions/dependencies.json
+++ b/distributions/dependencies.json
@ -1,4 +1,34 @@
 {
+  "sambanova": [
+    "aiosqlite",
+    "blobfile",
+    "chardet",
+    "chromadb-client",
+    "faiss-cpu",
+    "fastapi",
+    "fire",
+    "httpx",
+    "matplotlib",
+    "nltk",
+    "numpy",
+    "openai",
+    "opentelemetry-exporter-otlp-proto-http",
+    "opentelemetry-sdk",
+    "pandas",
+    "pillow",
+    "psycopg2-binary",
+    "pypdf",
+    "redis",
+    "requests",
+    "scikit-learn",
+    "scipy",
+    "sentencepiece",
+    "tqdm",
+    "transformers",
+    "uvicorn",
+    "sentence-transformers --no-deps",
+    "torch --index-url https://download.pytorch.org/whl/cpu"
+  ],
  "hf-serverless": [
    "aiohttp",
    "aiosqlite",
--- a/docs/source/distributions/self_hosted_distro/ollama.md
+++ b/docs/source/distributions/self_hosted_distro/ollama.md
@ -1,3 +1,6 @@
+---
+orphan: true
+---
 # Ollama Distribution

 ```{toctree}
@ -79,11 +82,15 @@ docker run \
 If you are using Llama Stack Safety / Shield APIs, use:

 ```bash
+# You need a local checkout of llama-stack to run this, get it using
+# git clone https://github.com/meta-llama/llama-stack.git
+cd /path/to/llama-stack
+
 docker run \
  -it \
  -p $LLAMA_STACK_PORT:$LLAMA_STACK_PORT \
  -v ~/.llama:/root/.llama \
-  -v ./run-with-safety.yaml:/root/my-run.yaml \
+  -v ./llama_stack/templates/ollama/run-with-safety.yaml:/root/my-run.yaml \
  llamastack/distribution-ollama \
  --yaml-config /root/my-run.yaml \
  --port $LLAMA_STACK_PORT \
--- a/docs/source/distributions/self_hosted_distro/remote-vllm.md
+++ b/docs/source/distributions/self_hosted_distro/remote-vllm.md
@ -1,3 +1,6 @@
+---
+orphan: true
+---
 # Remote vLLM Distribution
 ```{toctree}
 :maxdepth: 2
@ -107,10 +110,15 @@ If you are using Llama Stack Safety / Shield APIs, use:
 export SAFETY_PORT=8081
 export SAFETY_MODEL=meta-llama/Llama-Guard-3-1B

+# You need a local checkout of llama-stack to run this, get it using
+# git clone https://github.com/meta-llama/llama-stack.git
+cd /path/to/llama-stack
+
 docker run \
  -it \
  -p $LLAMA_STACK_PORT:$LLAMA_STACK_PORT \
-  -v ./run-with-safety.yaml:/root/my-run.yaml \
+  -v ~/.llama:/root/.llama \
+  -v ./llama_stack/templates/remote-vllm/run-with-safety.yaml:/root/my-run.yaml \
  llamastack/distribution-remote-vllm \
  --yaml-config /root/my-run.yaml \
  --port $LLAMA_STACK_PORT \
--- a/docs/source/distributions/self_hosted_distro/sambanova.md
+++ b/docs/source/distributions/self_hosted_distro/sambanova.md
@ -16,9 +16,10 @@ The `llamastack/distribution-sambanova` distribution consists of the following p
 |-----|-------------|
 | agents | `inline::meta-reference` |
 | inference | `remote::sambanova` |
-| memory | `inline::faiss`, `remote::chromadb`, `remote::pgvector` |
 | safety | `inline::llama-guard` |
 | telemetry | `inline::meta-reference` |
+| tool_runtime | `remote::brave-search`, `remote::tavily-search`, `inline::code-interpreter`, `inline::rag-runtime` |
+| vector_io | `inline::faiss`, `remote::chromadb`, `remote::pgvector` |


 ### Environment Variables
@ -32,13 +33,13 @@ The following environment variables can be configured:

 The following models are available by default:

- `meta-llama/Llama-3.1-8B-Instruct`
- `meta-llama/Llama-3.1-70B-Instruct`
- `meta-llama/Llama-3.1-405B-Instruct`
- `meta-llama/Llama-3.2-1B-Instruct`
- `meta-llama/Llama-3.2-3B-Instruct`
- `meta-llama/Llama-3.2-11B-Vision-Instruct`
- `meta-llama/Llama-3.2-90B-Vision-Instruct`
+- `meta-llama/Llama-3.1-8B-Instruct (Meta-Llama-3.1-8B-Instruct)`
+- `meta-llama/Llama-3.1-70B-Instruct (Meta-Llama-3.1-70B-Instruct)`
+- `meta-llama/Llama-3.1-405B-Instruct-FP8 (Meta-Llama-3.1-405B-Instruct)`
+- `meta-llama/Llama-3.2-1B-Instruct (Meta-Llama-3.2-1B-Instruct)`
+- `meta-llama/Llama-3.2-3B-Instruct (Meta-Llama-3.2-3B-Instruct)`
+- `meta-llama/Llama-3.2-11B-Vision-Instruct (Llama-3.2-11B-Vision-Instruct)`
+- `meta-llama/Llama-3.2-90B-Vision-Instruct (Llama-3.2-90B-Vision-Instruct)`


 ### Prerequisite: API Keys
--- a/docs/source/distributions/self_hosted_distro/tgi.md
+++ b/docs/source/distributions/self_hosted_distro/tgi.md
@ -1,3 +1,7 @@
+---
+orphan: true
+---
+
 # TGI Distribution

 ```{toctree}
@ -98,10 +102,15 @@ docker run \
 If you are using Llama Stack Safety / Shield APIs, use:

 ```bash
+# You need a local checkout of llama-stack to run this, get it using
+# git clone https://github.com/meta-llama/llama-stack.git
+cd /path/to/llama-stack
+
 docker run \
  -it \
  -p $LLAMA_STACK_PORT:$LLAMA_STACK_PORT \
-  -v ./run-with-safety.yaml:/root/my-run.yaml \
+  -v ~/.llama:/root/.llama \
+  -v ./llama_stack/templates/tgi/run-with-safety.yaml:/root/my-run.yaml \
  llamastack/distribution-tgi \
  --yaml-config /root/my-run.yaml \
  --port $LLAMA_STACK_PORT \
--- a/docs/source/distributions/self_hosted_distro/together.md
+++ b/docs/source/distributions/self_hosted_distro/together.md
@ -1,3 +1,6 @@
+---
+orphan: true
+---
 # Together Distribution

 ```{toctree}
--- a/llama_stack/templates/ollama/doc_template.md
+++ b/llama_stack/templates/ollama/doc_template.md
@ -74,11 +74,15 @@ docker run \
 If you are using Llama Stack Safety / Shield APIs, use:

 ```bash
+# You need a local checkout of llama-stack to run this, get it using
+# git clone https://github.com/meta-llama/llama-stack.git
+cd /path/to/llama-stack
+
 docker run \
  -it \
  -p $LLAMA_STACK_PORT:$LLAMA_STACK_PORT \
  -v ~/.llama:/root/.llama \
-  -v ./run-with-safety.yaml:/root/my-run.yaml \
+  -v ./llama_stack/templates/ollama/run-with-safety.yaml:/root/my-run.yaml \
  llamastack/distribution-{{ name }} \
  --yaml-config /root/my-run.yaml \
  --port $LLAMA_STACK_PORT \
--- a/llama_stack/templates/remote-vllm/doc_template.md
+++ b/llama_stack/templates/remote-vllm/doc_template.md
@ -98,10 +98,15 @@ If you are using Llama Stack Safety / Shield APIs, use:
 export SAFETY_PORT=8081
 export SAFETY_MODEL=meta-llama/Llama-Guard-3-1B

+# You need a local checkout of llama-stack to run this, get it using
+# git clone https://github.com/meta-llama/llama-stack.git
+cd /path/to/llama-stack
+
 docker run \
  -it \
  -p $LLAMA_STACK_PORT:$LLAMA_STACK_PORT \
-  -v ./run-with-safety.yaml:/root/my-run.yaml \
+  -v ~/.llama:/root/.llama \
+  -v ./llama_stack/templates/remote-vllm/run-with-safety.yaml:/root/my-run.yaml \
  llamastack/distribution-{{ name }} \
  --yaml-config /root/my-run.yaml \
  --port $LLAMA_STACK_PORT \
--- a/llama_stack/templates/sambanova/build.yaml
+++ b/llama_stack/templates/sambanova/build.yaml
@ -1,12 +1,10 @@
 version: '2'
-name: sambanova
 distribution_spec:
  description: Use SambaNova.AI for running LLM inference
-  docker_image: null
  providers:
    inference:
    - remote::sambanova
-    memory:
+    vector_io:
    - inline::faiss
    - remote::chromadb
    - remote::pgvector
@ -16,4 +14,9 @@ distribution_spec:
    - inline::meta-reference
    telemetry:
    - inline::meta-reference
+    tool_runtime:
+    - remote::brave-search
+    - remote::tavily-search
+    - inline::code-interpreter
+    - inline::rag-runtime
 image_type: conda
--- a/llama_stack/templates/sambanova/run.yaml
+++ b/llama_stack/templates/sambanova/run.yaml
@ -1,21 +1,20 @@
 version: '2'
 image_name: sambanova
-docker_image: null
-conda_env: sambanova
 apis:
 - agents
 - inference
- memory
 - safety
 - telemetry
+- tool_runtime
+- vector_io
 providers:
  inference:
  - provider_id: sambanova
    provider_type: remote::sambanova
    config:
-      url: https://api.sambanova.ai/v1/
+      url: https://api.sambanova.ai/v1
      api_key: ${env.SAMBANOVA_API_KEY}
-  memory:
+  vector_io:
  - provider_id: faiss
    provider_type: inline::faiss
    config:
@ -23,6 +22,12 @@ providers:
        type: sqlite
        namespace: null
        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/sambanova}/faiss_store.db
+  - provider_id: chromadb
+    provider_type: remote::chromadb
+    config: {}
+  - provider_id: pgvector
+    provider_type: remote::pgvector
+    config: {}
  safety:
  - provider_id: llama-guard
    provider_type: inline::llama-guard
@ -38,46 +43,63 @@ providers:
  telemetry:
  - provider_id: meta-reference
    provider_type: inline::meta-reference
+    config:
+      service_name: ${env.OTEL_SERVICE_NAME:llama-stack}
+      sinks: ${env.TELEMETRY_SINKS:console,sqlite}
+      sqlite_db_path: ${env.SQLITE_DB_PATH:~/.llama/distributions/sambanova/trace_store.db}
+  tool_runtime:
+  - provider_id: brave-search
+    provider_type: remote::brave-search
+    config:
+      api_key: ${env.BRAVE_SEARCH_API_KEY:}
+      max_results: 3
+  - provider_id: tavily-search
+    provider_type: remote::tavily-search
+    config:
+      api_key: ${env.TAVILY_SEARCH_API_KEY:}
+      max_results: 3
+  - provider_id: code-interpreter
+    provider_type: inline::code-interpreter
+    config: {}
+  - provider_id: rag-runtime
+    provider_type: inline::rag-runtime
    config: {}
 metadata_store:
-  namespace: null
  type: sqlite
  db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/sambanova}/registry.db
 models:
 - metadata: {}
  model_id: meta-llama/Llama-3.1-8B-Instruct
-  provider_id: null
  provider_model_id: Meta-Llama-3.1-8B-Instruct
+  model_type: llm
 - metadata: {}
  model_id: meta-llama/Llama-3.1-70B-Instruct
-  provider_id: null
  provider_model_id: Meta-Llama-3.1-70B-Instruct
+  model_type: llm
 - metadata: {}
-  model_id: meta-llama/Llama-3.1-405B-Instruct
-  provider_id: null
+  model_id: meta-llama/Llama-3.1-405B-Instruct-FP8
  provider_model_id: Meta-Llama-3.1-405B-Instruct
+  model_type: llm
 - metadata: {}
  model_id: meta-llama/Llama-3.2-1B-Instruct
-  provider_id: null
  provider_model_id: Meta-Llama-3.2-1B-Instruct
+  model_type: llm
 - metadata: {}
  model_id: meta-llama/Llama-3.2-3B-Instruct
-  provider_id: null
  provider_model_id: Meta-Llama-3.2-3B-Instruct
+  model_type: llm
 - metadata: {}
  model_id: meta-llama/Llama-3.2-11B-Vision-Instruct
-  provider_id: null
  provider_model_id: Llama-3.2-11B-Vision-Instruct
+  model_type: llm
 - metadata: {}
  model_id: meta-llama/Llama-3.2-90B-Vision-Instruct
-  provider_id: null
  provider_model_id: Llama-3.2-90B-Vision-Instruct
+  model_type: llm
 shields:
- params: null
-  shield_id: meta-llama/Llama-Guard-3-8B
-  provider_id: null
-  provider_shield_id: null
-memory_banks: []
+- shield_id: meta-llama/Llama-Guard-3-8B
+vector_dbs: []
 datasets: []
 scoring_fns: []
 eval_tasks: []
+tool_groups: []
--- a/llama_stack/templates/sambanova/sambanova.py
+++ b/llama_stack/templates/sambanova/sambanova.py
@ -18,10 +18,16 @@ from llama_stack.templates.template import DistributionTemplate, RunConfigSettin
 def get_distribution_template() -> DistributionTemplate:
    providers = {
        "inference": ["remote::sambanova"],
-        "memory": ["inline::faiss", "remote::chromadb", "remote::pgvector"],
+        "vector_io": ["inline::faiss", "remote::chromadb", "remote::pgvector"],
        "safety": ["inline::llama-guard"],
        "agents": ["inline::meta-reference"],
        "telemetry": ["inline::meta-reference"],
+        "tool_runtime": [
+            "remote::brave-search",
+            "remote::tavily-search",
+            "inline::code-interpreter",
+            "inline::rag-runtime",
+        ],
    }

    inference_provider = Provider(
--- a/llama_stack/templates/tgi/doc_template.md
+++ b/llama_stack/templates/tgi/doc_template.md
@ -91,10 +91,15 @@ docker run \
 If you are using Llama Stack Safety / Shield APIs, use:

 ```bash
+# You need a local checkout of llama-stack to run this, get it using
+# git clone https://github.com/meta-llama/llama-stack.git
+cd /path/to/llama-stack
+
 docker run \
  -it \
  -p $LLAMA_STACK_PORT:$LLAMA_STACK_PORT \
-  -v ./run-with-safety.yaml:/root/my-run.yaml \
+  -v ~/.llama:/root/.llama \
+  -v ./llama_stack/templates/tgi/run-with-safety.yaml:/root/my-run.yaml \
  llamastack/distribution-{{ name }} \
  --yaml-config /root/my-run.yaml \
  --port $LLAMA_STACK_PORT \