Update doc templates for running safety on self-hosted templates (#874)

2025-01-24 11:28:20 -08:00 · 2025-01-24 11:28:20 -08:00 · 2cebb24d3a
commit 2cebb24d3a
parent eaba6a550a
12 changed files with 140 additions and 37 deletions
--- a/distributions/dependencies.json
+++ b/distributions/dependencies.json
@ -1,4 +1,34 @@
 {
  "sambanova": [
    "aiosqlite",
    "blobfile",
    "chardet",
    "chromadb-client",
    "faiss-cpu",
    "fastapi",
    "fire",
    "httpx",
    "matplotlib",
    "nltk",
    "numpy",
    "openai",
    "opentelemetry-exporter-otlp-proto-http",
    "opentelemetry-sdk",
    "pandas",
    "pillow",
    "psycopg2-binary",
    "pypdf",
    "redis",
    "requests",
    "scikit-learn",
    "scipy",
    "sentencepiece",
    "tqdm",
    "transformers",
    "uvicorn",
    "sentence-transformers --no-deps",
    "torch --index-url https://download.pytorch.org/whl/cpu"
  ],
  "hf-serverless": [
    "aiohttp",
    "aiosqlite",
--- a/docs/source/distributions/self_hosted_distro/ollama.md
+++ b/docs/source/distributions/self_hosted_distro/ollama.md
@ -1,3 +1,6 @@
 ---
 orphan: true
 ---
 # Ollama Distribution
 ```{toctree}
@ -79,11 +82,15 @@ docker run \
 If you are using Llama Stack Safety / Shield APIs, use:
 ```bash
 # You need a local checkout of llama-stack to run this, get it using
 # git clone https://github.com/meta-llama/llama-stack.git
 cd /path/to/llama-stack
 docker run \
  -it \
  -p $LLAMA_STACK_PORT:$LLAMA_STACK_PORT \
  -v ~/.llama:/root/.llama \
-  -v ./run-with-safety.yaml:/root/my-run.yaml \
+  -v ./llama_stack/templates/ollama/run-with-safety.yaml:/root/my-run.yaml \
  llamastack/distribution-ollama \
  --yaml-config /root/my-run.yaml \
  --port $LLAMA_STACK_PORT \
--- a/docs/source/distributions/self_hosted_distro/remote-vllm.md
+++ b/docs/source/distributions/self_hosted_distro/remote-vllm.md
@ -1,3 +1,6 @@
 ---
 orphan: true
 ---
 # Remote vLLM Distribution
 ```{toctree}
 :maxdepth: 2
@ -107,10 +110,15 @@ If you are using Llama Stack Safety / Shield APIs, use:
 export SAFETY_PORT=8081
 export SAFETY_MODEL=meta-llama/Llama-Guard-3-1B
 # You need a local checkout of llama-stack to run this, get it using
 # git clone https://github.com/meta-llama/llama-stack.git
 cd /path/to/llama-stack
 docker run \
  -it \
  -p $LLAMA_STACK_PORT:$LLAMA_STACK_PORT \
-  -v ./run-with-safety.yaml:/root/my-run.yaml \
+  -v ~/.llama:/root/.llama \
  -v ./llama_stack/templates/remote-vllm/run-with-safety.yaml:/root/my-run.yaml \
  llamastack/distribution-remote-vllm \
  --yaml-config /root/my-run.yaml \
  --port $LLAMA_STACK_PORT \
--- a/docs/source/distributions/self_hosted_distro/sambanova.md
+++ b/docs/source/distributions/self_hosted_distro/sambanova.md
@ -16,9 +16,10 @@ The `llamastack/distribution-sambanova` distribution consists of the following p
 |-----|-------------|
 | agents | `inline::meta-reference` |
 | inference | `remote::sambanova` |
 | memory | `inline::faiss`, `remote::chromadb`, `remote::pgvector` |
 | safety | `inline::llama-guard` |
 | telemetry | `inline::meta-reference` |
 | tool_runtime | `remote::brave-search`, `remote::tavily-search`, `inline::code-interpreter`, `inline::rag-runtime` |
 | vector_io | `inline::faiss`, `remote::chromadb`, `remote::pgvector` |
 ### Environment Variables
@ -32,13 +33,13 @@ The following environment variables can be configured:
 The following models are available by default:
- `meta-llama/Llama-3.1-8B-Instruct`
+- `meta-llama/Llama-3.1-8B-Instruct (Meta-Llama-3.1-8B-Instruct)`
- `meta-llama/Llama-3.1-70B-Instruct`
+- `meta-llama/Llama-3.1-70B-Instruct (Meta-Llama-3.1-70B-Instruct)`
- `meta-llama/Llama-3.1-405B-Instruct`
+- `meta-llama/Llama-3.1-405B-Instruct-FP8 (Meta-Llama-3.1-405B-Instruct)`
- `meta-llama/Llama-3.2-1B-Instruct`
+- `meta-llama/Llama-3.2-1B-Instruct (Meta-Llama-3.2-1B-Instruct)`
- `meta-llama/Llama-3.2-3B-Instruct`
+- `meta-llama/Llama-3.2-3B-Instruct (Meta-Llama-3.2-3B-Instruct)`
- `meta-llama/Llama-3.2-11B-Vision-Instruct`
+- `meta-llama/Llama-3.2-11B-Vision-Instruct (Llama-3.2-11B-Vision-Instruct)`
- `meta-llama/Llama-3.2-90B-Vision-Instruct`
+- `meta-llama/Llama-3.2-90B-Vision-Instruct (Llama-3.2-90B-Vision-Instruct)`
 ### Prerequisite: API Keys
--- a/docs/source/distributions/self_hosted_distro/tgi.md
+++ b/docs/source/distributions/self_hosted_distro/tgi.md
@ -1,3 +1,7 @@
 ---
 orphan: true
 ---
 # TGI Distribution
 ```{toctree}
@ -98,10 +102,15 @@ docker run \
 If you are using Llama Stack Safety / Shield APIs, use:
 ```bash
 # You need a local checkout of llama-stack to run this, get it using
 # git clone https://github.com/meta-llama/llama-stack.git
 cd /path/to/llama-stack
 docker run \
  -it \
  -p $LLAMA_STACK_PORT:$LLAMA_STACK_PORT \
-  -v ./run-with-safety.yaml:/root/my-run.yaml \
+  -v ~/.llama:/root/.llama \
  -v ./llama_stack/templates/tgi/run-with-safety.yaml:/root/my-run.yaml \
  llamastack/distribution-tgi \
  --yaml-config /root/my-run.yaml \
  --port $LLAMA_STACK_PORT \
--- a/docs/source/distributions/self_hosted_distro/together.md
+++ b/docs/source/distributions/self_hosted_distro/together.md
@ -1,3 +1,6 @@
 ---
 orphan: true
 ---
 # Together Distribution
 ```{toctree}
--- a/llama_stack/templates/ollama/doc_template.md
+++ b/llama_stack/templates/ollama/doc_template.md
@ -74,11 +74,15 @@ docker run \
 If you are using Llama Stack Safety / Shield APIs, use:
 ```bash
 # You need a local checkout of llama-stack to run this, get it using
 # git clone https://github.com/meta-llama/llama-stack.git
 cd /path/to/llama-stack
 docker run \
  -it \
  -p $LLAMA_STACK_PORT:$LLAMA_STACK_PORT \
  -v ~/.llama:/root/.llama \
-  -v ./run-with-safety.yaml:/root/my-run.yaml \
+  -v ./llama_stack/templates/ollama/run-with-safety.yaml:/root/my-run.yaml \
  llamastack/distribution-{{ name }} \
  --yaml-config /root/my-run.yaml \
  --port $LLAMA_STACK_PORT \
--- a/llama_stack/templates/remote-vllm/doc_template.md
+++ b/llama_stack/templates/remote-vllm/doc_template.md
@ -98,10 +98,15 @@ If you are using Llama Stack Safety / Shield APIs, use:
 export SAFETY_PORT=8081
 export SAFETY_MODEL=meta-llama/Llama-Guard-3-1B
 # You need a local checkout of llama-stack to run this, get it using
 # git clone https://github.com/meta-llama/llama-stack.git
 cd /path/to/llama-stack
 docker run \
  -it \
  -p $LLAMA_STACK_PORT:$LLAMA_STACK_PORT \
-  -v ./run-with-safety.yaml:/root/my-run.yaml \
+  -v ~/.llama:/root/.llama \
  -v ./llama_stack/templates/remote-vllm/run-with-safety.yaml:/root/my-run.yaml \
  llamastack/distribution-{{ name }} \
  --yaml-config /root/my-run.yaml \
  --port $LLAMA_STACK_PORT \
--- a/llama_stack/templates/sambanova/build.yaml
+++ b/llama_stack/templates/sambanova/build.yaml
@ -1,12 +1,10 @@
 version: '2'
 name: sambanova
 distribution_spec:
  description: Use SambaNova.AI for running LLM inference
  docker_image: null
  providers:
    inference:
    - remote::sambanova
-    memory:
+    vector_io:
    - inline::faiss
    - remote::chromadb
    - remote::pgvector
@ -16,4 +14,9 @@ distribution_spec:
    - inline::meta-reference
    telemetry:
    - inline::meta-reference
    tool_runtime:
    - remote::brave-search
    - remote::tavily-search
    - inline::code-interpreter
    - inline::rag-runtime
 image_type: conda
--- a/llama_stack/templates/sambanova/run.yaml
+++ b/llama_stack/templates/sambanova/run.yaml
@ -1,21 +1,20 @@
 version: '2'
 image_name: sambanova
 docker_image: null
 conda_env: sambanova
 apis:
 - agents
 - inference
 - memory
 - safety
 - telemetry
 - tool_runtime
 - vector_io
 providers:
  inference:
  - provider_id: sambanova
    provider_type: remote::sambanova
    config:
-      url: https://api.sambanova.ai/v1/
+      url: https://api.sambanova.ai/v1
      api_key: ${env.SAMBANOVA_API_KEY}
-  memory:
+  vector_io:
  - provider_id: faiss
    provider_type: inline::faiss
    config:
@ -23,6 +22,12 @@ providers:
        type: sqlite
        namespace: null
        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/sambanova}/faiss_store.db
  - provider_id: chromadb
    provider_type: remote::chromadb
    config: {}
  - provider_id: pgvector
    provider_type: remote::pgvector
    config: {}
  safety:
  - provider_id: llama-guard
    provider_type: inline::llama-guard
@ -38,46 +43,63 @@ providers:
  telemetry:
  - provider_id: meta-reference
    provider_type: inline::meta-reference
    config:
      service_name: ${env.OTEL_SERVICE_NAME:llama-stack}
      sinks: ${env.TELEMETRY_SINKS:console,sqlite}
      sqlite_db_path: ${env.SQLITE_DB_PATH:~/.llama/distributions/sambanova/trace_store.db}
  tool_runtime:
  - provider_id: brave-search
    provider_type: remote::brave-search
    config:
      api_key: ${env.BRAVE_SEARCH_API_KEY:}
      max_results: 3
  - provider_id: tavily-search
    provider_type: remote::tavily-search
    config:
      api_key: ${env.TAVILY_SEARCH_API_KEY:}
      max_results: 3
  - provider_id: code-interpreter
    provider_type: inline::code-interpreter
    config: {}
  - provider_id: rag-runtime
    provider_type: inline::rag-runtime
    config: {}
 metadata_store:
  namespace: null
  type: sqlite
  db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/sambanova}/registry.db
 models:
 - metadata: {}
  model_id: meta-llama/Llama-3.1-8B-Instruct
  provider_id: null
  provider_model_id: Meta-Llama-3.1-8B-Instruct
  model_type: llm
 - metadata: {}
  model_id: meta-llama/Llama-3.1-70B-Instruct
  provider_id: null
  provider_model_id: Meta-Llama-3.1-70B-Instruct
  model_type: llm
 - metadata: {}
-  model_id: meta-llama/Llama-3.1-405B-Instruct
+  model_id: meta-llama/Llama-3.1-405B-Instruct-FP8
  provider_id: null
  provider_model_id: Meta-Llama-3.1-405B-Instruct
  model_type: llm
 - metadata: {}
  model_id: meta-llama/Llama-3.2-1B-Instruct
  provider_id: null
  provider_model_id: Meta-Llama-3.2-1B-Instruct
  model_type: llm
 - metadata: {}
  model_id: meta-llama/Llama-3.2-3B-Instruct
  provider_id: null
  provider_model_id: Meta-Llama-3.2-3B-Instruct
  model_type: llm
 - metadata: {}
  model_id: meta-llama/Llama-3.2-11B-Vision-Instruct
  provider_id: null
  provider_model_id: Llama-3.2-11B-Vision-Instruct
  model_type: llm
 - metadata: {}
  model_id: meta-llama/Llama-3.2-90B-Vision-Instruct
  provider_id: null
  provider_model_id: Llama-3.2-90B-Vision-Instruct
  model_type: llm
 shields:
- params: null
+- shield_id: meta-llama/Llama-Guard-3-8B
-  shield_id: meta-llama/Llama-Guard-3-8B
+vector_dbs: []
  provider_id: null
  provider_shield_id: null
 memory_banks: []
 datasets: []
 scoring_fns: []
 eval_tasks: []
 tool_groups: []
--- a/llama_stack/templates/sambanova/sambanova.py
+++ b/llama_stack/templates/sambanova/sambanova.py
@ -18,10 +18,16 @@ from llama_stack.templates.template import DistributionTemplate, RunConfigSettin
 def get_distribution_template() -> DistributionTemplate:
    providers = {
        "inference": ["remote::sambanova"],
-        "memory": ["inline::faiss", "remote::chromadb", "remote::pgvector"],
+        "vector_io": ["inline::faiss", "remote::chromadb", "remote::pgvector"],
        "safety": ["inline::llama-guard"],
        "agents": ["inline::meta-reference"],
        "telemetry": ["inline::meta-reference"],
        "tool_runtime": [
            "remote::brave-search",
            "remote::tavily-search",
            "inline::code-interpreter",
            "inline::rag-runtime",
        ],
    }
    inference_provider = Provider(
--- a/llama_stack/templates/tgi/doc_template.md
+++ b/llama_stack/templates/tgi/doc_template.md
@ -91,10 +91,15 @@ docker run \
 If you are using Llama Stack Safety / Shield APIs, use:
 ```bash
 # You need a local checkout of llama-stack to run this, get it using
 # git clone https://github.com/meta-llama/llama-stack.git
 cd /path/to/llama-stack
 docker run \
  -it \
  -p $LLAMA_STACK_PORT:$LLAMA_STACK_PORT \
-  -v ./run-with-safety.yaml:/root/my-run.yaml \
+  -v ~/.llama:/root/.llama \
  -v ./llama_stack/templates/tgi/run-with-safety.yaml:/root/my-run.yaml \
  llamastack/distribution-{{ name }} \
  --yaml-config /root/my-run.yaml \
  --port $LLAMA_STACK_PORT \