From 2cebb24d3a6c61ba9473435f4db85210ce7456eb Mon Sep 17 00:00:00 2001 From: Hardik Shah Date: Fri, 24 Jan 2025 11:28:20 -0800 Subject: [PATCH] Update doc templates for running safety on self-hosted templates (#874) --- distributions/dependencies.json | 30 ++++++++++ .../self_hosted_distro/ollama.md | 9 ++- .../self_hosted_distro/remote-vllm.md | 10 +++- .../self_hosted_distro/sambanova.md | 17 +++--- .../distributions/self_hosted_distro/tgi.md | 11 +++- .../self_hosted_distro/together.md | 3 + llama_stack/templates/ollama/doc_template.md | 6 +- .../templates/remote-vllm/doc_template.md | 7 ++- llama_stack/templates/sambanova/build.yaml | 9 ++- llama_stack/templates/sambanova/run.yaml | 60 +++++++++++++------ llama_stack/templates/sambanova/sambanova.py | 8 ++- llama_stack/templates/tgi/doc_template.md | 7 ++- 12 files changed, 140 insertions(+), 37 deletions(-) diff --git a/distributions/dependencies.json b/distributions/dependencies.json index 7b5d8b002..2b2e35a50 100644 --- a/distributions/dependencies.json +++ b/distributions/dependencies.json @@ -1,4 +1,34 @@ { + "sambanova": [ + "aiosqlite", + "blobfile", + "chardet", + "chromadb-client", + "faiss-cpu", + "fastapi", + "fire", + "httpx", + "matplotlib", + "nltk", + "numpy", + "openai", + "opentelemetry-exporter-otlp-proto-http", + "opentelemetry-sdk", + "pandas", + "pillow", + "psycopg2-binary", + "pypdf", + "redis", + "requests", + "scikit-learn", + "scipy", + "sentencepiece", + "tqdm", + "transformers", + "uvicorn", + "sentence-transformers --no-deps", + "torch --index-url https://download.pytorch.org/whl/cpu" + ], "hf-serverless": [ "aiohttp", "aiosqlite", diff --git a/docs/source/distributions/self_hosted_distro/ollama.md b/docs/source/distributions/self_hosted_distro/ollama.md index 93f4adfb3..92e1f7dbf 100644 --- a/docs/source/distributions/self_hosted_distro/ollama.md +++ b/docs/source/distributions/self_hosted_distro/ollama.md @@ -1,3 +1,6 @@ +--- +orphan: true +--- # Ollama Distribution ```{toctree} @@ -79,11 +82,15 @@ docker run \ If you are using Llama Stack Safety / Shield APIs, use: ```bash +# You need a local checkout of llama-stack to run this, get it using +# git clone https://github.com/meta-llama/llama-stack.git +cd /path/to/llama-stack + docker run \ -it \ -p $LLAMA_STACK_PORT:$LLAMA_STACK_PORT \ -v ~/.llama:/root/.llama \ - -v ./run-with-safety.yaml:/root/my-run.yaml \ + -v ./llama_stack/templates/ollama/run-with-safety.yaml:/root/my-run.yaml \ llamastack/distribution-ollama \ --yaml-config /root/my-run.yaml \ --port $LLAMA_STACK_PORT \ diff --git a/docs/source/distributions/self_hosted_distro/remote-vllm.md b/docs/source/distributions/self_hosted_distro/remote-vllm.md index 1638e9b11..b2d28be1b 100644 --- a/docs/source/distributions/self_hosted_distro/remote-vllm.md +++ b/docs/source/distributions/self_hosted_distro/remote-vllm.md @@ -1,3 +1,6 @@ +--- +orphan: true +--- # Remote vLLM Distribution ```{toctree} :maxdepth: 2 @@ -107,10 +110,15 @@ If you are using Llama Stack Safety / Shield APIs, use: export SAFETY_PORT=8081 export SAFETY_MODEL=meta-llama/Llama-Guard-3-1B +# You need a local checkout of llama-stack to run this, get it using +# git clone https://github.com/meta-llama/llama-stack.git +cd /path/to/llama-stack + docker run \ -it \ -p $LLAMA_STACK_PORT:$LLAMA_STACK_PORT \ - -v ./run-with-safety.yaml:/root/my-run.yaml \ + -v ~/.llama:/root/.llama \ + -v ./llama_stack/templates/remote-vllm/run-with-safety.yaml:/root/my-run.yaml \ llamastack/distribution-remote-vllm \ --yaml-config /root/my-run.yaml \ --port $LLAMA_STACK_PORT \ diff --git a/docs/source/distributions/self_hosted_distro/sambanova.md b/docs/source/distributions/self_hosted_distro/sambanova.md index 52d1cd962..199279990 100644 --- a/docs/source/distributions/self_hosted_distro/sambanova.md +++ b/docs/source/distributions/self_hosted_distro/sambanova.md @@ -16,9 +16,10 @@ The `llamastack/distribution-sambanova` distribution consists of the following p |-----|-------------| | agents | `inline::meta-reference` | | inference | `remote::sambanova` | -| memory | `inline::faiss`, `remote::chromadb`, `remote::pgvector` | | safety | `inline::llama-guard` | | telemetry | `inline::meta-reference` | +| tool_runtime | `remote::brave-search`, `remote::tavily-search`, `inline::code-interpreter`, `inline::rag-runtime` | +| vector_io | `inline::faiss`, `remote::chromadb`, `remote::pgvector` | ### Environment Variables @@ -32,13 +33,13 @@ The following environment variables can be configured: The following models are available by default: -- `meta-llama/Llama-3.1-8B-Instruct` -- `meta-llama/Llama-3.1-70B-Instruct` -- `meta-llama/Llama-3.1-405B-Instruct` -- `meta-llama/Llama-3.2-1B-Instruct` -- `meta-llama/Llama-3.2-3B-Instruct` -- `meta-llama/Llama-3.2-11B-Vision-Instruct` -- `meta-llama/Llama-3.2-90B-Vision-Instruct` +- `meta-llama/Llama-3.1-8B-Instruct (Meta-Llama-3.1-8B-Instruct)` +- `meta-llama/Llama-3.1-70B-Instruct (Meta-Llama-3.1-70B-Instruct)` +- `meta-llama/Llama-3.1-405B-Instruct-FP8 (Meta-Llama-3.1-405B-Instruct)` +- `meta-llama/Llama-3.2-1B-Instruct (Meta-Llama-3.2-1B-Instruct)` +- `meta-llama/Llama-3.2-3B-Instruct (Meta-Llama-3.2-3B-Instruct)` +- `meta-llama/Llama-3.2-11B-Vision-Instruct (Llama-3.2-11B-Vision-Instruct)` +- `meta-llama/Llama-3.2-90B-Vision-Instruct (Llama-3.2-90B-Vision-Instruct)` ### Prerequisite: API Keys diff --git a/docs/source/distributions/self_hosted_distro/tgi.md b/docs/source/distributions/self_hosted_distro/tgi.md index 5a709d0a8..ba5dee77f 100644 --- a/docs/source/distributions/self_hosted_distro/tgi.md +++ b/docs/source/distributions/self_hosted_distro/tgi.md @@ -1,3 +1,7 @@ +--- +orphan: true +--- + # TGI Distribution ```{toctree} @@ -98,10 +102,15 @@ docker run \ If you are using Llama Stack Safety / Shield APIs, use: ```bash +# You need a local checkout of llama-stack to run this, get it using +# git clone https://github.com/meta-llama/llama-stack.git +cd /path/to/llama-stack + docker run \ -it \ -p $LLAMA_STACK_PORT:$LLAMA_STACK_PORT \ - -v ./run-with-safety.yaml:/root/my-run.yaml \ + -v ~/.llama:/root/.llama \ + -v ./llama_stack/templates/tgi/run-with-safety.yaml:/root/my-run.yaml \ llamastack/distribution-tgi \ --yaml-config /root/my-run.yaml \ --port $LLAMA_STACK_PORT \ diff --git a/docs/source/distributions/self_hosted_distro/together.md b/docs/source/distributions/self_hosted_distro/together.md index 707f5be7a..2d5c8fc77 100644 --- a/docs/source/distributions/self_hosted_distro/together.md +++ b/docs/source/distributions/self_hosted_distro/together.md @@ -1,3 +1,6 @@ +--- +orphan: true +--- # Together Distribution ```{toctree} diff --git a/llama_stack/templates/ollama/doc_template.md b/llama_stack/templates/ollama/doc_template.md index a75583592..7c260e2c1 100644 --- a/llama_stack/templates/ollama/doc_template.md +++ b/llama_stack/templates/ollama/doc_template.md @@ -74,11 +74,15 @@ docker run \ If you are using Llama Stack Safety / Shield APIs, use: ```bash +# You need a local checkout of llama-stack to run this, get it using +# git clone https://github.com/meta-llama/llama-stack.git +cd /path/to/llama-stack + docker run \ -it \ -p $LLAMA_STACK_PORT:$LLAMA_STACK_PORT \ -v ~/.llama:/root/.llama \ - -v ./run-with-safety.yaml:/root/my-run.yaml \ + -v ./llama_stack/templates/ollama/run-with-safety.yaml:/root/my-run.yaml \ llamastack/distribution-{{ name }} \ --yaml-config /root/my-run.yaml \ --port $LLAMA_STACK_PORT \ diff --git a/llama_stack/templates/remote-vllm/doc_template.md b/llama_stack/templates/remote-vllm/doc_template.md index 7f48f961e..c855f6e62 100644 --- a/llama_stack/templates/remote-vllm/doc_template.md +++ b/llama_stack/templates/remote-vllm/doc_template.md @@ -98,10 +98,15 @@ If you are using Llama Stack Safety / Shield APIs, use: export SAFETY_PORT=8081 export SAFETY_MODEL=meta-llama/Llama-Guard-3-1B +# You need a local checkout of llama-stack to run this, get it using +# git clone https://github.com/meta-llama/llama-stack.git +cd /path/to/llama-stack + docker run \ -it \ -p $LLAMA_STACK_PORT:$LLAMA_STACK_PORT \ - -v ./run-with-safety.yaml:/root/my-run.yaml \ + -v ~/.llama:/root/.llama \ + -v ./llama_stack/templates/remote-vllm/run-with-safety.yaml:/root/my-run.yaml \ llamastack/distribution-{{ name }} \ --yaml-config /root/my-run.yaml \ --port $LLAMA_STACK_PORT \ diff --git a/llama_stack/templates/sambanova/build.yaml b/llama_stack/templates/sambanova/build.yaml index d6da478d1..ca5ffe618 100644 --- a/llama_stack/templates/sambanova/build.yaml +++ b/llama_stack/templates/sambanova/build.yaml @@ -1,12 +1,10 @@ version: '2' -name: sambanova distribution_spec: description: Use SambaNova.AI for running LLM inference - docker_image: null providers: inference: - remote::sambanova - memory: + vector_io: - inline::faiss - remote::chromadb - remote::pgvector @@ -16,4 +14,9 @@ distribution_spec: - inline::meta-reference telemetry: - inline::meta-reference + tool_runtime: + - remote::brave-search + - remote::tavily-search + - inline::code-interpreter + - inline::rag-runtime image_type: conda diff --git a/llama_stack/templates/sambanova/run.yaml b/llama_stack/templates/sambanova/run.yaml index 03c8ea44f..31f47e0c1 100644 --- a/llama_stack/templates/sambanova/run.yaml +++ b/llama_stack/templates/sambanova/run.yaml @@ -1,21 +1,20 @@ version: '2' image_name: sambanova -docker_image: null -conda_env: sambanova apis: - agents - inference -- memory - safety - telemetry +- tool_runtime +- vector_io providers: inference: - provider_id: sambanova provider_type: remote::sambanova config: - url: https://api.sambanova.ai/v1/ + url: https://api.sambanova.ai/v1 api_key: ${env.SAMBANOVA_API_KEY} - memory: + vector_io: - provider_id: faiss provider_type: inline::faiss config: @@ -23,6 +22,12 @@ providers: type: sqlite namespace: null db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/sambanova}/faiss_store.db + - provider_id: chromadb + provider_type: remote::chromadb + config: {} + - provider_id: pgvector + provider_type: remote::pgvector + config: {} safety: - provider_id: llama-guard provider_type: inline::llama-guard @@ -38,46 +43,63 @@ providers: telemetry: - provider_id: meta-reference provider_type: inline::meta-reference + config: + service_name: ${env.OTEL_SERVICE_NAME:llama-stack} + sinks: ${env.TELEMETRY_SINKS:console,sqlite} + sqlite_db_path: ${env.SQLITE_DB_PATH:~/.llama/distributions/sambanova/trace_store.db} + tool_runtime: + - provider_id: brave-search + provider_type: remote::brave-search + config: + api_key: ${env.BRAVE_SEARCH_API_KEY:} + max_results: 3 + - provider_id: tavily-search + provider_type: remote::tavily-search + config: + api_key: ${env.TAVILY_SEARCH_API_KEY:} + max_results: 3 + - provider_id: code-interpreter + provider_type: inline::code-interpreter + config: {} + - provider_id: rag-runtime + provider_type: inline::rag-runtime config: {} metadata_store: - namespace: null type: sqlite db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/sambanova}/registry.db models: - metadata: {} model_id: meta-llama/Llama-3.1-8B-Instruct - provider_id: null provider_model_id: Meta-Llama-3.1-8B-Instruct + model_type: llm - metadata: {} model_id: meta-llama/Llama-3.1-70B-Instruct - provider_id: null provider_model_id: Meta-Llama-3.1-70B-Instruct + model_type: llm - metadata: {} - model_id: meta-llama/Llama-3.1-405B-Instruct - provider_id: null + model_id: meta-llama/Llama-3.1-405B-Instruct-FP8 provider_model_id: Meta-Llama-3.1-405B-Instruct + model_type: llm - metadata: {} model_id: meta-llama/Llama-3.2-1B-Instruct - provider_id: null provider_model_id: Meta-Llama-3.2-1B-Instruct + model_type: llm - metadata: {} model_id: meta-llama/Llama-3.2-3B-Instruct - provider_id: null provider_model_id: Meta-Llama-3.2-3B-Instruct + model_type: llm - metadata: {} model_id: meta-llama/Llama-3.2-11B-Vision-Instruct - provider_id: null provider_model_id: Llama-3.2-11B-Vision-Instruct + model_type: llm - metadata: {} model_id: meta-llama/Llama-3.2-90B-Vision-Instruct - provider_id: null provider_model_id: Llama-3.2-90B-Vision-Instruct + model_type: llm shields: -- params: null - shield_id: meta-llama/Llama-Guard-3-8B - provider_id: null - provider_shield_id: null -memory_banks: [] +- shield_id: meta-llama/Llama-Guard-3-8B +vector_dbs: [] datasets: [] scoring_fns: [] eval_tasks: [] +tool_groups: [] diff --git a/llama_stack/templates/sambanova/sambanova.py b/llama_stack/templates/sambanova/sambanova.py index 8c231617b..389e2a6c5 100644 --- a/llama_stack/templates/sambanova/sambanova.py +++ b/llama_stack/templates/sambanova/sambanova.py @@ -18,10 +18,16 @@ from llama_stack.templates.template import DistributionTemplate, RunConfigSettin def get_distribution_template() -> DistributionTemplate: providers = { "inference": ["remote::sambanova"], - "memory": ["inline::faiss", "remote::chromadb", "remote::pgvector"], + "vector_io": ["inline::faiss", "remote::chromadb", "remote::pgvector"], "safety": ["inline::llama-guard"], "agents": ["inline::meta-reference"], "telemetry": ["inline::meta-reference"], + "tool_runtime": [ + "remote::brave-search", + "remote::tavily-search", + "inline::code-interpreter", + "inline::rag-runtime", + ], } inference_provider = Provider( diff --git a/llama_stack/templates/tgi/doc_template.md b/llama_stack/templates/tgi/doc_template.md index 067f69d1f..18b7d6b86 100644 --- a/llama_stack/templates/tgi/doc_template.md +++ b/llama_stack/templates/tgi/doc_template.md @@ -91,10 +91,15 @@ docker run \ If you are using Llama Stack Safety / Shield APIs, use: ```bash +# You need a local checkout of llama-stack to run this, get it using +# git clone https://github.com/meta-llama/llama-stack.git +cd /path/to/llama-stack + docker run \ -it \ -p $LLAMA_STACK_PORT:$LLAMA_STACK_PORT \ - -v ./run-with-safety.yaml:/root/my-run.yaml \ + -v ~/.llama:/root/.llama \ + -v ./llama_stack/templates/tgi/run-with-safety.yaml:/root/my-run.yaml \ llamastack/distribution-{{ name }} \ --yaml-config /root/my-run.yaml \ --port $LLAMA_STACK_PORT \