forked from phoenix-oss/llama-stack-mirror
Update doc templates for running safety on self-hosted templates (#874)
This commit is contained in:
parent
eaba6a550a
commit
2cebb24d3a
12 changed files with 140 additions and 37 deletions
|
@ -1,4 +1,34 @@
|
||||||
{
|
{
|
||||||
|
"sambanova": [
|
||||||
|
"aiosqlite",
|
||||||
|
"blobfile",
|
||||||
|
"chardet",
|
||||||
|
"chromadb-client",
|
||||||
|
"faiss-cpu",
|
||||||
|
"fastapi",
|
||||||
|
"fire",
|
||||||
|
"httpx",
|
||||||
|
"matplotlib",
|
||||||
|
"nltk",
|
||||||
|
"numpy",
|
||||||
|
"openai",
|
||||||
|
"opentelemetry-exporter-otlp-proto-http",
|
||||||
|
"opentelemetry-sdk",
|
||||||
|
"pandas",
|
||||||
|
"pillow",
|
||||||
|
"psycopg2-binary",
|
||||||
|
"pypdf",
|
||||||
|
"redis",
|
||||||
|
"requests",
|
||||||
|
"scikit-learn",
|
||||||
|
"scipy",
|
||||||
|
"sentencepiece",
|
||||||
|
"tqdm",
|
||||||
|
"transformers",
|
||||||
|
"uvicorn",
|
||||||
|
"sentence-transformers --no-deps",
|
||||||
|
"torch --index-url https://download.pytorch.org/whl/cpu"
|
||||||
|
],
|
||||||
"hf-serverless": [
|
"hf-serverless": [
|
||||||
"aiohttp",
|
"aiohttp",
|
||||||
"aiosqlite",
|
"aiosqlite",
|
||||||
|
|
|
@ -1,3 +1,6 @@
|
||||||
|
---
|
||||||
|
orphan: true
|
||||||
|
---
|
||||||
# Ollama Distribution
|
# Ollama Distribution
|
||||||
|
|
||||||
```{toctree}
|
```{toctree}
|
||||||
|
@ -79,11 +82,15 @@ docker run \
|
||||||
If you are using Llama Stack Safety / Shield APIs, use:
|
If you are using Llama Stack Safety / Shield APIs, use:
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
|
# You need a local checkout of llama-stack to run this, get it using
|
||||||
|
# git clone https://github.com/meta-llama/llama-stack.git
|
||||||
|
cd /path/to/llama-stack
|
||||||
|
|
||||||
docker run \
|
docker run \
|
||||||
-it \
|
-it \
|
||||||
-p $LLAMA_STACK_PORT:$LLAMA_STACK_PORT \
|
-p $LLAMA_STACK_PORT:$LLAMA_STACK_PORT \
|
||||||
-v ~/.llama:/root/.llama \
|
-v ~/.llama:/root/.llama \
|
||||||
-v ./run-with-safety.yaml:/root/my-run.yaml \
|
-v ./llama_stack/templates/ollama/run-with-safety.yaml:/root/my-run.yaml \
|
||||||
llamastack/distribution-ollama \
|
llamastack/distribution-ollama \
|
||||||
--yaml-config /root/my-run.yaml \
|
--yaml-config /root/my-run.yaml \
|
||||||
--port $LLAMA_STACK_PORT \
|
--port $LLAMA_STACK_PORT \
|
||||||
|
|
|
@ -1,3 +1,6 @@
|
||||||
|
---
|
||||||
|
orphan: true
|
||||||
|
---
|
||||||
# Remote vLLM Distribution
|
# Remote vLLM Distribution
|
||||||
```{toctree}
|
```{toctree}
|
||||||
:maxdepth: 2
|
:maxdepth: 2
|
||||||
|
@ -107,10 +110,15 @@ If you are using Llama Stack Safety / Shield APIs, use:
|
||||||
export SAFETY_PORT=8081
|
export SAFETY_PORT=8081
|
||||||
export SAFETY_MODEL=meta-llama/Llama-Guard-3-1B
|
export SAFETY_MODEL=meta-llama/Llama-Guard-3-1B
|
||||||
|
|
||||||
|
# You need a local checkout of llama-stack to run this, get it using
|
||||||
|
# git clone https://github.com/meta-llama/llama-stack.git
|
||||||
|
cd /path/to/llama-stack
|
||||||
|
|
||||||
docker run \
|
docker run \
|
||||||
-it \
|
-it \
|
||||||
-p $LLAMA_STACK_PORT:$LLAMA_STACK_PORT \
|
-p $LLAMA_STACK_PORT:$LLAMA_STACK_PORT \
|
||||||
-v ./run-with-safety.yaml:/root/my-run.yaml \
|
-v ~/.llama:/root/.llama \
|
||||||
|
-v ./llama_stack/templates/remote-vllm/run-with-safety.yaml:/root/my-run.yaml \
|
||||||
llamastack/distribution-remote-vllm \
|
llamastack/distribution-remote-vllm \
|
||||||
--yaml-config /root/my-run.yaml \
|
--yaml-config /root/my-run.yaml \
|
||||||
--port $LLAMA_STACK_PORT \
|
--port $LLAMA_STACK_PORT \
|
||||||
|
|
|
@ -16,9 +16,10 @@ The `llamastack/distribution-sambanova` distribution consists of the following p
|
||||||
|-----|-------------|
|
|-----|-------------|
|
||||||
| agents | `inline::meta-reference` |
|
| agents | `inline::meta-reference` |
|
||||||
| inference | `remote::sambanova` |
|
| inference | `remote::sambanova` |
|
||||||
| memory | `inline::faiss`, `remote::chromadb`, `remote::pgvector` |
|
|
||||||
| safety | `inline::llama-guard` |
|
| safety | `inline::llama-guard` |
|
||||||
| telemetry | `inline::meta-reference` |
|
| telemetry | `inline::meta-reference` |
|
||||||
|
| tool_runtime | `remote::brave-search`, `remote::tavily-search`, `inline::code-interpreter`, `inline::rag-runtime` |
|
||||||
|
| vector_io | `inline::faiss`, `remote::chromadb`, `remote::pgvector` |
|
||||||
|
|
||||||
|
|
||||||
### Environment Variables
|
### Environment Variables
|
||||||
|
@ -32,13 +33,13 @@ The following environment variables can be configured:
|
||||||
|
|
||||||
The following models are available by default:
|
The following models are available by default:
|
||||||
|
|
||||||
- `meta-llama/Llama-3.1-8B-Instruct`
|
- `meta-llama/Llama-3.1-8B-Instruct (Meta-Llama-3.1-8B-Instruct)`
|
||||||
- `meta-llama/Llama-3.1-70B-Instruct`
|
- `meta-llama/Llama-3.1-70B-Instruct (Meta-Llama-3.1-70B-Instruct)`
|
||||||
- `meta-llama/Llama-3.1-405B-Instruct`
|
- `meta-llama/Llama-3.1-405B-Instruct-FP8 (Meta-Llama-3.1-405B-Instruct)`
|
||||||
- `meta-llama/Llama-3.2-1B-Instruct`
|
- `meta-llama/Llama-3.2-1B-Instruct (Meta-Llama-3.2-1B-Instruct)`
|
||||||
- `meta-llama/Llama-3.2-3B-Instruct`
|
- `meta-llama/Llama-3.2-3B-Instruct (Meta-Llama-3.2-3B-Instruct)`
|
||||||
- `meta-llama/Llama-3.2-11B-Vision-Instruct`
|
- `meta-llama/Llama-3.2-11B-Vision-Instruct (Llama-3.2-11B-Vision-Instruct)`
|
||||||
- `meta-llama/Llama-3.2-90B-Vision-Instruct`
|
- `meta-llama/Llama-3.2-90B-Vision-Instruct (Llama-3.2-90B-Vision-Instruct)`
|
||||||
|
|
||||||
|
|
||||||
### Prerequisite: API Keys
|
### Prerequisite: API Keys
|
||||||
|
|
|
@ -1,3 +1,7 @@
|
||||||
|
---
|
||||||
|
orphan: true
|
||||||
|
---
|
||||||
|
|
||||||
# TGI Distribution
|
# TGI Distribution
|
||||||
|
|
||||||
```{toctree}
|
```{toctree}
|
||||||
|
@ -98,10 +102,15 @@ docker run \
|
||||||
If you are using Llama Stack Safety / Shield APIs, use:
|
If you are using Llama Stack Safety / Shield APIs, use:
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
|
# You need a local checkout of llama-stack to run this, get it using
|
||||||
|
# git clone https://github.com/meta-llama/llama-stack.git
|
||||||
|
cd /path/to/llama-stack
|
||||||
|
|
||||||
docker run \
|
docker run \
|
||||||
-it \
|
-it \
|
||||||
-p $LLAMA_STACK_PORT:$LLAMA_STACK_PORT \
|
-p $LLAMA_STACK_PORT:$LLAMA_STACK_PORT \
|
||||||
-v ./run-with-safety.yaml:/root/my-run.yaml \
|
-v ~/.llama:/root/.llama \
|
||||||
|
-v ./llama_stack/templates/tgi/run-with-safety.yaml:/root/my-run.yaml \
|
||||||
llamastack/distribution-tgi \
|
llamastack/distribution-tgi \
|
||||||
--yaml-config /root/my-run.yaml \
|
--yaml-config /root/my-run.yaml \
|
||||||
--port $LLAMA_STACK_PORT \
|
--port $LLAMA_STACK_PORT \
|
||||||
|
|
|
@ -1,3 +1,6 @@
|
||||||
|
---
|
||||||
|
orphan: true
|
||||||
|
---
|
||||||
# Together Distribution
|
# Together Distribution
|
||||||
|
|
||||||
```{toctree}
|
```{toctree}
|
||||||
|
|
|
@ -74,11 +74,15 @@ docker run \
|
||||||
If you are using Llama Stack Safety / Shield APIs, use:
|
If you are using Llama Stack Safety / Shield APIs, use:
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
|
# You need a local checkout of llama-stack to run this, get it using
|
||||||
|
# git clone https://github.com/meta-llama/llama-stack.git
|
||||||
|
cd /path/to/llama-stack
|
||||||
|
|
||||||
docker run \
|
docker run \
|
||||||
-it \
|
-it \
|
||||||
-p $LLAMA_STACK_PORT:$LLAMA_STACK_PORT \
|
-p $LLAMA_STACK_PORT:$LLAMA_STACK_PORT \
|
||||||
-v ~/.llama:/root/.llama \
|
-v ~/.llama:/root/.llama \
|
||||||
-v ./run-with-safety.yaml:/root/my-run.yaml \
|
-v ./llama_stack/templates/ollama/run-with-safety.yaml:/root/my-run.yaml \
|
||||||
llamastack/distribution-{{ name }} \
|
llamastack/distribution-{{ name }} \
|
||||||
--yaml-config /root/my-run.yaml \
|
--yaml-config /root/my-run.yaml \
|
||||||
--port $LLAMA_STACK_PORT \
|
--port $LLAMA_STACK_PORT \
|
||||||
|
|
|
@ -98,10 +98,15 @@ If you are using Llama Stack Safety / Shield APIs, use:
|
||||||
export SAFETY_PORT=8081
|
export SAFETY_PORT=8081
|
||||||
export SAFETY_MODEL=meta-llama/Llama-Guard-3-1B
|
export SAFETY_MODEL=meta-llama/Llama-Guard-3-1B
|
||||||
|
|
||||||
|
# You need a local checkout of llama-stack to run this, get it using
|
||||||
|
# git clone https://github.com/meta-llama/llama-stack.git
|
||||||
|
cd /path/to/llama-stack
|
||||||
|
|
||||||
docker run \
|
docker run \
|
||||||
-it \
|
-it \
|
||||||
-p $LLAMA_STACK_PORT:$LLAMA_STACK_PORT \
|
-p $LLAMA_STACK_PORT:$LLAMA_STACK_PORT \
|
||||||
-v ./run-with-safety.yaml:/root/my-run.yaml \
|
-v ~/.llama:/root/.llama \
|
||||||
|
-v ./llama_stack/templates/remote-vllm/run-with-safety.yaml:/root/my-run.yaml \
|
||||||
llamastack/distribution-{{ name }} \
|
llamastack/distribution-{{ name }} \
|
||||||
--yaml-config /root/my-run.yaml \
|
--yaml-config /root/my-run.yaml \
|
||||||
--port $LLAMA_STACK_PORT \
|
--port $LLAMA_STACK_PORT \
|
||||||
|
|
|
@ -1,12 +1,10 @@
|
||||||
version: '2'
|
version: '2'
|
||||||
name: sambanova
|
|
||||||
distribution_spec:
|
distribution_spec:
|
||||||
description: Use SambaNova.AI for running LLM inference
|
description: Use SambaNova.AI for running LLM inference
|
||||||
docker_image: null
|
|
||||||
providers:
|
providers:
|
||||||
inference:
|
inference:
|
||||||
- remote::sambanova
|
- remote::sambanova
|
||||||
memory:
|
vector_io:
|
||||||
- inline::faiss
|
- inline::faiss
|
||||||
- remote::chromadb
|
- remote::chromadb
|
||||||
- remote::pgvector
|
- remote::pgvector
|
||||||
|
@ -16,4 +14,9 @@ distribution_spec:
|
||||||
- inline::meta-reference
|
- inline::meta-reference
|
||||||
telemetry:
|
telemetry:
|
||||||
- inline::meta-reference
|
- inline::meta-reference
|
||||||
|
tool_runtime:
|
||||||
|
- remote::brave-search
|
||||||
|
- remote::tavily-search
|
||||||
|
- inline::code-interpreter
|
||||||
|
- inline::rag-runtime
|
||||||
image_type: conda
|
image_type: conda
|
||||||
|
|
|
@ -1,21 +1,20 @@
|
||||||
version: '2'
|
version: '2'
|
||||||
image_name: sambanova
|
image_name: sambanova
|
||||||
docker_image: null
|
|
||||||
conda_env: sambanova
|
|
||||||
apis:
|
apis:
|
||||||
- agents
|
- agents
|
||||||
- inference
|
- inference
|
||||||
- memory
|
|
||||||
- safety
|
- safety
|
||||||
- telemetry
|
- telemetry
|
||||||
|
- tool_runtime
|
||||||
|
- vector_io
|
||||||
providers:
|
providers:
|
||||||
inference:
|
inference:
|
||||||
- provider_id: sambanova
|
- provider_id: sambanova
|
||||||
provider_type: remote::sambanova
|
provider_type: remote::sambanova
|
||||||
config:
|
config:
|
||||||
url: https://api.sambanova.ai/v1/
|
url: https://api.sambanova.ai/v1
|
||||||
api_key: ${env.SAMBANOVA_API_KEY}
|
api_key: ${env.SAMBANOVA_API_KEY}
|
||||||
memory:
|
vector_io:
|
||||||
- provider_id: faiss
|
- provider_id: faiss
|
||||||
provider_type: inline::faiss
|
provider_type: inline::faiss
|
||||||
config:
|
config:
|
||||||
|
@ -23,6 +22,12 @@ providers:
|
||||||
type: sqlite
|
type: sqlite
|
||||||
namespace: null
|
namespace: null
|
||||||
db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/sambanova}/faiss_store.db
|
db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/sambanova}/faiss_store.db
|
||||||
|
- provider_id: chromadb
|
||||||
|
provider_type: remote::chromadb
|
||||||
|
config: {}
|
||||||
|
- provider_id: pgvector
|
||||||
|
provider_type: remote::pgvector
|
||||||
|
config: {}
|
||||||
safety:
|
safety:
|
||||||
- provider_id: llama-guard
|
- provider_id: llama-guard
|
||||||
provider_type: inline::llama-guard
|
provider_type: inline::llama-guard
|
||||||
|
@ -38,46 +43,63 @@ providers:
|
||||||
telemetry:
|
telemetry:
|
||||||
- provider_id: meta-reference
|
- provider_id: meta-reference
|
||||||
provider_type: inline::meta-reference
|
provider_type: inline::meta-reference
|
||||||
|
config:
|
||||||
|
service_name: ${env.OTEL_SERVICE_NAME:llama-stack}
|
||||||
|
sinks: ${env.TELEMETRY_SINKS:console,sqlite}
|
||||||
|
sqlite_db_path: ${env.SQLITE_DB_PATH:~/.llama/distributions/sambanova/trace_store.db}
|
||||||
|
tool_runtime:
|
||||||
|
- provider_id: brave-search
|
||||||
|
provider_type: remote::brave-search
|
||||||
|
config:
|
||||||
|
api_key: ${env.BRAVE_SEARCH_API_KEY:}
|
||||||
|
max_results: 3
|
||||||
|
- provider_id: tavily-search
|
||||||
|
provider_type: remote::tavily-search
|
||||||
|
config:
|
||||||
|
api_key: ${env.TAVILY_SEARCH_API_KEY:}
|
||||||
|
max_results: 3
|
||||||
|
- provider_id: code-interpreter
|
||||||
|
provider_type: inline::code-interpreter
|
||||||
|
config: {}
|
||||||
|
- provider_id: rag-runtime
|
||||||
|
provider_type: inline::rag-runtime
|
||||||
config: {}
|
config: {}
|
||||||
metadata_store:
|
metadata_store:
|
||||||
namespace: null
|
|
||||||
type: sqlite
|
type: sqlite
|
||||||
db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/sambanova}/registry.db
|
db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/sambanova}/registry.db
|
||||||
models:
|
models:
|
||||||
- metadata: {}
|
- metadata: {}
|
||||||
model_id: meta-llama/Llama-3.1-8B-Instruct
|
model_id: meta-llama/Llama-3.1-8B-Instruct
|
||||||
provider_id: null
|
|
||||||
provider_model_id: Meta-Llama-3.1-8B-Instruct
|
provider_model_id: Meta-Llama-3.1-8B-Instruct
|
||||||
|
model_type: llm
|
||||||
- metadata: {}
|
- metadata: {}
|
||||||
model_id: meta-llama/Llama-3.1-70B-Instruct
|
model_id: meta-llama/Llama-3.1-70B-Instruct
|
||||||
provider_id: null
|
|
||||||
provider_model_id: Meta-Llama-3.1-70B-Instruct
|
provider_model_id: Meta-Llama-3.1-70B-Instruct
|
||||||
|
model_type: llm
|
||||||
- metadata: {}
|
- metadata: {}
|
||||||
model_id: meta-llama/Llama-3.1-405B-Instruct
|
model_id: meta-llama/Llama-3.1-405B-Instruct-FP8
|
||||||
provider_id: null
|
|
||||||
provider_model_id: Meta-Llama-3.1-405B-Instruct
|
provider_model_id: Meta-Llama-3.1-405B-Instruct
|
||||||
|
model_type: llm
|
||||||
- metadata: {}
|
- metadata: {}
|
||||||
model_id: meta-llama/Llama-3.2-1B-Instruct
|
model_id: meta-llama/Llama-3.2-1B-Instruct
|
||||||
provider_id: null
|
|
||||||
provider_model_id: Meta-Llama-3.2-1B-Instruct
|
provider_model_id: Meta-Llama-3.2-1B-Instruct
|
||||||
|
model_type: llm
|
||||||
- metadata: {}
|
- metadata: {}
|
||||||
model_id: meta-llama/Llama-3.2-3B-Instruct
|
model_id: meta-llama/Llama-3.2-3B-Instruct
|
||||||
provider_id: null
|
|
||||||
provider_model_id: Meta-Llama-3.2-3B-Instruct
|
provider_model_id: Meta-Llama-3.2-3B-Instruct
|
||||||
|
model_type: llm
|
||||||
- metadata: {}
|
- metadata: {}
|
||||||
model_id: meta-llama/Llama-3.2-11B-Vision-Instruct
|
model_id: meta-llama/Llama-3.2-11B-Vision-Instruct
|
||||||
provider_id: null
|
|
||||||
provider_model_id: Llama-3.2-11B-Vision-Instruct
|
provider_model_id: Llama-3.2-11B-Vision-Instruct
|
||||||
|
model_type: llm
|
||||||
- metadata: {}
|
- metadata: {}
|
||||||
model_id: meta-llama/Llama-3.2-90B-Vision-Instruct
|
model_id: meta-llama/Llama-3.2-90B-Vision-Instruct
|
||||||
provider_id: null
|
|
||||||
provider_model_id: Llama-3.2-90B-Vision-Instruct
|
provider_model_id: Llama-3.2-90B-Vision-Instruct
|
||||||
|
model_type: llm
|
||||||
shields:
|
shields:
|
||||||
- params: null
|
- shield_id: meta-llama/Llama-Guard-3-8B
|
||||||
shield_id: meta-llama/Llama-Guard-3-8B
|
vector_dbs: []
|
||||||
provider_id: null
|
|
||||||
provider_shield_id: null
|
|
||||||
memory_banks: []
|
|
||||||
datasets: []
|
datasets: []
|
||||||
scoring_fns: []
|
scoring_fns: []
|
||||||
eval_tasks: []
|
eval_tasks: []
|
||||||
|
tool_groups: []
|
||||||
|
|
|
@ -18,10 +18,16 @@ from llama_stack.templates.template import DistributionTemplate, RunConfigSettin
|
||||||
def get_distribution_template() -> DistributionTemplate:
|
def get_distribution_template() -> DistributionTemplate:
|
||||||
providers = {
|
providers = {
|
||||||
"inference": ["remote::sambanova"],
|
"inference": ["remote::sambanova"],
|
||||||
"memory": ["inline::faiss", "remote::chromadb", "remote::pgvector"],
|
"vector_io": ["inline::faiss", "remote::chromadb", "remote::pgvector"],
|
||||||
"safety": ["inline::llama-guard"],
|
"safety": ["inline::llama-guard"],
|
||||||
"agents": ["inline::meta-reference"],
|
"agents": ["inline::meta-reference"],
|
||||||
"telemetry": ["inline::meta-reference"],
|
"telemetry": ["inline::meta-reference"],
|
||||||
|
"tool_runtime": [
|
||||||
|
"remote::brave-search",
|
||||||
|
"remote::tavily-search",
|
||||||
|
"inline::code-interpreter",
|
||||||
|
"inline::rag-runtime",
|
||||||
|
],
|
||||||
}
|
}
|
||||||
|
|
||||||
inference_provider = Provider(
|
inference_provider = Provider(
|
||||||
|
|
|
@ -91,10 +91,15 @@ docker run \
|
||||||
If you are using Llama Stack Safety / Shield APIs, use:
|
If you are using Llama Stack Safety / Shield APIs, use:
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
|
# You need a local checkout of llama-stack to run this, get it using
|
||||||
|
# git clone https://github.com/meta-llama/llama-stack.git
|
||||||
|
cd /path/to/llama-stack
|
||||||
|
|
||||||
docker run \
|
docker run \
|
||||||
-it \
|
-it \
|
||||||
-p $LLAMA_STACK_PORT:$LLAMA_STACK_PORT \
|
-p $LLAMA_STACK_PORT:$LLAMA_STACK_PORT \
|
||||||
-v ./run-with-safety.yaml:/root/my-run.yaml \
|
-v ~/.llama:/root/.llama \
|
||||||
|
-v ./llama_stack/templates/tgi/run-with-safety.yaml:/root/my-run.yaml \
|
||||||
llamastack/distribution-{{ name }} \
|
llamastack/distribution-{{ name }} \
|
||||||
--yaml-config /root/my-run.yaml \
|
--yaml-config /root/my-run.yaml \
|
||||||
--port $LLAMA_STACK_PORT \
|
--port $LLAMA_STACK_PORT \
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue