Move run-*.yaml to templates/ so they can be packaged

This commit is contained in:
Ashwin Bharambe 2024-11-18 14:54:20 -08:00
parent dd732f037f
commit 5dce17668c
23 changed files with 682 additions and 722 deletions

View file

@ -12,6 +12,11 @@ We actively welcome your pull requests.
5. Make sure your code lints. 5. Make sure your code lints.
6. If you haven't already, complete the Contributor License Agreement ("CLA"). 6. If you haven't already, complete the Contributor License Agreement ("CLA").
### Updating Provider Configurations
If you have made changes to a provider's configuration in any form (introducing a new config key, or changing models, etc.), you should run `python llama_stack/scripts/distro_codegen.py` to re-generate various YAML files as well as the documentation. You should not change `docs/source/.../distributions/` files manually as they are auto-generated.
### Building the Documentation ### Building the Documentation
If you are making changes to the documentation at [https://llama-stack.readthedocs.io/en/latest/](https://llama-stack.readthedocs.io/en/latest/), you can use the following command to build the documentation and preview your changes. You will need [Sphinx](https://www.sphinx-doc.org/en/master/) and the readthedocs theme. If you are making changes to the documentation at [https://llama-stack.readthedocs.io/en/latest/](https://llama-stack.readthedocs.io/en/latest/), you can use the following command to build the documentation and preview your changes. You will need [Sphinx](https://www.sphinx-doc.org/en/master/) and the readthedocs theme.

View file

@ -1,91 +0,0 @@
version: '2'
image_name: fireworks
docker_image: null
conda_env: null
apis:
- agents
- inference
- memory
- safety
- telemetry
providers:
inference:
- provider_id: fireworks
provider_type: remote::fireworks
config:
url: https://api.fireworks.ai/inference
api_key: ${env.FIREWORKS_API_KEY}
memory:
- provider_id: faiss
provider_type: inline::faiss
config:
kvstore:
type: sqlite
namespace: null
db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/fireworks}/faiss_store.db
safety:
- provider_id: llama-guard
provider_type: inline::llama-guard
config: {}
agents:
- provider_id: meta-reference
provider_type: inline::meta-reference
config:
persistence_store:
type: sqlite
namespace: null
db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/fireworks}/agents_store.db
telemetry:
- provider_id: meta-reference
provider_type: inline::meta-reference
config: {}
metadata_store:
namespace: null
type: sqlite
db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/fireworks}/registry.db
models:
- metadata: {}
model_id: fireworks/llama-v3p1-8b-instruct
provider_id: null
provider_model_id: null
- metadata: {}
model_id: fireworks/llama-v3p1-70b-instruct
provider_id: null
provider_model_id: null
- metadata: {}
model_id: fireworks/llama-v3p1-405b-instruct
provider_id: null
provider_model_id: null
- metadata: {}
model_id: fireworks/llama-v3p2-1b-instruct
provider_id: null
provider_model_id: null
- metadata: {}
model_id: fireworks/llama-v3p2-3b-instruct
provider_id: null
provider_model_id: null
- metadata: {}
model_id: fireworks/llama-v3p2-11b-vision-instruct
provider_id: null
provider_model_id: null
- metadata: {}
model_id: fireworks/llama-v3p2-90b-vision-instruct
provider_id: null
provider_model_id: null
- metadata: {}
model_id: fireworks/llama-guard-3-8b
provider_id: null
provider_model_id: null
- metadata: {}
model_id: fireworks/llama-guard-3-11b-vision
provider_id: null
provider_model_id: null
shields:
- params: null
shield_id: meta-llama/Llama-Guard-3-8B
provider_id: null
provider_shield_id: null
memory_banks: []
datasets: []
scoring_fns: []
eval_tasks: []

View file

@ -0,0 +1 @@
../../llama_stack/templates/fireworks/run.yaml

View file

@ -1,70 +0,0 @@
version: '2'
image_name: meta-reference-gpu
docker_image: null
conda_env: null
apis:
- agents
- inference
- memory
- safety
- telemetry
providers:
inference:
- provider_id: meta-reference-inference
provider_type: inline::meta-reference
config:
model: ${env.INFERENCE_MODEL}
max_seq_len: 4096
checkpoint_dir: ${env.INFERENCE_CHECKPOINT_DIR:null}
- provider_id: meta-reference-safety
provider_type: inline::meta-reference
config:
model: ${env.SAFETY_MODEL}
max_seq_len: 4096
checkpoint_dir: ${env.SAFETY_CHECKPOINT_DIR:null}
memory:
- provider_id: faiss
provider_type: inline::faiss
config:
kvstore:
type: sqlite
namespace: null
db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/meta-reference-gpu}/faiss_store.db
safety:
- provider_id: llama-guard
provider_type: inline::llama-guard
config: {}
agents:
- provider_id: meta-reference
provider_type: inline::meta-reference
config:
persistence_store:
type: sqlite
namespace: null
db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/meta-reference-gpu}/agents_store.db
telemetry:
- provider_id: meta-reference
provider_type: inline::meta-reference
config: {}
metadata_store:
namespace: null
type: sqlite
db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/meta-reference-gpu}/registry.db
models:
- metadata: {}
model_id: ${env.INFERENCE_MODEL}
provider_id: meta-reference-inference
provider_model_id: null
- metadata: {}
model_id: ${env.SAFETY_MODEL}
provider_id: meta-reference-safety
provider_model_id: null
shields:
- params: null
shield_id: ${env.SAFETY_MODEL}
provider_id: null
provider_shield_id: null
memory_banks: []
datasets: []
scoring_fns: []
eval_tasks: []

View file

@ -0,0 +1 @@
../../llama_stack/templates/meta-reference-gpu/run-with-safety.yaml

View file

@ -1,56 +0,0 @@
version: '2'
image_name: meta-reference-gpu
docker_image: null
conda_env: null
apis:
- agents
- inference
- memory
- safety
- telemetry
providers:
inference:
- provider_id: meta-reference-inference
provider_type: inline::meta-reference
config:
model: ${env.INFERENCE_MODEL}
max_seq_len: 4096
checkpoint_dir: ${env.INFERENCE_CHECKPOINT_DIR:null}
memory:
- provider_id: faiss
provider_type: inline::faiss
config:
kvstore:
type: sqlite
namespace: null
db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/meta-reference-gpu}/faiss_store.db
safety:
- provider_id: llama-guard
provider_type: inline::llama-guard
config: {}
agents:
- provider_id: meta-reference
provider_type: inline::meta-reference
config:
persistence_store:
type: sqlite
namespace: null
db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/meta-reference-gpu}/agents_store.db
telemetry:
- provider_id: meta-reference
provider_type: inline::meta-reference
config: {}
metadata_store:
namespace: null
type: sqlite
db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/meta-reference-gpu}/registry.db
models:
- metadata: {}
model_id: ${env.INFERENCE_MODEL}
provider_id: meta-reference-inference
provider_model_id: null
shields: []
memory_banks: []
datasets: []
scoring_fns: []
eval_tasks: []

View file

@ -0,0 +1 @@
../../llama_stack/templates/meta-reference-gpu/run.yaml

View file

@ -1,62 +0,0 @@
version: '2'
image_name: ollama
docker_image: null
conda_env: null
apis:
- agents
- inference
- memory
- safety
- telemetry
providers:
inference:
- provider_id: ollama
provider_type: remote::ollama
config:
url: ${env.OLLAMA_URL:http://localhost:11434}
memory:
- provider_id: faiss
provider_type: inline::faiss
config:
kvstore:
type: sqlite
namespace: null
db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/ollama}/faiss_store.db
safety:
- provider_id: llama-guard
provider_type: inline::llama-guard
config: {}
agents:
- provider_id: meta-reference
provider_type: inline::meta-reference
config:
persistence_store:
type: sqlite
namespace: null
db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/ollama}/agents_store.db
telemetry:
- provider_id: meta-reference
provider_type: inline::meta-reference
config: {}
metadata_store:
namespace: null
type: sqlite
db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/ollama}/registry.db
models:
- metadata: {}
model_id: ${env.INFERENCE_MODEL}
provider_id: ollama
provider_model_id: null
- metadata: {}
model_id: ${env.SAFETY_MODEL}
provider_id: ollama
provider_model_id: null
shields:
- params: null
shield_id: ${env.SAFETY_MODEL}
provider_id: null
provider_shield_id: null
memory_banks: []
datasets: []
scoring_fns: []
eval_tasks: []

View file

@ -0,0 +1 @@
../../llama_stack/templates/ollama/run-with-safety.yaml

View file

@ -1,54 +0,0 @@
version: '2'
image_name: ollama
docker_image: null
conda_env: null
apis:
- agents
- inference
- memory
- safety
- telemetry
providers:
inference:
- provider_id: ollama
provider_type: remote::ollama
config:
url: ${env.OLLAMA_URL:http://localhost:11434}
memory:
- provider_id: faiss
provider_type: inline::faiss
config:
kvstore:
type: sqlite
namespace: null
db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/ollama}/faiss_store.db
safety:
- provider_id: llama-guard
provider_type: inline::llama-guard
config: {}
agents:
- provider_id: meta-reference
provider_type: inline::meta-reference
config:
persistence_store:
type: sqlite
namespace: null
db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/ollama}/agents_store.db
telemetry:
- provider_id: meta-reference
provider_type: inline::meta-reference
config: {}
metadata_store:
namespace: null
type: sqlite
db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/ollama}/registry.db
models:
- metadata: {}
model_id: ${env.INFERENCE_MODEL}
provider_id: ollama
provider_model_id: null
shields: []
memory_banks: []
datasets: []
scoring_fns: []
eval_tasks: []

View file

@ -0,0 +1 @@
../../llama_stack/templates/ollama/run.yaml

View file

@ -1,70 +0,0 @@
version: '2'
image_name: remote-vllm
docker_image: null
conda_env: null
apis:
- agents
- inference
- memory
- safety
- telemetry
providers:
inference:
- provider_id: vllm-inference
provider_type: remote::vllm
config:
url: ${env.VLLM_URL}
max_tokens: ${env.VLLM_MAX_TOKENS:4096}
api_token: ${env.VLLM_API_TOKEN:fake}
- provider_id: vllm-safety
provider_type: remote::vllm
config:
url: ${env.SAFETY_VLLM_URL}
max_tokens: ${env.VLLM_MAX_TOKENS:4096}
api_token: ${env.VLLM_API_TOKEN:fake}
memory:
- provider_id: faiss
provider_type: inline::faiss
config:
kvstore:
type: sqlite
namespace: null
db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/remote-vllm}/faiss_store.db
safety:
- provider_id: llama-guard
provider_type: inline::llama-guard
config: {}
agents:
- provider_id: meta-reference
provider_type: inline::meta-reference
config:
persistence_store:
type: sqlite
namespace: null
db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/remote-vllm}/agents_store.db
telemetry:
- provider_id: meta-reference
provider_type: inline::meta-reference
config: {}
metadata_store:
namespace: null
type: sqlite
db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/remote-vllm}/registry.db
models:
- metadata: {}
model_id: ${env.INFERENCE_MODEL}
provider_id: vllm-inference
provider_model_id: null
- metadata: {}
model_id: ${env.SAFETY_MODEL}
provider_id: vllm-safety
provider_model_id: null
shields:
- params: null
shield_id: ${env.SAFETY_MODEL}
provider_id: null
provider_shield_id: null
memory_banks: []
datasets: []
scoring_fns: []
eval_tasks: []

View file

@ -0,0 +1 @@
../../llama_stack/templates/remote-vllm/run-with-safety.yaml

View file

@ -1,56 +0,0 @@
version: '2'
image_name: remote-vllm
docker_image: null
conda_env: null
apis:
- agents
- inference
- memory
- safety
- telemetry
providers:
inference:
- provider_id: vllm-inference
provider_type: remote::vllm
config:
url: ${env.VLLM_URL}
max_tokens: ${env.VLLM_MAX_TOKENS:4096}
api_token: ${env.VLLM_API_TOKEN:fake}
memory:
- provider_id: faiss
provider_type: inline::faiss
config:
kvstore:
type: sqlite
namespace: null
db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/remote-vllm}/faiss_store.db
safety:
- provider_id: llama-guard
provider_type: inline::llama-guard
config: {}
agents:
- provider_id: meta-reference
provider_type: inline::meta-reference
config:
persistence_store:
type: sqlite
namespace: null
db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/remote-vllm}/agents_store.db
telemetry:
- provider_id: meta-reference
provider_type: inline::meta-reference
config: {}
metadata_store:
namespace: null
type: sqlite
db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/remote-vllm}/registry.db
models:
- metadata: {}
model_id: ${env.INFERENCE_MODEL}
provider_id: vllm-inference
provider_model_id: null
shields: []
memory_banks: []
datasets: []
scoring_fns: []
eval_tasks: []

View file

@ -0,0 +1 @@
../../llama_stack/templates/remote-vllm/run.yaml

View file

@ -1,66 +0,0 @@
version: '2'
image_name: tgi
docker_image: llamastack/distribution-tgi:test-0.0.52rc3
conda_env: null
apis:
- agents
- inference
- memory
- safety
- telemetry
providers:
inference:
- provider_id: tgi-inference
provider_type: remote::tgi
config:
url: ${env.TGI_URL}
- provider_id: tgi-safety
provider_type: remote::tgi
config:
url: ${env.TGI_SAFETY_URL}
memory:
- provider_id: faiss
provider_type: inline::faiss
config:
kvstore:
type: sqlite
namespace: null
db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/tgi}/faiss_store.db
safety:
- provider_id: llama-guard
provider_type: inline::llama-guard
config: {}
agents:
- provider_id: meta-reference
provider_type: inline::meta-reference
config:
persistence_store:
type: sqlite
namespace: null
db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/tgi}/agents_store.db
telemetry:
- provider_id: meta-reference
provider_type: inline::meta-reference
config: {}
metadata_store:
namespace: null
type: sqlite
db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/tgi}/registry.db
models:
- metadata: {}
model_id: ${env.INFERENCE_MODEL}
provider_id: tgi-inference
provider_model_id: null
- metadata: {}
model_id: ${env.SAFETY_MODEL}
provider_id: tgi-safety
provider_model_id: null
shields:
- params: null
shield_id: ${env.SAFETY_MODEL}
provider_id: null
provider_shield_id: null
memory_banks: []
datasets: []
scoring_fns: []
eval_tasks: []

View file

@ -0,0 +1 @@
../../llama_stack/templates/tgi/run-with-safety.yaml

View file

@ -1,54 +0,0 @@
version: '2'
image_name: tgi
docker_image: llamastack/distribution-tgi:test-0.0.52rc3
conda_env: null
apis:
- agents
- inference
- memory
- safety
- telemetry
providers:
inference:
- provider_id: tgi-inference
provider_type: remote::tgi
config:
url: ${env.TGI_URL}
memory:
- provider_id: faiss
provider_type: inline::faiss
config:
kvstore:
type: sqlite
namespace: null
db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/tgi}/faiss_store.db
safety:
- provider_id: llama-guard
provider_type: inline::llama-guard
config: {}
agents:
- provider_id: meta-reference
provider_type: inline::meta-reference
config:
persistence_store:
type: sqlite
namespace: null
db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/tgi}/agents_store.db
telemetry:
- provider_id: meta-reference
provider_type: inline::meta-reference
config: {}
metadata_store:
namespace: null
type: sqlite
db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/tgi}/registry.db
models:
- metadata: {}
model_id: ${env.INFERENCE_MODEL}
provider_id: tgi-inference
provider_model_id: null
shields: []
memory_banks: []
datasets: []
scoring_fns: []
eval_tasks: []

1
distributions/tgi/run.yaml Symbolic link
View file

@ -0,0 +1 @@
../../llama_stack/templates/tgi/run.yaml

View file

@ -1,87 +0,0 @@
version: '2'
image_name: together
docker_image: null
conda_env: null
apis:
- agents
- inference
- memory
- safety
- telemetry
providers:
inference:
- provider_id: together
provider_type: remote::together
config:
url: https://api.together.xyz/v1
api_key: ${env.TOGETHER_API_KEY}
memory:
- provider_id: faiss
provider_type: inline::faiss
config:
kvstore:
type: sqlite
namespace: null
db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/together}/faiss_store.db
safety:
- provider_id: llama-guard
provider_type: inline::llama-guard
config: {}
agents:
- provider_id: meta-reference
provider_type: inline::meta-reference
config:
persistence_store:
type: sqlite
namespace: null
db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/together}/agents_store.db
telemetry:
- provider_id: meta-reference
provider_type: inline::meta-reference
config: {}
metadata_store:
namespace: null
type: sqlite
db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/together}/registry.db
models:
- metadata: {}
model_id: meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo
provider_id: null
provider_model_id: null
- metadata: {}
model_id: meta-llama/Meta-Llama-3.1-70B-Instruct-Turbo
provider_id: null
provider_model_id: null
- metadata: {}
model_id: meta-llama/Meta-Llama-3.1-405B-Instruct-Turbo
provider_id: null
provider_model_id: null
- metadata: {}
model_id: meta-llama/Llama-3.2-3B-Instruct-Turbo
provider_id: null
provider_model_id: null
- metadata: {}
model_id: meta-llama/Llama-3.2-11B-Vision-Instruct-Turbo
provider_id: null
provider_model_id: null
- metadata: {}
model_id: meta-llama/Llama-3.2-90B-Vision-Instruct-Turbo
provider_id: null
provider_model_id: null
- metadata: {}
model_id: meta-llama/Meta-Llama-Guard-3-8B
provider_id: null
provider_model_id: null
- metadata: {}
model_id: meta-llama/Llama-Guard-3-11B-Vision-Turbo
provider_id: null
provider_model_id: null
shields:
- params: null
shield_id: meta-llama/Llama-Guard-3-1B
provider_id: null
provider_shield_id: null
memory_banks: []
datasets: []
scoring_fns: []
eval_tasks: []

View file

@ -0,0 +1 @@
../../llama_stack/templates/together/run.yaml

View file

@ -1,55 +0,0 @@
services:
${SERVICE_NAME:-ollama}:
image: ollama/ollama:latest
ports:
- ${OLLAMA_PORT:-11434}:${OLLAMA_PORT:-11434}
volumes:
- $HOME/.ollama:/root/.ollama
devices:
- nvidia.com/gpu=all
runtime: nvidia
healthcheck:
test: ["CMD", "curl", "-f", "http://ollama:11434"]
interval: 10s
timeout: 5s
retries: 5
${SERVICE_NAME:-ollama}-init:
image: ollama/ollama
depends_on:
- ${SERVICE_NAME:-ollama}:
condition: service_healthy
environment:
- OLLAMA_HOST=ollama
- OLLAMA_MODELS=${OLLAMA_MODELS}
volumes:
- $HOME/.ollama:/root/.ollama
entrypoint: >
sh -c '
max_attempts=30;
attempt=0;
echo "Waiting for Ollama server...";
until curl -s http://ollama:11434 > /dev/null; do
attempt=$((attempt + 1));
if [ $attempt -ge $max_attempts ]; then
echo "Timeout waiting for Ollama server";
exit 1;
fi;
echo "Attempt $attempt: Server not ready yet...";
sleep 5;
done;
echo "Server ready. Pulling models...";
models="${OLLAMA_MODELS}";
for model in $models; do
echo "Pulling $model...";
if ! ollama pull "$model"; then
echo "Failed to pull $model";
exit 1;
fi;
done;
echo "All models pulled successfully"
'

View file

@ -40,7 +40,7 @@ def process_template(template_dir: Path, progress) -> None:
template = template_func() template = template_func()
template.save_distribution( template.save_distribution(
yaml_output_dir=REPO_ROOT / "distributions" / template.name, yaml_output_dir=REPO_ROOT / "llama_stack" / "templates" / template.name,
doc_output_dir=REPO_ROOT doc_output_dir=REPO_ROOT
/ "docs/source/getting_started/distributions" / "docs/source/getting_started/distributions"
/ f"{template.distro_type}_distro", / f"{template.distro_type}_distro",

View file

@ -0,0 +1,91 @@
version: '2'
image_name: fireworks
docker_image: null
conda_env: null
apis:
- agents
- inference
- memory
- safety
- telemetry
providers:
inference:
- provider_id: fireworks
provider_type: remote::fireworks
config:
url: https://api.fireworks.ai/inference
api_key: ${env.FIREWORKS_API_KEY}
memory:
- provider_id: faiss
provider_type: inline::faiss
config:
kvstore:
type: sqlite
namespace: null
db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/fireworks}/faiss_store.db
safety:
- provider_id: llama-guard
provider_type: inline::llama-guard
config: {}
agents:
- provider_id: meta-reference
provider_type: inline::meta-reference
config:
persistence_store:
type: sqlite
namespace: null
db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/fireworks}/agents_store.db
telemetry:
- provider_id: meta-reference
provider_type: inline::meta-reference
config: {}
metadata_store:
namespace: null
type: sqlite
db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/fireworks}/registry.db
models:
- metadata: {}
model_id: fireworks/llama-v3p1-8b-instruct
provider_id: null
provider_model_id: null
- metadata: {}
model_id: fireworks/llama-v3p1-70b-instruct
provider_id: null
provider_model_id: null
- metadata: {}
model_id: fireworks/llama-v3p1-405b-instruct
provider_id: null
provider_model_id: null
- metadata: {}
model_id: fireworks/llama-v3p2-1b-instruct
provider_id: null
provider_model_id: null
- metadata: {}
model_id: fireworks/llama-v3p2-3b-instruct
provider_id: null
provider_model_id: null
- metadata: {}
model_id: fireworks/llama-v3p2-11b-vision-instruct
provider_id: null
provider_model_id: null
- metadata: {}
model_id: fireworks/llama-v3p2-90b-vision-instruct
provider_id: null
provider_model_id: null
- metadata: {}
model_id: fireworks/llama-guard-3-8b
provider_id: null
provider_model_id: null
- metadata: {}
model_id: fireworks/llama-guard-3-11b-vision
provider_id: null
provider_model_id: null
shields:
- params: null
shield_id: meta-llama/Llama-Guard-3-8B
provider_id: null
provider_shield_id: null
memory_banks: []
datasets: []
scoring_fns: []
eval_tasks: []

View file

@ -0,0 +1,70 @@
version: '2'
image_name: meta-reference-gpu
docker_image: null
conda_env: null
apis:
- agents
- inference
- memory
- safety
- telemetry
providers:
inference:
- provider_id: meta-reference-inference
provider_type: inline::meta-reference
config:
model: ${env.INFERENCE_MODEL}
max_seq_len: 4096
checkpoint_dir: ${env.INFERENCE_CHECKPOINT_DIR:null}
- provider_id: meta-reference-safety
provider_type: inline::meta-reference
config:
model: ${env.SAFETY_MODEL}
max_seq_len: 4096
checkpoint_dir: ${env.SAFETY_CHECKPOINT_DIR:null}
memory:
- provider_id: faiss
provider_type: inline::faiss
config:
kvstore:
type: sqlite
namespace: null
db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/meta-reference-gpu}/faiss_store.db
safety:
- provider_id: llama-guard
provider_type: inline::llama-guard
config: {}
agents:
- provider_id: meta-reference
provider_type: inline::meta-reference
config:
persistence_store:
type: sqlite
namespace: null
db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/meta-reference-gpu}/agents_store.db
telemetry:
- provider_id: meta-reference
provider_type: inline::meta-reference
config: {}
metadata_store:
namespace: null
type: sqlite
db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/meta-reference-gpu}/registry.db
models:
- metadata: {}
model_id: ${env.INFERENCE_MODEL}
provider_id: meta-reference-inference
provider_model_id: null
- metadata: {}
model_id: ${env.SAFETY_MODEL}
provider_id: meta-reference-safety
provider_model_id: null
shields:
- params: null
shield_id: ${env.SAFETY_MODEL}
provider_id: null
provider_shield_id: null
memory_banks: []
datasets: []
scoring_fns: []
eval_tasks: []

View file

@ -0,0 +1,56 @@
version: '2'
image_name: meta-reference-gpu
docker_image: null
conda_env: null
apis:
- agents
- inference
- memory
- safety
- telemetry
providers:
inference:
- provider_id: meta-reference-inference
provider_type: inline::meta-reference
config:
model: ${env.INFERENCE_MODEL}
max_seq_len: 4096
checkpoint_dir: ${env.INFERENCE_CHECKPOINT_DIR:null}
memory:
- provider_id: faiss
provider_type: inline::faiss
config:
kvstore:
type: sqlite
namespace: null
db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/meta-reference-gpu}/faiss_store.db
safety:
- provider_id: llama-guard
provider_type: inline::llama-guard
config: {}
agents:
- provider_id: meta-reference
provider_type: inline::meta-reference
config:
persistence_store:
type: sqlite
namespace: null
db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/meta-reference-gpu}/agents_store.db
telemetry:
- provider_id: meta-reference
provider_type: inline::meta-reference
config: {}
metadata_store:
namespace: null
type: sqlite
db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/meta-reference-gpu}/registry.db
models:
- metadata: {}
model_id: ${env.INFERENCE_MODEL}
provider_id: meta-reference-inference
provider_model_id: null
shields: []
memory_banks: []
datasets: []
scoring_fns: []
eval_tasks: []

View file

@ -0,0 +1,62 @@
version: '2'
image_name: ollama
docker_image: null
conda_env: null
apis:
- agents
- inference
- memory
- safety
- telemetry
providers:
inference:
- provider_id: ollama
provider_type: remote::ollama
config:
url: ${env.OLLAMA_URL:http://localhost:11434}
memory:
- provider_id: faiss
provider_type: inline::faiss
config:
kvstore:
type: sqlite
namespace: null
db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/ollama}/faiss_store.db
safety:
- provider_id: llama-guard
provider_type: inline::llama-guard
config: {}
agents:
- provider_id: meta-reference
provider_type: inline::meta-reference
config:
persistence_store:
type: sqlite
namespace: null
db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/ollama}/agents_store.db
telemetry:
- provider_id: meta-reference
provider_type: inline::meta-reference
config: {}
metadata_store:
namespace: null
type: sqlite
db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/ollama}/registry.db
models:
- metadata: {}
model_id: ${env.INFERENCE_MODEL}
provider_id: ollama
provider_model_id: null
- metadata: {}
model_id: ${env.SAFETY_MODEL}
provider_id: ollama
provider_model_id: null
shields:
- params: null
shield_id: ${env.SAFETY_MODEL}
provider_id: null
provider_shield_id: null
memory_banks: []
datasets: []
scoring_fns: []
eval_tasks: []

View file

@ -0,0 +1,54 @@
version: '2'
image_name: ollama
docker_image: null
conda_env: null
apis:
- agents
- inference
- memory
- safety
- telemetry
providers:
inference:
- provider_id: ollama
provider_type: remote::ollama
config:
url: ${env.OLLAMA_URL:http://localhost:11434}
memory:
- provider_id: faiss
provider_type: inline::faiss
config:
kvstore:
type: sqlite
namespace: null
db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/ollama}/faiss_store.db
safety:
- provider_id: llama-guard
provider_type: inline::llama-guard
config: {}
agents:
- provider_id: meta-reference
provider_type: inline::meta-reference
config:
persistence_store:
type: sqlite
namespace: null
db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/ollama}/agents_store.db
telemetry:
- provider_id: meta-reference
provider_type: inline::meta-reference
config: {}
metadata_store:
namespace: null
type: sqlite
db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/ollama}/registry.db
models:
- metadata: {}
model_id: ${env.INFERENCE_MODEL}
provider_id: ollama
provider_model_id: null
shields: []
memory_banks: []
datasets: []
scoring_fns: []
eval_tasks: []

View file

@ -0,0 +1,70 @@
version: '2'
image_name: remote-vllm
docker_image: null
conda_env: null
apis:
- agents
- inference
- memory
- safety
- telemetry
providers:
inference:
- provider_id: vllm-inference
provider_type: remote::vllm
config:
url: ${env.VLLM_URL}
max_tokens: ${env.VLLM_MAX_TOKENS:4096}
api_token: ${env.VLLM_API_TOKEN:fake}
- provider_id: vllm-safety
provider_type: remote::vllm
config:
url: ${env.SAFETY_VLLM_URL}
max_tokens: ${env.VLLM_MAX_TOKENS:4096}
api_token: ${env.VLLM_API_TOKEN:fake}
memory:
- provider_id: faiss
provider_type: inline::faiss
config:
kvstore:
type: sqlite
namespace: null
db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/remote-vllm}/faiss_store.db
safety:
- provider_id: llama-guard
provider_type: inline::llama-guard
config: {}
agents:
- provider_id: meta-reference
provider_type: inline::meta-reference
config:
persistence_store:
type: sqlite
namespace: null
db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/remote-vllm}/agents_store.db
telemetry:
- provider_id: meta-reference
provider_type: inline::meta-reference
config: {}
metadata_store:
namespace: null
type: sqlite
db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/remote-vllm}/registry.db
models:
- metadata: {}
model_id: ${env.INFERENCE_MODEL}
provider_id: vllm-inference
provider_model_id: null
- metadata: {}
model_id: ${env.SAFETY_MODEL}
provider_id: vllm-safety
provider_model_id: null
shields:
- params: null
shield_id: ${env.SAFETY_MODEL}
provider_id: null
provider_shield_id: null
memory_banks: []
datasets: []
scoring_fns: []
eval_tasks: []

View file

@ -0,0 +1,56 @@
version: '2'
image_name: remote-vllm
docker_image: null
conda_env: null
apis:
- agents
- inference
- memory
- safety
- telemetry
providers:
inference:
- provider_id: vllm-inference
provider_type: remote::vllm
config:
url: ${env.VLLM_URL}
max_tokens: ${env.VLLM_MAX_TOKENS:4096}
api_token: ${env.VLLM_API_TOKEN:fake}
memory:
- provider_id: faiss
provider_type: inline::faiss
config:
kvstore:
type: sqlite
namespace: null
db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/remote-vllm}/faiss_store.db
safety:
- provider_id: llama-guard
provider_type: inline::llama-guard
config: {}
agents:
- provider_id: meta-reference
provider_type: inline::meta-reference
config:
persistence_store:
type: sqlite
namespace: null
db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/remote-vllm}/agents_store.db
telemetry:
- provider_id: meta-reference
provider_type: inline::meta-reference
config: {}
metadata_store:
namespace: null
type: sqlite
db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/remote-vllm}/registry.db
models:
- metadata: {}
model_id: ${env.INFERENCE_MODEL}
provider_id: vllm-inference
provider_model_id: null
shields: []
memory_banks: []
datasets: []
scoring_fns: []
eval_tasks: []

View file

@ -0,0 +1,66 @@
version: '2'
image_name: tgi
docker_image: llamastack/distribution-tgi:test-0.0.52rc3
conda_env: null
apis:
- agents
- inference
- memory
- safety
- telemetry
providers:
inference:
- provider_id: tgi-inference
provider_type: remote::tgi
config:
url: ${env.TGI_URL}
- provider_id: tgi-safety
provider_type: remote::tgi
config:
url: ${env.TGI_SAFETY_URL}
memory:
- provider_id: faiss
provider_type: inline::faiss
config:
kvstore:
type: sqlite
namespace: null
db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/tgi}/faiss_store.db
safety:
- provider_id: llama-guard
provider_type: inline::llama-guard
config: {}
agents:
- provider_id: meta-reference
provider_type: inline::meta-reference
config:
persistence_store:
type: sqlite
namespace: null
db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/tgi}/agents_store.db
telemetry:
- provider_id: meta-reference
provider_type: inline::meta-reference
config: {}
metadata_store:
namespace: null
type: sqlite
db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/tgi}/registry.db
models:
- metadata: {}
model_id: ${env.INFERENCE_MODEL}
provider_id: tgi-inference
provider_model_id: null
- metadata: {}
model_id: ${env.SAFETY_MODEL}
provider_id: tgi-safety
provider_model_id: null
shields:
- params: null
shield_id: ${env.SAFETY_MODEL}
provider_id: null
provider_shield_id: null
memory_banks: []
datasets: []
scoring_fns: []
eval_tasks: []

View file

@ -0,0 +1,54 @@
version: '2'
image_name: tgi
docker_image: llamastack/distribution-tgi:test-0.0.52rc3
conda_env: null
apis:
- agents
- inference
- memory
- safety
- telemetry
providers:
inference:
- provider_id: tgi-inference
provider_type: remote::tgi
config:
url: ${env.TGI_URL}
memory:
- provider_id: faiss
provider_type: inline::faiss
config:
kvstore:
type: sqlite
namespace: null
db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/tgi}/faiss_store.db
safety:
- provider_id: llama-guard
provider_type: inline::llama-guard
config: {}
agents:
- provider_id: meta-reference
provider_type: inline::meta-reference
config:
persistence_store:
type: sqlite
namespace: null
db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/tgi}/agents_store.db
telemetry:
- provider_id: meta-reference
provider_type: inline::meta-reference
config: {}
metadata_store:
namespace: null
type: sqlite
db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/tgi}/registry.db
models:
- metadata: {}
model_id: ${env.INFERENCE_MODEL}
provider_id: tgi-inference
provider_model_id: null
shields: []
memory_banks: []
datasets: []
scoring_fns: []
eval_tasks: []

View file

@ -0,0 +1,87 @@
version: '2'
image_name: together
docker_image: null
conda_env: null
apis:
- agents
- inference
- memory
- safety
- telemetry
providers:
inference:
- provider_id: together
provider_type: remote::together
config:
url: https://api.together.xyz/v1
api_key: ${env.TOGETHER_API_KEY}
memory:
- provider_id: faiss
provider_type: inline::faiss
config:
kvstore:
type: sqlite
namespace: null
db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/together}/faiss_store.db
safety:
- provider_id: llama-guard
provider_type: inline::llama-guard
config: {}
agents:
- provider_id: meta-reference
provider_type: inline::meta-reference
config:
persistence_store:
type: sqlite
namespace: null
db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/together}/agents_store.db
telemetry:
- provider_id: meta-reference
provider_type: inline::meta-reference
config: {}
metadata_store:
namespace: null
type: sqlite
db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/together}/registry.db
models:
- metadata: {}
model_id: meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo
provider_id: null
provider_model_id: null
- metadata: {}
model_id: meta-llama/Meta-Llama-3.1-70B-Instruct-Turbo
provider_id: null
provider_model_id: null
- metadata: {}
model_id: meta-llama/Meta-Llama-3.1-405B-Instruct-Turbo
provider_id: null
provider_model_id: null
- metadata: {}
model_id: meta-llama/Llama-3.2-3B-Instruct-Turbo
provider_id: null
provider_model_id: null
- metadata: {}
model_id: meta-llama/Llama-3.2-11B-Vision-Instruct-Turbo
provider_id: null
provider_model_id: null
- metadata: {}
model_id: meta-llama/Llama-3.2-90B-Vision-Instruct-Turbo
provider_id: null
provider_model_id: null
- metadata: {}
model_id: meta-llama/Meta-Llama-Guard-3-8B
provider_id: null
provider_model_id: null
- metadata: {}
model_id: meta-llama/Llama-Guard-3-11B-Vision-Turbo
provider_id: null
provider_model_id: null
shields:
- params: null
shield_id: meta-llama/Llama-Guard-3-1B
provider_id: null
provider_shield_id: null
memory_banks: []
datasets: []
scoring_fns: []
eval_tasks: []