Add eval/scoring/datasetio API providers to distribution templates & UI developer guide (#564)

# What does this PR do?

- add /eval, /scoring, /datasetio API providers to distribution
templates
- regenerate build.yaml / run.yaml files
- fix `template.py` to take in list of providers instead of only first
one
- override memory provider as faiss default for all distro (as only 1
memory provider is needed to start basic flow, chromadb/pgvector need
additional setup step).
```
python llama_stack/scripts/distro_codegen.py
```

- updated README to start UI via conda builds. 

## Test Plan

```
python llama_stack/scripts/distro_codegen.py
```

- Use newly generated `run.yaml` to start server
```
llama stack run ./llama_stack/templates/together/run.yaml
```
<img width="1191" alt="image"
src="https://github.com/user-attachments/assets/62f7d179-0cd0-427c-b6e8-e087d4648f09">


#### Registration
```
❯ llama-stack-client datasets register \
--dataset-id "mmlu" \
--provider-id "huggingface" \
--url "https://huggingface.co/datasets/llamastack/evals" \
--metadata '{"path": "llamastack/evals", "name": "evals__mmlu__details", "split": "train"}' \
--schema '{"input_query": {"type": "string"}, "expected_answer": {"type": "string", "chat_completion_input": {"type": "string"}}}'
❯ llama-stack-client datasets list
┏━━━━━━━━━━━━┳━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━┓
┃ identifier ┃ provider_id ┃ metadata                                ┃ type    ┃
┡━━━━━━━━━━━━╇━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━┩
│ mmlu       │ huggingface │ {'path': 'llamastack/evals', 'name':    │ dataset │
│            │             │ 'evals__mmlu__details', 'split':        │         │
│            │             │ 'train'}                                │         │
└────────────┴─────────────┴─────────────────────────────────────────┴─────────┘
```

```
❯ llama-stack-client datasets register \
--dataset-id "simpleqa" \
--provider-id "huggingface" \
--url "https://huggingface.co/datasets/llamastack/evals" \
--metadata '{"path": "llamastack/evals", "name": "evals__simpleqa", "split": "train"}' \
--schema '{"input_query": {"type": "string"}, "expected_answer": {"type": "string", "chat_completion_input": {"type": "string"}}}'
❯ llama-stack-client datasets list
┏━━━━━━━━━━━━┳━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━┓
┃ identifier ┃ provider_id ┃ metadata                                                      ┃ type    ┃
┡━━━━━━━━━━━━╇━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━┩
│ mmlu       │ huggingface │ {'path': 'llamastack/evals', 'name': 'evals__mmlu__details',  │ dataset │
│            │             │ 'split': 'train'}                                             │         │
│ simpleqa   │ huggingface │ {'path': 'llamastack/evals', 'name': 'evals__simpleqa',       │ dataset │
│            │             │ 'split': 'train'}                                             │         │
└────────────┴─────────────┴───────────────────────────────────────────────────────────────┴─────────┘
```

```
❯ llama-stack-client eval_tasks register \
> --eval-task-id meta-reference-mmlu \
> --provider-id meta-reference \
> --dataset-id mmlu \
> --scoring-functions basic::regex_parser_multiple_choice_answer
❯ llama-stack-client eval_tasks register \
--eval-task-id meta-reference-simpleqa \
--provider-id meta-reference \
--dataset-id simpleqa \
--scoring-functions llm-as-judge::405b-simpleqa
❯ llama-stack-client eval_tasks list
┏━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━┳━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━┓
┃ dataset_id ┃ identifier       ┃ metadata ┃ provider_id    ┃ provider_resour… ┃ scoring_functio… ┃ type      ┃
┡━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━╇━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━┩
│ mmlu       │ meta-reference-… │ {}       │ meta-reference │ meta-reference-… │ ['basic::regex_… │ eval_task │
│ simpleqa   │ meta-reference-… │ {}       │ meta-reference │ meta-reference-… │ ['llm-as-judge:… │ eval_task │
└────────────┴──────────────────┴──────────┴────────────────┴──────────────────┴──────────────────┴───────────┘
```

#### Test with UI
```
streamlit run app.py
```

## Before submitting

- [ ] This PR fixes a typo or improves the docs (you can dismiss the
other checks if that's the case).
- [ ] Ran pre-commit to handle lint / formatting issues.
- [ ] Read the [contributor
guideline](https://github.com/meta-llama/llama-stack/blob/main/CONTRIBUTING.md),
      Pull Request section?
- [ ] Updated relevant documentation.
- [ ] Wrote necessary unit or integration tests.
This commit is contained in:
Xi Yan 2024-12-05 16:29:32 -08:00 committed by GitHub
parent a4daf4d3ec
commit 7301403ce3
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
47 changed files with 841 additions and 195 deletions

View file

@ -1,10 +1,12 @@
{
"tgi": [
"hf-serverless": [
"aiohttp",
"aiosqlite",
"autoevals",
"blobfile",
"chardet",
"chromadb-client",
"datasets",
"faiss-cpu",
"fastapi",
"fire",
@ -13,6 +15,7 @@
"matplotlib",
"nltk",
"numpy",
"openai",
"pandas",
"pillow",
"psycopg2-binary",
@ -27,6 +30,66 @@
"sentence-transformers --no-deps",
"torch --index-url https://download.pytorch.org/whl/cpu"
],
"together": [
"aiosqlite",
"autoevals",
"blobfile",
"chardet",
"chromadb-client",
"datasets",
"faiss-cpu",
"fastapi",
"fire",
"httpx",
"matplotlib",
"nltk",
"numpy",
"openai",
"pandas",
"pillow",
"psycopg2-binary",
"pypdf",
"redis",
"scikit-learn",
"scipy",
"sentencepiece",
"together",
"tqdm",
"transformers",
"uvicorn",
"sentence-transformers --no-deps",
"torch --index-url https://download.pytorch.org/whl/cpu"
],
"vllm-gpu": [
"aiosqlite",
"autoevals",
"blobfile",
"chardet",
"chromadb-client",
"datasets",
"faiss-cpu",
"fastapi",
"fire",
"httpx",
"matplotlib",
"nltk",
"numpy",
"openai",
"pandas",
"pillow",
"psycopg2-binary",
"pypdf",
"redis",
"scikit-learn",
"scipy",
"sentencepiece",
"tqdm",
"transformers",
"uvicorn",
"vllm",
"sentence-transformers --no-deps",
"torch --index-url https://download.pytorch.org/whl/cpu"
],
"remote-vllm": [
"aiosqlite",
"blobfile",
@ -54,18 +117,22 @@
"sentence-transformers --no-deps",
"torch --index-url https://download.pytorch.org/whl/cpu"
],
"vllm-gpu": [
"fireworks": [
"aiosqlite",
"autoevals",
"blobfile",
"chardet",
"chromadb-client",
"datasets",
"faiss-cpu",
"fastapi",
"fire",
"fireworks-ai",
"httpx",
"matplotlib",
"nltk",
"numpy",
"openai",
"pandas",
"pillow",
"psycopg2-binary",
@ -77,82 +144,17 @@
"tqdm",
"transformers",
"uvicorn",
"vllm",
"sentence-transformers --no-deps",
"torch --index-url https://download.pytorch.org/whl/cpu"
],
"meta-reference-quantized-gpu": [
"accelerate",
"aiosqlite",
"blobfile",
"chardet",
"chromadb-client",
"fairscale",
"faiss-cpu",
"fastapi",
"fbgemm-gpu",
"fire",
"httpx",
"lm-format-enforcer",
"matplotlib",
"nltk",
"numpy",
"pandas",
"pillow",
"psycopg2-binary",
"pypdf",
"redis",
"scikit-learn",
"scipy",
"sentencepiece",
"torch",
"torchao==0.5.0",
"torchvision",
"tqdm",
"transformers",
"uvicorn",
"zmq",
"sentence-transformers --no-deps",
"torch --index-url https://download.pytorch.org/whl/cpu"
],
"meta-reference-gpu": [
"accelerate",
"aiosqlite",
"blobfile",
"chardet",
"chromadb-client",
"fairscale",
"faiss-cpu",
"fastapi",
"fire",
"httpx",
"lm-format-enforcer",
"matplotlib",
"nltk",
"numpy",
"pandas",
"pillow",
"psycopg2-binary",
"pypdf",
"redis",
"scikit-learn",
"scipy",
"sentencepiece",
"torch",
"torchvision",
"tqdm",
"transformers",
"uvicorn",
"zmq",
"sentence-transformers --no-deps",
"torch --index-url https://download.pytorch.org/whl/cpu"
],
"hf-serverless": [
"tgi": [
"aiohttp",
"aiosqlite",
"autoevals",
"blobfile",
"chardet",
"chromadb-client",
"datasets",
"faiss-cpu",
"fastapi",
"fire",
@ -161,61 +163,7 @@
"matplotlib",
"nltk",
"numpy",
"pandas",
"pillow",
"psycopg2-binary",
"pypdf",
"redis",
"scikit-learn",
"scipy",
"sentencepiece",
"tqdm",
"transformers",
"uvicorn",
"sentence-transformers --no-deps",
"torch --index-url https://download.pytorch.org/whl/cpu"
],
"together": [
"aiosqlite",
"blobfile",
"chardet",
"chromadb-client",
"faiss-cpu",
"fastapi",
"fire",
"httpx",
"matplotlib",
"nltk",
"numpy",
"pandas",
"pillow",
"psycopg2-binary",
"pypdf",
"redis",
"scikit-learn",
"scipy",
"sentencepiece",
"together",
"tqdm",
"transformers",
"uvicorn",
"sentence-transformers --no-deps",
"torch --index-url https://download.pytorch.org/whl/cpu"
],
"ollama": [
"aiohttp",
"aiosqlite",
"blobfile",
"chardet",
"chromadb-client",
"faiss-cpu",
"fastapi",
"fire",
"httpx",
"matplotlib",
"nltk",
"numpy",
"ollama",
"openai",
"pandas",
"pillow",
"psycopg2-binary",
@ -232,10 +180,12 @@
],
"bedrock": [
"aiosqlite",
"autoevals",
"blobfile",
"boto3",
"chardet",
"chromadb-client",
"datasets",
"faiss-cpu",
"fastapi",
"fire",
@ -243,6 +193,7 @@
"matplotlib",
"nltk",
"numpy",
"openai",
"pandas",
"pillow",
"psycopg2-binary",
@ -257,20 +208,24 @@
"sentence-transformers --no-deps",
"torch --index-url https://download.pytorch.org/whl/cpu"
],
"hf-endpoint": [
"aiohttp",
"meta-reference-gpu": [
"accelerate",
"aiosqlite",
"autoevals",
"blobfile",
"chardet",
"chromadb-client",
"datasets",
"fairscale",
"faiss-cpu",
"fastapi",
"fire",
"httpx",
"huggingface_hub",
"lm-format-enforcer",
"matplotlib",
"nltk",
"numpy",
"openai",
"pandas",
"pillow",
"psycopg2-binary",
@ -279,25 +234,34 @@
"scikit-learn",
"scipy",
"sentencepiece",
"torch",
"torchvision",
"tqdm",
"transformers",
"uvicorn",
"zmq",
"sentence-transformers --no-deps",
"torch --index-url https://download.pytorch.org/whl/cpu"
],
"fireworks": [
"meta-reference-quantized-gpu": [
"accelerate",
"aiosqlite",
"autoevals",
"blobfile",
"chardet",
"chromadb-client",
"datasets",
"fairscale",
"faiss-cpu",
"fastapi",
"fbgemm-gpu",
"fire",
"fireworks-ai",
"httpx",
"lm-format-enforcer",
"matplotlib",
"nltk",
"numpy",
"openai",
"pandas",
"pillow",
"psycopg2-binary",
@ -306,9 +270,13 @@
"scikit-learn",
"scipy",
"sentencepiece",
"torch",
"torchao==0.5.0",
"torchvision",
"tqdm",
"transformers",
"uvicorn",
"zmq",
"sentence-transformers --no-deps",
"torch --index-url https://download.pytorch.org/whl/cpu"
],
@ -337,5 +305,67 @@
"uvicorn",
"sentence-transformers --no-deps",
"torch --index-url https://download.pytorch.org/whl/cpu"
],
"ollama": [
"aiohttp",
"aiosqlite",
"autoevals",
"blobfile",
"chardet",
"chromadb-client",
"datasets",
"faiss-cpu",
"fastapi",
"fire",
"httpx",
"matplotlib",
"nltk",
"numpy",
"ollama",
"openai",
"pandas",
"pillow",
"psycopg2-binary",
"pypdf",
"redis",
"scikit-learn",
"scipy",
"sentencepiece",
"tqdm",
"transformers",
"uvicorn",
"sentence-transformers --no-deps",
"torch --index-url https://download.pytorch.org/whl/cpu"
],
"hf-endpoint": [
"aiohttp",
"aiosqlite",
"autoevals",
"blobfile",
"chardet",
"chromadb-client",
"datasets",
"faiss-cpu",
"fastapi",
"fire",
"httpx",
"huggingface_hub",
"matplotlib",
"nltk",
"numpy",
"openai",
"pandas",
"pillow",
"psycopg2-binary",
"pypdf",
"redis",
"scikit-learn",
"scipy",
"sentencepiece",
"tqdm",
"transformers",
"uvicorn",
"sentence-transformers --no-deps",
"torch --index-url https://download.pytorch.org/whl/cpu"
]
}

View file

@ -1,6 +1,3 @@
---
orphan: true
---
# Bedrock Distribution
```{toctree}
@ -15,9 +12,12 @@ The `llamastack/distribution-bedrock` distribution consists of the following pro
| API | Provider(s) |
|-----|-------------|
| agents | `inline::meta-reference` |
| datasetio | `remote::huggingface`, `inline::localfs` |
| eval | `inline::meta-reference` |
| inference | `remote::bedrock` |
| memory | `inline::faiss`, `remote::chromadb`, `remote::pgvector` |
| safety | `remote::bedrock` |
| scoring | `inline::basic`, `inline::llm-as-judge`, `inline::braintrust` |
| telemetry | `inline::meta-reference` |

View file

@ -15,9 +15,12 @@ The `llamastack/distribution-fireworks` distribution consists of the following p
| API | Provider(s) |
|-----|-------------|
| agents | `inline::meta-reference` |
| datasetio | `remote::huggingface`, `inline::localfs` |
| eval | `inline::meta-reference` |
| inference | `remote::fireworks` |
| memory | `inline::faiss`, `remote::chromadb`, `remote::pgvector` |
| safety | `inline::llama-guard` |
| scoring | `inline::basic`, `inline::llm-as-judge`, `inline::braintrust` |
| telemetry | `inline::meta-reference` |

View file

@ -15,9 +15,12 @@ The `llamastack/distribution-meta-reference-gpu` distribution consists of the fo
| API | Provider(s) |
|-----|-------------|
| agents | `inline::meta-reference` |
| datasetio | `remote::huggingface`, `inline::localfs` |
| eval | `inline::meta-reference` |
| inference | `inline::meta-reference` |
| memory | `inline::faiss`, `remote::chromadb`, `remote::pgvector` |
| safety | `inline::llama-guard` |
| scoring | `inline::basic`, `inline::llm-as-judge`, `inline::braintrust` |
| telemetry | `inline::meta-reference` |

View file

@ -15,9 +15,12 @@ The `llamastack/distribution-meta-reference-quantized-gpu` distribution consists
| API | Provider(s) |
|-----|-------------|
| agents | `inline::meta-reference` |
| datasetio | `remote::huggingface`, `inline::localfs` |
| eval | `inline::meta-reference` |
| inference | `inline::meta-reference-quantized` |
| memory | `inline::faiss`, `remote::chromadb`, `remote::pgvector` |
| safety | `inline::llama-guard` |
| scoring | `inline::basic`, `inline::llm-as-judge`, `inline::braintrust` |
| telemetry | `inline::meta-reference` |

View file

@ -15,9 +15,12 @@ The `llamastack/distribution-ollama` distribution consists of the following prov
| API | Provider(s) |
|-----|-------------|
| agents | `inline::meta-reference` |
| datasetio | `remote::huggingface`, `inline::localfs` |
| eval | `inline::meta-reference` |
| inference | `remote::ollama` |
| memory | `inline::faiss`, `remote::chromadb`, `remote::pgvector` |
| safety | `inline::llama-guard` |
| scoring | `inline::basic`, `inline::llm-as-judge`, `inline::braintrust` |
| telemetry | `inline::meta-reference` |
@ -119,7 +122,7 @@ llama stack run ./run-with-safety.yaml \
### (Optional) Update Model Serving Configuration
```{note}
Please check the [model_aliases](https://github.com/meta-llama/llama-stack/blob/main/llama_stack/providers/remote/inference/ollama/ollama.py#L45) variable for supported Ollama models.
Please check the [model_aliases](https://github.com/meta-llama/llama-stack/blob/main/llama_stack/providers/remote/inference/ollama/ollama.py#L45) for the supported Ollama models.
```
To serve a new model with `ollama`

View file

@ -16,9 +16,12 @@ The `llamastack/distribution-tgi` distribution consists of the following provide
| API | Provider(s) |
|-----|-------------|
| agents | `inline::meta-reference` |
| datasetio | `remote::huggingface`, `inline::localfs` |
| eval | `inline::meta-reference` |
| inference | `remote::tgi` |
| memory | `inline::faiss`, `remote::chromadb`, `remote::pgvector` |
| safety | `inline::llama-guard` |
| scoring | `inline::basic`, `inline::llm-as-judge`, `inline::braintrust` |
| telemetry | `inline::meta-reference` |

View file

@ -15,9 +15,12 @@ The `llamastack/distribution-together` distribution consists of the following pr
| API | Provider(s) |
|-----|-------------|
| agents | `inline::meta-reference` |
| datasetio | `remote::huggingface`, `inline::localfs` |
| eval | `inline::meta-reference` |
| inference | `remote::together` |
| memory | `inline::faiss`, `remote::chromadb`, `remote::pgvector` |
| safety | `inline::llama-guard` |
| scoring | `inline::basic`, `inline::llm-as-judge`, `inline::braintrust` |
| telemetry | `inline::meta-reference` |

View file

@ -1,16 +1,41 @@
# LLama Stack UI
# (Experimental) LLama Stack UI
[!NOTE] This is a work in progress.
## Docker Setup
## Prerequisite
- Start up Llama Stack Server
```
llama stack run
```
:warning: This is a work in progress.
## Running Streamlit App
## Developer Setup
1. Start up Llama Stack API server. More details [here](https://llama-stack.readthedocs.io/en/latest/getting_started/index.html).
```
llama stack build --template together --image-type conda
llama stack run together
```
2. (Optional) Register datasets and eval tasks as resources. If you want to run pre-configured evaluation flows (e.g. Evaluations (Generation + Scoring) Page).
```bash
$ llama-stack-client datasets register \
--dataset-id "mmlu" \
--provider-id "huggingface" \
--url "https://huggingface.co/datasets/llamastack/evals" \
--metadata '{"path": "llamastack/evals", "name": "evals__mmlu__details", "split": "train"}' \
--schema '{"input_query": {"type": "string"}, "expected_answer": {"type": "string", "chat_completion_input": {"type": "string"}}}'
```
```bash
$ llama-stack-client eval_tasks register \
--eval-task-id meta-reference-mmlu \
--provider-id meta-reference \
--dataset-id mmlu \
--scoring-functions basic::regex_parser_multiple_choice_answer
```
3. Start Streamlit UI
```bash
cd llama_stack/distribution/ui
pip install -r requirements.txt
streamlit run app.py

View file

@ -6,6 +6,9 @@
from pathlib import Path
from llama_stack.distribution.datatypes import Provider
from llama_stack.providers.inline.memory.faiss.config import FaissImplConfig
from llama_stack.templates.template import DistributionTemplate, RunConfigSettings
@ -16,10 +19,19 @@ def get_distribution_template() -> DistributionTemplate:
"safety": ["remote::bedrock"],
"agents": ["inline::meta-reference"],
"telemetry": ["inline::meta-reference"],
"eval": ["inline::meta-reference"],
"datasetio": ["remote::huggingface", "inline::localfs"],
"scoring": ["inline::basic", "inline::llm-as-judge", "inline::braintrust"],
}
name = "bedrock"
memory_provider = Provider(
provider_id="faiss",
provider_type="inline::faiss",
config=FaissImplConfig.sample_run_config(f"distributions/{name}"),
)
return DistributionTemplate(
name="bedrock",
name=name,
distro_type="self_hosted",
description="Use AWS Bedrock for running LLM inference and safety",
docker_image=None,
@ -27,7 +39,11 @@ def get_distribution_template() -> DistributionTemplate:
providers=providers,
default_models=[],
run_configs={
"run.yaml": RunConfigSettings(),
"run.yaml": RunConfigSettings(
provider_overrides={
"memory": [memory_provider],
},
),
},
run_config_env_vars={
"LLAMASTACK_PORT": (

View file

@ -16,4 +16,13 @@ distribution_spec:
- inline::meta-reference
telemetry:
- inline::meta-reference
eval:
- inline::meta-reference
datasetio:
- remote::huggingface
- inline::localfs
scoring:
- inline::basic
- inline::llm-as-judge
- inline::braintrust
image_type: conda

View file

@ -4,9 +4,12 @@ docker_image: null
conda_env: bedrock
apis:
- agents
- datasetio
- eval
- inference
- memory
- safety
- scoring
- telemetry
providers:
inference:
@ -37,6 +40,27 @@ providers:
- provider_id: meta-reference
provider_type: inline::meta-reference
config: {}
eval:
- provider_id: meta-reference
provider_type: inline::meta-reference
config: {}
datasetio:
- provider_id: huggingface
provider_type: remote::huggingface
config: {}
- provider_id: localfs
provider_type: inline::localfs
config: {}
scoring:
- provider_id: basic
provider_type: inline::basic
config: {}
- provider_id: llm-as-judge
provider_type: inline::llm-as-judge
config: {}
- provider_id: braintrust
provider_type: inline::braintrust
config: {}
metadata_store:
namespace: null
type: sqlite

View file

@ -16,4 +16,13 @@ distribution_spec:
- inline::meta-reference
telemetry:
- inline::meta-reference
eval:
- inline::meta-reference
datasetio:
- remote::huggingface
- inline::localfs
scoring:
- inline::basic
- inline::llm-as-judge
- inline::braintrust
image_type: conda

View file

@ -9,6 +9,7 @@ from pathlib import Path
from llama_models.sku_list import all_registered_models
from llama_stack.distribution.datatypes import ModelInput, Provider, ShieldInput
from llama_stack.providers.inline.memory.faiss.config import FaissImplConfig
from llama_stack.providers.remote.inference.fireworks import FireworksImplConfig
from llama_stack.providers.remote.inference.fireworks.fireworks import MODEL_ALIASES
@ -22,13 +23,23 @@ def get_distribution_template() -> DistributionTemplate:
"safety": ["inline::llama-guard"],
"agents": ["inline::meta-reference"],
"telemetry": ["inline::meta-reference"],
"eval": ["inline::meta-reference"],
"datasetio": ["remote::huggingface", "inline::localfs"],
"scoring": ["inline::basic", "inline::llm-as-judge", "inline::braintrust"],
}
name = "fireworks"
inference_provider = Provider(
provider_id="fireworks",
provider_type="remote::fireworks",
config=FireworksImplConfig.sample_run_config(),
)
memory_provider = Provider(
provider_id="faiss",
provider_type="inline::faiss",
config=FaissImplConfig.sample_run_config(f"distributions/{name}"),
)
core_model_to_hf_repo = {
m.descriptor(): m.huggingface_repo for m in all_registered_models()
@ -42,7 +53,7 @@ def get_distribution_template() -> DistributionTemplate:
]
return DistributionTemplate(
name="fireworks",
name=name,
distro_type="self_hosted",
description="Use Fireworks.AI for running LLM inference",
docker_image=None,
@ -53,6 +64,7 @@ def get_distribution_template() -> DistributionTemplate:
"run.yaml": RunConfigSettings(
provider_overrides={
"inference": [inference_provider],
"memory": [memory_provider],
},
default_models=default_models,
default_shields=[ShieldInput(shield_id="meta-llama/Llama-Guard-3-8B")],

View file

@ -4,9 +4,12 @@ docker_image: null
conda_env: fireworks
apis:
- agents
- datasetio
- eval
- inference
- memory
- safety
- scoring
- telemetry
providers:
inference:
@ -39,6 +42,27 @@ providers:
- provider_id: meta-reference
provider_type: inline::meta-reference
config: {}
eval:
- provider_id: meta-reference
provider_type: inline::meta-reference
config: {}
datasetio:
- provider_id: huggingface
provider_type: remote::huggingface
config: {}
- provider_id: localfs
provider_type: inline::localfs
config: {}
scoring:
- provider_id: basic
provider_type: inline::basic
config: {}
- provider_id: llm-as-judge
provider_type: inline::llm-as-judge
config: {}
- provider_id: braintrust
provider_type: inline::braintrust
config: {}
metadata_store:
namespace: null
type: sqlite

View file

@ -16,4 +16,13 @@ distribution_spec:
- inline::meta-reference
telemetry:
- inline::meta-reference
eval:
- inline::meta-reference
datasetio:
- remote::huggingface
- inline::localfs
scoring:
- inline::basic
- inline::llm-as-judge
- inline::braintrust
image_type: conda

View file

@ -5,6 +5,7 @@
# the root directory of this source tree.
from llama_stack.distribution.datatypes import ModelInput, Provider, ShieldInput
from llama_stack.providers.inline.memory.faiss.config import FaissImplConfig
from llama_stack.providers.remote.inference.tgi import InferenceEndpointImplConfig
from llama_stack.templates.template import DistributionTemplate, RunConfigSettings
@ -16,13 +17,21 @@ def get_distribution_template() -> DistributionTemplate:
"safety": ["inline::llama-guard"],
"agents": ["inline::meta-reference"],
"telemetry": ["inline::meta-reference"],
"eval": ["inline::meta-reference"],
"datasetio": ["remote::huggingface", "inline::localfs"],
"scoring": ["inline::basic", "inline::llm-as-judge", "inline::braintrust"],
}
name = "hf-endpoint"
inference_provider = Provider(
provider_id="hf-endpoint",
provider_type="remote::hf::endpoint",
config=InferenceEndpointImplConfig.sample_run_config(),
)
memory_provider = Provider(
provider_id="faiss",
provider_type="inline::faiss",
config=FaissImplConfig.sample_run_config(f"distributions/{name}"),
)
inference_model = ModelInput(
model_id="${env.INFERENCE_MODEL}",
@ -34,7 +43,7 @@ def get_distribution_template() -> DistributionTemplate:
)
return DistributionTemplate(
name="hf-endpoint",
name=name,
distro_type="self_hosted",
description="Use (an external) Hugging Face Inference Endpoint for running LLM inference",
docker_image=None,
@ -45,6 +54,7 @@ def get_distribution_template() -> DistributionTemplate:
"run.yaml": RunConfigSettings(
provider_overrides={
"inference": [inference_provider],
"memory": [memory_provider],
},
default_models=[inference_model],
),
@ -59,7 +69,8 @@ def get_distribution_template() -> DistributionTemplate:
endpoint_name="${env.SAFETY_INFERENCE_ENDPOINT_NAME}",
),
),
]
],
"memory": [memory_provider],
},
default_models=[
inference_model,

View file

@ -4,9 +4,12 @@ docker_image: null
conda_env: hf-endpoint
apis:
- agents
- datasetio
- eval
- inference
- memory
- safety
- scoring
- telemetry
providers:
inference:
@ -44,6 +47,27 @@ providers:
- provider_id: meta-reference
provider_type: inline::meta-reference
config: {}
eval:
- provider_id: meta-reference
provider_type: inline::meta-reference
config: {}
datasetio:
- provider_id: huggingface
provider_type: remote::huggingface
config: {}
- provider_id: localfs
provider_type: inline::localfs
config: {}
scoring:
- provider_id: basic
provider_type: inline::basic
config: {}
- provider_id: llm-as-judge
provider_type: inline::llm-as-judge
config: {}
- provider_id: braintrust
provider_type: inline::braintrust
config: {}
metadata_store:
namespace: null
type: sqlite

View file

@ -4,9 +4,12 @@ docker_image: null
conda_env: hf-endpoint
apis:
- agents
- datasetio
- eval
- inference
- memory
- safety
- scoring
- telemetry
providers:
inference:
@ -39,6 +42,27 @@ providers:
- provider_id: meta-reference
provider_type: inline::meta-reference
config: {}
eval:
- provider_id: meta-reference
provider_type: inline::meta-reference
config: {}
datasetio:
- provider_id: huggingface
provider_type: remote::huggingface
config: {}
- provider_id: localfs
provider_type: inline::localfs
config: {}
scoring:
- provider_id: basic
provider_type: inline::basic
config: {}
- provider_id: llm-as-judge
provider_type: inline::llm-as-judge
config: {}
- provider_id: braintrust
provider_type: inline::braintrust
config: {}
metadata_store:
namespace: null
type: sqlite

View file

@ -16,4 +16,13 @@ distribution_spec:
- inline::meta-reference
telemetry:
- inline::meta-reference
eval:
- inline::meta-reference
datasetio:
- remote::huggingface
- inline::localfs
scoring:
- inline::basic
- inline::llm-as-judge
- inline::braintrust
image_type: conda

View file

@ -5,6 +5,7 @@
# the root directory of this source tree.
from llama_stack.distribution.datatypes import ModelInput, Provider, ShieldInput
from llama_stack.providers.inline.memory.faiss.config import FaissImplConfig
from llama_stack.providers.remote.inference.tgi import InferenceAPIImplConfig
from llama_stack.templates.template import DistributionTemplate, RunConfigSettings
@ -16,13 +17,22 @@ def get_distribution_template() -> DistributionTemplate:
"safety": ["inline::llama-guard"],
"agents": ["inline::meta-reference"],
"telemetry": ["inline::meta-reference"],
"eval": ["inline::meta-reference"],
"datasetio": ["remote::huggingface", "inline::localfs"],
"scoring": ["inline::basic", "inline::llm-as-judge", "inline::braintrust"],
}
name = "hf-serverless"
inference_provider = Provider(
provider_id="hf-serverless",
provider_type="remote::hf::serverless",
config=InferenceAPIImplConfig.sample_run_config(),
)
memory_provider = Provider(
provider_id="faiss",
provider_type="inline::faiss",
config=FaissImplConfig.sample_run_config(f"distributions/{name}"),
)
inference_model = ModelInput(
model_id="${env.INFERENCE_MODEL}",
@ -34,7 +44,7 @@ def get_distribution_template() -> DistributionTemplate:
)
return DistributionTemplate(
name="hf-serverless",
name=name,
distro_type="self_hosted",
description="Use (an external) Hugging Face Inference Endpoint for running LLM inference",
docker_image=None,
@ -45,6 +55,7 @@ def get_distribution_template() -> DistributionTemplate:
"run.yaml": RunConfigSettings(
provider_overrides={
"inference": [inference_provider],
"memory": [memory_provider],
},
default_models=[inference_model],
),
@ -59,7 +70,8 @@ def get_distribution_template() -> DistributionTemplate:
repo="${env.SAFETY_MODEL}",
),
),
]
],
"memory": [memory_provider],
},
default_models=[
inference_model,

View file

@ -4,9 +4,12 @@ docker_image: null
conda_env: hf-serverless
apis:
- agents
- datasetio
- eval
- inference
- memory
- safety
- scoring
- telemetry
providers:
inference:
@ -44,6 +47,27 @@ providers:
- provider_id: meta-reference
provider_type: inline::meta-reference
config: {}
eval:
- provider_id: meta-reference
provider_type: inline::meta-reference
config: {}
datasetio:
- provider_id: huggingface
provider_type: remote::huggingface
config: {}
- provider_id: localfs
provider_type: inline::localfs
config: {}
scoring:
- provider_id: basic
provider_type: inline::basic
config: {}
- provider_id: llm-as-judge
provider_type: inline::llm-as-judge
config: {}
- provider_id: braintrust
provider_type: inline::braintrust
config: {}
metadata_store:
namespace: null
type: sqlite

View file

@ -4,9 +4,12 @@ docker_image: null
conda_env: hf-serverless
apis:
- agents
- datasetio
- eval
- inference
- memory
- safety
- scoring
- telemetry
providers:
inference:
@ -39,6 +42,27 @@ providers:
- provider_id: meta-reference
provider_type: inline::meta-reference
config: {}
eval:
- provider_id: meta-reference
provider_type: inline::meta-reference
config: {}
datasetio:
- provider_id: huggingface
provider_type: remote::huggingface
config: {}
- provider_id: localfs
provider_type: inline::localfs
config: {}
scoring:
- provider_id: basic
provider_type: inline::basic
config: {}
- provider_id: llm-as-judge
provider_type: inline::llm-as-judge
config: {}
- provider_id: braintrust
provider_type: inline::braintrust
config: {}
metadata_store:
namespace: null
type: sqlite

View file

@ -16,4 +16,13 @@ distribution_spec:
- inline::meta-reference
telemetry:
- inline::meta-reference
eval:
- inline::meta-reference
datasetio:
- remote::huggingface
- inline::localfs
scoring:
- inline::basic
- inline::llm-as-judge
- inline::braintrust
image_type: conda

View file

@ -10,6 +10,7 @@ from llama_stack.distribution.datatypes import ModelInput, Provider, ShieldInput
from llama_stack.providers.inline.inference.meta_reference import (
MetaReferenceInferenceConfig,
)
from llama_stack.providers.inline.memory.faiss.config import FaissImplConfig
from llama_stack.templates.template import DistributionTemplate, RunConfigSettings
@ -20,8 +21,11 @@ def get_distribution_template() -> DistributionTemplate:
"safety": ["inline::llama-guard"],
"agents": ["inline::meta-reference"],
"telemetry": ["inline::meta-reference"],
"eval": ["inline::meta-reference"],
"datasetio": ["remote::huggingface", "inline::localfs"],
"scoring": ["inline::basic", "inline::llm-as-judge", "inline::braintrust"],
}
name = "meta-reference-gpu"
inference_provider = Provider(
provider_id="meta-reference-inference",
provider_type="inline::meta-reference",
@ -30,6 +34,11 @@ def get_distribution_template() -> DistributionTemplate:
checkpoint_dir="${env.INFERENCE_CHECKPOINT_DIR:null}",
),
)
memory_provider = Provider(
provider_id="faiss",
provider_type="inline::faiss",
config=FaissImplConfig.sample_run_config(f"distributions/{name}"),
)
inference_model = ModelInput(
model_id="${env.INFERENCE_MODEL}",
@ -41,7 +50,7 @@ def get_distribution_template() -> DistributionTemplate:
)
return DistributionTemplate(
name="meta-reference-gpu",
name=name,
distro_type="self_hosted",
description="Use Meta Reference for running LLM inference",
template_path=Path(__file__).parent / "doc_template.md",
@ -51,6 +60,7 @@ def get_distribution_template() -> DistributionTemplate:
"run.yaml": RunConfigSettings(
provider_overrides={
"inference": [inference_provider],
"memory": [memory_provider],
},
default_models=[inference_model],
),
@ -67,6 +77,7 @@ def get_distribution_template() -> DistributionTemplate:
),
),
],
"memory": [memory_provider],
},
default_models=[
inference_model,

View file

@ -4,9 +4,12 @@ docker_image: null
conda_env: meta-reference-gpu
apis:
- agents
- datasetio
- eval
- inference
- memory
- safety
- scoring
- telemetry
providers:
inference:
@ -46,6 +49,27 @@ providers:
- provider_id: meta-reference
provider_type: inline::meta-reference
config: {}
eval:
- provider_id: meta-reference
provider_type: inline::meta-reference
config: {}
datasetio:
- provider_id: huggingface
provider_type: remote::huggingface
config: {}
- provider_id: localfs
provider_type: inline::localfs
config: {}
scoring:
- provider_id: basic
provider_type: inline::basic
config: {}
- provider_id: llm-as-judge
provider_type: inline::llm-as-judge
config: {}
- provider_id: braintrust
provider_type: inline::braintrust
config: {}
metadata_store:
namespace: null
type: sqlite

View file

@ -4,9 +4,12 @@ docker_image: null
conda_env: meta-reference-gpu
apis:
- agents
- datasetio
- eval
- inference
- memory
- safety
- scoring
- telemetry
providers:
inference:
@ -40,6 +43,27 @@ providers:
- provider_id: meta-reference
provider_type: inline::meta-reference
config: {}
eval:
- provider_id: meta-reference
provider_type: inline::meta-reference
config: {}
datasetio:
- provider_id: huggingface
provider_type: remote::huggingface
config: {}
- provider_id: localfs
provider_type: inline::localfs
config: {}
scoring:
- provider_id: basic
provider_type: inline::basic
config: {}
- provider_id: llm-as-judge
provider_type: inline::llm-as-judge
config: {}
- provider_id: braintrust
provider_type: inline::braintrust
config: {}
metadata_store:
namespace: null
type: sqlite

View file

@ -16,4 +16,13 @@ distribution_spec:
- inline::meta-reference
telemetry:
- inline::meta-reference
eval:
- inline::meta-reference
datasetio:
- remote::huggingface
- inline::localfs
scoring:
- inline::basic
- inline::llm-as-judge
- inline::braintrust
image_type: conda

View file

@ -10,6 +10,7 @@ from llama_stack.distribution.datatypes import ModelInput, Provider
from llama_stack.providers.inline.inference.meta_reference import (
MetaReferenceQuantizedInferenceConfig,
)
from llama_stack.providers.inline.memory.faiss.config import FaissImplConfig
from llama_stack.templates.template import DistributionTemplate, RunConfigSettings
@ -20,8 +21,11 @@ def get_distribution_template() -> DistributionTemplate:
"safety": ["inline::llama-guard"],
"agents": ["inline::meta-reference"],
"telemetry": ["inline::meta-reference"],
"eval": ["inline::meta-reference"],
"datasetio": ["remote::huggingface", "inline::localfs"],
"scoring": ["inline::basic", "inline::llm-as-judge", "inline::braintrust"],
}
name = "meta-reference-quantized-gpu"
inference_provider = Provider(
provider_id="meta-reference-inference",
provider_type="inline::meta-reference-quantized",
@ -30,13 +34,18 @@ def get_distribution_template() -> DistributionTemplate:
checkpoint_dir="${env.INFERENCE_CHECKPOINT_DIR:null}",
),
)
memory_provider = Provider(
provider_id="faiss",
provider_type="inline::faiss",
config=FaissImplConfig.sample_run_config(f"distributions/{name}"),
)
inference_model = ModelInput(
model_id="${env.INFERENCE_MODEL}",
provider_id="meta-reference-inference",
)
return DistributionTemplate(
name="meta-reference-quantized-gpu",
name=name,
distro_type="self_hosted",
description="Use Meta Reference with fp8, int4 quantization for running LLM inference",
template_path=Path(__file__).parent / "doc_template.md",
@ -46,6 +55,7 @@ def get_distribution_template() -> DistributionTemplate:
"run.yaml": RunConfigSettings(
provider_overrides={
"inference": [inference_provider],
"memory": [memory_provider],
},
default_models=[inference_model],
),

View file

@ -4,9 +4,12 @@ docker_image: null
conda_env: meta-reference-quantized-gpu
apis:
- agents
- datasetio
- eval
- inference
- memory
- safety
- scoring
- telemetry
providers:
inference:
@ -42,6 +45,27 @@ providers:
- provider_id: meta-reference
provider_type: inline::meta-reference
config: {}
eval:
- provider_id: meta-reference
provider_type: inline::meta-reference
config: {}
datasetio:
- provider_id: huggingface
provider_type: remote::huggingface
config: {}
- provider_id: localfs
provider_type: inline::localfs
config: {}
scoring:
- provider_id: basic
provider_type: inline::basic
config: {}
- provider_id: llm-as-judge
provider_type: inline::llm-as-judge
config: {}
- provider_id: braintrust
provider_type: inline::braintrust
config: {}
metadata_store:
namespace: null
type: sqlite

View file

@ -16,4 +16,13 @@ distribution_spec:
- inline::meta-reference
telemetry:
- inline::meta-reference
eval:
- inline::meta-reference
datasetio:
- remote::huggingface
- inline::localfs
scoring:
- inline::basic
- inline::llm-as-judge
- inline::braintrust
image_type: conda

View file

@ -114,9 +114,9 @@ llama stack run ./run-with-safety.yaml \
### (Optional) Update Model Serving Configuration
> [!NOTE]
> Please check the [OLLAMA_SUPPORTED_MODELS](https://github.com/meta-llama/llama-stack/blob/main/llama_stack/providers.remote/inference/ollama/ollama.py) for the supported Ollama models.
```{note}
Please check the [model_aliases](https://github.com/meta-llama/llama-stack/blob/main/llama_stack/providers/remote/inference/ollama/ollama.py#L45) for the supported Ollama models.
```
To serve a new model with `ollama`
```bash

View file

@ -7,6 +7,7 @@
from pathlib import Path
from llama_stack.distribution.datatypes import ModelInput, Provider, ShieldInput
from llama_stack.providers.inline.memory.faiss.config import FaissImplConfig
from llama_stack.providers.remote.inference.ollama import OllamaImplConfig
from llama_stack.templates.template import DistributionTemplate, RunConfigSettings
@ -18,13 +19,21 @@ def get_distribution_template() -> DistributionTemplate:
"safety": ["inline::llama-guard"],
"agents": ["inline::meta-reference"],
"telemetry": ["inline::meta-reference"],
"eval": ["inline::meta-reference"],
"datasetio": ["remote::huggingface", "inline::localfs"],
"scoring": ["inline::basic", "inline::llm-as-judge", "inline::braintrust"],
}
name = "ollama"
inference_provider = Provider(
provider_id="ollama",
provider_type="remote::ollama",
config=OllamaImplConfig.sample_run_config(),
)
memory_provider = Provider(
provider_id="faiss",
provider_type="inline::faiss",
config=FaissImplConfig.sample_run_config(f"distributions/{name}"),
)
inference_model = ModelInput(
model_id="${env.INFERENCE_MODEL}",
@ -36,7 +45,7 @@ def get_distribution_template() -> DistributionTemplate:
)
return DistributionTemplate(
name="ollama",
name=name,
distro_type="self_hosted",
description="Use (an external) Ollama server for running LLM inference",
docker_image=None,
@ -47,6 +56,7 @@ def get_distribution_template() -> DistributionTemplate:
"run.yaml": RunConfigSettings(
provider_overrides={
"inference": [inference_provider],
"memory": [memory_provider],
},
default_models=[inference_model],
),
@ -54,7 +64,8 @@ def get_distribution_template() -> DistributionTemplate:
provider_overrides={
"inference": [
inference_provider,
]
],
"memory": [memory_provider],
},
default_models=[
inference_model,

View file

@ -4,9 +4,12 @@ docker_image: null
conda_env: ollama
apis:
- agents
- datasetio
- eval
- inference
- memory
- safety
- scoring
- telemetry
providers:
inference:
@ -38,6 +41,27 @@ providers:
- provider_id: meta-reference
provider_type: inline::meta-reference
config: {}
eval:
- provider_id: meta-reference
provider_type: inline::meta-reference
config: {}
datasetio:
- provider_id: huggingface
provider_type: remote::huggingface
config: {}
- provider_id: localfs
provider_type: inline::localfs
config: {}
scoring:
- provider_id: basic
provider_type: inline::basic
config: {}
- provider_id: llm-as-judge
provider_type: inline::llm-as-judge
config: {}
- provider_id: braintrust
provider_type: inline::braintrust
config: {}
metadata_store:
namespace: null
type: sqlite

View file

@ -4,9 +4,12 @@ docker_image: null
conda_env: ollama
apis:
- agents
- datasetio
- eval
- inference
- memory
- safety
- scoring
- telemetry
providers:
inference:
@ -38,6 +41,27 @@ providers:
- provider_id: meta-reference
provider_type: inline::meta-reference
config: {}
eval:
- provider_id: meta-reference
provider_type: inline::meta-reference
config: {}
datasetio:
- provider_id: huggingface
provider_type: remote::huggingface
config: {}
- provider_id: localfs
provider_type: inline::localfs
config: {}
scoring:
- provider_id: basic
provider_type: inline::basic
config: {}
- provider_id: llm-as-judge
provider_type: inline::llm-as-judge
config: {}
- provider_id: braintrust
provider_type: inline::braintrust
config: {}
metadata_store:
namespace: null
type: sqlite

View file

@ -7,6 +7,7 @@
from pathlib import Path
from llama_stack.distribution.datatypes import ModelInput, Provider, ShieldInput
from llama_stack.providers.inline.memory.faiss.config import FaissImplConfig
from llama_stack.providers.remote.inference.vllm import VLLMInferenceAdapterConfig
from llama_stack.templates.template import DistributionTemplate, RunConfigSettings
@ -19,7 +20,7 @@ def get_distribution_template() -> DistributionTemplate:
"agents": ["inline::meta-reference"],
"telemetry": ["inline::meta-reference"],
}
name = "remote-vllm"
inference_provider = Provider(
provider_id="vllm-inference",
provider_type="remote::vllm",
@ -27,6 +28,11 @@ def get_distribution_template() -> DistributionTemplate:
url="${env.VLLM_URL}",
),
)
memory_provider = Provider(
provider_id="faiss",
provider_type="inline::faiss",
config=FaissImplConfig.sample_run_config(f"distributions/{name}"),
)
inference_model = ModelInput(
model_id="${env.INFERENCE_MODEL}",
@ -38,7 +44,7 @@ def get_distribution_template() -> DistributionTemplate:
)
return DistributionTemplate(
name="remote-vllm",
name=name,
distro_type="self_hosted",
description="Use (an external) vLLM server for running LLM inference",
template_path=Path(__file__).parent / "doc_template.md",
@ -48,6 +54,7 @@ def get_distribution_template() -> DistributionTemplate:
"run.yaml": RunConfigSettings(
provider_overrides={
"inference": [inference_provider],
"memory": [memory_provider],
},
default_models=[inference_model],
),
@ -63,6 +70,7 @@ def get_distribution_template() -> DistributionTemplate:
),
),
],
"memory": [memory_provider],
},
default_models=[
inference_model,

View file

@ -44,36 +44,37 @@ class RunConfigSettings(BaseModel):
provider_configs[api_str] = api_providers
continue
provider_type = provider_types[0]
provider_id = provider_type.split("::")[-1]
provider_configs[api_str] = []
for provider_type in provider_types:
provider_id = provider_type.split("::")[-1]
api = Api(api_str)
if provider_type not in provider_registry[api]:
raise ValueError(
f"Unknown provider type: {provider_type} for API: {api_str}"
api = Api(api_str)
if provider_type not in provider_registry[api]:
raise ValueError(
f"Unknown provider type: {provider_type} for API: {api_str}"
)
config_class = provider_registry[api][provider_type].config_class
assert (
config_class is not None
), f"No config class for provider type: {provider_type} for API: {api_str}"
config_class = instantiate_class_type(config_class)
if hasattr(config_class, "sample_run_config"):
config = config_class.sample_run_config(
__distro_dir__=f"distributions/{name}"
)
else:
config = {}
provider_configs[api_str].append(
Provider(
provider_id=provider_id,
provider_type=provider_type,
config=config,
)
)
config_class = provider_registry[api][provider_type].config_class
assert (
config_class is not None
), f"No config class for provider type: {provider_type} for API: {api_str}"
config_class = instantiate_class_type(config_class)
if hasattr(config_class, "sample_run_config"):
config = config_class.sample_run_config(
__distro_dir__=f"distributions/{name}"
)
else:
config = {}
provider_configs[api_str] = [
Provider(
provider_id=provider_id,
provider_type=provider_type,
config=config,
)
]
# Get unique set of APIs from providers
apis = list(sorted(providers.keys()))

View file

@ -16,4 +16,13 @@ distribution_spec:
- inline::meta-reference
telemetry:
- inline::meta-reference
eval:
- inline::meta-reference
datasetio:
- remote::huggingface
- inline::localfs
scoring:
- inline::basic
- inline::llm-as-judge
- inline::braintrust
image_type: conda

View file

@ -4,9 +4,12 @@ docker_image: null
conda_env: tgi
apis:
- agents
- datasetio
- eval
- inference
- memory
- safety
- scoring
- telemetry
providers:
inference:
@ -42,6 +45,27 @@ providers:
- provider_id: meta-reference
provider_type: inline::meta-reference
config: {}
eval:
- provider_id: meta-reference
provider_type: inline::meta-reference
config: {}
datasetio:
- provider_id: huggingface
provider_type: remote::huggingface
config: {}
- provider_id: localfs
provider_type: inline::localfs
config: {}
scoring:
- provider_id: basic
provider_type: inline::basic
config: {}
- provider_id: llm-as-judge
provider_type: inline::llm-as-judge
config: {}
- provider_id: braintrust
provider_type: inline::braintrust
config: {}
metadata_store:
namespace: null
type: sqlite

View file

@ -4,9 +4,12 @@ docker_image: null
conda_env: tgi
apis:
- agents
- datasetio
- eval
- inference
- memory
- safety
- scoring
- telemetry
providers:
inference:
@ -38,6 +41,27 @@ providers:
- provider_id: meta-reference
provider_type: inline::meta-reference
config: {}
eval:
- provider_id: meta-reference
provider_type: inline::meta-reference
config: {}
datasetio:
- provider_id: huggingface
provider_type: remote::huggingface
config: {}
- provider_id: localfs
provider_type: inline::localfs
config: {}
scoring:
- provider_id: basic
provider_type: inline::basic
config: {}
- provider_id: llm-as-judge
provider_type: inline::llm-as-judge
config: {}
- provider_id: braintrust
provider_type: inline::braintrust
config: {}
metadata_store:
namespace: null
type: sqlite

View file

@ -7,6 +7,7 @@
from pathlib import Path
from llama_stack.distribution.datatypes import ModelInput, Provider, ShieldInput
from llama_stack.providers.inline.memory.faiss.config import FaissImplConfig
from llama_stack.providers.remote.inference.tgi import TGIImplConfig
from llama_stack.templates.template import DistributionTemplate, RunConfigSettings
@ -18,8 +19,11 @@ def get_distribution_template() -> DistributionTemplate:
"safety": ["inline::llama-guard"],
"agents": ["inline::meta-reference"],
"telemetry": ["inline::meta-reference"],
"eval": ["inline::meta-reference"],
"datasetio": ["remote::huggingface", "inline::localfs"],
"scoring": ["inline::basic", "inline::llm-as-judge", "inline::braintrust"],
}
name = "tgi"
inference_provider = Provider(
provider_id="tgi-inference",
provider_type="remote::tgi",
@ -27,6 +31,11 @@ def get_distribution_template() -> DistributionTemplate:
url="${env.TGI_URL}",
),
)
memory_provider = Provider(
provider_id="faiss",
provider_type="inline::faiss",
config=FaissImplConfig.sample_run_config(f"distributions/{name}"),
)
inference_model = ModelInput(
model_id="${env.INFERENCE_MODEL}",
@ -38,7 +47,7 @@ def get_distribution_template() -> DistributionTemplate:
)
return DistributionTemplate(
name="tgi",
name=name,
distro_type="self_hosted",
description="Use (an external) TGI server for running LLM inference",
docker_image=None,
@ -49,6 +58,7 @@ def get_distribution_template() -> DistributionTemplate:
"run.yaml": RunConfigSettings(
provider_overrides={
"inference": [inference_provider],
"memory": [memory_provider],
},
default_models=[inference_model],
),
@ -64,6 +74,7 @@ def get_distribution_template() -> DistributionTemplate:
),
),
],
"memory": [memory_provider],
},
default_models=[
inference_model,

View file

@ -16,4 +16,13 @@ distribution_spec:
- inline::meta-reference
telemetry:
- inline::meta-reference
eval:
- inline::meta-reference
datasetio:
- remote::huggingface
- inline::localfs
scoring:
- inline::basic
- inline::llm-as-judge
- inline::braintrust
image_type: conda

View file

@ -4,9 +4,12 @@ docker_image: null
conda_env: together
apis:
- agents
- datasetio
- eval
- inference
- memory
- safety
- scoring
- telemetry
providers:
inference:
@ -39,6 +42,27 @@ providers:
- provider_id: meta-reference
provider_type: inline::meta-reference
config: {}
eval:
- provider_id: meta-reference
provider_type: inline::meta-reference
config: {}
datasetio:
- provider_id: huggingface
provider_type: remote::huggingface
config: {}
- provider_id: localfs
provider_type: inline::localfs
config: {}
scoring:
- provider_id: basic
provider_type: inline::basic
config: {}
- provider_id: llm-as-judge
provider_type: inline::llm-as-judge
config: {}
- provider_id: braintrust
provider_type: inline::braintrust
config: {}
metadata_store:
namespace: null
type: sqlite

View file

@ -9,6 +9,7 @@ from pathlib import Path
from llama_models.sku_list import all_registered_models
from llama_stack.distribution.datatypes import ModelInput, Provider, ShieldInput
from llama_stack.providers.inline.memory.faiss.config import FaissImplConfig
from llama_stack.providers.remote.inference.together import TogetherImplConfig
from llama_stack.providers.remote.inference.together.together import MODEL_ALIASES
@ -22,13 +23,21 @@ def get_distribution_template() -> DistributionTemplate:
"safety": ["inline::llama-guard"],
"agents": ["inline::meta-reference"],
"telemetry": ["inline::meta-reference"],
"eval": ["inline::meta-reference"],
"datasetio": ["remote::huggingface", "inline::localfs"],
"scoring": ["inline::basic", "inline::llm-as-judge", "inline::braintrust"],
}
name = "together"
inference_provider = Provider(
provider_id="together",
provider_type="remote::together",
config=TogetherImplConfig.sample_run_config(),
)
memory_provider = Provider(
provider_id="faiss",
provider_type="inline::faiss",
config=FaissImplConfig.sample_run_config(f"distributions/{name}"),
)
core_model_to_hf_repo = {
m.descriptor(): m.huggingface_repo for m in all_registered_models()
@ -42,7 +51,7 @@ def get_distribution_template() -> DistributionTemplate:
]
return DistributionTemplate(
name="together",
name=name,
distro_type="self_hosted",
description="Use Together.AI for running LLM inference",
docker_image=None,
@ -53,6 +62,7 @@ def get_distribution_template() -> DistributionTemplate:
"run.yaml": RunConfigSettings(
provider_overrides={
"inference": [inference_provider],
"memory": [memory_provider],
},
default_models=default_models,
default_shields=[ShieldInput(shield_id="meta-llama/Llama-Guard-3-8B")],

View file

@ -16,4 +16,13 @@ distribution_spec:
- inline::meta-reference
telemetry:
- inline::meta-reference
eval:
- inline::meta-reference
datasetio:
- remote::huggingface
- inline::localfs
scoring:
- inline::basic
- inline::llm-as-judge
- inline::braintrust
image_type: conda

View file

@ -4,9 +4,12 @@ docker_image: null
conda_env: vllm-gpu
apis:
- agents
- datasetio
- eval
- inference
- memory
- safety
- scoring
- telemetry
providers:
inference:
@ -42,6 +45,27 @@ providers:
- provider_id: meta-reference
provider_type: inline::meta-reference
config: {}
eval:
- provider_id: meta-reference
provider_type: inline::meta-reference
config: {}
datasetio:
- provider_id: huggingface
provider_type: remote::huggingface
config: {}
- provider_id: localfs
provider_type: inline::localfs
config: {}
scoring:
- provider_id: basic
provider_type: inline::basic
config: {}
- provider_id: llm-as-judge
provider_type: inline::llm-as-judge
config: {}
- provider_id: braintrust
provider_type: inline::braintrust
config: {}
metadata_store:
namespace: null
type: sqlite

View file

@ -6,6 +6,7 @@
from llama_stack.distribution.datatypes import ModelInput, Provider
from llama_stack.providers.inline.inference.vllm import VLLMConfig
from llama_stack.providers.inline.memory.faiss.config import FaissImplConfig
from llama_stack.templates.template import DistributionTemplate, RunConfigSettings
@ -16,13 +17,21 @@ def get_distribution_template() -> DistributionTemplate:
"safety": ["inline::llama-guard"],
"agents": ["inline::meta-reference"],
"telemetry": ["inline::meta-reference"],
"eval": ["inline::meta-reference"],
"datasetio": ["remote::huggingface", "inline::localfs"],
"scoring": ["inline::basic", "inline::llm-as-judge", "inline::braintrust"],
}
name = "vllm-gpu"
inference_provider = Provider(
provider_id="vllm",
provider_type="inline::vllm",
config=VLLMConfig.sample_run_config(),
)
memory_provider = Provider(
provider_id="faiss",
provider_type="inline::faiss",
config=FaissImplConfig.sample_run_config(f"distributions/{name}"),
)
inference_model = ModelInput(
model_id="${env.INFERENCE_MODEL}",
@ -30,7 +39,7 @@ def get_distribution_template() -> DistributionTemplate:
)
return DistributionTemplate(
name="vllm-gpu",
name=name,
distro_type="self_hosted",
description="Use a built-in vLLM engine for running LLM inference",
docker_image=None,
@ -41,6 +50,7 @@ def get_distribution_template() -> DistributionTemplate:
"run.yaml": RunConfigSettings(
provider_overrides={
"inference": [inference_provider],
"memory": [memory_provider],
},
default_models=[inference_model],
),