mirror of
https://github.com/meta-llama/llama-stack.git
synced 2025-06-28 02:53:30 +00:00
Add eval/scoring/datasetio API providers to distribution templates & UI developer guide (#564)
# What does this PR do? - add /eval, /scoring, /datasetio API providers to distribution templates - regenerate build.yaml / run.yaml files - fix `template.py` to take in list of providers instead of only first one - override memory provider as faiss default for all distro (as only 1 memory provider is needed to start basic flow, chromadb/pgvector need additional setup step). ``` python llama_stack/scripts/distro_codegen.py ``` - updated README to start UI via conda builds. ## Test Plan ``` python llama_stack/scripts/distro_codegen.py ``` - Use newly generated `run.yaml` to start server ``` llama stack run ./llama_stack/templates/together/run.yaml ``` <img width="1191" alt="image" src="https://github.com/user-attachments/assets/62f7d179-0cd0-427c-b6e8-e087d4648f09"> #### Registration ``` ❯ llama-stack-client datasets register \ --dataset-id "mmlu" \ --provider-id "huggingface" \ --url "https://huggingface.co/datasets/llamastack/evals" \ --metadata '{"path": "llamastack/evals", "name": "evals__mmlu__details", "split": "train"}' \ --schema '{"input_query": {"type": "string"}, "expected_answer": {"type": "string", "chat_completion_input": {"type": "string"}}}' ❯ llama-stack-client datasets list ┏━━━━━━━━━━━━┳━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━┓ ┃ identifier ┃ provider_id ┃ metadata ┃ type ┃ ┡━━━━━━━━━━━━╇━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━┩ │ mmlu │ huggingface │ {'path': 'llamastack/evals', 'name': │ dataset │ │ │ │ 'evals__mmlu__details', 'split': │ │ │ │ │ 'train'} │ │ └────────────┴─────────────┴─────────────────────────────────────────┴─────────┘ ``` ``` ❯ llama-stack-client datasets register \ --dataset-id "simpleqa" \ --provider-id "huggingface" \ --url "https://huggingface.co/datasets/llamastack/evals" \ --metadata '{"path": "llamastack/evals", "name": "evals__simpleqa", "split": "train"}' \ --schema '{"input_query": {"type": "string"}, "expected_answer": {"type": "string", "chat_completion_input": {"type": "string"}}}' ❯ llama-stack-client datasets list ┏━━━━━━━━━━━━┳━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━┓ ┃ identifier ┃ provider_id ┃ metadata ┃ type ┃ ┡━━━━━━━━━━━━╇━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━┩ │ mmlu │ huggingface │ {'path': 'llamastack/evals', 'name': 'evals__mmlu__details', │ dataset │ │ │ │ 'split': 'train'} │ │ │ simpleqa │ huggingface │ {'path': 'llamastack/evals', 'name': 'evals__simpleqa', │ dataset │ │ │ │ 'split': 'train'} │ │ └────────────┴─────────────┴───────────────────────────────────────────────────────────────┴─────────┘ ``` ``` ❯ llama-stack-client eval_tasks register \ > --eval-task-id meta-reference-mmlu \ > --provider-id meta-reference \ > --dataset-id mmlu \ > --scoring-functions basic::regex_parser_multiple_choice_answer ❯ llama-stack-client eval_tasks register \ --eval-task-id meta-reference-simpleqa \ --provider-id meta-reference \ --dataset-id simpleqa \ --scoring-functions llm-as-judge::405b-simpleqa ❯ llama-stack-client eval_tasks list ┏━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━┳━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━┓ ┃ dataset_id ┃ identifier ┃ metadata ┃ provider_id ┃ provider_resour… ┃ scoring_functio… ┃ type ┃ ┡━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━╇━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━┩ │ mmlu │ meta-reference-… │ {} │ meta-reference │ meta-reference-… │ ['basic::regex_… │ eval_task │ │ simpleqa │ meta-reference-… │ {} │ meta-reference │ meta-reference-… │ ['llm-as-judge:… │ eval_task │ └────────────┴──────────────────┴──────────┴────────────────┴──────────────────┴──────────────────┴───────────┘ ``` #### Test with UI ``` streamlit run app.py ``` ## Before submitting - [ ] This PR fixes a typo or improves the docs (you can dismiss the other checks if that's the case). - [ ] Ran pre-commit to handle lint / formatting issues. - [ ] Read the [contributor guideline](https://github.com/meta-llama/llama-stack/blob/main/CONTRIBUTING.md), Pull Request section? - [ ] Updated relevant documentation. - [ ] Wrote necessary unit or integration tests.
This commit is contained in:
parent
a4daf4d3ec
commit
7301403ce3
47 changed files with 841 additions and 195 deletions
|
@ -1,10 +1,12 @@
|
||||||
{
|
{
|
||||||
"tgi": [
|
"hf-serverless": [
|
||||||
"aiohttp",
|
"aiohttp",
|
||||||
"aiosqlite",
|
"aiosqlite",
|
||||||
|
"autoevals",
|
||||||
"blobfile",
|
"blobfile",
|
||||||
"chardet",
|
"chardet",
|
||||||
"chromadb-client",
|
"chromadb-client",
|
||||||
|
"datasets",
|
||||||
"faiss-cpu",
|
"faiss-cpu",
|
||||||
"fastapi",
|
"fastapi",
|
||||||
"fire",
|
"fire",
|
||||||
|
@ -13,6 +15,7 @@
|
||||||
"matplotlib",
|
"matplotlib",
|
||||||
"nltk",
|
"nltk",
|
||||||
"numpy",
|
"numpy",
|
||||||
|
"openai",
|
||||||
"pandas",
|
"pandas",
|
||||||
"pillow",
|
"pillow",
|
||||||
"psycopg2-binary",
|
"psycopg2-binary",
|
||||||
|
@ -27,6 +30,66 @@
|
||||||
"sentence-transformers --no-deps",
|
"sentence-transformers --no-deps",
|
||||||
"torch --index-url https://download.pytorch.org/whl/cpu"
|
"torch --index-url https://download.pytorch.org/whl/cpu"
|
||||||
],
|
],
|
||||||
|
"together": [
|
||||||
|
"aiosqlite",
|
||||||
|
"autoevals",
|
||||||
|
"blobfile",
|
||||||
|
"chardet",
|
||||||
|
"chromadb-client",
|
||||||
|
"datasets",
|
||||||
|
"faiss-cpu",
|
||||||
|
"fastapi",
|
||||||
|
"fire",
|
||||||
|
"httpx",
|
||||||
|
"matplotlib",
|
||||||
|
"nltk",
|
||||||
|
"numpy",
|
||||||
|
"openai",
|
||||||
|
"pandas",
|
||||||
|
"pillow",
|
||||||
|
"psycopg2-binary",
|
||||||
|
"pypdf",
|
||||||
|
"redis",
|
||||||
|
"scikit-learn",
|
||||||
|
"scipy",
|
||||||
|
"sentencepiece",
|
||||||
|
"together",
|
||||||
|
"tqdm",
|
||||||
|
"transformers",
|
||||||
|
"uvicorn",
|
||||||
|
"sentence-transformers --no-deps",
|
||||||
|
"torch --index-url https://download.pytorch.org/whl/cpu"
|
||||||
|
],
|
||||||
|
"vllm-gpu": [
|
||||||
|
"aiosqlite",
|
||||||
|
"autoevals",
|
||||||
|
"blobfile",
|
||||||
|
"chardet",
|
||||||
|
"chromadb-client",
|
||||||
|
"datasets",
|
||||||
|
"faiss-cpu",
|
||||||
|
"fastapi",
|
||||||
|
"fire",
|
||||||
|
"httpx",
|
||||||
|
"matplotlib",
|
||||||
|
"nltk",
|
||||||
|
"numpy",
|
||||||
|
"openai",
|
||||||
|
"pandas",
|
||||||
|
"pillow",
|
||||||
|
"psycopg2-binary",
|
||||||
|
"pypdf",
|
||||||
|
"redis",
|
||||||
|
"scikit-learn",
|
||||||
|
"scipy",
|
||||||
|
"sentencepiece",
|
||||||
|
"tqdm",
|
||||||
|
"transformers",
|
||||||
|
"uvicorn",
|
||||||
|
"vllm",
|
||||||
|
"sentence-transformers --no-deps",
|
||||||
|
"torch --index-url https://download.pytorch.org/whl/cpu"
|
||||||
|
],
|
||||||
"remote-vllm": [
|
"remote-vllm": [
|
||||||
"aiosqlite",
|
"aiosqlite",
|
||||||
"blobfile",
|
"blobfile",
|
||||||
|
@ -54,18 +117,22 @@
|
||||||
"sentence-transformers --no-deps",
|
"sentence-transformers --no-deps",
|
||||||
"torch --index-url https://download.pytorch.org/whl/cpu"
|
"torch --index-url https://download.pytorch.org/whl/cpu"
|
||||||
],
|
],
|
||||||
"vllm-gpu": [
|
"fireworks": [
|
||||||
"aiosqlite",
|
"aiosqlite",
|
||||||
|
"autoevals",
|
||||||
"blobfile",
|
"blobfile",
|
||||||
"chardet",
|
"chardet",
|
||||||
"chromadb-client",
|
"chromadb-client",
|
||||||
|
"datasets",
|
||||||
"faiss-cpu",
|
"faiss-cpu",
|
||||||
"fastapi",
|
"fastapi",
|
||||||
"fire",
|
"fire",
|
||||||
|
"fireworks-ai",
|
||||||
"httpx",
|
"httpx",
|
||||||
"matplotlib",
|
"matplotlib",
|
||||||
"nltk",
|
"nltk",
|
||||||
"numpy",
|
"numpy",
|
||||||
|
"openai",
|
||||||
"pandas",
|
"pandas",
|
||||||
"pillow",
|
"pillow",
|
||||||
"psycopg2-binary",
|
"psycopg2-binary",
|
||||||
|
@ -77,82 +144,17 @@
|
||||||
"tqdm",
|
"tqdm",
|
||||||
"transformers",
|
"transformers",
|
||||||
"uvicorn",
|
"uvicorn",
|
||||||
"vllm",
|
|
||||||
"sentence-transformers --no-deps",
|
"sentence-transformers --no-deps",
|
||||||
"torch --index-url https://download.pytorch.org/whl/cpu"
|
"torch --index-url https://download.pytorch.org/whl/cpu"
|
||||||
],
|
],
|
||||||
"meta-reference-quantized-gpu": [
|
"tgi": [
|
||||||
"accelerate",
|
|
||||||
"aiosqlite",
|
|
||||||
"blobfile",
|
|
||||||
"chardet",
|
|
||||||
"chromadb-client",
|
|
||||||
"fairscale",
|
|
||||||
"faiss-cpu",
|
|
||||||
"fastapi",
|
|
||||||
"fbgemm-gpu",
|
|
||||||
"fire",
|
|
||||||
"httpx",
|
|
||||||
"lm-format-enforcer",
|
|
||||||
"matplotlib",
|
|
||||||
"nltk",
|
|
||||||
"numpy",
|
|
||||||
"pandas",
|
|
||||||
"pillow",
|
|
||||||
"psycopg2-binary",
|
|
||||||
"pypdf",
|
|
||||||
"redis",
|
|
||||||
"scikit-learn",
|
|
||||||
"scipy",
|
|
||||||
"sentencepiece",
|
|
||||||
"torch",
|
|
||||||
"torchao==0.5.0",
|
|
||||||
"torchvision",
|
|
||||||
"tqdm",
|
|
||||||
"transformers",
|
|
||||||
"uvicorn",
|
|
||||||
"zmq",
|
|
||||||
"sentence-transformers --no-deps",
|
|
||||||
"torch --index-url https://download.pytorch.org/whl/cpu"
|
|
||||||
],
|
|
||||||
"meta-reference-gpu": [
|
|
||||||
"accelerate",
|
|
||||||
"aiosqlite",
|
|
||||||
"blobfile",
|
|
||||||
"chardet",
|
|
||||||
"chromadb-client",
|
|
||||||
"fairscale",
|
|
||||||
"faiss-cpu",
|
|
||||||
"fastapi",
|
|
||||||
"fire",
|
|
||||||
"httpx",
|
|
||||||
"lm-format-enforcer",
|
|
||||||
"matplotlib",
|
|
||||||
"nltk",
|
|
||||||
"numpy",
|
|
||||||
"pandas",
|
|
||||||
"pillow",
|
|
||||||
"psycopg2-binary",
|
|
||||||
"pypdf",
|
|
||||||
"redis",
|
|
||||||
"scikit-learn",
|
|
||||||
"scipy",
|
|
||||||
"sentencepiece",
|
|
||||||
"torch",
|
|
||||||
"torchvision",
|
|
||||||
"tqdm",
|
|
||||||
"transformers",
|
|
||||||
"uvicorn",
|
|
||||||
"zmq",
|
|
||||||
"sentence-transformers --no-deps",
|
|
||||||
"torch --index-url https://download.pytorch.org/whl/cpu"
|
|
||||||
],
|
|
||||||
"hf-serverless": [
|
|
||||||
"aiohttp",
|
"aiohttp",
|
||||||
"aiosqlite",
|
"aiosqlite",
|
||||||
|
"autoevals",
|
||||||
"blobfile",
|
"blobfile",
|
||||||
"chardet",
|
"chardet",
|
||||||
"chromadb-client",
|
"chromadb-client",
|
||||||
|
"datasets",
|
||||||
"faiss-cpu",
|
"faiss-cpu",
|
||||||
"fastapi",
|
"fastapi",
|
||||||
"fire",
|
"fire",
|
||||||
|
@ -161,61 +163,7 @@
|
||||||
"matplotlib",
|
"matplotlib",
|
||||||
"nltk",
|
"nltk",
|
||||||
"numpy",
|
"numpy",
|
||||||
"pandas",
|
"openai",
|
||||||
"pillow",
|
|
||||||
"psycopg2-binary",
|
|
||||||
"pypdf",
|
|
||||||
"redis",
|
|
||||||
"scikit-learn",
|
|
||||||
"scipy",
|
|
||||||
"sentencepiece",
|
|
||||||
"tqdm",
|
|
||||||
"transformers",
|
|
||||||
"uvicorn",
|
|
||||||
"sentence-transformers --no-deps",
|
|
||||||
"torch --index-url https://download.pytorch.org/whl/cpu"
|
|
||||||
],
|
|
||||||
"together": [
|
|
||||||
"aiosqlite",
|
|
||||||
"blobfile",
|
|
||||||
"chardet",
|
|
||||||
"chromadb-client",
|
|
||||||
"faiss-cpu",
|
|
||||||
"fastapi",
|
|
||||||
"fire",
|
|
||||||
"httpx",
|
|
||||||
"matplotlib",
|
|
||||||
"nltk",
|
|
||||||
"numpy",
|
|
||||||
"pandas",
|
|
||||||
"pillow",
|
|
||||||
"psycopg2-binary",
|
|
||||||
"pypdf",
|
|
||||||
"redis",
|
|
||||||
"scikit-learn",
|
|
||||||
"scipy",
|
|
||||||
"sentencepiece",
|
|
||||||
"together",
|
|
||||||
"tqdm",
|
|
||||||
"transformers",
|
|
||||||
"uvicorn",
|
|
||||||
"sentence-transformers --no-deps",
|
|
||||||
"torch --index-url https://download.pytorch.org/whl/cpu"
|
|
||||||
],
|
|
||||||
"ollama": [
|
|
||||||
"aiohttp",
|
|
||||||
"aiosqlite",
|
|
||||||
"blobfile",
|
|
||||||
"chardet",
|
|
||||||
"chromadb-client",
|
|
||||||
"faiss-cpu",
|
|
||||||
"fastapi",
|
|
||||||
"fire",
|
|
||||||
"httpx",
|
|
||||||
"matplotlib",
|
|
||||||
"nltk",
|
|
||||||
"numpy",
|
|
||||||
"ollama",
|
|
||||||
"pandas",
|
"pandas",
|
||||||
"pillow",
|
"pillow",
|
||||||
"psycopg2-binary",
|
"psycopg2-binary",
|
||||||
|
@ -232,10 +180,12 @@
|
||||||
],
|
],
|
||||||
"bedrock": [
|
"bedrock": [
|
||||||
"aiosqlite",
|
"aiosqlite",
|
||||||
|
"autoevals",
|
||||||
"blobfile",
|
"blobfile",
|
||||||
"boto3",
|
"boto3",
|
||||||
"chardet",
|
"chardet",
|
||||||
"chromadb-client",
|
"chromadb-client",
|
||||||
|
"datasets",
|
||||||
"faiss-cpu",
|
"faiss-cpu",
|
||||||
"fastapi",
|
"fastapi",
|
||||||
"fire",
|
"fire",
|
||||||
|
@ -243,6 +193,7 @@
|
||||||
"matplotlib",
|
"matplotlib",
|
||||||
"nltk",
|
"nltk",
|
||||||
"numpy",
|
"numpy",
|
||||||
|
"openai",
|
||||||
"pandas",
|
"pandas",
|
||||||
"pillow",
|
"pillow",
|
||||||
"psycopg2-binary",
|
"psycopg2-binary",
|
||||||
|
@ -257,20 +208,24 @@
|
||||||
"sentence-transformers --no-deps",
|
"sentence-transformers --no-deps",
|
||||||
"torch --index-url https://download.pytorch.org/whl/cpu"
|
"torch --index-url https://download.pytorch.org/whl/cpu"
|
||||||
],
|
],
|
||||||
"hf-endpoint": [
|
"meta-reference-gpu": [
|
||||||
"aiohttp",
|
"accelerate",
|
||||||
"aiosqlite",
|
"aiosqlite",
|
||||||
|
"autoevals",
|
||||||
"blobfile",
|
"blobfile",
|
||||||
"chardet",
|
"chardet",
|
||||||
"chromadb-client",
|
"chromadb-client",
|
||||||
|
"datasets",
|
||||||
|
"fairscale",
|
||||||
"faiss-cpu",
|
"faiss-cpu",
|
||||||
"fastapi",
|
"fastapi",
|
||||||
"fire",
|
"fire",
|
||||||
"httpx",
|
"httpx",
|
||||||
"huggingface_hub",
|
"lm-format-enforcer",
|
||||||
"matplotlib",
|
"matplotlib",
|
||||||
"nltk",
|
"nltk",
|
||||||
"numpy",
|
"numpy",
|
||||||
|
"openai",
|
||||||
"pandas",
|
"pandas",
|
||||||
"pillow",
|
"pillow",
|
||||||
"psycopg2-binary",
|
"psycopg2-binary",
|
||||||
|
@ -279,25 +234,34 @@
|
||||||
"scikit-learn",
|
"scikit-learn",
|
||||||
"scipy",
|
"scipy",
|
||||||
"sentencepiece",
|
"sentencepiece",
|
||||||
|
"torch",
|
||||||
|
"torchvision",
|
||||||
"tqdm",
|
"tqdm",
|
||||||
"transformers",
|
"transformers",
|
||||||
"uvicorn",
|
"uvicorn",
|
||||||
|
"zmq",
|
||||||
"sentence-transformers --no-deps",
|
"sentence-transformers --no-deps",
|
||||||
"torch --index-url https://download.pytorch.org/whl/cpu"
|
"torch --index-url https://download.pytorch.org/whl/cpu"
|
||||||
],
|
],
|
||||||
"fireworks": [
|
"meta-reference-quantized-gpu": [
|
||||||
|
"accelerate",
|
||||||
"aiosqlite",
|
"aiosqlite",
|
||||||
|
"autoevals",
|
||||||
"blobfile",
|
"blobfile",
|
||||||
"chardet",
|
"chardet",
|
||||||
"chromadb-client",
|
"chromadb-client",
|
||||||
|
"datasets",
|
||||||
|
"fairscale",
|
||||||
"faiss-cpu",
|
"faiss-cpu",
|
||||||
"fastapi",
|
"fastapi",
|
||||||
|
"fbgemm-gpu",
|
||||||
"fire",
|
"fire",
|
||||||
"fireworks-ai",
|
|
||||||
"httpx",
|
"httpx",
|
||||||
|
"lm-format-enforcer",
|
||||||
"matplotlib",
|
"matplotlib",
|
||||||
"nltk",
|
"nltk",
|
||||||
"numpy",
|
"numpy",
|
||||||
|
"openai",
|
||||||
"pandas",
|
"pandas",
|
||||||
"pillow",
|
"pillow",
|
||||||
"psycopg2-binary",
|
"psycopg2-binary",
|
||||||
|
@ -306,9 +270,13 @@
|
||||||
"scikit-learn",
|
"scikit-learn",
|
||||||
"scipy",
|
"scipy",
|
||||||
"sentencepiece",
|
"sentencepiece",
|
||||||
|
"torch",
|
||||||
|
"torchao==0.5.0",
|
||||||
|
"torchvision",
|
||||||
"tqdm",
|
"tqdm",
|
||||||
"transformers",
|
"transformers",
|
||||||
"uvicorn",
|
"uvicorn",
|
||||||
|
"zmq",
|
||||||
"sentence-transformers --no-deps",
|
"sentence-transformers --no-deps",
|
||||||
"torch --index-url https://download.pytorch.org/whl/cpu"
|
"torch --index-url https://download.pytorch.org/whl/cpu"
|
||||||
],
|
],
|
||||||
|
@ -337,5 +305,67 @@
|
||||||
"uvicorn",
|
"uvicorn",
|
||||||
"sentence-transformers --no-deps",
|
"sentence-transformers --no-deps",
|
||||||
"torch --index-url https://download.pytorch.org/whl/cpu"
|
"torch --index-url https://download.pytorch.org/whl/cpu"
|
||||||
|
],
|
||||||
|
"ollama": [
|
||||||
|
"aiohttp",
|
||||||
|
"aiosqlite",
|
||||||
|
"autoevals",
|
||||||
|
"blobfile",
|
||||||
|
"chardet",
|
||||||
|
"chromadb-client",
|
||||||
|
"datasets",
|
||||||
|
"faiss-cpu",
|
||||||
|
"fastapi",
|
||||||
|
"fire",
|
||||||
|
"httpx",
|
||||||
|
"matplotlib",
|
||||||
|
"nltk",
|
||||||
|
"numpy",
|
||||||
|
"ollama",
|
||||||
|
"openai",
|
||||||
|
"pandas",
|
||||||
|
"pillow",
|
||||||
|
"psycopg2-binary",
|
||||||
|
"pypdf",
|
||||||
|
"redis",
|
||||||
|
"scikit-learn",
|
||||||
|
"scipy",
|
||||||
|
"sentencepiece",
|
||||||
|
"tqdm",
|
||||||
|
"transformers",
|
||||||
|
"uvicorn",
|
||||||
|
"sentence-transformers --no-deps",
|
||||||
|
"torch --index-url https://download.pytorch.org/whl/cpu"
|
||||||
|
],
|
||||||
|
"hf-endpoint": [
|
||||||
|
"aiohttp",
|
||||||
|
"aiosqlite",
|
||||||
|
"autoevals",
|
||||||
|
"blobfile",
|
||||||
|
"chardet",
|
||||||
|
"chromadb-client",
|
||||||
|
"datasets",
|
||||||
|
"faiss-cpu",
|
||||||
|
"fastapi",
|
||||||
|
"fire",
|
||||||
|
"httpx",
|
||||||
|
"huggingface_hub",
|
||||||
|
"matplotlib",
|
||||||
|
"nltk",
|
||||||
|
"numpy",
|
||||||
|
"openai",
|
||||||
|
"pandas",
|
||||||
|
"pillow",
|
||||||
|
"psycopg2-binary",
|
||||||
|
"pypdf",
|
||||||
|
"redis",
|
||||||
|
"scikit-learn",
|
||||||
|
"scipy",
|
||||||
|
"sentencepiece",
|
||||||
|
"tqdm",
|
||||||
|
"transformers",
|
||||||
|
"uvicorn",
|
||||||
|
"sentence-transformers --no-deps",
|
||||||
|
"torch --index-url https://download.pytorch.org/whl/cpu"
|
||||||
]
|
]
|
||||||
}
|
}
|
||||||
|
|
|
@ -1,6 +1,3 @@
|
||||||
---
|
|
||||||
orphan: true
|
|
||||||
---
|
|
||||||
# Bedrock Distribution
|
# Bedrock Distribution
|
||||||
|
|
||||||
```{toctree}
|
```{toctree}
|
||||||
|
@ -15,9 +12,12 @@ The `llamastack/distribution-bedrock` distribution consists of the following pro
|
||||||
| API | Provider(s) |
|
| API | Provider(s) |
|
||||||
|-----|-------------|
|
|-----|-------------|
|
||||||
| agents | `inline::meta-reference` |
|
| agents | `inline::meta-reference` |
|
||||||
|
| datasetio | `remote::huggingface`, `inline::localfs` |
|
||||||
|
| eval | `inline::meta-reference` |
|
||||||
| inference | `remote::bedrock` |
|
| inference | `remote::bedrock` |
|
||||||
| memory | `inline::faiss`, `remote::chromadb`, `remote::pgvector` |
|
| memory | `inline::faiss`, `remote::chromadb`, `remote::pgvector` |
|
||||||
| safety | `remote::bedrock` |
|
| safety | `remote::bedrock` |
|
||||||
|
| scoring | `inline::basic`, `inline::llm-as-judge`, `inline::braintrust` |
|
||||||
| telemetry | `inline::meta-reference` |
|
| telemetry | `inline::meta-reference` |
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -15,9 +15,12 @@ The `llamastack/distribution-fireworks` distribution consists of the following p
|
||||||
| API | Provider(s) |
|
| API | Provider(s) |
|
||||||
|-----|-------------|
|
|-----|-------------|
|
||||||
| agents | `inline::meta-reference` |
|
| agents | `inline::meta-reference` |
|
||||||
|
| datasetio | `remote::huggingface`, `inline::localfs` |
|
||||||
|
| eval | `inline::meta-reference` |
|
||||||
| inference | `remote::fireworks` |
|
| inference | `remote::fireworks` |
|
||||||
| memory | `inline::faiss`, `remote::chromadb`, `remote::pgvector` |
|
| memory | `inline::faiss`, `remote::chromadb`, `remote::pgvector` |
|
||||||
| safety | `inline::llama-guard` |
|
| safety | `inline::llama-guard` |
|
||||||
|
| scoring | `inline::basic`, `inline::llm-as-judge`, `inline::braintrust` |
|
||||||
| telemetry | `inline::meta-reference` |
|
| telemetry | `inline::meta-reference` |
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -15,9 +15,12 @@ The `llamastack/distribution-meta-reference-gpu` distribution consists of the fo
|
||||||
| API | Provider(s) |
|
| API | Provider(s) |
|
||||||
|-----|-------------|
|
|-----|-------------|
|
||||||
| agents | `inline::meta-reference` |
|
| agents | `inline::meta-reference` |
|
||||||
|
| datasetio | `remote::huggingface`, `inline::localfs` |
|
||||||
|
| eval | `inline::meta-reference` |
|
||||||
| inference | `inline::meta-reference` |
|
| inference | `inline::meta-reference` |
|
||||||
| memory | `inline::faiss`, `remote::chromadb`, `remote::pgvector` |
|
| memory | `inline::faiss`, `remote::chromadb`, `remote::pgvector` |
|
||||||
| safety | `inline::llama-guard` |
|
| safety | `inline::llama-guard` |
|
||||||
|
| scoring | `inline::basic`, `inline::llm-as-judge`, `inline::braintrust` |
|
||||||
| telemetry | `inline::meta-reference` |
|
| telemetry | `inline::meta-reference` |
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -15,9 +15,12 @@ The `llamastack/distribution-meta-reference-quantized-gpu` distribution consists
|
||||||
| API | Provider(s) |
|
| API | Provider(s) |
|
||||||
|-----|-------------|
|
|-----|-------------|
|
||||||
| agents | `inline::meta-reference` |
|
| agents | `inline::meta-reference` |
|
||||||
|
| datasetio | `remote::huggingface`, `inline::localfs` |
|
||||||
|
| eval | `inline::meta-reference` |
|
||||||
| inference | `inline::meta-reference-quantized` |
|
| inference | `inline::meta-reference-quantized` |
|
||||||
| memory | `inline::faiss`, `remote::chromadb`, `remote::pgvector` |
|
| memory | `inline::faiss`, `remote::chromadb`, `remote::pgvector` |
|
||||||
| safety | `inline::llama-guard` |
|
| safety | `inline::llama-guard` |
|
||||||
|
| scoring | `inline::basic`, `inline::llm-as-judge`, `inline::braintrust` |
|
||||||
| telemetry | `inline::meta-reference` |
|
| telemetry | `inline::meta-reference` |
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -15,9 +15,12 @@ The `llamastack/distribution-ollama` distribution consists of the following prov
|
||||||
| API | Provider(s) |
|
| API | Provider(s) |
|
||||||
|-----|-------------|
|
|-----|-------------|
|
||||||
| agents | `inline::meta-reference` |
|
| agents | `inline::meta-reference` |
|
||||||
|
| datasetio | `remote::huggingface`, `inline::localfs` |
|
||||||
|
| eval | `inline::meta-reference` |
|
||||||
| inference | `remote::ollama` |
|
| inference | `remote::ollama` |
|
||||||
| memory | `inline::faiss`, `remote::chromadb`, `remote::pgvector` |
|
| memory | `inline::faiss`, `remote::chromadb`, `remote::pgvector` |
|
||||||
| safety | `inline::llama-guard` |
|
| safety | `inline::llama-guard` |
|
||||||
|
| scoring | `inline::basic`, `inline::llm-as-judge`, `inline::braintrust` |
|
||||||
| telemetry | `inline::meta-reference` |
|
| telemetry | `inline::meta-reference` |
|
||||||
|
|
||||||
|
|
||||||
|
@ -119,7 +122,7 @@ llama stack run ./run-with-safety.yaml \
|
||||||
### (Optional) Update Model Serving Configuration
|
### (Optional) Update Model Serving Configuration
|
||||||
|
|
||||||
```{note}
|
```{note}
|
||||||
Please check the [model_aliases](https://github.com/meta-llama/llama-stack/blob/main/llama_stack/providers/remote/inference/ollama/ollama.py#L45) variable for supported Ollama models.
|
Please check the [model_aliases](https://github.com/meta-llama/llama-stack/blob/main/llama_stack/providers/remote/inference/ollama/ollama.py#L45) for the supported Ollama models.
|
||||||
```
|
```
|
||||||
|
|
||||||
To serve a new model with `ollama`
|
To serve a new model with `ollama`
|
||||||
|
|
|
@ -16,9 +16,12 @@ The `llamastack/distribution-tgi` distribution consists of the following provide
|
||||||
| API | Provider(s) |
|
| API | Provider(s) |
|
||||||
|-----|-------------|
|
|-----|-------------|
|
||||||
| agents | `inline::meta-reference` |
|
| agents | `inline::meta-reference` |
|
||||||
|
| datasetio | `remote::huggingface`, `inline::localfs` |
|
||||||
|
| eval | `inline::meta-reference` |
|
||||||
| inference | `remote::tgi` |
|
| inference | `remote::tgi` |
|
||||||
| memory | `inline::faiss`, `remote::chromadb`, `remote::pgvector` |
|
| memory | `inline::faiss`, `remote::chromadb`, `remote::pgvector` |
|
||||||
| safety | `inline::llama-guard` |
|
| safety | `inline::llama-guard` |
|
||||||
|
| scoring | `inline::basic`, `inline::llm-as-judge`, `inline::braintrust` |
|
||||||
| telemetry | `inline::meta-reference` |
|
| telemetry | `inline::meta-reference` |
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -15,9 +15,12 @@ The `llamastack/distribution-together` distribution consists of the following pr
|
||||||
| API | Provider(s) |
|
| API | Provider(s) |
|
||||||
|-----|-------------|
|
|-----|-------------|
|
||||||
| agents | `inline::meta-reference` |
|
| agents | `inline::meta-reference` |
|
||||||
|
| datasetio | `remote::huggingface`, `inline::localfs` |
|
||||||
|
| eval | `inline::meta-reference` |
|
||||||
| inference | `remote::together` |
|
| inference | `remote::together` |
|
||||||
| memory | `inline::faiss`, `remote::chromadb`, `remote::pgvector` |
|
| memory | `inline::faiss`, `remote::chromadb`, `remote::pgvector` |
|
||||||
| safety | `inline::llama-guard` |
|
| safety | `inline::llama-guard` |
|
||||||
|
| scoring | `inline::basic`, `inline::llm-as-judge`, `inline::braintrust` |
|
||||||
| telemetry | `inline::meta-reference` |
|
| telemetry | `inline::meta-reference` |
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -1,16 +1,41 @@
|
||||||
# LLama Stack UI
|
# (Experimental) LLama Stack UI
|
||||||
|
|
||||||
[!NOTE] This is a work in progress.
|
## Docker Setup
|
||||||
|
|
||||||
## Prerequisite
|
:warning: This is a work in progress.
|
||||||
- Start up Llama Stack Server
|
|
||||||
```
|
|
||||||
llama stack run
|
|
||||||
```
|
|
||||||
|
|
||||||
## Running Streamlit App
|
## Developer Setup
|
||||||
|
|
||||||
|
1. Start up Llama Stack API server. More details [here](https://llama-stack.readthedocs.io/en/latest/getting_started/index.html).
|
||||||
|
|
||||||
```
|
```
|
||||||
|
llama stack build --template together --image-type conda
|
||||||
|
|
||||||
|
llama stack run together
|
||||||
|
```
|
||||||
|
|
||||||
|
2. (Optional) Register datasets and eval tasks as resources. If you want to run pre-configured evaluation flows (e.g. Evaluations (Generation + Scoring) Page).
|
||||||
|
|
||||||
|
```bash
|
||||||
|
$ llama-stack-client datasets register \
|
||||||
|
--dataset-id "mmlu" \
|
||||||
|
--provider-id "huggingface" \
|
||||||
|
--url "https://huggingface.co/datasets/llamastack/evals" \
|
||||||
|
--metadata '{"path": "llamastack/evals", "name": "evals__mmlu__details", "split": "train"}' \
|
||||||
|
--schema '{"input_query": {"type": "string"}, "expected_answer": {"type": "string", "chat_completion_input": {"type": "string"}}}'
|
||||||
|
```
|
||||||
|
|
||||||
|
```bash
|
||||||
|
$ llama-stack-client eval_tasks register \
|
||||||
|
--eval-task-id meta-reference-mmlu \
|
||||||
|
--provider-id meta-reference \
|
||||||
|
--dataset-id mmlu \
|
||||||
|
--scoring-functions basic::regex_parser_multiple_choice_answer
|
||||||
|
```
|
||||||
|
|
||||||
|
3. Start Streamlit UI
|
||||||
|
|
||||||
|
```bash
|
||||||
cd llama_stack/distribution/ui
|
cd llama_stack/distribution/ui
|
||||||
pip install -r requirements.txt
|
pip install -r requirements.txt
|
||||||
streamlit run app.py
|
streamlit run app.py
|
||||||
|
|
|
@ -6,6 +6,9 @@
|
||||||
|
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
|
|
||||||
|
from llama_stack.distribution.datatypes import Provider
|
||||||
|
|
||||||
|
from llama_stack.providers.inline.memory.faiss.config import FaissImplConfig
|
||||||
from llama_stack.templates.template import DistributionTemplate, RunConfigSettings
|
from llama_stack.templates.template import DistributionTemplate, RunConfigSettings
|
||||||
|
|
||||||
|
|
||||||
|
@ -16,10 +19,19 @@ def get_distribution_template() -> DistributionTemplate:
|
||||||
"safety": ["remote::bedrock"],
|
"safety": ["remote::bedrock"],
|
||||||
"agents": ["inline::meta-reference"],
|
"agents": ["inline::meta-reference"],
|
||||||
"telemetry": ["inline::meta-reference"],
|
"telemetry": ["inline::meta-reference"],
|
||||||
|
"eval": ["inline::meta-reference"],
|
||||||
|
"datasetio": ["remote::huggingface", "inline::localfs"],
|
||||||
|
"scoring": ["inline::basic", "inline::llm-as-judge", "inline::braintrust"],
|
||||||
}
|
}
|
||||||
|
name = "bedrock"
|
||||||
|
memory_provider = Provider(
|
||||||
|
provider_id="faiss",
|
||||||
|
provider_type="inline::faiss",
|
||||||
|
config=FaissImplConfig.sample_run_config(f"distributions/{name}"),
|
||||||
|
)
|
||||||
|
|
||||||
return DistributionTemplate(
|
return DistributionTemplate(
|
||||||
name="bedrock",
|
name=name,
|
||||||
distro_type="self_hosted",
|
distro_type="self_hosted",
|
||||||
description="Use AWS Bedrock for running LLM inference and safety",
|
description="Use AWS Bedrock for running LLM inference and safety",
|
||||||
docker_image=None,
|
docker_image=None,
|
||||||
|
@ -27,7 +39,11 @@ def get_distribution_template() -> DistributionTemplate:
|
||||||
providers=providers,
|
providers=providers,
|
||||||
default_models=[],
|
default_models=[],
|
||||||
run_configs={
|
run_configs={
|
||||||
"run.yaml": RunConfigSettings(),
|
"run.yaml": RunConfigSettings(
|
||||||
|
provider_overrides={
|
||||||
|
"memory": [memory_provider],
|
||||||
|
},
|
||||||
|
),
|
||||||
},
|
},
|
||||||
run_config_env_vars={
|
run_config_env_vars={
|
||||||
"LLAMASTACK_PORT": (
|
"LLAMASTACK_PORT": (
|
||||||
|
|
|
@ -16,4 +16,13 @@ distribution_spec:
|
||||||
- inline::meta-reference
|
- inline::meta-reference
|
||||||
telemetry:
|
telemetry:
|
||||||
- inline::meta-reference
|
- inline::meta-reference
|
||||||
|
eval:
|
||||||
|
- inline::meta-reference
|
||||||
|
datasetio:
|
||||||
|
- remote::huggingface
|
||||||
|
- inline::localfs
|
||||||
|
scoring:
|
||||||
|
- inline::basic
|
||||||
|
- inline::llm-as-judge
|
||||||
|
- inline::braintrust
|
||||||
image_type: conda
|
image_type: conda
|
||||||
|
|
|
@ -4,9 +4,12 @@ docker_image: null
|
||||||
conda_env: bedrock
|
conda_env: bedrock
|
||||||
apis:
|
apis:
|
||||||
- agents
|
- agents
|
||||||
|
- datasetio
|
||||||
|
- eval
|
||||||
- inference
|
- inference
|
||||||
- memory
|
- memory
|
||||||
- safety
|
- safety
|
||||||
|
- scoring
|
||||||
- telemetry
|
- telemetry
|
||||||
providers:
|
providers:
|
||||||
inference:
|
inference:
|
||||||
|
@ -37,6 +40,27 @@ providers:
|
||||||
- provider_id: meta-reference
|
- provider_id: meta-reference
|
||||||
provider_type: inline::meta-reference
|
provider_type: inline::meta-reference
|
||||||
config: {}
|
config: {}
|
||||||
|
eval:
|
||||||
|
- provider_id: meta-reference
|
||||||
|
provider_type: inline::meta-reference
|
||||||
|
config: {}
|
||||||
|
datasetio:
|
||||||
|
- provider_id: huggingface
|
||||||
|
provider_type: remote::huggingface
|
||||||
|
config: {}
|
||||||
|
- provider_id: localfs
|
||||||
|
provider_type: inline::localfs
|
||||||
|
config: {}
|
||||||
|
scoring:
|
||||||
|
- provider_id: basic
|
||||||
|
provider_type: inline::basic
|
||||||
|
config: {}
|
||||||
|
- provider_id: llm-as-judge
|
||||||
|
provider_type: inline::llm-as-judge
|
||||||
|
config: {}
|
||||||
|
- provider_id: braintrust
|
||||||
|
provider_type: inline::braintrust
|
||||||
|
config: {}
|
||||||
metadata_store:
|
metadata_store:
|
||||||
namespace: null
|
namespace: null
|
||||||
type: sqlite
|
type: sqlite
|
||||||
|
|
|
@ -16,4 +16,13 @@ distribution_spec:
|
||||||
- inline::meta-reference
|
- inline::meta-reference
|
||||||
telemetry:
|
telemetry:
|
||||||
- inline::meta-reference
|
- inline::meta-reference
|
||||||
|
eval:
|
||||||
|
- inline::meta-reference
|
||||||
|
datasetio:
|
||||||
|
- remote::huggingface
|
||||||
|
- inline::localfs
|
||||||
|
scoring:
|
||||||
|
- inline::basic
|
||||||
|
- inline::llm-as-judge
|
||||||
|
- inline::braintrust
|
||||||
image_type: conda
|
image_type: conda
|
||||||
|
|
|
@ -9,6 +9,7 @@ from pathlib import Path
|
||||||
from llama_models.sku_list import all_registered_models
|
from llama_models.sku_list import all_registered_models
|
||||||
|
|
||||||
from llama_stack.distribution.datatypes import ModelInput, Provider, ShieldInput
|
from llama_stack.distribution.datatypes import ModelInput, Provider, ShieldInput
|
||||||
|
from llama_stack.providers.inline.memory.faiss.config import FaissImplConfig
|
||||||
from llama_stack.providers.remote.inference.fireworks import FireworksImplConfig
|
from llama_stack.providers.remote.inference.fireworks import FireworksImplConfig
|
||||||
from llama_stack.providers.remote.inference.fireworks.fireworks import MODEL_ALIASES
|
from llama_stack.providers.remote.inference.fireworks.fireworks import MODEL_ALIASES
|
||||||
|
|
||||||
|
@ -22,13 +23,23 @@ def get_distribution_template() -> DistributionTemplate:
|
||||||
"safety": ["inline::llama-guard"],
|
"safety": ["inline::llama-guard"],
|
||||||
"agents": ["inline::meta-reference"],
|
"agents": ["inline::meta-reference"],
|
||||||
"telemetry": ["inline::meta-reference"],
|
"telemetry": ["inline::meta-reference"],
|
||||||
|
"eval": ["inline::meta-reference"],
|
||||||
|
"datasetio": ["remote::huggingface", "inline::localfs"],
|
||||||
|
"scoring": ["inline::basic", "inline::llm-as-judge", "inline::braintrust"],
|
||||||
}
|
}
|
||||||
|
|
||||||
|
name = "fireworks"
|
||||||
|
|
||||||
inference_provider = Provider(
|
inference_provider = Provider(
|
||||||
provider_id="fireworks",
|
provider_id="fireworks",
|
||||||
provider_type="remote::fireworks",
|
provider_type="remote::fireworks",
|
||||||
config=FireworksImplConfig.sample_run_config(),
|
config=FireworksImplConfig.sample_run_config(),
|
||||||
)
|
)
|
||||||
|
memory_provider = Provider(
|
||||||
|
provider_id="faiss",
|
||||||
|
provider_type="inline::faiss",
|
||||||
|
config=FaissImplConfig.sample_run_config(f"distributions/{name}"),
|
||||||
|
)
|
||||||
|
|
||||||
core_model_to_hf_repo = {
|
core_model_to_hf_repo = {
|
||||||
m.descriptor(): m.huggingface_repo for m in all_registered_models()
|
m.descriptor(): m.huggingface_repo for m in all_registered_models()
|
||||||
|
@ -42,7 +53,7 @@ def get_distribution_template() -> DistributionTemplate:
|
||||||
]
|
]
|
||||||
|
|
||||||
return DistributionTemplate(
|
return DistributionTemplate(
|
||||||
name="fireworks",
|
name=name,
|
||||||
distro_type="self_hosted",
|
distro_type="self_hosted",
|
||||||
description="Use Fireworks.AI for running LLM inference",
|
description="Use Fireworks.AI for running LLM inference",
|
||||||
docker_image=None,
|
docker_image=None,
|
||||||
|
@ -53,6 +64,7 @@ def get_distribution_template() -> DistributionTemplate:
|
||||||
"run.yaml": RunConfigSettings(
|
"run.yaml": RunConfigSettings(
|
||||||
provider_overrides={
|
provider_overrides={
|
||||||
"inference": [inference_provider],
|
"inference": [inference_provider],
|
||||||
|
"memory": [memory_provider],
|
||||||
},
|
},
|
||||||
default_models=default_models,
|
default_models=default_models,
|
||||||
default_shields=[ShieldInput(shield_id="meta-llama/Llama-Guard-3-8B")],
|
default_shields=[ShieldInput(shield_id="meta-llama/Llama-Guard-3-8B")],
|
||||||
|
|
|
@ -4,9 +4,12 @@ docker_image: null
|
||||||
conda_env: fireworks
|
conda_env: fireworks
|
||||||
apis:
|
apis:
|
||||||
- agents
|
- agents
|
||||||
|
- datasetio
|
||||||
|
- eval
|
||||||
- inference
|
- inference
|
||||||
- memory
|
- memory
|
||||||
- safety
|
- safety
|
||||||
|
- scoring
|
||||||
- telemetry
|
- telemetry
|
||||||
providers:
|
providers:
|
||||||
inference:
|
inference:
|
||||||
|
@ -39,6 +42,27 @@ providers:
|
||||||
- provider_id: meta-reference
|
- provider_id: meta-reference
|
||||||
provider_type: inline::meta-reference
|
provider_type: inline::meta-reference
|
||||||
config: {}
|
config: {}
|
||||||
|
eval:
|
||||||
|
- provider_id: meta-reference
|
||||||
|
provider_type: inline::meta-reference
|
||||||
|
config: {}
|
||||||
|
datasetio:
|
||||||
|
- provider_id: huggingface
|
||||||
|
provider_type: remote::huggingface
|
||||||
|
config: {}
|
||||||
|
- provider_id: localfs
|
||||||
|
provider_type: inline::localfs
|
||||||
|
config: {}
|
||||||
|
scoring:
|
||||||
|
- provider_id: basic
|
||||||
|
provider_type: inline::basic
|
||||||
|
config: {}
|
||||||
|
- provider_id: llm-as-judge
|
||||||
|
provider_type: inline::llm-as-judge
|
||||||
|
config: {}
|
||||||
|
- provider_id: braintrust
|
||||||
|
provider_type: inline::braintrust
|
||||||
|
config: {}
|
||||||
metadata_store:
|
metadata_store:
|
||||||
namespace: null
|
namespace: null
|
||||||
type: sqlite
|
type: sqlite
|
||||||
|
|
|
@ -16,4 +16,13 @@ distribution_spec:
|
||||||
- inline::meta-reference
|
- inline::meta-reference
|
||||||
telemetry:
|
telemetry:
|
||||||
- inline::meta-reference
|
- inline::meta-reference
|
||||||
|
eval:
|
||||||
|
- inline::meta-reference
|
||||||
|
datasetio:
|
||||||
|
- remote::huggingface
|
||||||
|
- inline::localfs
|
||||||
|
scoring:
|
||||||
|
- inline::basic
|
||||||
|
- inline::llm-as-judge
|
||||||
|
- inline::braintrust
|
||||||
image_type: conda
|
image_type: conda
|
||||||
|
|
|
@ -5,6 +5,7 @@
|
||||||
# the root directory of this source tree.
|
# the root directory of this source tree.
|
||||||
|
|
||||||
from llama_stack.distribution.datatypes import ModelInput, Provider, ShieldInput
|
from llama_stack.distribution.datatypes import ModelInput, Provider, ShieldInput
|
||||||
|
from llama_stack.providers.inline.memory.faiss.config import FaissImplConfig
|
||||||
from llama_stack.providers.remote.inference.tgi import InferenceEndpointImplConfig
|
from llama_stack.providers.remote.inference.tgi import InferenceEndpointImplConfig
|
||||||
from llama_stack.templates.template import DistributionTemplate, RunConfigSettings
|
from llama_stack.templates.template import DistributionTemplate, RunConfigSettings
|
||||||
|
|
||||||
|
@ -16,13 +17,21 @@ def get_distribution_template() -> DistributionTemplate:
|
||||||
"safety": ["inline::llama-guard"],
|
"safety": ["inline::llama-guard"],
|
||||||
"agents": ["inline::meta-reference"],
|
"agents": ["inline::meta-reference"],
|
||||||
"telemetry": ["inline::meta-reference"],
|
"telemetry": ["inline::meta-reference"],
|
||||||
|
"eval": ["inline::meta-reference"],
|
||||||
|
"datasetio": ["remote::huggingface", "inline::localfs"],
|
||||||
|
"scoring": ["inline::basic", "inline::llm-as-judge", "inline::braintrust"],
|
||||||
}
|
}
|
||||||
|
name = "hf-endpoint"
|
||||||
inference_provider = Provider(
|
inference_provider = Provider(
|
||||||
provider_id="hf-endpoint",
|
provider_id="hf-endpoint",
|
||||||
provider_type="remote::hf::endpoint",
|
provider_type="remote::hf::endpoint",
|
||||||
config=InferenceEndpointImplConfig.sample_run_config(),
|
config=InferenceEndpointImplConfig.sample_run_config(),
|
||||||
)
|
)
|
||||||
|
memory_provider = Provider(
|
||||||
|
provider_id="faiss",
|
||||||
|
provider_type="inline::faiss",
|
||||||
|
config=FaissImplConfig.sample_run_config(f"distributions/{name}"),
|
||||||
|
)
|
||||||
|
|
||||||
inference_model = ModelInput(
|
inference_model = ModelInput(
|
||||||
model_id="${env.INFERENCE_MODEL}",
|
model_id="${env.INFERENCE_MODEL}",
|
||||||
|
@ -34,7 +43,7 @@ def get_distribution_template() -> DistributionTemplate:
|
||||||
)
|
)
|
||||||
|
|
||||||
return DistributionTemplate(
|
return DistributionTemplate(
|
||||||
name="hf-endpoint",
|
name=name,
|
||||||
distro_type="self_hosted",
|
distro_type="self_hosted",
|
||||||
description="Use (an external) Hugging Face Inference Endpoint for running LLM inference",
|
description="Use (an external) Hugging Face Inference Endpoint for running LLM inference",
|
||||||
docker_image=None,
|
docker_image=None,
|
||||||
|
@ -45,6 +54,7 @@ def get_distribution_template() -> DistributionTemplate:
|
||||||
"run.yaml": RunConfigSettings(
|
"run.yaml": RunConfigSettings(
|
||||||
provider_overrides={
|
provider_overrides={
|
||||||
"inference": [inference_provider],
|
"inference": [inference_provider],
|
||||||
|
"memory": [memory_provider],
|
||||||
},
|
},
|
||||||
default_models=[inference_model],
|
default_models=[inference_model],
|
||||||
),
|
),
|
||||||
|
@ -59,7 +69,8 @@ def get_distribution_template() -> DistributionTemplate:
|
||||||
endpoint_name="${env.SAFETY_INFERENCE_ENDPOINT_NAME}",
|
endpoint_name="${env.SAFETY_INFERENCE_ENDPOINT_NAME}",
|
||||||
),
|
),
|
||||||
),
|
),
|
||||||
]
|
],
|
||||||
|
"memory": [memory_provider],
|
||||||
},
|
},
|
||||||
default_models=[
|
default_models=[
|
||||||
inference_model,
|
inference_model,
|
||||||
|
|
|
@ -4,9 +4,12 @@ docker_image: null
|
||||||
conda_env: hf-endpoint
|
conda_env: hf-endpoint
|
||||||
apis:
|
apis:
|
||||||
- agents
|
- agents
|
||||||
|
- datasetio
|
||||||
|
- eval
|
||||||
- inference
|
- inference
|
||||||
- memory
|
- memory
|
||||||
- safety
|
- safety
|
||||||
|
- scoring
|
||||||
- telemetry
|
- telemetry
|
||||||
providers:
|
providers:
|
||||||
inference:
|
inference:
|
||||||
|
@ -44,6 +47,27 @@ providers:
|
||||||
- provider_id: meta-reference
|
- provider_id: meta-reference
|
||||||
provider_type: inline::meta-reference
|
provider_type: inline::meta-reference
|
||||||
config: {}
|
config: {}
|
||||||
|
eval:
|
||||||
|
- provider_id: meta-reference
|
||||||
|
provider_type: inline::meta-reference
|
||||||
|
config: {}
|
||||||
|
datasetio:
|
||||||
|
- provider_id: huggingface
|
||||||
|
provider_type: remote::huggingface
|
||||||
|
config: {}
|
||||||
|
- provider_id: localfs
|
||||||
|
provider_type: inline::localfs
|
||||||
|
config: {}
|
||||||
|
scoring:
|
||||||
|
- provider_id: basic
|
||||||
|
provider_type: inline::basic
|
||||||
|
config: {}
|
||||||
|
- provider_id: llm-as-judge
|
||||||
|
provider_type: inline::llm-as-judge
|
||||||
|
config: {}
|
||||||
|
- provider_id: braintrust
|
||||||
|
provider_type: inline::braintrust
|
||||||
|
config: {}
|
||||||
metadata_store:
|
metadata_store:
|
||||||
namespace: null
|
namespace: null
|
||||||
type: sqlite
|
type: sqlite
|
||||||
|
|
|
@ -4,9 +4,12 @@ docker_image: null
|
||||||
conda_env: hf-endpoint
|
conda_env: hf-endpoint
|
||||||
apis:
|
apis:
|
||||||
- agents
|
- agents
|
||||||
|
- datasetio
|
||||||
|
- eval
|
||||||
- inference
|
- inference
|
||||||
- memory
|
- memory
|
||||||
- safety
|
- safety
|
||||||
|
- scoring
|
||||||
- telemetry
|
- telemetry
|
||||||
providers:
|
providers:
|
||||||
inference:
|
inference:
|
||||||
|
@ -39,6 +42,27 @@ providers:
|
||||||
- provider_id: meta-reference
|
- provider_id: meta-reference
|
||||||
provider_type: inline::meta-reference
|
provider_type: inline::meta-reference
|
||||||
config: {}
|
config: {}
|
||||||
|
eval:
|
||||||
|
- provider_id: meta-reference
|
||||||
|
provider_type: inline::meta-reference
|
||||||
|
config: {}
|
||||||
|
datasetio:
|
||||||
|
- provider_id: huggingface
|
||||||
|
provider_type: remote::huggingface
|
||||||
|
config: {}
|
||||||
|
- provider_id: localfs
|
||||||
|
provider_type: inline::localfs
|
||||||
|
config: {}
|
||||||
|
scoring:
|
||||||
|
- provider_id: basic
|
||||||
|
provider_type: inline::basic
|
||||||
|
config: {}
|
||||||
|
- provider_id: llm-as-judge
|
||||||
|
provider_type: inline::llm-as-judge
|
||||||
|
config: {}
|
||||||
|
- provider_id: braintrust
|
||||||
|
provider_type: inline::braintrust
|
||||||
|
config: {}
|
||||||
metadata_store:
|
metadata_store:
|
||||||
namespace: null
|
namespace: null
|
||||||
type: sqlite
|
type: sqlite
|
||||||
|
|
|
@ -16,4 +16,13 @@ distribution_spec:
|
||||||
- inline::meta-reference
|
- inline::meta-reference
|
||||||
telemetry:
|
telemetry:
|
||||||
- inline::meta-reference
|
- inline::meta-reference
|
||||||
|
eval:
|
||||||
|
- inline::meta-reference
|
||||||
|
datasetio:
|
||||||
|
- remote::huggingface
|
||||||
|
- inline::localfs
|
||||||
|
scoring:
|
||||||
|
- inline::basic
|
||||||
|
- inline::llm-as-judge
|
||||||
|
- inline::braintrust
|
||||||
image_type: conda
|
image_type: conda
|
||||||
|
|
|
@ -5,6 +5,7 @@
|
||||||
# the root directory of this source tree.
|
# the root directory of this source tree.
|
||||||
|
|
||||||
from llama_stack.distribution.datatypes import ModelInput, Provider, ShieldInput
|
from llama_stack.distribution.datatypes import ModelInput, Provider, ShieldInput
|
||||||
|
from llama_stack.providers.inline.memory.faiss.config import FaissImplConfig
|
||||||
from llama_stack.providers.remote.inference.tgi import InferenceAPIImplConfig
|
from llama_stack.providers.remote.inference.tgi import InferenceAPIImplConfig
|
||||||
from llama_stack.templates.template import DistributionTemplate, RunConfigSettings
|
from llama_stack.templates.template import DistributionTemplate, RunConfigSettings
|
||||||
|
|
||||||
|
@ -16,13 +17,22 @@ def get_distribution_template() -> DistributionTemplate:
|
||||||
"safety": ["inline::llama-guard"],
|
"safety": ["inline::llama-guard"],
|
||||||
"agents": ["inline::meta-reference"],
|
"agents": ["inline::meta-reference"],
|
||||||
"telemetry": ["inline::meta-reference"],
|
"telemetry": ["inline::meta-reference"],
|
||||||
|
"eval": ["inline::meta-reference"],
|
||||||
|
"datasetio": ["remote::huggingface", "inline::localfs"],
|
||||||
|
"scoring": ["inline::basic", "inline::llm-as-judge", "inline::braintrust"],
|
||||||
}
|
}
|
||||||
|
|
||||||
|
name = "hf-serverless"
|
||||||
inference_provider = Provider(
|
inference_provider = Provider(
|
||||||
provider_id="hf-serverless",
|
provider_id="hf-serverless",
|
||||||
provider_type="remote::hf::serverless",
|
provider_type="remote::hf::serverless",
|
||||||
config=InferenceAPIImplConfig.sample_run_config(),
|
config=InferenceAPIImplConfig.sample_run_config(),
|
||||||
)
|
)
|
||||||
|
memory_provider = Provider(
|
||||||
|
provider_id="faiss",
|
||||||
|
provider_type="inline::faiss",
|
||||||
|
config=FaissImplConfig.sample_run_config(f"distributions/{name}"),
|
||||||
|
)
|
||||||
|
|
||||||
inference_model = ModelInput(
|
inference_model = ModelInput(
|
||||||
model_id="${env.INFERENCE_MODEL}",
|
model_id="${env.INFERENCE_MODEL}",
|
||||||
|
@ -34,7 +44,7 @@ def get_distribution_template() -> DistributionTemplate:
|
||||||
)
|
)
|
||||||
|
|
||||||
return DistributionTemplate(
|
return DistributionTemplate(
|
||||||
name="hf-serverless",
|
name=name,
|
||||||
distro_type="self_hosted",
|
distro_type="self_hosted",
|
||||||
description="Use (an external) Hugging Face Inference Endpoint for running LLM inference",
|
description="Use (an external) Hugging Face Inference Endpoint for running LLM inference",
|
||||||
docker_image=None,
|
docker_image=None,
|
||||||
|
@ -45,6 +55,7 @@ def get_distribution_template() -> DistributionTemplate:
|
||||||
"run.yaml": RunConfigSettings(
|
"run.yaml": RunConfigSettings(
|
||||||
provider_overrides={
|
provider_overrides={
|
||||||
"inference": [inference_provider],
|
"inference": [inference_provider],
|
||||||
|
"memory": [memory_provider],
|
||||||
},
|
},
|
||||||
default_models=[inference_model],
|
default_models=[inference_model],
|
||||||
),
|
),
|
||||||
|
@ -59,7 +70,8 @@ def get_distribution_template() -> DistributionTemplate:
|
||||||
repo="${env.SAFETY_MODEL}",
|
repo="${env.SAFETY_MODEL}",
|
||||||
),
|
),
|
||||||
),
|
),
|
||||||
]
|
],
|
||||||
|
"memory": [memory_provider],
|
||||||
},
|
},
|
||||||
default_models=[
|
default_models=[
|
||||||
inference_model,
|
inference_model,
|
||||||
|
|
|
@ -4,9 +4,12 @@ docker_image: null
|
||||||
conda_env: hf-serverless
|
conda_env: hf-serverless
|
||||||
apis:
|
apis:
|
||||||
- agents
|
- agents
|
||||||
|
- datasetio
|
||||||
|
- eval
|
||||||
- inference
|
- inference
|
||||||
- memory
|
- memory
|
||||||
- safety
|
- safety
|
||||||
|
- scoring
|
||||||
- telemetry
|
- telemetry
|
||||||
providers:
|
providers:
|
||||||
inference:
|
inference:
|
||||||
|
@ -44,6 +47,27 @@ providers:
|
||||||
- provider_id: meta-reference
|
- provider_id: meta-reference
|
||||||
provider_type: inline::meta-reference
|
provider_type: inline::meta-reference
|
||||||
config: {}
|
config: {}
|
||||||
|
eval:
|
||||||
|
- provider_id: meta-reference
|
||||||
|
provider_type: inline::meta-reference
|
||||||
|
config: {}
|
||||||
|
datasetio:
|
||||||
|
- provider_id: huggingface
|
||||||
|
provider_type: remote::huggingface
|
||||||
|
config: {}
|
||||||
|
- provider_id: localfs
|
||||||
|
provider_type: inline::localfs
|
||||||
|
config: {}
|
||||||
|
scoring:
|
||||||
|
- provider_id: basic
|
||||||
|
provider_type: inline::basic
|
||||||
|
config: {}
|
||||||
|
- provider_id: llm-as-judge
|
||||||
|
provider_type: inline::llm-as-judge
|
||||||
|
config: {}
|
||||||
|
- provider_id: braintrust
|
||||||
|
provider_type: inline::braintrust
|
||||||
|
config: {}
|
||||||
metadata_store:
|
metadata_store:
|
||||||
namespace: null
|
namespace: null
|
||||||
type: sqlite
|
type: sqlite
|
||||||
|
|
|
@ -4,9 +4,12 @@ docker_image: null
|
||||||
conda_env: hf-serverless
|
conda_env: hf-serverless
|
||||||
apis:
|
apis:
|
||||||
- agents
|
- agents
|
||||||
|
- datasetio
|
||||||
|
- eval
|
||||||
- inference
|
- inference
|
||||||
- memory
|
- memory
|
||||||
- safety
|
- safety
|
||||||
|
- scoring
|
||||||
- telemetry
|
- telemetry
|
||||||
providers:
|
providers:
|
||||||
inference:
|
inference:
|
||||||
|
@ -39,6 +42,27 @@ providers:
|
||||||
- provider_id: meta-reference
|
- provider_id: meta-reference
|
||||||
provider_type: inline::meta-reference
|
provider_type: inline::meta-reference
|
||||||
config: {}
|
config: {}
|
||||||
|
eval:
|
||||||
|
- provider_id: meta-reference
|
||||||
|
provider_type: inline::meta-reference
|
||||||
|
config: {}
|
||||||
|
datasetio:
|
||||||
|
- provider_id: huggingface
|
||||||
|
provider_type: remote::huggingface
|
||||||
|
config: {}
|
||||||
|
- provider_id: localfs
|
||||||
|
provider_type: inline::localfs
|
||||||
|
config: {}
|
||||||
|
scoring:
|
||||||
|
- provider_id: basic
|
||||||
|
provider_type: inline::basic
|
||||||
|
config: {}
|
||||||
|
- provider_id: llm-as-judge
|
||||||
|
provider_type: inline::llm-as-judge
|
||||||
|
config: {}
|
||||||
|
- provider_id: braintrust
|
||||||
|
provider_type: inline::braintrust
|
||||||
|
config: {}
|
||||||
metadata_store:
|
metadata_store:
|
||||||
namespace: null
|
namespace: null
|
||||||
type: sqlite
|
type: sqlite
|
||||||
|
|
|
@ -16,4 +16,13 @@ distribution_spec:
|
||||||
- inline::meta-reference
|
- inline::meta-reference
|
||||||
telemetry:
|
telemetry:
|
||||||
- inline::meta-reference
|
- inline::meta-reference
|
||||||
|
eval:
|
||||||
|
- inline::meta-reference
|
||||||
|
datasetio:
|
||||||
|
- remote::huggingface
|
||||||
|
- inline::localfs
|
||||||
|
scoring:
|
||||||
|
- inline::basic
|
||||||
|
- inline::llm-as-judge
|
||||||
|
- inline::braintrust
|
||||||
image_type: conda
|
image_type: conda
|
||||||
|
|
|
@ -10,6 +10,7 @@ from llama_stack.distribution.datatypes import ModelInput, Provider, ShieldInput
|
||||||
from llama_stack.providers.inline.inference.meta_reference import (
|
from llama_stack.providers.inline.inference.meta_reference import (
|
||||||
MetaReferenceInferenceConfig,
|
MetaReferenceInferenceConfig,
|
||||||
)
|
)
|
||||||
|
from llama_stack.providers.inline.memory.faiss.config import FaissImplConfig
|
||||||
from llama_stack.templates.template import DistributionTemplate, RunConfigSettings
|
from llama_stack.templates.template import DistributionTemplate, RunConfigSettings
|
||||||
|
|
||||||
|
|
||||||
|
@ -20,8 +21,11 @@ def get_distribution_template() -> DistributionTemplate:
|
||||||
"safety": ["inline::llama-guard"],
|
"safety": ["inline::llama-guard"],
|
||||||
"agents": ["inline::meta-reference"],
|
"agents": ["inline::meta-reference"],
|
||||||
"telemetry": ["inline::meta-reference"],
|
"telemetry": ["inline::meta-reference"],
|
||||||
|
"eval": ["inline::meta-reference"],
|
||||||
|
"datasetio": ["remote::huggingface", "inline::localfs"],
|
||||||
|
"scoring": ["inline::basic", "inline::llm-as-judge", "inline::braintrust"],
|
||||||
}
|
}
|
||||||
|
name = "meta-reference-gpu"
|
||||||
inference_provider = Provider(
|
inference_provider = Provider(
|
||||||
provider_id="meta-reference-inference",
|
provider_id="meta-reference-inference",
|
||||||
provider_type="inline::meta-reference",
|
provider_type="inline::meta-reference",
|
||||||
|
@ -30,6 +34,11 @@ def get_distribution_template() -> DistributionTemplate:
|
||||||
checkpoint_dir="${env.INFERENCE_CHECKPOINT_DIR:null}",
|
checkpoint_dir="${env.INFERENCE_CHECKPOINT_DIR:null}",
|
||||||
),
|
),
|
||||||
)
|
)
|
||||||
|
memory_provider = Provider(
|
||||||
|
provider_id="faiss",
|
||||||
|
provider_type="inline::faiss",
|
||||||
|
config=FaissImplConfig.sample_run_config(f"distributions/{name}"),
|
||||||
|
)
|
||||||
|
|
||||||
inference_model = ModelInput(
|
inference_model = ModelInput(
|
||||||
model_id="${env.INFERENCE_MODEL}",
|
model_id="${env.INFERENCE_MODEL}",
|
||||||
|
@ -41,7 +50,7 @@ def get_distribution_template() -> DistributionTemplate:
|
||||||
)
|
)
|
||||||
|
|
||||||
return DistributionTemplate(
|
return DistributionTemplate(
|
||||||
name="meta-reference-gpu",
|
name=name,
|
||||||
distro_type="self_hosted",
|
distro_type="self_hosted",
|
||||||
description="Use Meta Reference for running LLM inference",
|
description="Use Meta Reference for running LLM inference",
|
||||||
template_path=Path(__file__).parent / "doc_template.md",
|
template_path=Path(__file__).parent / "doc_template.md",
|
||||||
|
@ -51,6 +60,7 @@ def get_distribution_template() -> DistributionTemplate:
|
||||||
"run.yaml": RunConfigSettings(
|
"run.yaml": RunConfigSettings(
|
||||||
provider_overrides={
|
provider_overrides={
|
||||||
"inference": [inference_provider],
|
"inference": [inference_provider],
|
||||||
|
"memory": [memory_provider],
|
||||||
},
|
},
|
||||||
default_models=[inference_model],
|
default_models=[inference_model],
|
||||||
),
|
),
|
||||||
|
@ -67,6 +77,7 @@ def get_distribution_template() -> DistributionTemplate:
|
||||||
),
|
),
|
||||||
),
|
),
|
||||||
],
|
],
|
||||||
|
"memory": [memory_provider],
|
||||||
},
|
},
|
||||||
default_models=[
|
default_models=[
|
||||||
inference_model,
|
inference_model,
|
||||||
|
|
|
@ -4,9 +4,12 @@ docker_image: null
|
||||||
conda_env: meta-reference-gpu
|
conda_env: meta-reference-gpu
|
||||||
apis:
|
apis:
|
||||||
- agents
|
- agents
|
||||||
|
- datasetio
|
||||||
|
- eval
|
||||||
- inference
|
- inference
|
||||||
- memory
|
- memory
|
||||||
- safety
|
- safety
|
||||||
|
- scoring
|
||||||
- telemetry
|
- telemetry
|
||||||
providers:
|
providers:
|
||||||
inference:
|
inference:
|
||||||
|
@ -46,6 +49,27 @@ providers:
|
||||||
- provider_id: meta-reference
|
- provider_id: meta-reference
|
||||||
provider_type: inline::meta-reference
|
provider_type: inline::meta-reference
|
||||||
config: {}
|
config: {}
|
||||||
|
eval:
|
||||||
|
- provider_id: meta-reference
|
||||||
|
provider_type: inline::meta-reference
|
||||||
|
config: {}
|
||||||
|
datasetio:
|
||||||
|
- provider_id: huggingface
|
||||||
|
provider_type: remote::huggingface
|
||||||
|
config: {}
|
||||||
|
- provider_id: localfs
|
||||||
|
provider_type: inline::localfs
|
||||||
|
config: {}
|
||||||
|
scoring:
|
||||||
|
- provider_id: basic
|
||||||
|
provider_type: inline::basic
|
||||||
|
config: {}
|
||||||
|
- provider_id: llm-as-judge
|
||||||
|
provider_type: inline::llm-as-judge
|
||||||
|
config: {}
|
||||||
|
- provider_id: braintrust
|
||||||
|
provider_type: inline::braintrust
|
||||||
|
config: {}
|
||||||
metadata_store:
|
metadata_store:
|
||||||
namespace: null
|
namespace: null
|
||||||
type: sqlite
|
type: sqlite
|
||||||
|
|
|
@ -4,9 +4,12 @@ docker_image: null
|
||||||
conda_env: meta-reference-gpu
|
conda_env: meta-reference-gpu
|
||||||
apis:
|
apis:
|
||||||
- agents
|
- agents
|
||||||
|
- datasetio
|
||||||
|
- eval
|
||||||
- inference
|
- inference
|
||||||
- memory
|
- memory
|
||||||
- safety
|
- safety
|
||||||
|
- scoring
|
||||||
- telemetry
|
- telemetry
|
||||||
providers:
|
providers:
|
||||||
inference:
|
inference:
|
||||||
|
@ -40,6 +43,27 @@ providers:
|
||||||
- provider_id: meta-reference
|
- provider_id: meta-reference
|
||||||
provider_type: inline::meta-reference
|
provider_type: inline::meta-reference
|
||||||
config: {}
|
config: {}
|
||||||
|
eval:
|
||||||
|
- provider_id: meta-reference
|
||||||
|
provider_type: inline::meta-reference
|
||||||
|
config: {}
|
||||||
|
datasetio:
|
||||||
|
- provider_id: huggingface
|
||||||
|
provider_type: remote::huggingface
|
||||||
|
config: {}
|
||||||
|
- provider_id: localfs
|
||||||
|
provider_type: inline::localfs
|
||||||
|
config: {}
|
||||||
|
scoring:
|
||||||
|
- provider_id: basic
|
||||||
|
provider_type: inline::basic
|
||||||
|
config: {}
|
||||||
|
- provider_id: llm-as-judge
|
||||||
|
provider_type: inline::llm-as-judge
|
||||||
|
config: {}
|
||||||
|
- provider_id: braintrust
|
||||||
|
provider_type: inline::braintrust
|
||||||
|
config: {}
|
||||||
metadata_store:
|
metadata_store:
|
||||||
namespace: null
|
namespace: null
|
||||||
type: sqlite
|
type: sqlite
|
||||||
|
|
|
@ -16,4 +16,13 @@ distribution_spec:
|
||||||
- inline::meta-reference
|
- inline::meta-reference
|
||||||
telemetry:
|
telemetry:
|
||||||
- inline::meta-reference
|
- inline::meta-reference
|
||||||
|
eval:
|
||||||
|
- inline::meta-reference
|
||||||
|
datasetio:
|
||||||
|
- remote::huggingface
|
||||||
|
- inline::localfs
|
||||||
|
scoring:
|
||||||
|
- inline::basic
|
||||||
|
- inline::llm-as-judge
|
||||||
|
- inline::braintrust
|
||||||
image_type: conda
|
image_type: conda
|
||||||
|
|
|
@ -10,6 +10,7 @@ from llama_stack.distribution.datatypes import ModelInput, Provider
|
||||||
from llama_stack.providers.inline.inference.meta_reference import (
|
from llama_stack.providers.inline.inference.meta_reference import (
|
||||||
MetaReferenceQuantizedInferenceConfig,
|
MetaReferenceQuantizedInferenceConfig,
|
||||||
)
|
)
|
||||||
|
from llama_stack.providers.inline.memory.faiss.config import FaissImplConfig
|
||||||
from llama_stack.templates.template import DistributionTemplate, RunConfigSettings
|
from llama_stack.templates.template import DistributionTemplate, RunConfigSettings
|
||||||
|
|
||||||
|
|
||||||
|
@ -20,8 +21,11 @@ def get_distribution_template() -> DistributionTemplate:
|
||||||
"safety": ["inline::llama-guard"],
|
"safety": ["inline::llama-guard"],
|
||||||
"agents": ["inline::meta-reference"],
|
"agents": ["inline::meta-reference"],
|
||||||
"telemetry": ["inline::meta-reference"],
|
"telemetry": ["inline::meta-reference"],
|
||||||
|
"eval": ["inline::meta-reference"],
|
||||||
|
"datasetio": ["remote::huggingface", "inline::localfs"],
|
||||||
|
"scoring": ["inline::basic", "inline::llm-as-judge", "inline::braintrust"],
|
||||||
}
|
}
|
||||||
|
name = "meta-reference-quantized-gpu"
|
||||||
inference_provider = Provider(
|
inference_provider = Provider(
|
||||||
provider_id="meta-reference-inference",
|
provider_id="meta-reference-inference",
|
||||||
provider_type="inline::meta-reference-quantized",
|
provider_type="inline::meta-reference-quantized",
|
||||||
|
@ -30,13 +34,18 @@ def get_distribution_template() -> DistributionTemplate:
|
||||||
checkpoint_dir="${env.INFERENCE_CHECKPOINT_DIR:null}",
|
checkpoint_dir="${env.INFERENCE_CHECKPOINT_DIR:null}",
|
||||||
),
|
),
|
||||||
)
|
)
|
||||||
|
memory_provider = Provider(
|
||||||
|
provider_id="faiss",
|
||||||
|
provider_type="inline::faiss",
|
||||||
|
config=FaissImplConfig.sample_run_config(f"distributions/{name}"),
|
||||||
|
)
|
||||||
|
|
||||||
inference_model = ModelInput(
|
inference_model = ModelInput(
|
||||||
model_id="${env.INFERENCE_MODEL}",
|
model_id="${env.INFERENCE_MODEL}",
|
||||||
provider_id="meta-reference-inference",
|
provider_id="meta-reference-inference",
|
||||||
)
|
)
|
||||||
return DistributionTemplate(
|
return DistributionTemplate(
|
||||||
name="meta-reference-quantized-gpu",
|
name=name,
|
||||||
distro_type="self_hosted",
|
distro_type="self_hosted",
|
||||||
description="Use Meta Reference with fp8, int4 quantization for running LLM inference",
|
description="Use Meta Reference with fp8, int4 quantization for running LLM inference",
|
||||||
template_path=Path(__file__).parent / "doc_template.md",
|
template_path=Path(__file__).parent / "doc_template.md",
|
||||||
|
@ -46,6 +55,7 @@ def get_distribution_template() -> DistributionTemplate:
|
||||||
"run.yaml": RunConfigSettings(
|
"run.yaml": RunConfigSettings(
|
||||||
provider_overrides={
|
provider_overrides={
|
||||||
"inference": [inference_provider],
|
"inference": [inference_provider],
|
||||||
|
"memory": [memory_provider],
|
||||||
},
|
},
|
||||||
default_models=[inference_model],
|
default_models=[inference_model],
|
||||||
),
|
),
|
||||||
|
|
|
@ -4,9 +4,12 @@ docker_image: null
|
||||||
conda_env: meta-reference-quantized-gpu
|
conda_env: meta-reference-quantized-gpu
|
||||||
apis:
|
apis:
|
||||||
- agents
|
- agents
|
||||||
|
- datasetio
|
||||||
|
- eval
|
||||||
- inference
|
- inference
|
||||||
- memory
|
- memory
|
||||||
- safety
|
- safety
|
||||||
|
- scoring
|
||||||
- telemetry
|
- telemetry
|
||||||
providers:
|
providers:
|
||||||
inference:
|
inference:
|
||||||
|
@ -42,6 +45,27 @@ providers:
|
||||||
- provider_id: meta-reference
|
- provider_id: meta-reference
|
||||||
provider_type: inline::meta-reference
|
provider_type: inline::meta-reference
|
||||||
config: {}
|
config: {}
|
||||||
|
eval:
|
||||||
|
- provider_id: meta-reference
|
||||||
|
provider_type: inline::meta-reference
|
||||||
|
config: {}
|
||||||
|
datasetio:
|
||||||
|
- provider_id: huggingface
|
||||||
|
provider_type: remote::huggingface
|
||||||
|
config: {}
|
||||||
|
- provider_id: localfs
|
||||||
|
provider_type: inline::localfs
|
||||||
|
config: {}
|
||||||
|
scoring:
|
||||||
|
- provider_id: basic
|
||||||
|
provider_type: inline::basic
|
||||||
|
config: {}
|
||||||
|
- provider_id: llm-as-judge
|
||||||
|
provider_type: inline::llm-as-judge
|
||||||
|
config: {}
|
||||||
|
- provider_id: braintrust
|
||||||
|
provider_type: inline::braintrust
|
||||||
|
config: {}
|
||||||
metadata_store:
|
metadata_store:
|
||||||
namespace: null
|
namespace: null
|
||||||
type: sqlite
|
type: sqlite
|
||||||
|
|
|
@ -16,4 +16,13 @@ distribution_spec:
|
||||||
- inline::meta-reference
|
- inline::meta-reference
|
||||||
telemetry:
|
telemetry:
|
||||||
- inline::meta-reference
|
- inline::meta-reference
|
||||||
|
eval:
|
||||||
|
- inline::meta-reference
|
||||||
|
datasetio:
|
||||||
|
- remote::huggingface
|
||||||
|
- inline::localfs
|
||||||
|
scoring:
|
||||||
|
- inline::basic
|
||||||
|
- inline::llm-as-judge
|
||||||
|
- inline::braintrust
|
||||||
image_type: conda
|
image_type: conda
|
||||||
|
|
|
@ -114,9 +114,9 @@ llama stack run ./run-with-safety.yaml \
|
||||||
|
|
||||||
### (Optional) Update Model Serving Configuration
|
### (Optional) Update Model Serving Configuration
|
||||||
|
|
||||||
> [!NOTE]
|
```{note}
|
||||||
> Please check the [OLLAMA_SUPPORTED_MODELS](https://github.com/meta-llama/llama-stack/blob/main/llama_stack/providers.remote/inference/ollama/ollama.py) for the supported Ollama models.
|
Please check the [model_aliases](https://github.com/meta-llama/llama-stack/blob/main/llama_stack/providers/remote/inference/ollama/ollama.py#L45) for the supported Ollama models.
|
||||||
|
```
|
||||||
|
|
||||||
To serve a new model with `ollama`
|
To serve a new model with `ollama`
|
||||||
```bash
|
```bash
|
||||||
|
|
|
@ -7,6 +7,7 @@
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
|
|
||||||
from llama_stack.distribution.datatypes import ModelInput, Provider, ShieldInput
|
from llama_stack.distribution.datatypes import ModelInput, Provider, ShieldInput
|
||||||
|
from llama_stack.providers.inline.memory.faiss.config import FaissImplConfig
|
||||||
from llama_stack.providers.remote.inference.ollama import OllamaImplConfig
|
from llama_stack.providers.remote.inference.ollama import OllamaImplConfig
|
||||||
from llama_stack.templates.template import DistributionTemplate, RunConfigSettings
|
from llama_stack.templates.template import DistributionTemplate, RunConfigSettings
|
||||||
|
|
||||||
|
@ -18,13 +19,21 @@ def get_distribution_template() -> DistributionTemplate:
|
||||||
"safety": ["inline::llama-guard"],
|
"safety": ["inline::llama-guard"],
|
||||||
"agents": ["inline::meta-reference"],
|
"agents": ["inline::meta-reference"],
|
||||||
"telemetry": ["inline::meta-reference"],
|
"telemetry": ["inline::meta-reference"],
|
||||||
|
"eval": ["inline::meta-reference"],
|
||||||
|
"datasetio": ["remote::huggingface", "inline::localfs"],
|
||||||
|
"scoring": ["inline::basic", "inline::llm-as-judge", "inline::braintrust"],
|
||||||
}
|
}
|
||||||
|
name = "ollama"
|
||||||
inference_provider = Provider(
|
inference_provider = Provider(
|
||||||
provider_id="ollama",
|
provider_id="ollama",
|
||||||
provider_type="remote::ollama",
|
provider_type="remote::ollama",
|
||||||
config=OllamaImplConfig.sample_run_config(),
|
config=OllamaImplConfig.sample_run_config(),
|
||||||
)
|
)
|
||||||
|
memory_provider = Provider(
|
||||||
|
provider_id="faiss",
|
||||||
|
provider_type="inline::faiss",
|
||||||
|
config=FaissImplConfig.sample_run_config(f"distributions/{name}"),
|
||||||
|
)
|
||||||
|
|
||||||
inference_model = ModelInput(
|
inference_model = ModelInput(
|
||||||
model_id="${env.INFERENCE_MODEL}",
|
model_id="${env.INFERENCE_MODEL}",
|
||||||
|
@ -36,7 +45,7 @@ def get_distribution_template() -> DistributionTemplate:
|
||||||
)
|
)
|
||||||
|
|
||||||
return DistributionTemplate(
|
return DistributionTemplate(
|
||||||
name="ollama",
|
name=name,
|
||||||
distro_type="self_hosted",
|
distro_type="self_hosted",
|
||||||
description="Use (an external) Ollama server for running LLM inference",
|
description="Use (an external) Ollama server for running LLM inference",
|
||||||
docker_image=None,
|
docker_image=None,
|
||||||
|
@ -47,6 +56,7 @@ def get_distribution_template() -> DistributionTemplate:
|
||||||
"run.yaml": RunConfigSettings(
|
"run.yaml": RunConfigSettings(
|
||||||
provider_overrides={
|
provider_overrides={
|
||||||
"inference": [inference_provider],
|
"inference": [inference_provider],
|
||||||
|
"memory": [memory_provider],
|
||||||
},
|
},
|
||||||
default_models=[inference_model],
|
default_models=[inference_model],
|
||||||
),
|
),
|
||||||
|
@ -54,7 +64,8 @@ def get_distribution_template() -> DistributionTemplate:
|
||||||
provider_overrides={
|
provider_overrides={
|
||||||
"inference": [
|
"inference": [
|
||||||
inference_provider,
|
inference_provider,
|
||||||
]
|
],
|
||||||
|
"memory": [memory_provider],
|
||||||
},
|
},
|
||||||
default_models=[
|
default_models=[
|
||||||
inference_model,
|
inference_model,
|
||||||
|
|
|
@ -4,9 +4,12 @@ docker_image: null
|
||||||
conda_env: ollama
|
conda_env: ollama
|
||||||
apis:
|
apis:
|
||||||
- agents
|
- agents
|
||||||
|
- datasetio
|
||||||
|
- eval
|
||||||
- inference
|
- inference
|
||||||
- memory
|
- memory
|
||||||
- safety
|
- safety
|
||||||
|
- scoring
|
||||||
- telemetry
|
- telemetry
|
||||||
providers:
|
providers:
|
||||||
inference:
|
inference:
|
||||||
|
@ -38,6 +41,27 @@ providers:
|
||||||
- provider_id: meta-reference
|
- provider_id: meta-reference
|
||||||
provider_type: inline::meta-reference
|
provider_type: inline::meta-reference
|
||||||
config: {}
|
config: {}
|
||||||
|
eval:
|
||||||
|
- provider_id: meta-reference
|
||||||
|
provider_type: inline::meta-reference
|
||||||
|
config: {}
|
||||||
|
datasetio:
|
||||||
|
- provider_id: huggingface
|
||||||
|
provider_type: remote::huggingface
|
||||||
|
config: {}
|
||||||
|
- provider_id: localfs
|
||||||
|
provider_type: inline::localfs
|
||||||
|
config: {}
|
||||||
|
scoring:
|
||||||
|
- provider_id: basic
|
||||||
|
provider_type: inline::basic
|
||||||
|
config: {}
|
||||||
|
- provider_id: llm-as-judge
|
||||||
|
provider_type: inline::llm-as-judge
|
||||||
|
config: {}
|
||||||
|
- provider_id: braintrust
|
||||||
|
provider_type: inline::braintrust
|
||||||
|
config: {}
|
||||||
metadata_store:
|
metadata_store:
|
||||||
namespace: null
|
namespace: null
|
||||||
type: sqlite
|
type: sqlite
|
||||||
|
|
|
@ -4,9 +4,12 @@ docker_image: null
|
||||||
conda_env: ollama
|
conda_env: ollama
|
||||||
apis:
|
apis:
|
||||||
- agents
|
- agents
|
||||||
|
- datasetio
|
||||||
|
- eval
|
||||||
- inference
|
- inference
|
||||||
- memory
|
- memory
|
||||||
- safety
|
- safety
|
||||||
|
- scoring
|
||||||
- telemetry
|
- telemetry
|
||||||
providers:
|
providers:
|
||||||
inference:
|
inference:
|
||||||
|
@ -38,6 +41,27 @@ providers:
|
||||||
- provider_id: meta-reference
|
- provider_id: meta-reference
|
||||||
provider_type: inline::meta-reference
|
provider_type: inline::meta-reference
|
||||||
config: {}
|
config: {}
|
||||||
|
eval:
|
||||||
|
- provider_id: meta-reference
|
||||||
|
provider_type: inline::meta-reference
|
||||||
|
config: {}
|
||||||
|
datasetio:
|
||||||
|
- provider_id: huggingface
|
||||||
|
provider_type: remote::huggingface
|
||||||
|
config: {}
|
||||||
|
- provider_id: localfs
|
||||||
|
provider_type: inline::localfs
|
||||||
|
config: {}
|
||||||
|
scoring:
|
||||||
|
- provider_id: basic
|
||||||
|
provider_type: inline::basic
|
||||||
|
config: {}
|
||||||
|
- provider_id: llm-as-judge
|
||||||
|
provider_type: inline::llm-as-judge
|
||||||
|
config: {}
|
||||||
|
- provider_id: braintrust
|
||||||
|
provider_type: inline::braintrust
|
||||||
|
config: {}
|
||||||
metadata_store:
|
metadata_store:
|
||||||
namespace: null
|
namespace: null
|
||||||
type: sqlite
|
type: sqlite
|
||||||
|
|
|
@ -7,6 +7,7 @@
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
|
|
||||||
from llama_stack.distribution.datatypes import ModelInput, Provider, ShieldInput
|
from llama_stack.distribution.datatypes import ModelInput, Provider, ShieldInput
|
||||||
|
from llama_stack.providers.inline.memory.faiss.config import FaissImplConfig
|
||||||
from llama_stack.providers.remote.inference.vllm import VLLMInferenceAdapterConfig
|
from llama_stack.providers.remote.inference.vllm import VLLMInferenceAdapterConfig
|
||||||
from llama_stack.templates.template import DistributionTemplate, RunConfigSettings
|
from llama_stack.templates.template import DistributionTemplate, RunConfigSettings
|
||||||
|
|
||||||
|
@ -19,7 +20,7 @@ def get_distribution_template() -> DistributionTemplate:
|
||||||
"agents": ["inline::meta-reference"],
|
"agents": ["inline::meta-reference"],
|
||||||
"telemetry": ["inline::meta-reference"],
|
"telemetry": ["inline::meta-reference"],
|
||||||
}
|
}
|
||||||
|
name = "remote-vllm"
|
||||||
inference_provider = Provider(
|
inference_provider = Provider(
|
||||||
provider_id="vllm-inference",
|
provider_id="vllm-inference",
|
||||||
provider_type="remote::vllm",
|
provider_type="remote::vllm",
|
||||||
|
@ -27,6 +28,11 @@ def get_distribution_template() -> DistributionTemplate:
|
||||||
url="${env.VLLM_URL}",
|
url="${env.VLLM_URL}",
|
||||||
),
|
),
|
||||||
)
|
)
|
||||||
|
memory_provider = Provider(
|
||||||
|
provider_id="faiss",
|
||||||
|
provider_type="inline::faiss",
|
||||||
|
config=FaissImplConfig.sample_run_config(f"distributions/{name}"),
|
||||||
|
)
|
||||||
|
|
||||||
inference_model = ModelInput(
|
inference_model = ModelInput(
|
||||||
model_id="${env.INFERENCE_MODEL}",
|
model_id="${env.INFERENCE_MODEL}",
|
||||||
|
@ -38,7 +44,7 @@ def get_distribution_template() -> DistributionTemplate:
|
||||||
)
|
)
|
||||||
|
|
||||||
return DistributionTemplate(
|
return DistributionTemplate(
|
||||||
name="remote-vllm",
|
name=name,
|
||||||
distro_type="self_hosted",
|
distro_type="self_hosted",
|
||||||
description="Use (an external) vLLM server for running LLM inference",
|
description="Use (an external) vLLM server for running LLM inference",
|
||||||
template_path=Path(__file__).parent / "doc_template.md",
|
template_path=Path(__file__).parent / "doc_template.md",
|
||||||
|
@ -48,6 +54,7 @@ def get_distribution_template() -> DistributionTemplate:
|
||||||
"run.yaml": RunConfigSettings(
|
"run.yaml": RunConfigSettings(
|
||||||
provider_overrides={
|
provider_overrides={
|
||||||
"inference": [inference_provider],
|
"inference": [inference_provider],
|
||||||
|
"memory": [memory_provider],
|
||||||
},
|
},
|
||||||
default_models=[inference_model],
|
default_models=[inference_model],
|
||||||
),
|
),
|
||||||
|
@ -63,6 +70,7 @@ def get_distribution_template() -> DistributionTemplate:
|
||||||
),
|
),
|
||||||
),
|
),
|
||||||
],
|
],
|
||||||
|
"memory": [memory_provider],
|
||||||
},
|
},
|
||||||
default_models=[
|
default_models=[
|
||||||
inference_model,
|
inference_model,
|
||||||
|
|
|
@ -44,36 +44,37 @@ class RunConfigSettings(BaseModel):
|
||||||
provider_configs[api_str] = api_providers
|
provider_configs[api_str] = api_providers
|
||||||
continue
|
continue
|
||||||
|
|
||||||
provider_type = provider_types[0]
|
provider_configs[api_str] = []
|
||||||
provider_id = provider_type.split("::")[-1]
|
for provider_type in provider_types:
|
||||||
|
provider_id = provider_type.split("::")[-1]
|
||||||
|
|
||||||
api = Api(api_str)
|
api = Api(api_str)
|
||||||
if provider_type not in provider_registry[api]:
|
if provider_type not in provider_registry[api]:
|
||||||
raise ValueError(
|
raise ValueError(
|
||||||
f"Unknown provider type: {provider_type} for API: {api_str}"
|
f"Unknown provider type: {provider_type} for API: {api_str}"
|
||||||
|
)
|
||||||
|
|
||||||
|
config_class = provider_registry[api][provider_type].config_class
|
||||||
|
assert (
|
||||||
|
config_class is not None
|
||||||
|
), f"No config class for provider type: {provider_type} for API: {api_str}"
|
||||||
|
|
||||||
|
config_class = instantiate_class_type(config_class)
|
||||||
|
if hasattr(config_class, "sample_run_config"):
|
||||||
|
config = config_class.sample_run_config(
|
||||||
|
__distro_dir__=f"distributions/{name}"
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
config = {}
|
||||||
|
|
||||||
|
provider_configs[api_str].append(
|
||||||
|
Provider(
|
||||||
|
provider_id=provider_id,
|
||||||
|
provider_type=provider_type,
|
||||||
|
config=config,
|
||||||
|
)
|
||||||
)
|
)
|
||||||
|
|
||||||
config_class = provider_registry[api][provider_type].config_class
|
|
||||||
assert (
|
|
||||||
config_class is not None
|
|
||||||
), f"No config class for provider type: {provider_type} for API: {api_str}"
|
|
||||||
|
|
||||||
config_class = instantiate_class_type(config_class)
|
|
||||||
if hasattr(config_class, "sample_run_config"):
|
|
||||||
config = config_class.sample_run_config(
|
|
||||||
__distro_dir__=f"distributions/{name}"
|
|
||||||
)
|
|
||||||
else:
|
|
||||||
config = {}
|
|
||||||
|
|
||||||
provider_configs[api_str] = [
|
|
||||||
Provider(
|
|
||||||
provider_id=provider_id,
|
|
||||||
provider_type=provider_type,
|
|
||||||
config=config,
|
|
||||||
)
|
|
||||||
]
|
|
||||||
|
|
||||||
# Get unique set of APIs from providers
|
# Get unique set of APIs from providers
|
||||||
apis = list(sorted(providers.keys()))
|
apis = list(sorted(providers.keys()))
|
||||||
|
|
||||||
|
|
|
@ -16,4 +16,13 @@ distribution_spec:
|
||||||
- inline::meta-reference
|
- inline::meta-reference
|
||||||
telemetry:
|
telemetry:
|
||||||
- inline::meta-reference
|
- inline::meta-reference
|
||||||
|
eval:
|
||||||
|
- inline::meta-reference
|
||||||
|
datasetio:
|
||||||
|
- remote::huggingface
|
||||||
|
- inline::localfs
|
||||||
|
scoring:
|
||||||
|
- inline::basic
|
||||||
|
- inline::llm-as-judge
|
||||||
|
- inline::braintrust
|
||||||
image_type: conda
|
image_type: conda
|
||||||
|
|
|
@ -4,9 +4,12 @@ docker_image: null
|
||||||
conda_env: tgi
|
conda_env: tgi
|
||||||
apis:
|
apis:
|
||||||
- agents
|
- agents
|
||||||
|
- datasetio
|
||||||
|
- eval
|
||||||
- inference
|
- inference
|
||||||
- memory
|
- memory
|
||||||
- safety
|
- safety
|
||||||
|
- scoring
|
||||||
- telemetry
|
- telemetry
|
||||||
providers:
|
providers:
|
||||||
inference:
|
inference:
|
||||||
|
@ -42,6 +45,27 @@ providers:
|
||||||
- provider_id: meta-reference
|
- provider_id: meta-reference
|
||||||
provider_type: inline::meta-reference
|
provider_type: inline::meta-reference
|
||||||
config: {}
|
config: {}
|
||||||
|
eval:
|
||||||
|
- provider_id: meta-reference
|
||||||
|
provider_type: inline::meta-reference
|
||||||
|
config: {}
|
||||||
|
datasetio:
|
||||||
|
- provider_id: huggingface
|
||||||
|
provider_type: remote::huggingface
|
||||||
|
config: {}
|
||||||
|
- provider_id: localfs
|
||||||
|
provider_type: inline::localfs
|
||||||
|
config: {}
|
||||||
|
scoring:
|
||||||
|
- provider_id: basic
|
||||||
|
provider_type: inline::basic
|
||||||
|
config: {}
|
||||||
|
- provider_id: llm-as-judge
|
||||||
|
provider_type: inline::llm-as-judge
|
||||||
|
config: {}
|
||||||
|
- provider_id: braintrust
|
||||||
|
provider_type: inline::braintrust
|
||||||
|
config: {}
|
||||||
metadata_store:
|
metadata_store:
|
||||||
namespace: null
|
namespace: null
|
||||||
type: sqlite
|
type: sqlite
|
||||||
|
|
|
@ -4,9 +4,12 @@ docker_image: null
|
||||||
conda_env: tgi
|
conda_env: tgi
|
||||||
apis:
|
apis:
|
||||||
- agents
|
- agents
|
||||||
|
- datasetio
|
||||||
|
- eval
|
||||||
- inference
|
- inference
|
||||||
- memory
|
- memory
|
||||||
- safety
|
- safety
|
||||||
|
- scoring
|
||||||
- telemetry
|
- telemetry
|
||||||
providers:
|
providers:
|
||||||
inference:
|
inference:
|
||||||
|
@ -38,6 +41,27 @@ providers:
|
||||||
- provider_id: meta-reference
|
- provider_id: meta-reference
|
||||||
provider_type: inline::meta-reference
|
provider_type: inline::meta-reference
|
||||||
config: {}
|
config: {}
|
||||||
|
eval:
|
||||||
|
- provider_id: meta-reference
|
||||||
|
provider_type: inline::meta-reference
|
||||||
|
config: {}
|
||||||
|
datasetio:
|
||||||
|
- provider_id: huggingface
|
||||||
|
provider_type: remote::huggingface
|
||||||
|
config: {}
|
||||||
|
- provider_id: localfs
|
||||||
|
provider_type: inline::localfs
|
||||||
|
config: {}
|
||||||
|
scoring:
|
||||||
|
- provider_id: basic
|
||||||
|
provider_type: inline::basic
|
||||||
|
config: {}
|
||||||
|
- provider_id: llm-as-judge
|
||||||
|
provider_type: inline::llm-as-judge
|
||||||
|
config: {}
|
||||||
|
- provider_id: braintrust
|
||||||
|
provider_type: inline::braintrust
|
||||||
|
config: {}
|
||||||
metadata_store:
|
metadata_store:
|
||||||
namespace: null
|
namespace: null
|
||||||
type: sqlite
|
type: sqlite
|
||||||
|
|
|
@ -7,6 +7,7 @@
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
|
|
||||||
from llama_stack.distribution.datatypes import ModelInput, Provider, ShieldInput
|
from llama_stack.distribution.datatypes import ModelInput, Provider, ShieldInput
|
||||||
|
from llama_stack.providers.inline.memory.faiss.config import FaissImplConfig
|
||||||
from llama_stack.providers.remote.inference.tgi import TGIImplConfig
|
from llama_stack.providers.remote.inference.tgi import TGIImplConfig
|
||||||
from llama_stack.templates.template import DistributionTemplate, RunConfigSettings
|
from llama_stack.templates.template import DistributionTemplate, RunConfigSettings
|
||||||
|
|
||||||
|
@ -18,8 +19,11 @@ def get_distribution_template() -> DistributionTemplate:
|
||||||
"safety": ["inline::llama-guard"],
|
"safety": ["inline::llama-guard"],
|
||||||
"agents": ["inline::meta-reference"],
|
"agents": ["inline::meta-reference"],
|
||||||
"telemetry": ["inline::meta-reference"],
|
"telemetry": ["inline::meta-reference"],
|
||||||
|
"eval": ["inline::meta-reference"],
|
||||||
|
"datasetio": ["remote::huggingface", "inline::localfs"],
|
||||||
|
"scoring": ["inline::basic", "inline::llm-as-judge", "inline::braintrust"],
|
||||||
}
|
}
|
||||||
|
name = "tgi"
|
||||||
inference_provider = Provider(
|
inference_provider = Provider(
|
||||||
provider_id="tgi-inference",
|
provider_id="tgi-inference",
|
||||||
provider_type="remote::tgi",
|
provider_type="remote::tgi",
|
||||||
|
@ -27,6 +31,11 @@ def get_distribution_template() -> DistributionTemplate:
|
||||||
url="${env.TGI_URL}",
|
url="${env.TGI_URL}",
|
||||||
),
|
),
|
||||||
)
|
)
|
||||||
|
memory_provider = Provider(
|
||||||
|
provider_id="faiss",
|
||||||
|
provider_type="inline::faiss",
|
||||||
|
config=FaissImplConfig.sample_run_config(f"distributions/{name}"),
|
||||||
|
)
|
||||||
|
|
||||||
inference_model = ModelInput(
|
inference_model = ModelInput(
|
||||||
model_id="${env.INFERENCE_MODEL}",
|
model_id="${env.INFERENCE_MODEL}",
|
||||||
|
@ -38,7 +47,7 @@ def get_distribution_template() -> DistributionTemplate:
|
||||||
)
|
)
|
||||||
|
|
||||||
return DistributionTemplate(
|
return DistributionTemplate(
|
||||||
name="tgi",
|
name=name,
|
||||||
distro_type="self_hosted",
|
distro_type="self_hosted",
|
||||||
description="Use (an external) TGI server for running LLM inference",
|
description="Use (an external) TGI server for running LLM inference",
|
||||||
docker_image=None,
|
docker_image=None,
|
||||||
|
@ -49,6 +58,7 @@ def get_distribution_template() -> DistributionTemplate:
|
||||||
"run.yaml": RunConfigSettings(
|
"run.yaml": RunConfigSettings(
|
||||||
provider_overrides={
|
provider_overrides={
|
||||||
"inference": [inference_provider],
|
"inference": [inference_provider],
|
||||||
|
"memory": [memory_provider],
|
||||||
},
|
},
|
||||||
default_models=[inference_model],
|
default_models=[inference_model],
|
||||||
),
|
),
|
||||||
|
@ -64,6 +74,7 @@ def get_distribution_template() -> DistributionTemplate:
|
||||||
),
|
),
|
||||||
),
|
),
|
||||||
],
|
],
|
||||||
|
"memory": [memory_provider],
|
||||||
},
|
},
|
||||||
default_models=[
|
default_models=[
|
||||||
inference_model,
|
inference_model,
|
||||||
|
|
|
@ -16,4 +16,13 @@ distribution_spec:
|
||||||
- inline::meta-reference
|
- inline::meta-reference
|
||||||
telemetry:
|
telemetry:
|
||||||
- inline::meta-reference
|
- inline::meta-reference
|
||||||
|
eval:
|
||||||
|
- inline::meta-reference
|
||||||
|
datasetio:
|
||||||
|
- remote::huggingface
|
||||||
|
- inline::localfs
|
||||||
|
scoring:
|
||||||
|
- inline::basic
|
||||||
|
- inline::llm-as-judge
|
||||||
|
- inline::braintrust
|
||||||
image_type: conda
|
image_type: conda
|
||||||
|
|
|
@ -4,9 +4,12 @@ docker_image: null
|
||||||
conda_env: together
|
conda_env: together
|
||||||
apis:
|
apis:
|
||||||
- agents
|
- agents
|
||||||
|
- datasetio
|
||||||
|
- eval
|
||||||
- inference
|
- inference
|
||||||
- memory
|
- memory
|
||||||
- safety
|
- safety
|
||||||
|
- scoring
|
||||||
- telemetry
|
- telemetry
|
||||||
providers:
|
providers:
|
||||||
inference:
|
inference:
|
||||||
|
@ -39,6 +42,27 @@ providers:
|
||||||
- provider_id: meta-reference
|
- provider_id: meta-reference
|
||||||
provider_type: inline::meta-reference
|
provider_type: inline::meta-reference
|
||||||
config: {}
|
config: {}
|
||||||
|
eval:
|
||||||
|
- provider_id: meta-reference
|
||||||
|
provider_type: inline::meta-reference
|
||||||
|
config: {}
|
||||||
|
datasetio:
|
||||||
|
- provider_id: huggingface
|
||||||
|
provider_type: remote::huggingface
|
||||||
|
config: {}
|
||||||
|
- provider_id: localfs
|
||||||
|
provider_type: inline::localfs
|
||||||
|
config: {}
|
||||||
|
scoring:
|
||||||
|
- provider_id: basic
|
||||||
|
provider_type: inline::basic
|
||||||
|
config: {}
|
||||||
|
- provider_id: llm-as-judge
|
||||||
|
provider_type: inline::llm-as-judge
|
||||||
|
config: {}
|
||||||
|
- provider_id: braintrust
|
||||||
|
provider_type: inline::braintrust
|
||||||
|
config: {}
|
||||||
metadata_store:
|
metadata_store:
|
||||||
namespace: null
|
namespace: null
|
||||||
type: sqlite
|
type: sqlite
|
||||||
|
|
|
@ -9,6 +9,7 @@ from pathlib import Path
|
||||||
from llama_models.sku_list import all_registered_models
|
from llama_models.sku_list import all_registered_models
|
||||||
|
|
||||||
from llama_stack.distribution.datatypes import ModelInput, Provider, ShieldInput
|
from llama_stack.distribution.datatypes import ModelInput, Provider, ShieldInput
|
||||||
|
from llama_stack.providers.inline.memory.faiss.config import FaissImplConfig
|
||||||
from llama_stack.providers.remote.inference.together import TogetherImplConfig
|
from llama_stack.providers.remote.inference.together import TogetherImplConfig
|
||||||
from llama_stack.providers.remote.inference.together.together import MODEL_ALIASES
|
from llama_stack.providers.remote.inference.together.together import MODEL_ALIASES
|
||||||
|
|
||||||
|
@ -22,13 +23,21 @@ def get_distribution_template() -> DistributionTemplate:
|
||||||
"safety": ["inline::llama-guard"],
|
"safety": ["inline::llama-guard"],
|
||||||
"agents": ["inline::meta-reference"],
|
"agents": ["inline::meta-reference"],
|
||||||
"telemetry": ["inline::meta-reference"],
|
"telemetry": ["inline::meta-reference"],
|
||||||
|
"eval": ["inline::meta-reference"],
|
||||||
|
"datasetio": ["remote::huggingface", "inline::localfs"],
|
||||||
|
"scoring": ["inline::basic", "inline::llm-as-judge", "inline::braintrust"],
|
||||||
}
|
}
|
||||||
|
name = "together"
|
||||||
inference_provider = Provider(
|
inference_provider = Provider(
|
||||||
provider_id="together",
|
provider_id="together",
|
||||||
provider_type="remote::together",
|
provider_type="remote::together",
|
||||||
config=TogetherImplConfig.sample_run_config(),
|
config=TogetherImplConfig.sample_run_config(),
|
||||||
)
|
)
|
||||||
|
memory_provider = Provider(
|
||||||
|
provider_id="faiss",
|
||||||
|
provider_type="inline::faiss",
|
||||||
|
config=FaissImplConfig.sample_run_config(f"distributions/{name}"),
|
||||||
|
)
|
||||||
|
|
||||||
core_model_to_hf_repo = {
|
core_model_to_hf_repo = {
|
||||||
m.descriptor(): m.huggingface_repo for m in all_registered_models()
|
m.descriptor(): m.huggingface_repo for m in all_registered_models()
|
||||||
|
@ -42,7 +51,7 @@ def get_distribution_template() -> DistributionTemplate:
|
||||||
]
|
]
|
||||||
|
|
||||||
return DistributionTemplate(
|
return DistributionTemplate(
|
||||||
name="together",
|
name=name,
|
||||||
distro_type="self_hosted",
|
distro_type="self_hosted",
|
||||||
description="Use Together.AI for running LLM inference",
|
description="Use Together.AI for running LLM inference",
|
||||||
docker_image=None,
|
docker_image=None,
|
||||||
|
@ -53,6 +62,7 @@ def get_distribution_template() -> DistributionTemplate:
|
||||||
"run.yaml": RunConfigSettings(
|
"run.yaml": RunConfigSettings(
|
||||||
provider_overrides={
|
provider_overrides={
|
||||||
"inference": [inference_provider],
|
"inference": [inference_provider],
|
||||||
|
"memory": [memory_provider],
|
||||||
},
|
},
|
||||||
default_models=default_models,
|
default_models=default_models,
|
||||||
default_shields=[ShieldInput(shield_id="meta-llama/Llama-Guard-3-8B")],
|
default_shields=[ShieldInput(shield_id="meta-llama/Llama-Guard-3-8B")],
|
||||||
|
|
|
@ -16,4 +16,13 @@ distribution_spec:
|
||||||
- inline::meta-reference
|
- inline::meta-reference
|
||||||
telemetry:
|
telemetry:
|
||||||
- inline::meta-reference
|
- inline::meta-reference
|
||||||
|
eval:
|
||||||
|
- inline::meta-reference
|
||||||
|
datasetio:
|
||||||
|
- remote::huggingface
|
||||||
|
- inline::localfs
|
||||||
|
scoring:
|
||||||
|
- inline::basic
|
||||||
|
- inline::llm-as-judge
|
||||||
|
- inline::braintrust
|
||||||
image_type: conda
|
image_type: conda
|
||||||
|
|
|
@ -4,9 +4,12 @@ docker_image: null
|
||||||
conda_env: vllm-gpu
|
conda_env: vllm-gpu
|
||||||
apis:
|
apis:
|
||||||
- agents
|
- agents
|
||||||
|
- datasetio
|
||||||
|
- eval
|
||||||
- inference
|
- inference
|
||||||
- memory
|
- memory
|
||||||
- safety
|
- safety
|
||||||
|
- scoring
|
||||||
- telemetry
|
- telemetry
|
||||||
providers:
|
providers:
|
||||||
inference:
|
inference:
|
||||||
|
@ -42,6 +45,27 @@ providers:
|
||||||
- provider_id: meta-reference
|
- provider_id: meta-reference
|
||||||
provider_type: inline::meta-reference
|
provider_type: inline::meta-reference
|
||||||
config: {}
|
config: {}
|
||||||
|
eval:
|
||||||
|
- provider_id: meta-reference
|
||||||
|
provider_type: inline::meta-reference
|
||||||
|
config: {}
|
||||||
|
datasetio:
|
||||||
|
- provider_id: huggingface
|
||||||
|
provider_type: remote::huggingface
|
||||||
|
config: {}
|
||||||
|
- provider_id: localfs
|
||||||
|
provider_type: inline::localfs
|
||||||
|
config: {}
|
||||||
|
scoring:
|
||||||
|
- provider_id: basic
|
||||||
|
provider_type: inline::basic
|
||||||
|
config: {}
|
||||||
|
- provider_id: llm-as-judge
|
||||||
|
provider_type: inline::llm-as-judge
|
||||||
|
config: {}
|
||||||
|
- provider_id: braintrust
|
||||||
|
provider_type: inline::braintrust
|
||||||
|
config: {}
|
||||||
metadata_store:
|
metadata_store:
|
||||||
namespace: null
|
namespace: null
|
||||||
type: sqlite
|
type: sqlite
|
||||||
|
|
|
@ -6,6 +6,7 @@
|
||||||
|
|
||||||
from llama_stack.distribution.datatypes import ModelInput, Provider
|
from llama_stack.distribution.datatypes import ModelInput, Provider
|
||||||
from llama_stack.providers.inline.inference.vllm import VLLMConfig
|
from llama_stack.providers.inline.inference.vllm import VLLMConfig
|
||||||
|
from llama_stack.providers.inline.memory.faiss.config import FaissImplConfig
|
||||||
from llama_stack.templates.template import DistributionTemplate, RunConfigSettings
|
from llama_stack.templates.template import DistributionTemplate, RunConfigSettings
|
||||||
|
|
||||||
|
|
||||||
|
@ -16,13 +17,21 @@ def get_distribution_template() -> DistributionTemplate:
|
||||||
"safety": ["inline::llama-guard"],
|
"safety": ["inline::llama-guard"],
|
||||||
"agents": ["inline::meta-reference"],
|
"agents": ["inline::meta-reference"],
|
||||||
"telemetry": ["inline::meta-reference"],
|
"telemetry": ["inline::meta-reference"],
|
||||||
|
"eval": ["inline::meta-reference"],
|
||||||
|
"datasetio": ["remote::huggingface", "inline::localfs"],
|
||||||
|
"scoring": ["inline::basic", "inline::llm-as-judge", "inline::braintrust"],
|
||||||
}
|
}
|
||||||
|
name = "vllm-gpu"
|
||||||
inference_provider = Provider(
|
inference_provider = Provider(
|
||||||
provider_id="vllm",
|
provider_id="vllm",
|
||||||
provider_type="inline::vllm",
|
provider_type="inline::vllm",
|
||||||
config=VLLMConfig.sample_run_config(),
|
config=VLLMConfig.sample_run_config(),
|
||||||
)
|
)
|
||||||
|
memory_provider = Provider(
|
||||||
|
provider_id="faiss",
|
||||||
|
provider_type="inline::faiss",
|
||||||
|
config=FaissImplConfig.sample_run_config(f"distributions/{name}"),
|
||||||
|
)
|
||||||
|
|
||||||
inference_model = ModelInput(
|
inference_model = ModelInput(
|
||||||
model_id="${env.INFERENCE_MODEL}",
|
model_id="${env.INFERENCE_MODEL}",
|
||||||
|
@ -30,7 +39,7 @@ def get_distribution_template() -> DistributionTemplate:
|
||||||
)
|
)
|
||||||
|
|
||||||
return DistributionTemplate(
|
return DistributionTemplate(
|
||||||
name="vllm-gpu",
|
name=name,
|
||||||
distro_type="self_hosted",
|
distro_type="self_hosted",
|
||||||
description="Use a built-in vLLM engine for running LLM inference",
|
description="Use a built-in vLLM engine for running LLM inference",
|
||||||
docker_image=None,
|
docker_image=None,
|
||||||
|
@ -41,6 +50,7 @@ def get_distribution_template() -> DistributionTemplate:
|
||||||
"run.yaml": RunConfigSettings(
|
"run.yaml": RunConfigSettings(
|
||||||
provider_overrides={
|
provider_overrides={
|
||||||
"inference": [inference_provider],
|
"inference": [inference_provider],
|
||||||
|
"memory": [memory_provider],
|
||||||
},
|
},
|
||||||
default_models=[inference_model],
|
default_models=[inference_model],
|
||||||
),
|
),
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue