mirror of
https://github.com/meta-llama/llama-stack.git
synced 2025-07-25 21:57:45 +00:00
feat: re-work distro-codegen
each *.py file in the various templates now has to use `Provider`s rather than the stringified provider_types in the DistributionTemplate. Adjust that, regenerate all templates, docs, etc. Signed-off-by: Charlie Doern <cdoern@redhat.com>
This commit is contained in:
parent
dcc6b1eee9
commit
776fabed9e
28 changed files with 809 additions and 328 deletions
|
@ -3,57 +3,98 @@ distribution_spec:
|
|||
description: CI tests for Llama Stack
|
||||
providers:
|
||||
inference:
|
||||
- remote::cerebras
|
||||
- remote::ollama
|
||||
- remote::vllm
|
||||
- remote::tgi
|
||||
- remote::hf::serverless
|
||||
- remote::hf::endpoint
|
||||
- remote::fireworks
|
||||
- remote::together
|
||||
- remote::bedrock
|
||||
- remote::databricks
|
||||
- remote::nvidia
|
||||
- remote::runpod
|
||||
- remote::openai
|
||||
- remote::anthropic
|
||||
- remote::gemini
|
||||
- remote::groq
|
||||
- remote::llama-openai-compat
|
||||
- remote::sambanova
|
||||
- remote::passthrough
|
||||
- inline::sentence-transformers
|
||||
- provider_id: ${env.ENABLE_CEREBRAS:=__disabled__}
|
||||
provider_type: remote::cerebras
|
||||
- provider_id: ${env.ENABLE_OLLAMA:=__disabled__}
|
||||
provider_type: remote::ollama
|
||||
- provider_id: ${env.ENABLE_VLLM:=__disabled__}
|
||||
provider_type: remote::vllm
|
||||
- provider_id: ${env.ENABLE_TGI:=__disabled__}
|
||||
provider_type: remote::tgi
|
||||
- provider_id: ${env.ENABLE_HF_SERVERLESS:=__disabled__}
|
||||
provider_type: remote::hf::serverless
|
||||
- provider_id: ${env.ENABLE_HF_ENDPOINT:=__disabled__}
|
||||
provider_type: remote::hf::endpoint
|
||||
- provider_id: ${env.ENABLE_FIREWORKS:=__disabled__}
|
||||
provider_type: remote::fireworks
|
||||
- provider_id: ${env.ENABLE_TOGETHER:=__disabled__}
|
||||
provider_type: remote::together
|
||||
- provider_id: ${env.ENABLE_BEDROCK:=__disabled__}
|
||||
provider_type: remote::bedrock
|
||||
- provider_id: ${env.ENABLE_DATABRICKS:=__disabled__}
|
||||
provider_type: remote::databricks
|
||||
- provider_id: ${env.ENABLE_NVIDIA:=__disabled__}
|
||||
provider_type: remote::nvidia
|
||||
- provider_id: ${env.ENABLE_RUNPOD:=__disabled__}
|
||||
provider_type: remote::runpod
|
||||
- provider_id: ${env.ENABLE_OPENAI:=__disabled__}
|
||||
provider_type: remote::openai
|
||||
- provider_id: ${env.ENABLE_ANTHROPIC:=__disabled__}
|
||||
provider_type: remote::anthropic
|
||||
- provider_id: ${env.ENABLE_GEMINI:=__disabled__}
|
||||
provider_type: remote::gemini
|
||||
- provider_id: ${env.ENABLE_GROQ:=__disabled__}
|
||||
provider_type: remote::groq
|
||||
- provider_id: ${env.ENABLE_LLAMA_OPENAI_COMPAT:=__disabled__}
|
||||
provider_type: remote::llama-openai-compat
|
||||
- provider_id: ${env.ENABLE_SAMBANOVA:=__disabled__}
|
||||
provider_type: remote::sambanova
|
||||
- provider_id: ${env.ENABLE_PASSTHROUGH:=__disabled__}
|
||||
provider_type: remote::passthrough
|
||||
- provider_id: sentence-transformers
|
||||
provider_type: inline::sentence-transformers
|
||||
vector_io:
|
||||
- inline::faiss
|
||||
- inline::sqlite-vec
|
||||
- inline::milvus
|
||||
- remote::chromadb
|
||||
- remote::pgvector
|
||||
- provider_id: ${env.ENABLE_FAISS:=faiss}
|
||||
provider_type: inline::faiss
|
||||
- provider_id: ${env.ENABLE_SQLITE_VEC:=__disabled__}
|
||||
provider_type: inline::sqlite-vec
|
||||
- provider_id: ${env.ENABLE_MILVUS:=__disabled__}
|
||||
provider_type: inline::milvus
|
||||
- provider_id: ${env.ENABLE_CHROMADB:=__disabled__}
|
||||
provider_type: remote::chromadb
|
||||
- provider_id: ${env.ENABLE_PGVECTOR:=__disabled__}
|
||||
provider_type: remote::pgvector
|
||||
files:
|
||||
- inline::localfs
|
||||
- provider_id: localfs
|
||||
provider_type: inline::localfs
|
||||
safety:
|
||||
- inline::llama-guard
|
||||
- provider_id: llama-guard
|
||||
provider_type: inline::llama-guard
|
||||
agents:
|
||||
- inline::meta-reference
|
||||
- provider_id: meta-reference
|
||||
provider_type: inline::meta-reference
|
||||
telemetry:
|
||||
- inline::meta-reference
|
||||
- provider_id: meta-reference
|
||||
provider_type: inline::meta-reference
|
||||
post_training:
|
||||
- inline::huggingface
|
||||
- provider_id: huggingface
|
||||
provider_type: inline::huggingface
|
||||
eval:
|
||||
- inline::meta-reference
|
||||
- provider_id: meta-reference
|
||||
provider_type: inline::meta-reference
|
||||
datasetio:
|
||||
- remote::huggingface
|
||||
- inline::localfs
|
||||
- provider_id: huggingface
|
||||
provider_type: remote::huggingface
|
||||
- provider_id: localfs
|
||||
provider_type: inline::localfs
|
||||
scoring:
|
||||
- inline::basic
|
||||
- inline::llm-as-judge
|
||||
- inline::braintrust
|
||||
- provider_id: basic
|
||||
provider_type: inline::basic
|
||||
- provider_id: llm-as-judge
|
||||
provider_type: inline::llm-as-judge
|
||||
- provider_id: braintrust
|
||||
provider_type: inline::braintrust
|
||||
tool_runtime:
|
||||
- remote::brave-search
|
||||
- remote::tavily-search
|
||||
- inline::rag-runtime
|
||||
- remote::model-context-protocol
|
||||
- provider_id: brave-search
|
||||
provider_type: remote::brave-search
|
||||
- provider_id: tavily-search
|
||||
provider_type: remote::tavily-search
|
||||
- provider_id: rag-runtime
|
||||
provider_type: inline::rag-runtime
|
||||
- provider_id: model-context-protocol
|
||||
provider_type: remote::model-context-protocol
|
||||
image_type: conda
|
||||
image_name: ci-tests
|
||||
additional_pip_packages:
|
||||
- aiosqlite
|
||||
- asyncpg
|
||||
|
|
|
@ -56,7 +56,6 @@ providers:
|
|||
api_key: ${env.TOGETHER_API_KEY}
|
||||
- provider_id: ${env.ENABLE_BEDROCK:=__disabled__}
|
||||
provider_type: remote::bedrock
|
||||
config: {}
|
||||
- provider_id: ${env.ENABLE_DATABRICKS:=__disabled__}
|
||||
provider_type: remote::databricks
|
||||
config:
|
||||
|
@ -107,7 +106,6 @@ providers:
|
|||
api_key: ${env.PASSTHROUGH_API_KEY}
|
||||
- provider_id: ${env.ENABLE_SENTENCE_TRANSFORMERS:=sentence-transformers}
|
||||
provider_type: inline::sentence-transformers
|
||||
config: {}
|
||||
vector_io:
|
||||
- provider_id: ${env.ENABLE_FAISS:=faiss}
|
||||
provider_type: inline::faiss
|
||||
|
@ -208,10 +206,8 @@ providers:
|
|||
scoring:
|
||||
- provider_id: basic
|
||||
provider_type: inline::basic
|
||||
config: {}
|
||||
- provider_id: llm-as-judge
|
||||
provider_type: inline::llm-as-judge
|
||||
config: {}
|
||||
- provider_id: braintrust
|
||||
provider_type: inline::braintrust
|
||||
config:
|
||||
|
@ -229,10 +225,8 @@ providers:
|
|||
max_results: 3
|
||||
- provider_id: rag-runtime
|
||||
provider_type: inline::rag-runtime
|
||||
config: {}
|
||||
- provider_id: model-context-protocol
|
||||
provider_type: remote::model-context-protocol
|
||||
config: {}
|
||||
metadata_store:
|
||||
type: sqlite
|
||||
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ci-tests}/registry.db
|
||||
|
|
|
@ -4,32 +4,50 @@ distribution_spec:
|
|||
container
|
||||
providers:
|
||||
inference:
|
||||
- remote::tgi
|
||||
- inline::sentence-transformers
|
||||
- provider_id: tgi
|
||||
provider_type: remote::tgi
|
||||
- provider_id: sentence-transformers
|
||||
provider_type: inline::sentence-transformers
|
||||
vector_io:
|
||||
- inline::faiss
|
||||
- remote::chromadb
|
||||
- remote::pgvector
|
||||
- provider_id: faiss
|
||||
provider_type: inline::faiss
|
||||
- provider_id: chromadb
|
||||
provider_type: remote::chromadb
|
||||
- provider_id: pgvector
|
||||
provider_type: remote::pgvector
|
||||
safety:
|
||||
- inline::llama-guard
|
||||
- provider_id: llama-guard
|
||||
provider_type: inline::llama-guard
|
||||
agents:
|
||||
- inline::meta-reference
|
||||
- provider_id: meta-reference
|
||||
provider_type: inline::meta-reference
|
||||
telemetry:
|
||||
- inline::meta-reference
|
||||
- provider_id: meta-reference
|
||||
provider_type: inline::meta-reference
|
||||
eval:
|
||||
- inline::meta-reference
|
||||
- provider_id: meta-reference
|
||||
provider_type: inline::meta-reference
|
||||
datasetio:
|
||||
- remote::huggingface
|
||||
- inline::localfs
|
||||
- provider_id: huggingface
|
||||
provider_type: remote::huggingface
|
||||
- provider_id: localfs
|
||||
provider_type: inline::localfs
|
||||
scoring:
|
||||
- inline::basic
|
||||
- inline::llm-as-judge
|
||||
- inline::braintrust
|
||||
- provider_id: basic
|
||||
provider_type: inline::basic
|
||||
- provider_id: llm-as-judge
|
||||
provider_type: inline::llm-as-judge
|
||||
- provider_id: braintrust
|
||||
provider_type: inline::braintrust
|
||||
tool_runtime:
|
||||
- remote::brave-search
|
||||
- remote::tavily-search
|
||||
- inline::rag-runtime
|
||||
- provider_id: brave-search
|
||||
provider_type: remote::brave-search
|
||||
- provider_id: tavily-search
|
||||
provider_type: remote::tavily-search
|
||||
- provider_id: rag-runtime
|
||||
provider_type: inline::rag-runtime
|
||||
image_type: conda
|
||||
image_name: dell
|
||||
additional_pip_packages:
|
||||
- aiosqlite
|
||||
- sqlalchemy[asyncio]
|
||||
|
|
|
@ -19,18 +19,32 @@ from llama_stack.templates.template import DistributionTemplate, RunConfigSettin
|
|||
|
||||
def get_distribution_template() -> DistributionTemplate:
|
||||
providers = {
|
||||
"inference": ["remote::tgi", "inline::sentence-transformers"],
|
||||
"vector_io": ["inline::faiss", "remote::chromadb", "remote::pgvector"],
|
||||
"safety": ["inline::llama-guard"],
|
||||
"agents": ["inline::meta-reference"],
|
||||
"telemetry": ["inline::meta-reference"],
|
||||
"eval": ["inline::meta-reference"],
|
||||
"datasetio": ["remote::huggingface", "inline::localfs"],
|
||||
"scoring": ["inline::basic", "inline::llm-as-judge", "inline::braintrust"],
|
||||
"inference": [
|
||||
Provider(provider_id="tgi", provider_type="remote::tgi"),
|
||||
Provider(provider_id="sentence-transformers", provider_type="inline::sentence-transformers"),
|
||||
],
|
||||
"vector_io": [
|
||||
Provider(provider_id="faiss", provider_type="inline::faiss"),
|
||||
Provider(provider_id="chromadb", provider_type="remote::chromadb"),
|
||||
Provider(provider_id="pgvector", provider_type="remote::pgvector"),
|
||||
],
|
||||
"safety": [Provider(provider_id="llama-guard", provider_type="inline::llama-guard")],
|
||||
"agents": [Provider(provider_id="meta-reference", provider_type="inline::meta-reference")],
|
||||
"telemetry": [Provider(provider_id="meta-reference", provider_type="inline::meta-reference")],
|
||||
"eval": [Provider(provider_id="meta-reference", provider_type="inline::meta-reference")],
|
||||
"datasetio": [
|
||||
Provider(provider_id="huggingface", provider_type="remote::huggingface"),
|
||||
Provider(provider_id="localfs", provider_type="inline::localfs"),
|
||||
],
|
||||
"scoring": [
|
||||
Provider(provider_id="basic", provider_type="inline::basic"),
|
||||
Provider(provider_id="llm-as-judge", provider_type="inline::llm-as-judge"),
|
||||
Provider(provider_id="braintrust", provider_type="inline::braintrust"),
|
||||
],
|
||||
"tool_runtime": [
|
||||
"remote::brave-search",
|
||||
"remote::tavily-search",
|
||||
"inline::rag-runtime",
|
||||
Provider(provider_id="brave-search", provider_type="remote::brave-search"),
|
||||
Provider(provider_id="tavily-search", provider_type="remote::tavily-search"),
|
||||
Provider(provider_id="rag-runtime", provider_type="inline::rag-runtime"),
|
||||
],
|
||||
}
|
||||
name = "dell"
|
||||
|
|
|
@ -22,7 +22,6 @@ providers:
|
|||
url: ${env.DEH_SAFETY_URL}
|
||||
- provider_id: sentence-transformers
|
||||
provider_type: inline::sentence-transformers
|
||||
config: {}
|
||||
vector_io:
|
||||
- provider_id: chromadb
|
||||
provider_type: remote::chromadb
|
||||
|
@ -74,10 +73,8 @@ providers:
|
|||
scoring:
|
||||
- provider_id: basic
|
||||
provider_type: inline::basic
|
||||
config: {}
|
||||
- provider_id: llm-as-judge
|
||||
provider_type: inline::llm-as-judge
|
||||
config: {}
|
||||
- provider_id: braintrust
|
||||
provider_type: inline::braintrust
|
||||
config:
|
||||
|
@ -95,7 +92,6 @@ providers:
|
|||
max_results: 3
|
||||
- provider_id: rag-runtime
|
||||
provider_type: inline::rag-runtime
|
||||
config: {}
|
||||
metadata_store:
|
||||
type: sqlite
|
||||
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/dell}/registry.db
|
||||
|
|
|
@ -18,7 +18,6 @@ providers:
|
|||
url: ${env.DEH_URL}
|
||||
- provider_id: sentence-transformers
|
||||
provider_type: inline::sentence-transformers
|
||||
config: {}
|
||||
vector_io:
|
||||
- provider_id: chromadb
|
||||
provider_type: remote::chromadb
|
||||
|
@ -70,10 +69,8 @@ providers:
|
|||
scoring:
|
||||
- provider_id: basic
|
||||
provider_type: inline::basic
|
||||
config: {}
|
||||
- provider_id: llm-as-judge
|
||||
provider_type: inline::llm-as-judge
|
||||
config: {}
|
||||
- provider_id: braintrust
|
||||
provider_type: inline::braintrust
|
||||
config:
|
||||
|
@ -91,7 +88,6 @@ providers:
|
|||
max_results: 3
|
||||
- provider_id: rag-runtime
|
||||
provider_type: inline::rag-runtime
|
||||
config: {}
|
||||
metadata_store:
|
||||
type: sqlite
|
||||
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/dell}/registry.db
|
||||
|
|
|
@ -3,32 +3,50 @@ distribution_spec:
|
|||
description: Use Meta Reference for running LLM inference
|
||||
providers:
|
||||
inference:
|
||||
- inline::meta-reference
|
||||
- provider_id: meta-reference
|
||||
provider_type: inline::meta-reference
|
||||
vector_io:
|
||||
- inline::faiss
|
||||
- remote::chromadb
|
||||
- remote::pgvector
|
||||
- provider_id: faiss
|
||||
provider_type: inline::faiss
|
||||
- provider_id: chromadb
|
||||
provider_type: remote::chromadb
|
||||
- provider_id: pgvector
|
||||
provider_type: remote::pgvector
|
||||
safety:
|
||||
- inline::llama-guard
|
||||
- provider_id: llama-guard
|
||||
provider_type: inline::llama-guard
|
||||
agents:
|
||||
- inline::meta-reference
|
||||
- provider_id: meta-reference
|
||||
provider_type: inline::meta-reference
|
||||
telemetry:
|
||||
- inline::meta-reference
|
||||
- provider_id: meta-reference
|
||||
provider_type: inline::meta-reference
|
||||
eval:
|
||||
- inline::meta-reference
|
||||
- provider_id: meta-reference
|
||||
provider_type: inline::meta-reference
|
||||
datasetio:
|
||||
- remote::huggingface
|
||||
- inline::localfs
|
||||
- provider_id: huggingface
|
||||
provider_type: remote::huggingface
|
||||
- provider_id: localfs
|
||||
provider_type: inline::localfs
|
||||
scoring:
|
||||
- inline::basic
|
||||
- inline::llm-as-judge
|
||||
- inline::braintrust
|
||||
- provider_id: basic
|
||||
provider_type: inline::basic
|
||||
- provider_id: llm-as-judge
|
||||
provider_type: inline::llm-as-judge
|
||||
- provider_id: braintrust
|
||||
provider_type: inline::braintrust
|
||||
tool_runtime:
|
||||
- remote::brave-search
|
||||
- remote::tavily-search
|
||||
- inline::rag-runtime
|
||||
- remote::model-context-protocol
|
||||
- provider_id: brave-search
|
||||
provider_type: remote::brave-search
|
||||
- provider_id: tavily-search
|
||||
provider_type: remote::tavily-search
|
||||
- provider_id: rag-runtime
|
||||
provider_type: inline::rag-runtime
|
||||
- provider_id: model-context-protocol
|
||||
provider_type: remote::model-context-protocol
|
||||
image_type: conda
|
||||
image_name: meta-reference-gpu
|
||||
additional_pip_packages:
|
||||
- aiosqlite
|
||||
- sqlalchemy[asyncio]
|
||||
|
|
|
@ -25,19 +25,91 @@ from llama_stack.templates.template import DistributionTemplate, RunConfigSettin
|
|||
|
||||
def get_distribution_template() -> DistributionTemplate:
|
||||
providers = {
|
||||
"inference": ["inline::meta-reference"],
|
||||
"vector_io": ["inline::faiss", "remote::chromadb", "remote::pgvector"],
|
||||
"safety": ["inline::llama-guard"],
|
||||
"agents": ["inline::meta-reference"],
|
||||
"telemetry": ["inline::meta-reference"],
|
||||
"eval": ["inline::meta-reference"],
|
||||
"datasetio": ["remote::huggingface", "inline::localfs"],
|
||||
"scoring": ["inline::basic", "inline::llm-as-judge", "inline::braintrust"],
|
||||
"inference": [
|
||||
Provider(
|
||||
provider_id="meta-reference",
|
||||
provider_type="inline::meta-reference",
|
||||
)
|
||||
],
|
||||
"vector_io": [
|
||||
Provider(
|
||||
provider_id="faiss",
|
||||
provider_type="inline::faiss",
|
||||
),
|
||||
Provider(
|
||||
provider_id="chromadb",
|
||||
provider_type="remote::chromadb",
|
||||
),
|
||||
Provider(
|
||||
provider_id="pgvector",
|
||||
provider_type="remote::pgvector",
|
||||
),
|
||||
],
|
||||
"safety": [
|
||||
Provider(
|
||||
provider_id="llama-guard",
|
||||
provider_type="inline::llama-guard",
|
||||
)
|
||||
],
|
||||
"agents": [
|
||||
Provider(
|
||||
provider_id="meta-reference",
|
||||
provider_type="inline::meta-reference",
|
||||
)
|
||||
],
|
||||
"telemetry": [
|
||||
Provider(
|
||||
provider_id="meta-reference",
|
||||
provider_type="inline::meta-reference",
|
||||
)
|
||||
],
|
||||
"eval": [
|
||||
Provider(
|
||||
provider_id="meta-reference",
|
||||
provider_type="inline::meta-reference",
|
||||
)
|
||||
],
|
||||
"datasetio": [
|
||||
Provider(
|
||||
provider_id="huggingface",
|
||||
provider_type="remote::huggingface",
|
||||
),
|
||||
Provider(
|
||||
provider_id="localfs",
|
||||
provider_type="inline::localfs",
|
||||
),
|
||||
],
|
||||
"scoring": [
|
||||
Provider(
|
||||
provider_id="basic",
|
||||
provider_type="inline::basic",
|
||||
),
|
||||
Provider(
|
||||
provider_id="llm-as-judge",
|
||||
provider_type="inline::llm-as-judge",
|
||||
),
|
||||
Provider(
|
||||
provider_id="braintrust",
|
||||
provider_type="inline::braintrust",
|
||||
),
|
||||
],
|
||||
"tool_runtime": [
|
||||
"remote::brave-search",
|
||||
"remote::tavily-search",
|
||||
"inline::rag-runtime",
|
||||
"remote::model-context-protocol",
|
||||
Provider(
|
||||
provider_id="brave-search",
|
||||
provider_type="remote::brave-search",
|
||||
),
|
||||
Provider(
|
||||
provider_id="tavily-search",
|
||||
provider_type="remote::tavily-search",
|
||||
),
|
||||
Provider(
|
||||
provider_id="rag-runtime",
|
||||
provider_type="inline::rag-runtime",
|
||||
),
|
||||
Provider(
|
||||
provider_id="model-context-protocol",
|
||||
provider_type="remote::model-context-protocol",
|
||||
),
|
||||
],
|
||||
}
|
||||
name = "meta-reference-gpu"
|
||||
|
|
|
@ -24,7 +24,6 @@ providers:
|
|||
max_seq_len: ${env.MAX_SEQ_LEN:=4096}
|
||||
- provider_id: sentence-transformers
|
||||
provider_type: inline::sentence-transformers
|
||||
config: {}
|
||||
- provider_id: meta-reference-safety
|
||||
provider_type: inline::meta-reference
|
||||
config:
|
||||
|
@ -88,10 +87,8 @@ providers:
|
|||
scoring:
|
||||
- provider_id: basic
|
||||
provider_type: inline::basic
|
||||
config: {}
|
||||
- provider_id: llm-as-judge
|
||||
provider_type: inline::llm-as-judge
|
||||
config: {}
|
||||
- provider_id: braintrust
|
||||
provider_type: inline::braintrust
|
||||
config:
|
||||
|
@ -109,10 +106,8 @@ providers:
|
|||
max_results: 3
|
||||
- provider_id: rag-runtime
|
||||
provider_type: inline::rag-runtime
|
||||
config: {}
|
||||
- provider_id: model-context-protocol
|
||||
provider_type: remote::model-context-protocol
|
||||
config: {}
|
||||
metadata_store:
|
||||
type: sqlite
|
||||
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/meta-reference-gpu}/registry.db
|
||||
|
|
|
@ -24,7 +24,6 @@ providers:
|
|||
max_seq_len: ${env.MAX_SEQ_LEN:=4096}
|
||||
- provider_id: sentence-transformers
|
||||
provider_type: inline::sentence-transformers
|
||||
config: {}
|
||||
vector_io:
|
||||
- provider_id: faiss
|
||||
provider_type: inline::faiss
|
||||
|
@ -78,10 +77,8 @@ providers:
|
|||
scoring:
|
||||
- provider_id: basic
|
||||
provider_type: inline::basic
|
||||
config: {}
|
||||
- provider_id: llm-as-judge
|
||||
provider_type: inline::llm-as-judge
|
||||
config: {}
|
||||
- provider_id: braintrust
|
||||
provider_type: inline::braintrust
|
||||
config:
|
||||
|
@ -99,10 +96,8 @@ providers:
|
|||
max_results: 3
|
||||
- provider_id: rag-runtime
|
||||
provider_type: inline::rag-runtime
|
||||
config: {}
|
||||
- provider_id: model-context-protocol
|
||||
provider_type: remote::model-context-protocol
|
||||
config: {}
|
||||
metadata_store:
|
||||
type: sqlite
|
||||
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/meta-reference-gpu}/registry.db
|
||||
|
|
|
@ -3,27 +3,39 @@ distribution_spec:
|
|||
description: Use NVIDIA NIM for running LLM inference, evaluation and safety
|
||||
providers:
|
||||
inference:
|
||||
- remote::nvidia
|
||||
- provider_id: nvidia
|
||||
provider_type: remote::nvidia
|
||||
vector_io:
|
||||
- inline::faiss
|
||||
- provider_id: faiss
|
||||
provider_type: inline::faiss
|
||||
safety:
|
||||
- remote::nvidia
|
||||
- provider_id: nvidia
|
||||
provider_type: remote::nvidia
|
||||
agents:
|
||||
- inline::meta-reference
|
||||
- provider_id: meta-reference
|
||||
provider_type: inline::meta-reference
|
||||
telemetry:
|
||||
- inline::meta-reference
|
||||
- provider_id: meta-reference
|
||||
provider_type: inline::meta-reference
|
||||
eval:
|
||||
- remote::nvidia
|
||||
- provider_id: nvidia
|
||||
provider_type: remote::nvidia
|
||||
post_training:
|
||||
- remote::nvidia
|
||||
- provider_id: nvidia
|
||||
provider_type: remote::nvidia
|
||||
datasetio:
|
||||
- inline::localfs
|
||||
- remote::nvidia
|
||||
- provider_id: localfs
|
||||
provider_type: inline::localfs
|
||||
- provider_id: nvidia
|
||||
provider_type: remote::nvidia
|
||||
scoring:
|
||||
- inline::basic
|
||||
- provider_id: basic
|
||||
provider_type: inline::basic
|
||||
tool_runtime:
|
||||
- inline::rag-runtime
|
||||
- provider_id: rag-runtime
|
||||
provider_type: inline::rag-runtime
|
||||
image_type: conda
|
||||
image_name: nvidia
|
||||
additional_pip_packages:
|
||||
- aiosqlite
|
||||
- sqlalchemy[asyncio]
|
||||
|
|
|
@ -17,16 +17,65 @@ from llama_stack.templates.template import DistributionTemplate, RunConfigSettin
|
|||
|
||||
def get_distribution_template() -> DistributionTemplate:
|
||||
providers = {
|
||||
"inference": ["remote::nvidia"],
|
||||
"vector_io": ["inline::faiss"],
|
||||
"safety": ["remote::nvidia"],
|
||||
"agents": ["inline::meta-reference"],
|
||||
"telemetry": ["inline::meta-reference"],
|
||||
"eval": ["remote::nvidia"],
|
||||
"post_training": ["remote::nvidia"],
|
||||
"datasetio": ["inline::localfs", "remote::nvidia"],
|
||||
"scoring": ["inline::basic"],
|
||||
"tool_runtime": ["inline::rag-runtime"],
|
||||
"inference": [
|
||||
Provider(
|
||||
provider_id="nvidia",
|
||||
provider_type="remote::nvidia",
|
||||
)
|
||||
],
|
||||
"vector_io": [
|
||||
Provider(
|
||||
provider_id="faiss",
|
||||
provider_type="inline::faiss",
|
||||
)
|
||||
],
|
||||
"safety": [
|
||||
Provider(
|
||||
provider_id="nvidia",
|
||||
provider_type="remote::nvidia",
|
||||
)
|
||||
],
|
||||
"agents": [
|
||||
Provider(
|
||||
provider_id="meta-reference",
|
||||
provider_type="inline::meta-reference",
|
||||
)
|
||||
],
|
||||
"telemetry": [
|
||||
Provider(
|
||||
provider_id="meta-reference",
|
||||
provider_type="inline::meta-reference",
|
||||
)
|
||||
],
|
||||
"eval": [
|
||||
Provider(
|
||||
provider_id="nvidia",
|
||||
provider_type="remote::nvidia",
|
||||
)
|
||||
],
|
||||
"post_training": [Provider(provider_id="nvidia", provider_type="remote::nvidia", config={})],
|
||||
"datasetio": [
|
||||
Provider(
|
||||
provider_id="localfs",
|
||||
provider_type="inline::localfs",
|
||||
),
|
||||
Provider(
|
||||
provider_id="nvidia",
|
||||
provider_type="remote::nvidia",
|
||||
),
|
||||
],
|
||||
"scoring": [
|
||||
Provider(
|
||||
provider_id="basic",
|
||||
provider_type="inline::basic",
|
||||
)
|
||||
],
|
||||
"tool_runtime": [
|
||||
Provider(
|
||||
provider_id="rag-runtime",
|
||||
provider_type="inline::rag-runtime",
|
||||
)
|
||||
],
|
||||
}
|
||||
|
||||
inference_provider = Provider(
|
||||
|
|
|
@ -85,11 +85,9 @@ providers:
|
|||
scoring:
|
||||
- provider_id: basic
|
||||
provider_type: inline::basic
|
||||
config: {}
|
||||
tool_runtime:
|
||||
- provider_id: rag-runtime
|
||||
provider_type: inline::rag-runtime
|
||||
config: {}
|
||||
metadata_store:
|
||||
type: sqlite
|
||||
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/nvidia}/registry.db
|
||||
|
|
|
@ -74,11 +74,9 @@ providers:
|
|||
scoring:
|
||||
- provider_id: basic
|
||||
provider_type: inline::basic
|
||||
config: {}
|
||||
tool_runtime:
|
||||
- provider_id: rag-runtime
|
||||
provider_type: inline::rag-runtime
|
||||
config: {}
|
||||
metadata_store:
|
||||
type: sqlite
|
||||
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/nvidia}/registry.db
|
||||
|
|
|
@ -3,36 +3,58 @@ distribution_spec:
|
|||
description: Distribution for running open benchmarks
|
||||
providers:
|
||||
inference:
|
||||
- remote::openai
|
||||
- remote::anthropic
|
||||
- remote::gemini
|
||||
- remote::groq
|
||||
- remote::together
|
||||
- provider_id: openai
|
||||
provider_type: remote::openai
|
||||
- provider_id: anthropic
|
||||
provider_type: remote::anthropic
|
||||
- provider_id: gemini
|
||||
provider_type: remote::gemini
|
||||
- provider_id: groq
|
||||
provider_type: remote::groq
|
||||
- provider_id: together
|
||||
provider_type: remote::together
|
||||
vector_io:
|
||||
- inline::sqlite-vec
|
||||
- remote::chromadb
|
||||
- remote::pgvector
|
||||
- provider_id: sqlite-vec
|
||||
provider_type: inline::sqlite-vec
|
||||
- provider_id: chromadb
|
||||
provider_type: remote::chromadb
|
||||
- provider_id: pgvector
|
||||
provider_type: remote::pgvector
|
||||
safety:
|
||||
- inline::llama-guard
|
||||
- provider_id: llama-guard
|
||||
provider_type: inline::llama-guard
|
||||
agents:
|
||||
- inline::meta-reference
|
||||
- provider_id: meta-reference
|
||||
provider_type: inline::meta-reference
|
||||
telemetry:
|
||||
- inline::meta-reference
|
||||
- provider_id: meta-reference
|
||||
provider_type: inline::meta-reference
|
||||
eval:
|
||||
- inline::meta-reference
|
||||
- provider_id: meta-reference
|
||||
provider_type: inline::meta-reference
|
||||
datasetio:
|
||||
- remote::huggingface
|
||||
- inline::localfs
|
||||
- provider_id: huggingface
|
||||
provider_type: remote::huggingface
|
||||
- provider_id: localfs
|
||||
provider_type: inline::localfs
|
||||
scoring:
|
||||
- inline::basic
|
||||
- inline::llm-as-judge
|
||||
- inline::braintrust
|
||||
- provider_id: basic
|
||||
provider_type: inline::basic
|
||||
- provider_id: llm-as-judge
|
||||
provider_type: inline::llm-as-judge
|
||||
- provider_id: braintrust
|
||||
provider_type: inline::braintrust
|
||||
tool_runtime:
|
||||
- remote::brave-search
|
||||
- remote::tavily-search
|
||||
- inline::rag-runtime
|
||||
- remote::model-context-protocol
|
||||
- provider_id: brave-search
|
||||
provider_type: remote::brave-search
|
||||
- provider_id: tavily-search
|
||||
provider_type: remote::tavily-search
|
||||
- provider_id: rag-runtime
|
||||
provider_type: inline::rag-runtime
|
||||
- provider_id: model-context-protocol
|
||||
provider_type: remote::model-context-protocol
|
||||
image_type: conda
|
||||
image_name: open-benchmark
|
||||
additional_pip_packages:
|
||||
- aiosqlite
|
||||
- sqlalchemy[asyncio]
|
||||
|
|
|
@ -96,19 +96,33 @@ def get_inference_providers() -> tuple[list[Provider], dict[str, list[ProviderMo
|
|||
def get_distribution_template() -> DistributionTemplate:
|
||||
inference_providers, available_models = get_inference_providers()
|
||||
providers = {
|
||||
"inference": [p.provider_type for p in inference_providers],
|
||||
"vector_io": ["inline::sqlite-vec", "remote::chromadb", "remote::pgvector"],
|
||||
"safety": ["inline::llama-guard"],
|
||||
"agents": ["inline::meta-reference"],
|
||||
"telemetry": ["inline::meta-reference"],
|
||||
"eval": ["inline::meta-reference"],
|
||||
"datasetio": ["remote::huggingface", "inline::localfs"],
|
||||
"scoring": ["inline::basic", "inline::llm-as-judge", "inline::braintrust"],
|
||||
"inference": inference_providers,
|
||||
"vector_io": [
|
||||
Provider(provider_id="sqlite-vec", provider_type="inline::sqlite-vec"),
|
||||
Provider(provider_id="chromadb", provider_type="remote::chromadb"),
|
||||
Provider(provider_id="pgvector", provider_type="remote::pgvector"),
|
||||
],
|
||||
"safety": [Provider(provider_id="llama-guard", provider_type="inline::llama-guard")],
|
||||
"agents": [Provider(provider_id="meta-reference", provider_type="inline::meta-reference")],
|
||||
"telemetry": [Provider(provider_id="meta-reference", provider_type="inline::meta-reference")],
|
||||
"eval": [Provider(provider_id="meta-reference", provider_type="inline::meta-reference")],
|
||||
"datasetio": [
|
||||
Provider(provider_id="huggingface", provider_type="remote::huggingface"),
|
||||
Provider(provider_id="localfs", provider_type="inline::localfs"),
|
||||
],
|
||||
"scoring": [
|
||||
Provider(provider_id="basic", provider_type="inline::basic"),
|
||||
Provider(provider_id="llm-as-judge", provider_type="inline::llm-as-judge"),
|
||||
Provider(provider_id="braintrust", provider_type="inline::braintrust"),
|
||||
],
|
||||
"tool_runtime": [
|
||||
"remote::brave-search",
|
||||
"remote::tavily-search",
|
||||
"inline::rag-runtime",
|
||||
"remote::model-context-protocol",
|
||||
Provider(provider_id="brave-search", provider_type="remote::brave-search"),
|
||||
Provider(provider_id="tavily-search", provider_type="remote::tavily-search"),
|
||||
Provider(provider_id="rag-runtime", provider_type="inline::rag-runtime"),
|
||||
Provider(
|
||||
provider_id="model-context-protocol",
|
||||
provider_type="remote::model-context-protocol",
|
||||
),
|
||||
],
|
||||
}
|
||||
name = "open-benchmark"
|
||||
|
|
|
@ -106,10 +106,8 @@ providers:
|
|||
scoring:
|
||||
- provider_id: basic
|
||||
provider_type: inline::basic
|
||||
config: {}
|
||||
- provider_id: llm-as-judge
|
||||
provider_type: inline::llm-as-judge
|
||||
config: {}
|
||||
- provider_id: braintrust
|
||||
provider_type: inline::braintrust
|
||||
config:
|
||||
|
@ -127,10 +125,8 @@ providers:
|
|||
max_results: 3
|
||||
- provider_id: rag-runtime
|
||||
provider_type: inline::rag-runtime
|
||||
config: {}
|
||||
- provider_id: model-context-protocol
|
||||
provider_type: remote::model-context-protocol
|
||||
config: {}
|
||||
metadata_store:
|
||||
type: sqlite
|
||||
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/open-benchmark}/registry.db
|
||||
|
|
|
@ -3,22 +3,33 @@ distribution_spec:
|
|||
description: Quick start template for running Llama Stack with several popular providers
|
||||
providers:
|
||||
inference:
|
||||
- remote::vllm
|
||||
- inline::sentence-transformers
|
||||
- provider_id: vllm-inference
|
||||
provider_type: remote::vllm
|
||||
- provider_id: sentence-transformers
|
||||
provider_type: inline::sentence-transformers
|
||||
vector_io:
|
||||
- remote::chromadb
|
||||
- provider_id: chromadb
|
||||
provider_type: remote::chromadb
|
||||
safety:
|
||||
- inline::llama-guard
|
||||
- provider_id: llama-guard
|
||||
provider_type: inline::llama-guard
|
||||
agents:
|
||||
- inline::meta-reference
|
||||
- provider_id: meta-reference
|
||||
provider_type: inline::meta-reference
|
||||
telemetry:
|
||||
- inline::meta-reference
|
||||
- provider_id: meta-reference
|
||||
provider_type: inline::meta-reference
|
||||
tool_runtime:
|
||||
- remote::brave-search
|
||||
- remote::tavily-search
|
||||
- inline::rag-runtime
|
||||
- remote::model-context-protocol
|
||||
- provider_id: brave-search
|
||||
provider_type: remote::brave-search
|
||||
- provider_id: tavily-search
|
||||
provider_type: remote::tavily-search
|
||||
- provider_id: rag-runtime
|
||||
provider_type: inline::rag-runtime
|
||||
- provider_id: model-context-protocol
|
||||
provider_type: remote::model-context-protocol
|
||||
image_type: conda
|
||||
image_name: postgres-demo
|
||||
additional_pip_packages:
|
||||
- asyncpg
|
||||
- psycopg2-binary
|
||||
|
|
|
@ -34,16 +34,24 @@ def get_distribution_template() -> DistributionTemplate:
|
|||
),
|
||||
]
|
||||
providers = {
|
||||
"inference": ([p.provider_type for p in inference_providers] + ["inline::sentence-transformers"]),
|
||||
"vector_io": ["remote::chromadb"],
|
||||
"safety": ["inline::llama-guard"],
|
||||
"agents": ["inline::meta-reference"],
|
||||
"telemetry": ["inline::meta-reference"],
|
||||
"inference": inference_providers
|
||||
+ [
|
||||
Provider(provider_id="sentence-transformers", provider_type="inline::sentence-transformers"),
|
||||
],
|
||||
"vector_io": [
|
||||
Provider(provider_id="chromadb", provider_type="remote::chromadb"),
|
||||
],
|
||||
"safety": [Provider(provider_id="llama-guard", provider_type="inline::llama-guard")],
|
||||
"agents": [Provider(provider_id="meta-reference", provider_type="inline::meta-reference")],
|
||||
"telemetry": [Provider(provider_id="meta-reference", provider_type="inline::meta-reference")],
|
||||
"tool_runtime": [
|
||||
"remote::brave-search",
|
||||
"remote::tavily-search",
|
||||
"inline::rag-runtime",
|
||||
"remote::model-context-protocol",
|
||||
Provider(provider_id="brave-search", provider_type="remote::brave-search"),
|
||||
Provider(provider_id="tavily-search", provider_type="remote::tavily-search"),
|
||||
Provider(provider_id="rag-runtime", provider_type="inline::rag-runtime"),
|
||||
Provider(
|
||||
provider_id="model-context-protocol",
|
||||
provider_type="remote::model-context-protocol",
|
||||
),
|
||||
],
|
||||
}
|
||||
name = "postgres-demo"
|
||||
|
|
|
@ -18,7 +18,6 @@ providers:
|
|||
tls_verify: ${env.VLLM_TLS_VERIFY:=true}
|
||||
- provider_id: sentence-transformers
|
||||
provider_type: inline::sentence-transformers
|
||||
config: {}
|
||||
vector_io:
|
||||
- provider_id: ${env.ENABLE_CHROMADB:+chromadb}
|
||||
provider_type: remote::chromadb
|
||||
|
@ -70,10 +69,8 @@ providers:
|
|||
max_results: 3
|
||||
- provider_id: rag-runtime
|
||||
provider_type: inline::rag-runtime
|
||||
config: {}
|
||||
- provider_id: model-context-protocol
|
||||
provider_type: remote::model-context-protocol
|
||||
config: {}
|
||||
metadata_store:
|
||||
type: postgres
|
||||
host: ${env.POSTGRES_HOST:=localhost}
|
||||
|
|
|
@ -3,57 +3,98 @@ distribution_spec:
|
|||
description: Quick start template for running Llama Stack with several popular providers
|
||||
providers:
|
||||
inference:
|
||||
- remote::cerebras
|
||||
- remote::ollama
|
||||
- remote::vllm
|
||||
- remote::tgi
|
||||
- remote::hf::serverless
|
||||
- remote::hf::endpoint
|
||||
- remote::fireworks
|
||||
- remote::together
|
||||
- remote::bedrock
|
||||
- remote::databricks
|
||||
- remote::nvidia
|
||||
- remote::runpod
|
||||
- remote::openai
|
||||
- remote::anthropic
|
||||
- remote::gemini
|
||||
- remote::groq
|
||||
- remote::llama-openai-compat
|
||||
- remote::sambanova
|
||||
- remote::passthrough
|
||||
- inline::sentence-transformers
|
||||
- provider_id: ${env.ENABLE_CEREBRAS:=__disabled__}
|
||||
provider_type: remote::cerebras
|
||||
- provider_id: ${env.ENABLE_OLLAMA:=__disabled__}
|
||||
provider_type: remote::ollama
|
||||
- provider_id: ${env.ENABLE_VLLM:=__disabled__}
|
||||
provider_type: remote::vllm
|
||||
- provider_id: ${env.ENABLE_TGI:=__disabled__}
|
||||
provider_type: remote::tgi
|
||||
- provider_id: ${env.ENABLE_HF_SERVERLESS:=__disabled__}
|
||||
provider_type: remote::hf::serverless
|
||||
- provider_id: ${env.ENABLE_HF_ENDPOINT:=__disabled__}
|
||||
provider_type: remote::hf::endpoint
|
||||
- provider_id: ${env.ENABLE_FIREWORKS:=__disabled__}
|
||||
provider_type: remote::fireworks
|
||||
- provider_id: ${env.ENABLE_TOGETHER:=__disabled__}
|
||||
provider_type: remote::together
|
||||
- provider_id: ${env.ENABLE_BEDROCK:=__disabled__}
|
||||
provider_type: remote::bedrock
|
||||
- provider_id: ${env.ENABLE_DATABRICKS:=__disabled__}
|
||||
provider_type: remote::databricks
|
||||
- provider_id: ${env.ENABLE_NVIDIA:=__disabled__}
|
||||
provider_type: remote::nvidia
|
||||
- provider_id: ${env.ENABLE_RUNPOD:=__disabled__}
|
||||
provider_type: remote::runpod
|
||||
- provider_id: ${env.ENABLE_OPENAI:=__disabled__}
|
||||
provider_type: remote::openai
|
||||
- provider_id: ${env.ENABLE_ANTHROPIC:=__disabled__}
|
||||
provider_type: remote::anthropic
|
||||
- provider_id: ${env.ENABLE_GEMINI:=__disabled__}
|
||||
provider_type: remote::gemini
|
||||
- provider_id: ${env.ENABLE_GROQ:=__disabled__}
|
||||
provider_type: remote::groq
|
||||
- provider_id: ${env.ENABLE_LLAMA_OPENAI_COMPAT:=__disabled__}
|
||||
provider_type: remote::llama-openai-compat
|
||||
- provider_id: ${env.ENABLE_SAMBANOVA:=__disabled__}
|
||||
provider_type: remote::sambanova
|
||||
- provider_id: ${env.ENABLE_PASSTHROUGH:=__disabled__}
|
||||
provider_type: remote::passthrough
|
||||
- provider_id: sentence-transformers
|
||||
provider_type: inline::sentence-transformers
|
||||
vector_io:
|
||||
- inline::faiss
|
||||
- inline::sqlite-vec
|
||||
- inline::milvus
|
||||
- remote::chromadb
|
||||
- remote::pgvector
|
||||
- provider_id: ${env.ENABLE_FAISS:=faiss}
|
||||
provider_type: inline::faiss
|
||||
- provider_id: ${env.ENABLE_SQLITE_VEC:=__disabled__}
|
||||
provider_type: inline::sqlite-vec
|
||||
- provider_id: ${env.ENABLE_MILVUS:=__disabled__}
|
||||
provider_type: inline::milvus
|
||||
- provider_id: ${env.ENABLE_CHROMADB:=__disabled__}
|
||||
provider_type: remote::chromadb
|
||||
- provider_id: ${env.ENABLE_PGVECTOR:=__disabled__}
|
||||
provider_type: remote::pgvector
|
||||
files:
|
||||
- inline::localfs
|
||||
- provider_id: localfs
|
||||
provider_type: inline::localfs
|
||||
safety:
|
||||
- inline::llama-guard
|
||||
- provider_id: llama-guard
|
||||
provider_type: inline::llama-guard
|
||||
agents:
|
||||
- inline::meta-reference
|
||||
- provider_id: meta-reference
|
||||
provider_type: inline::meta-reference
|
||||
telemetry:
|
||||
- inline::meta-reference
|
||||
- provider_id: meta-reference
|
||||
provider_type: inline::meta-reference
|
||||
post_training:
|
||||
- inline::huggingface
|
||||
- provider_id: huggingface
|
||||
provider_type: inline::huggingface
|
||||
eval:
|
||||
- inline::meta-reference
|
||||
- provider_id: meta-reference
|
||||
provider_type: inline::meta-reference
|
||||
datasetio:
|
||||
- remote::huggingface
|
||||
- inline::localfs
|
||||
- provider_id: huggingface
|
||||
provider_type: remote::huggingface
|
||||
- provider_id: localfs
|
||||
provider_type: inline::localfs
|
||||
scoring:
|
||||
- inline::basic
|
||||
- inline::llm-as-judge
|
||||
- inline::braintrust
|
||||
- provider_id: basic
|
||||
provider_type: inline::basic
|
||||
- provider_id: llm-as-judge
|
||||
provider_type: inline::llm-as-judge
|
||||
- provider_id: braintrust
|
||||
provider_type: inline::braintrust
|
||||
tool_runtime:
|
||||
- remote::brave-search
|
||||
- remote::tavily-search
|
||||
- inline::rag-runtime
|
||||
- remote::model-context-protocol
|
||||
- provider_id: brave-search
|
||||
provider_type: remote::brave-search
|
||||
- provider_id: tavily-search
|
||||
provider_type: remote::tavily-search
|
||||
- provider_id: rag-runtime
|
||||
provider_type: inline::rag-runtime
|
||||
- provider_id: model-context-protocol
|
||||
provider_type: remote::model-context-protocol
|
||||
image_type: conda
|
||||
image_name: starter
|
||||
additional_pip_packages:
|
||||
- aiosqlite
|
||||
- asyncpg
|
||||
|
|
|
@ -56,7 +56,6 @@ providers:
|
|||
api_key: ${env.TOGETHER_API_KEY}
|
||||
- provider_id: ${env.ENABLE_BEDROCK:=__disabled__}
|
||||
provider_type: remote::bedrock
|
||||
config: {}
|
||||
- provider_id: ${env.ENABLE_DATABRICKS:=__disabled__}
|
||||
provider_type: remote::databricks
|
||||
config:
|
||||
|
@ -107,7 +106,6 @@ providers:
|
|||
api_key: ${env.PASSTHROUGH_API_KEY}
|
||||
- provider_id: ${env.ENABLE_SENTENCE_TRANSFORMERS:=sentence-transformers}
|
||||
provider_type: inline::sentence-transformers
|
||||
config: {}
|
||||
vector_io:
|
||||
- provider_id: ${env.ENABLE_FAISS:=faiss}
|
||||
provider_type: inline::faiss
|
||||
|
@ -208,10 +206,8 @@ providers:
|
|||
scoring:
|
||||
- provider_id: basic
|
||||
provider_type: inline::basic
|
||||
config: {}
|
||||
- provider_id: llm-as-judge
|
||||
provider_type: inline::llm-as-judge
|
||||
config: {}
|
||||
- provider_id: braintrust
|
||||
provider_type: inline::braintrust
|
||||
config:
|
||||
|
@ -229,10 +225,8 @@ providers:
|
|||
max_results: 3
|
||||
- provider_id: rag-runtime
|
||||
provider_type: inline::rag-runtime
|
||||
config: {}
|
||||
- provider_id: model-context-protocol
|
||||
provider_type: remote::model-context-protocol
|
||||
config: {}
|
||||
metadata_store:
|
||||
type: sqlite
|
||||
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/starter}/registry.db
|
||||
|
|
|
@ -253,21 +253,91 @@ def get_distribution_template() -> DistributionTemplate:
|
|||
]
|
||||
|
||||
providers = {
|
||||
"inference": ([p.provider_type for p in remote_inference_providers] + ["inline::sentence-transformers"]),
|
||||
"vector_io": ([p.provider_type for p in vector_io_providers]),
|
||||
"files": ["inline::localfs"],
|
||||
"safety": ["inline::llama-guard"],
|
||||
"agents": ["inline::meta-reference"],
|
||||
"telemetry": ["inline::meta-reference"],
|
||||
"post_training": ["inline::huggingface"],
|
||||
"eval": ["inline::meta-reference"],
|
||||
"datasetio": ["remote::huggingface", "inline::localfs"],
|
||||
"scoring": ["inline::basic", "inline::llm-as-judge", "inline::braintrust"],
|
||||
"inference": remote_inference_providers
|
||||
+ [
|
||||
Provider(
|
||||
provider_id="sentence-transformers",
|
||||
provider_type="inline::sentence-transformers",
|
||||
)
|
||||
],
|
||||
"vector_io": vector_io_providers,
|
||||
"files": [
|
||||
Provider(
|
||||
provider_id="localfs",
|
||||
provider_type="inline::localfs",
|
||||
)
|
||||
],
|
||||
"safety": [
|
||||
Provider(
|
||||
provider_id="llama-guard",
|
||||
provider_type="inline::llama-guard",
|
||||
)
|
||||
],
|
||||
"agents": [
|
||||
Provider(
|
||||
provider_id="meta-reference",
|
||||
provider_type="inline::meta-reference",
|
||||
)
|
||||
],
|
||||
"telemetry": [
|
||||
Provider(
|
||||
provider_id="meta-reference",
|
||||
provider_type="inline::meta-reference",
|
||||
)
|
||||
],
|
||||
"post_training": [
|
||||
Provider(
|
||||
provider_id="huggingface",
|
||||
provider_type="inline::huggingface",
|
||||
)
|
||||
],
|
||||
"eval": [
|
||||
Provider(
|
||||
provider_id="meta-reference",
|
||||
provider_type="inline::meta-reference",
|
||||
)
|
||||
],
|
||||
"datasetio": [
|
||||
Provider(
|
||||
provider_id="huggingface",
|
||||
provider_type="remote::huggingface",
|
||||
),
|
||||
Provider(
|
||||
provider_id="localfs",
|
||||
provider_type="inline::localfs",
|
||||
),
|
||||
],
|
||||
"scoring": [
|
||||
Provider(
|
||||
provider_id="basic",
|
||||
provider_type="inline::basic",
|
||||
),
|
||||
Provider(
|
||||
provider_id="llm-as-judge",
|
||||
provider_type="inline::llm-as-judge",
|
||||
),
|
||||
Provider(
|
||||
provider_id="braintrust",
|
||||
provider_type="inline::braintrust",
|
||||
),
|
||||
],
|
||||
"tool_runtime": [
|
||||
"remote::brave-search",
|
||||
"remote::tavily-search",
|
||||
"inline::rag-runtime",
|
||||
"remote::model-context-protocol",
|
||||
Provider(
|
||||
provider_id="brave-search",
|
||||
provider_type="remote::brave-search",
|
||||
),
|
||||
Provider(
|
||||
provider_id="tavily-search",
|
||||
provider_type="remote::tavily-search",
|
||||
),
|
||||
Provider(
|
||||
provider_id="rag-runtime",
|
||||
provider_type="inline::rag-runtime",
|
||||
),
|
||||
Provider(
|
||||
provider_id="model-context-protocol",
|
||||
provider_type="remote::model-context-protocol",
|
||||
),
|
||||
],
|
||||
}
|
||||
files_provider = Provider(
|
||||
|
|
|
@ -5,7 +5,7 @@
|
|||
# the root directory of this source tree.
|
||||
|
||||
from pathlib import Path
|
||||
from typing import Literal
|
||||
from typing import Any, Literal
|
||||
|
||||
import jinja2
|
||||
import rich
|
||||
|
@ -35,6 +35,51 @@ from llama_stack.providers.utils.sqlstore.sqlstore import SqliteSqlStoreConfig
|
|||
from llama_stack.providers.utils.sqlstore.sqlstore import get_pip_packages as get_sql_pip_packages
|
||||
|
||||
|
||||
def filter_empty_values(obj: Any) -> Any:
|
||||
"""Recursively filter out specific empty values from a dictionary or list.
|
||||
|
||||
This function removes:
|
||||
- Empty strings ('') only when they are the 'module' field
|
||||
- Empty dictionaries ({}) only when they are the 'config' field
|
||||
- None values (always excluded)
|
||||
"""
|
||||
if obj is None:
|
||||
return None
|
||||
|
||||
if isinstance(obj, dict):
|
||||
filtered = {}
|
||||
for key, value in obj.items():
|
||||
# Special handling for specific fields
|
||||
if key == "module" and isinstance(value, str) and value == "":
|
||||
# Skip empty module strings
|
||||
continue
|
||||
elif key == "config" and isinstance(value, dict) and not value:
|
||||
# Skip empty config dictionaries
|
||||
continue
|
||||
elif key == "container_image" and not value:
|
||||
# Skip empty container_image names
|
||||
continue
|
||||
else:
|
||||
# For all other fields, recursively filter but preserve empty values
|
||||
filtered_value = filter_empty_values(value)
|
||||
# if filtered_value is not None:
|
||||
filtered[key] = filtered_value
|
||||
return filtered
|
||||
|
||||
elif isinstance(obj, list):
|
||||
filtered = []
|
||||
for item in obj:
|
||||
filtered_item = filter_empty_values(item)
|
||||
if filtered_item is not None:
|
||||
filtered.append(filtered_item)
|
||||
return filtered
|
||||
|
||||
else:
|
||||
# For all other types (including empty strings and dicts that aren't module/config),
|
||||
# preserve them as-is
|
||||
return obj
|
||||
|
||||
|
||||
def get_model_registry(
|
||||
available_models: dict[str, list[ProviderModelEntry]],
|
||||
) -> tuple[list[ModelInput], bool]:
|
||||
|
@ -138,31 +183,26 @@ class RunConfigSettings(BaseModel):
|
|||
def run_config(
|
||||
self,
|
||||
name: str,
|
||||
providers: dict[str, list[str]],
|
||||
providers: dict[str, list[Provider]],
|
||||
container_image: str | None = None,
|
||||
) -> dict:
|
||||
provider_registry = get_provider_registry()
|
||||
|
||||
provider_configs = {}
|
||||
for api_str, provider_types in providers.items():
|
||||
for api_str, provider_objs in providers.items():
|
||||
if api_providers := self.provider_overrides.get(api_str):
|
||||
# Convert Provider objects to dicts for YAML serialization
|
||||
provider_configs[api_str] = [
|
||||
p.model_dump(exclude_none=True) if isinstance(p, Provider) else p for p in api_providers
|
||||
]
|
||||
provider_configs[api_str] = [p.model_dump(exclude_none=True) for p in api_providers]
|
||||
continue
|
||||
|
||||
provider_configs[api_str] = []
|
||||
for provider_type in provider_types:
|
||||
provider_id = provider_type.split("::")[-1]
|
||||
|
||||
for provider in provider_objs:
|
||||
api = Api(api_str)
|
||||
if provider_type not in provider_registry[api]:
|
||||
raise ValueError(f"Unknown provider type: {provider_type} for API: {api_str}")
|
||||
if provider.provider_type not in provider_registry[api]:
|
||||
raise ValueError(f"Unknown provider type: {provider.provider_type} for API: {api_str}")
|
||||
|
||||
config_class = provider_registry[api][provider_type].config_class
|
||||
config_class = provider_registry[api][provider.provider_type].config_class
|
||||
assert config_class is not None, (
|
||||
f"No config class for provider type: {provider_type} for API: {api_str}"
|
||||
f"No config class for provider type: {provider.provider_type} for API: {api_str}"
|
||||
)
|
||||
|
||||
config_class = instantiate_class_type(config_class)
|
||||
|
@ -171,14 +211,9 @@ class RunConfigSettings(BaseModel):
|
|||
else:
|
||||
config = {}
|
||||
|
||||
provider_configs[api_str].append(
|
||||
Provider(
|
||||
provider_id=provider_id,
|
||||
provider_type=provider_type,
|
||||
config=config,
|
||||
).model_dump(exclude_none=True)
|
||||
)
|
||||
|
||||
provider.config = config
|
||||
# Convert Provider object to dict for YAML serialization
|
||||
provider_configs[api_str].append(provider.model_dump(exclude_none=True))
|
||||
# Get unique set of APIs from providers
|
||||
apis = sorted(providers.keys())
|
||||
|
||||
|
@ -222,7 +257,7 @@ class DistributionTemplate(BaseModel):
|
|||
description: str
|
||||
distro_type: Literal["self_hosted", "remote_hosted", "ondevice"]
|
||||
|
||||
providers: dict[str, list[str]]
|
||||
providers: dict[str, list[Provider]]
|
||||
run_configs: dict[str, RunConfigSettings]
|
||||
template_path: Path | None = None
|
||||
|
||||
|
@ -255,13 +290,28 @@ class DistributionTemplate(BaseModel):
|
|||
if self.additional_pip_packages:
|
||||
additional_pip_packages.extend(self.additional_pip_packages)
|
||||
|
||||
# Create minimal providers for build config (without runtime configs)
|
||||
build_providers = {}
|
||||
for api, providers in self.providers.items():
|
||||
build_providers[api] = []
|
||||
for provider in providers:
|
||||
# Create a minimal provider object with only essential build information
|
||||
build_provider = Provider(
|
||||
provider_id=provider.provider_id,
|
||||
provider_type=provider.provider_type,
|
||||
config={}, # Empty config for build
|
||||
module=provider.module,
|
||||
)
|
||||
build_providers[api].append(build_provider)
|
||||
|
||||
return BuildConfig(
|
||||
distribution_spec=DistributionSpec(
|
||||
description=self.description,
|
||||
container_image=self.container_image,
|
||||
providers=self.providers,
|
||||
providers=build_providers,
|
||||
),
|
||||
image_type="conda", # default to conda, can be overridden
|
||||
image_type="conda",
|
||||
image_name=self.name,
|
||||
additional_pip_packages=sorted(set(additional_pip_packages)),
|
||||
)
|
||||
|
||||
|
@ -270,7 +320,7 @@ class DistributionTemplate(BaseModel):
|
|||
providers_table += "|-----|-------------|\n"
|
||||
|
||||
for api, providers in sorted(self.providers.items()):
|
||||
providers_str = ", ".join(f"`{p}`" for p in providers)
|
||||
providers_str = ", ".join(f"`{p.provider_type}`" for p in providers)
|
||||
providers_table += f"| {api} | {providers_str} |\n"
|
||||
|
||||
template = self.template_path.read_text()
|
||||
|
@ -334,7 +384,7 @@ class DistributionTemplate(BaseModel):
|
|||
build_config = self.build_config()
|
||||
with open(yaml_output_dir / "build.yaml", "w") as f:
|
||||
yaml.safe_dump(
|
||||
build_config.model_dump(exclude_none=True),
|
||||
filter_empty_values(build_config.model_dump(exclude_none=True)),
|
||||
f,
|
||||
sort_keys=False,
|
||||
)
|
||||
|
@ -343,7 +393,7 @@ class DistributionTemplate(BaseModel):
|
|||
run_config = settings.run_config(self.name, self.providers, self.container_image)
|
||||
with open(yaml_output_dir / yaml_pth, "w") as f:
|
||||
yaml.safe_dump(
|
||||
{k: v for k, v in run_config.items() if v is not None},
|
||||
filter_empty_values(run_config),
|
||||
f,
|
||||
sort_keys=False,
|
||||
)
|
||||
|
|
|
@ -3,31 +3,49 @@ distribution_spec:
|
|||
description: Use watsonx for running LLM inference
|
||||
providers:
|
||||
inference:
|
||||
- remote::watsonx
|
||||
- inline::sentence-transformers
|
||||
- provider_id: watsonx
|
||||
provider_type: remote::watsonx
|
||||
- provider_id: sentence-transformers
|
||||
provider_type: inline::sentence-transformers
|
||||
vector_io:
|
||||
- inline::faiss
|
||||
- provider_id: faiss
|
||||
provider_type: inline::faiss
|
||||
safety:
|
||||
- inline::llama-guard
|
||||
- provider_id: llama-guard
|
||||
provider_type: inline::llama-guard
|
||||
agents:
|
||||
- inline::meta-reference
|
||||
- provider_id: meta-reference
|
||||
provider_type: inline::meta-reference
|
||||
telemetry:
|
||||
- inline::meta-reference
|
||||
- provider_id: meta-reference
|
||||
provider_type: inline::meta-reference
|
||||
eval:
|
||||
- inline::meta-reference
|
||||
- provider_id: meta-reference
|
||||
provider_type: inline::meta-reference
|
||||
datasetio:
|
||||
- remote::huggingface
|
||||
- inline::localfs
|
||||
- provider_id: huggingface
|
||||
provider_type: remote::huggingface
|
||||
- provider_id: localfs
|
||||
provider_type: inline::localfs
|
||||
scoring:
|
||||
- inline::basic
|
||||
- inline::llm-as-judge
|
||||
- inline::braintrust
|
||||
- provider_id: basic
|
||||
provider_type: inline::basic
|
||||
- provider_id: llm-as-judge
|
||||
provider_type: inline::llm-as-judge
|
||||
- provider_id: braintrust
|
||||
provider_type: inline::braintrust
|
||||
tool_runtime:
|
||||
- remote::brave-search
|
||||
- remote::tavily-search
|
||||
- inline::rag-runtime
|
||||
- remote::model-context-protocol
|
||||
- provider_id: brave-search
|
||||
provider_type: remote::brave-search
|
||||
- provider_id: tavily-search
|
||||
provider_type: remote::tavily-search
|
||||
- provider_id: rag-runtime
|
||||
provider_type: inline::rag-runtime
|
||||
- provider_id: model-context-protocol
|
||||
provider_type: remote::model-context-protocol
|
||||
image_type: conda
|
||||
image_name: watsonx
|
||||
additional_pip_packages:
|
||||
- aiosqlite
|
||||
- sqlalchemy[asyncio]
|
||||
- aiosqlite
|
||||
- aiosqlite
|
||||
|
|
|
@ -20,7 +20,6 @@ providers:
|
|||
project_id: ${env.WATSONX_PROJECT_ID:=}
|
||||
- provider_id: sentence-transformers
|
||||
provider_type: inline::sentence-transformers
|
||||
config: {}
|
||||
vector_io:
|
||||
- provider_id: faiss
|
||||
provider_type: inline::faiss
|
||||
|
@ -74,10 +73,8 @@ providers:
|
|||
scoring:
|
||||
- provider_id: basic
|
||||
provider_type: inline::basic
|
||||
config: {}
|
||||
- provider_id: llm-as-judge
|
||||
provider_type: inline::llm-as-judge
|
||||
config: {}
|
||||
- provider_id: braintrust
|
||||
provider_type: inline::braintrust
|
||||
config:
|
||||
|
@ -95,10 +92,8 @@ providers:
|
|||
max_results: 3
|
||||
- provider_id: rag-runtime
|
||||
provider_type: inline::rag-runtime
|
||||
config: {}
|
||||
- provider_id: model-context-protocol
|
||||
provider_type: remote::model-context-protocol
|
||||
config: {}
|
||||
metadata_store:
|
||||
type: sqlite
|
||||
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/watsonx}/registry.db
|
||||
|
|
|
@ -18,19 +18,87 @@ from llama_stack.templates.template import DistributionTemplate, RunConfigSettin
|
|||
|
||||
def get_distribution_template() -> DistributionTemplate:
|
||||
providers = {
|
||||
"inference": ["remote::watsonx", "inline::sentence-transformers"],
|
||||
"vector_io": ["inline::faiss"],
|
||||
"safety": ["inline::llama-guard"],
|
||||
"agents": ["inline::meta-reference"],
|
||||
"telemetry": ["inline::meta-reference"],
|
||||
"eval": ["inline::meta-reference"],
|
||||
"datasetio": ["remote::huggingface", "inline::localfs"],
|
||||
"scoring": ["inline::basic", "inline::llm-as-judge", "inline::braintrust"],
|
||||
"inference": [
|
||||
Provider(
|
||||
provider_id="watsonx",
|
||||
provider_type="remote::watsonx",
|
||||
),
|
||||
Provider(
|
||||
provider_id="sentence-transformers",
|
||||
provider_type="inline::sentence-transformers",
|
||||
),
|
||||
],
|
||||
"vector_io": [
|
||||
Provider(
|
||||
provider_id="faiss",
|
||||
provider_type="inline::faiss",
|
||||
)
|
||||
],
|
||||
"safety": [
|
||||
Provider(
|
||||
provider_id="llama-guard",
|
||||
provider_type="inline::llama-guard",
|
||||
)
|
||||
],
|
||||
"agents": [
|
||||
Provider(
|
||||
provider_id="meta-reference",
|
||||
provider_type="inline::meta-reference",
|
||||
)
|
||||
],
|
||||
"telemetry": [
|
||||
Provider(
|
||||
provider_id="meta-reference",
|
||||
provider_type="inline::meta-reference",
|
||||
)
|
||||
],
|
||||
"eval": [
|
||||
Provider(
|
||||
provider_id="meta-reference",
|
||||
provider_type="inline::meta-reference",
|
||||
)
|
||||
],
|
||||
"datasetio": [
|
||||
Provider(
|
||||
provider_id="huggingface",
|
||||
provider_type="remote::huggingface",
|
||||
),
|
||||
Provider(
|
||||
provider_id="localfs",
|
||||
provider_type="inline::localfs",
|
||||
),
|
||||
],
|
||||
"scoring": [
|
||||
Provider(
|
||||
provider_id="basic",
|
||||
provider_type="inline::basic",
|
||||
),
|
||||
Provider(
|
||||
provider_id="llm-as-judge",
|
||||
provider_type="inline::llm-as-judge",
|
||||
),
|
||||
Provider(
|
||||
provider_id="braintrust",
|
||||
provider_type="inline::braintrust",
|
||||
),
|
||||
],
|
||||
"tool_runtime": [
|
||||
"remote::brave-search",
|
||||
"remote::tavily-search",
|
||||
"inline::rag-runtime",
|
||||
"remote::model-context-protocol",
|
||||
Provider(
|
||||
provider_id="brave-search",
|
||||
provider_type="remote::brave-search",
|
||||
),
|
||||
Provider(
|
||||
provider_id="tavily-search",
|
||||
provider_type="remote::tavily-search",
|
||||
),
|
||||
Provider(
|
||||
provider_id="rag-runtime",
|
||||
provider_type="inline::rag-runtime",
|
||||
),
|
||||
Provider(
|
||||
provider_id="model-context-protocol",
|
||||
provider_type="remote::model-context-protocol",
|
||||
),
|
||||
],
|
||||
}
|
||||
|
||||
|
|
3
tests/external/build.yaml
vendored
3
tests/external/build.yaml
vendored
|
@ -3,7 +3,8 @@ distribution_spec:
|
|||
description: Custom distro for CI tests
|
||||
providers:
|
||||
weather:
|
||||
- remote::kaze
|
||||
- provider_id: kaze
|
||||
provider_type: remote::kaze
|
||||
image_type: venv
|
||||
image_name: ci-test
|
||||
external_providers_dir: ~/.llama/providers.d
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue