mirror of
https://github.com/meta-llama/llama-stack.git
synced 2025-07-27 06:28:50 +00:00
fix: separate build and run provider types (#2917)
Some checks failed
Coverage Badge / unit-tests (push) Failing after 1s
Integration Auth Tests / test-matrix (oauth2_token) (push) Failing after 1s
Integration Tests / discover-tests (push) Successful in 3s
SqlStore Integration Tests / test-postgres (3.13) (push) Failing after 4s
Vector IO Integration Tests / test-matrix (3.12, inline::faiss) (push) Failing after 4s
Vector IO Integration Tests / test-matrix (3.12, inline::milvus) (push) Failing after 5s
Vector IO Integration Tests / test-matrix (3.12, inline::sqlite-vec) (push) Failing after 5s
Vector IO Integration Tests / test-matrix (3.12, remote::pgvector) (push) Failing after 4s
Vector IO Integration Tests / test-matrix (3.13, inline::faiss) (push) Failing after 5s
Test Llama Stack Build / generate-matrix (push) Successful in 4s
Vector IO Integration Tests / test-matrix (3.13, inline::milvus) (push) Failing after 5s
Python Package Build Test / build (3.13) (push) Failing after 2s
Test Llama Stack Build / build-single-provider (push) Failing after 3s
Python Package Build Test / build (3.12) (push) Failing after 2s
Vector IO Integration Tests / test-matrix (3.13, inline::sqlite-vec) (push) Failing after 5s
Vector IO Integration Tests / test-matrix (3.12, remote::chromadb) (push) Failing after 6s
Test Llama Stack Build / build-ubi9-container-distribution (push) Failing after 3s
Vector IO Integration Tests / test-matrix (3.13, remote::chromadb) (push) Failing after 5s
Test Llama Stack Build / build-custom-container-distribution (push) Failing after 4s
Vector IO Integration Tests / test-matrix (3.13, remote::pgvector) (push) Failing after 5s
SqlStore Integration Tests / test-postgres (3.12) (push) Failing after 9s
Test External Providers Installed via Module / test-external-providers-from-module (venv) (push) Failing after 6s
Test External API and Providers / test-external (venv) (push) Failing after 5s
Update ReadTheDocs / update-readthedocs (push) Failing after 4s
Unit Tests / unit-tests (3.13) (push) Failing after 5s
Test Llama Stack Build / build (push) Failing after 3s
Unit Tests / unit-tests (3.12) (push) Failing after 5s
Integration Tests / test-matrix (push) Failing after 7s
Pre-commit / pre-commit (push) Successful in 1m13s
Some checks failed
Coverage Badge / unit-tests (push) Failing after 1s
Integration Auth Tests / test-matrix (oauth2_token) (push) Failing after 1s
Integration Tests / discover-tests (push) Successful in 3s
SqlStore Integration Tests / test-postgres (3.13) (push) Failing after 4s
Vector IO Integration Tests / test-matrix (3.12, inline::faiss) (push) Failing after 4s
Vector IO Integration Tests / test-matrix (3.12, inline::milvus) (push) Failing after 5s
Vector IO Integration Tests / test-matrix (3.12, inline::sqlite-vec) (push) Failing after 5s
Vector IO Integration Tests / test-matrix (3.12, remote::pgvector) (push) Failing after 4s
Vector IO Integration Tests / test-matrix (3.13, inline::faiss) (push) Failing after 5s
Test Llama Stack Build / generate-matrix (push) Successful in 4s
Vector IO Integration Tests / test-matrix (3.13, inline::milvus) (push) Failing after 5s
Python Package Build Test / build (3.13) (push) Failing after 2s
Test Llama Stack Build / build-single-provider (push) Failing after 3s
Python Package Build Test / build (3.12) (push) Failing after 2s
Vector IO Integration Tests / test-matrix (3.13, inline::sqlite-vec) (push) Failing after 5s
Vector IO Integration Tests / test-matrix (3.12, remote::chromadb) (push) Failing after 6s
Test Llama Stack Build / build-ubi9-container-distribution (push) Failing after 3s
Vector IO Integration Tests / test-matrix (3.13, remote::chromadb) (push) Failing after 5s
Test Llama Stack Build / build-custom-container-distribution (push) Failing after 4s
Vector IO Integration Tests / test-matrix (3.13, remote::pgvector) (push) Failing after 5s
SqlStore Integration Tests / test-postgres (3.12) (push) Failing after 9s
Test External Providers Installed via Module / test-external-providers-from-module (venv) (push) Failing after 6s
Test External API and Providers / test-external (venv) (push) Failing after 5s
Update ReadTheDocs / update-readthedocs (push) Failing after 4s
Unit Tests / unit-tests (3.13) (push) Failing after 5s
Test Llama Stack Build / build (push) Failing after 3s
Unit Tests / unit-tests (3.12) (push) Failing after 5s
Integration Tests / test-matrix (push) Failing after 7s
Pre-commit / pre-commit (push) Successful in 1m13s
# What does this PR do? in #2637, I combined the run and build config provider types to both use `Provider` since this includes a provider_id, a user must now specify this when writing a build yaml. This is not very clear because all a user should care about upon build is the code to be installed (the module and the provider_type) introduce `BuildProvider` and fixup the parts of the code impacted by this Signed-off-by: Charlie Doern <cdoern@redhat.com>
This commit is contained in:
parent
025163d8e6
commit
3344d8a9e5
19 changed files with 401 additions and 754 deletions
|
@ -31,6 +31,7 @@ from llama_stack.distribution.build import (
|
||||||
from llama_stack.distribution.configure import parse_and_maybe_upgrade_config
|
from llama_stack.distribution.configure import parse_and_maybe_upgrade_config
|
||||||
from llama_stack.distribution.datatypes import (
|
from llama_stack.distribution.datatypes import (
|
||||||
BuildConfig,
|
BuildConfig,
|
||||||
|
BuildProvider,
|
||||||
DistributionSpec,
|
DistributionSpec,
|
||||||
Provider,
|
Provider,
|
||||||
StackRunConfig,
|
StackRunConfig,
|
||||||
|
@ -94,7 +95,7 @@ def run_stack_build_command(args: argparse.Namespace) -> None:
|
||||||
)
|
)
|
||||||
sys.exit(1)
|
sys.exit(1)
|
||||||
elif args.providers:
|
elif args.providers:
|
||||||
provider_list: dict[str, list[Provider]] = dict()
|
provider_list: dict[str, list[BuildProvider]] = dict()
|
||||||
for api_provider in args.providers.split(","):
|
for api_provider in args.providers.split(","):
|
||||||
if "=" not in api_provider:
|
if "=" not in api_provider:
|
||||||
cprint(
|
cprint(
|
||||||
|
@ -113,10 +114,8 @@ def run_stack_build_command(args: argparse.Namespace) -> None:
|
||||||
)
|
)
|
||||||
sys.exit(1)
|
sys.exit(1)
|
||||||
if provider_type in providers_for_api:
|
if provider_type in providers_for_api:
|
||||||
provider = Provider(
|
provider = BuildProvider(
|
||||||
provider_type=provider_type,
|
provider_type=provider_type,
|
||||||
provider_id=provider_type.split("::")[1],
|
|
||||||
config={},
|
|
||||||
module=None,
|
module=None,
|
||||||
)
|
)
|
||||||
provider_list.setdefault(api, []).append(provider)
|
provider_list.setdefault(api, []).append(provider)
|
||||||
|
@ -189,7 +188,7 @@ def run_stack_build_command(args: argparse.Namespace) -> None:
|
||||||
|
|
||||||
cprint("Tip: use <TAB> to see options for the providers.\n", color="green", file=sys.stderr)
|
cprint("Tip: use <TAB> to see options for the providers.\n", color="green", file=sys.stderr)
|
||||||
|
|
||||||
providers: dict[str, list[Provider]] = dict()
|
providers: dict[str, list[BuildProvider]] = dict()
|
||||||
for api, providers_for_api in get_provider_registry().items():
|
for api, providers_for_api in get_provider_registry().items():
|
||||||
available_providers = [x for x in providers_for_api.keys() if x not in ("remote", "remote::sample")]
|
available_providers = [x for x in providers_for_api.keys() if x not in ("remote", "remote::sample")]
|
||||||
if not available_providers:
|
if not available_providers:
|
||||||
|
@ -204,7 +203,10 @@ def run_stack_build_command(args: argparse.Namespace) -> None:
|
||||||
),
|
),
|
||||||
)
|
)
|
||||||
|
|
||||||
providers[api.value] = api_provider
|
string_providers = api_provider.split(" ")
|
||||||
|
|
||||||
|
for provider in string_providers:
|
||||||
|
providers.setdefault(api.value, []).append(BuildProvider(provider_type=provider))
|
||||||
|
|
||||||
description = prompt(
|
description = prompt(
|
||||||
"\n > (Optional) Enter a short description for your Llama Stack: ",
|
"\n > (Optional) Enter a short description for your Llama Stack: ",
|
||||||
|
@ -307,7 +309,7 @@ def _generate_run_config(
|
||||||
providers = build_config.distribution_spec.providers[api]
|
providers = build_config.distribution_spec.providers[api]
|
||||||
|
|
||||||
for provider in providers:
|
for provider in providers:
|
||||||
pid = provider.provider_id
|
pid = provider.provider_type.split("::")[-1]
|
||||||
|
|
||||||
p = provider_registry[Api(api)][provider.provider_type]
|
p = provider_registry[Api(api)][provider.provider_type]
|
||||||
if p.deprecation_error:
|
if p.deprecation_error:
|
||||||
|
|
|
@ -100,11 +100,12 @@ def configure_api_providers(config: StackRunConfig, build_spec: DistributionSpec
|
||||||
break
|
break
|
||||||
|
|
||||||
logger.info(f"> Configuring provider `({provider.provider_type})`")
|
logger.info(f"> Configuring provider `({provider.provider_type})`")
|
||||||
|
pid = provider.provider_type.split("::")[-1]
|
||||||
updated_providers.append(
|
updated_providers.append(
|
||||||
configure_single_provider(
|
configure_single_provider(
|
||||||
provider_registry[api],
|
provider_registry[api],
|
||||||
Provider(
|
Provider(
|
||||||
provider_id=(f"{provider.provider_id}-{i:02d}" if len(plist) > 1 else provider.provider_id),
|
provider_id=(f"{pid}-{i:02d}" if len(plist) > 1 else pid),
|
||||||
provider_type=provider.provider_type,
|
provider_type=provider.provider_type,
|
||||||
config={},
|
config={},
|
||||||
),
|
),
|
||||||
|
|
|
@ -154,13 +154,27 @@ class Provider(BaseModel):
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class BuildProvider(BaseModel):
|
||||||
|
provider_type: str
|
||||||
|
module: str | None = Field(
|
||||||
|
default=None,
|
||||||
|
description="""
|
||||||
|
Fully-qualified name of the external provider module to import. The module is expected to have:
|
||||||
|
|
||||||
|
- `get_adapter_impl(config, deps)`: returns the adapter implementation
|
||||||
|
|
||||||
|
Example: `module: ramalama_stack`
|
||||||
|
""",
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
class DistributionSpec(BaseModel):
|
class DistributionSpec(BaseModel):
|
||||||
description: str | None = Field(
|
description: str | None = Field(
|
||||||
default="",
|
default="",
|
||||||
description="Description of the distribution",
|
description="Description of the distribution",
|
||||||
)
|
)
|
||||||
container_image: str | None = None
|
container_image: str | None = None
|
||||||
providers: dict[str, list[Provider]] = Field(
|
providers: dict[str, list[BuildProvider]] = Field(
|
||||||
default_factory=dict,
|
default_factory=dict,
|
||||||
description="""
|
description="""
|
||||||
Provider Types for each of the APIs provided by this distribution. If you
|
Provider Types for each of the APIs provided by this distribution. If you
|
||||||
|
|
|
@ -33,7 +33,7 @@ from termcolor import cprint
|
||||||
|
|
||||||
from llama_stack.distribution.build import print_pip_install_help
|
from llama_stack.distribution.build import print_pip_install_help
|
||||||
from llama_stack.distribution.configure import parse_and_maybe_upgrade_config
|
from llama_stack.distribution.configure import parse_and_maybe_upgrade_config
|
||||||
from llama_stack.distribution.datatypes import Api, BuildConfig, DistributionSpec
|
from llama_stack.distribution.datatypes import Api, BuildConfig, BuildProvider, DistributionSpec
|
||||||
from llama_stack.distribution.request_headers import (
|
from llama_stack.distribution.request_headers import (
|
||||||
PROVIDER_DATA_VAR,
|
PROVIDER_DATA_VAR,
|
||||||
request_provider_data_context,
|
request_provider_data_context,
|
||||||
|
@ -249,9 +249,16 @@ class AsyncLlamaStackAsLibraryClient(AsyncLlamaStackClient):
|
||||||
file=sys.stderr,
|
file=sys.stderr,
|
||||||
)
|
)
|
||||||
if self.config_path_or_template_name.endswith(".yaml"):
|
if self.config_path_or_template_name.endswith(".yaml"):
|
||||||
|
providers: dict[str, list[BuildProvider]] = {}
|
||||||
|
for api, run_providers in self.config.providers.items():
|
||||||
|
for provider in run_providers:
|
||||||
|
providers.setdefault(api, []).append(
|
||||||
|
BuildProvider(provider_type=provider.provider_type, module=provider.module)
|
||||||
|
)
|
||||||
|
providers = dict(providers)
|
||||||
build_config = BuildConfig(
|
build_config = BuildConfig(
|
||||||
distribution_spec=DistributionSpec(
|
distribution_spec=DistributionSpec(
|
||||||
providers=self.config.providers,
|
providers=providers,
|
||||||
),
|
),
|
||||||
external_providers_dir=self.config.external_providers_dir,
|
external_providers_dir=self.config.external_providers_dir,
|
||||||
)
|
)
|
||||||
|
|
|
@ -3,96 +3,56 @@ distribution_spec:
|
||||||
description: CI tests for Llama Stack
|
description: CI tests for Llama Stack
|
||||||
providers:
|
providers:
|
||||||
inference:
|
inference:
|
||||||
- provider_id: ${env.ENABLE_CEREBRAS:=__disabled__}
|
- provider_type: remote::cerebras
|
||||||
provider_type: remote::cerebras
|
- provider_type: remote::ollama
|
||||||
- provider_id: ${env.ENABLE_OLLAMA:=__disabled__}
|
- provider_type: remote::vllm
|
||||||
provider_type: remote::ollama
|
- provider_type: remote::tgi
|
||||||
- provider_id: ${env.ENABLE_VLLM:=__disabled__}
|
- provider_type: remote::hf::serverless
|
||||||
provider_type: remote::vllm
|
- provider_type: remote::hf::endpoint
|
||||||
- provider_id: ${env.ENABLE_TGI:=__disabled__}
|
- provider_type: remote::fireworks
|
||||||
provider_type: remote::tgi
|
- provider_type: remote::together
|
||||||
- provider_id: ${env.ENABLE_HF_SERVERLESS:=__disabled__}
|
- provider_type: remote::bedrock
|
||||||
provider_type: remote::hf::serverless
|
- provider_type: remote::databricks
|
||||||
- provider_id: ${env.ENABLE_HF_ENDPOINT:=__disabled__}
|
- provider_type: remote::nvidia
|
||||||
provider_type: remote::hf::endpoint
|
- provider_type: remote::runpod
|
||||||
- provider_id: ${env.ENABLE_FIREWORKS:=__disabled__}
|
- provider_type: remote::openai
|
||||||
provider_type: remote::fireworks
|
- provider_type: remote::anthropic
|
||||||
- provider_id: ${env.ENABLE_TOGETHER:=__disabled__}
|
- provider_type: remote::gemini
|
||||||
provider_type: remote::together
|
- provider_type: remote::groq
|
||||||
- provider_id: ${env.ENABLE_BEDROCK:=__disabled__}
|
- provider_type: remote::llama-openai-compat
|
||||||
provider_type: remote::bedrock
|
- provider_type: remote::sambanova
|
||||||
- provider_id: ${env.ENABLE_DATABRICKS:=__disabled__}
|
- provider_type: remote::passthrough
|
||||||
provider_type: remote::databricks
|
- provider_type: inline::sentence-transformers
|
||||||
- provider_id: ${env.ENABLE_NVIDIA:=__disabled__}
|
|
||||||
provider_type: remote::nvidia
|
|
||||||
- provider_id: ${env.ENABLE_RUNPOD:=__disabled__}
|
|
||||||
provider_type: remote::runpod
|
|
||||||
- provider_id: ${env.ENABLE_OPENAI:=__disabled__}
|
|
||||||
provider_type: remote::openai
|
|
||||||
- provider_id: ${env.ENABLE_ANTHROPIC:=__disabled__}
|
|
||||||
provider_type: remote::anthropic
|
|
||||||
- provider_id: ${env.ENABLE_GEMINI:=__disabled__}
|
|
||||||
provider_type: remote::gemini
|
|
||||||
- provider_id: ${env.ENABLE_GROQ:=__disabled__}
|
|
||||||
provider_type: remote::groq
|
|
||||||
- provider_id: ${env.ENABLE_LLAMA_OPENAI_COMPAT:=__disabled__}
|
|
||||||
provider_type: remote::llama-openai-compat
|
|
||||||
- provider_id: ${env.ENABLE_SAMBANOVA:=__disabled__}
|
|
||||||
provider_type: remote::sambanova
|
|
||||||
- provider_id: ${env.ENABLE_PASSTHROUGH:=__disabled__}
|
|
||||||
provider_type: remote::passthrough
|
|
||||||
- provider_id: sentence-transformers
|
|
||||||
provider_type: inline::sentence-transformers
|
|
||||||
vector_io:
|
vector_io:
|
||||||
- provider_id: ${env.ENABLE_FAISS:=faiss}
|
- provider_type: inline::faiss
|
||||||
provider_type: inline::faiss
|
- provider_type: inline::sqlite-vec
|
||||||
- provider_id: ${env.ENABLE_SQLITE_VEC:=__disabled__}
|
- provider_type: inline::milvus
|
||||||
provider_type: inline::sqlite-vec
|
- provider_type: remote::chromadb
|
||||||
- provider_id: ${env.ENABLE_MILVUS:=__disabled__}
|
- provider_type: remote::pgvector
|
||||||
provider_type: inline::milvus
|
|
||||||
- provider_id: ${env.ENABLE_CHROMADB:=__disabled__}
|
|
||||||
provider_type: remote::chromadb
|
|
||||||
- provider_id: ${env.ENABLE_PGVECTOR:=__disabled__}
|
|
||||||
provider_type: remote::pgvector
|
|
||||||
files:
|
files:
|
||||||
- provider_id: localfs
|
- provider_type: inline::localfs
|
||||||
provider_type: inline::localfs
|
|
||||||
safety:
|
safety:
|
||||||
- provider_id: llama-guard
|
- provider_type: inline::llama-guard
|
||||||
provider_type: inline::llama-guard
|
|
||||||
agents:
|
agents:
|
||||||
- provider_id: meta-reference
|
- provider_type: inline::meta-reference
|
||||||
provider_type: inline::meta-reference
|
|
||||||
telemetry:
|
telemetry:
|
||||||
- provider_id: meta-reference
|
- provider_type: inline::meta-reference
|
||||||
provider_type: inline::meta-reference
|
|
||||||
post_training:
|
post_training:
|
||||||
- provider_id: huggingface
|
- provider_type: inline::huggingface
|
||||||
provider_type: inline::huggingface
|
|
||||||
eval:
|
eval:
|
||||||
- provider_id: meta-reference
|
- provider_type: inline::meta-reference
|
||||||
provider_type: inline::meta-reference
|
|
||||||
datasetio:
|
datasetio:
|
||||||
- provider_id: huggingface
|
- provider_type: remote::huggingface
|
||||||
provider_type: remote::huggingface
|
- provider_type: inline::localfs
|
||||||
- provider_id: localfs
|
|
||||||
provider_type: inline::localfs
|
|
||||||
scoring:
|
scoring:
|
||||||
- provider_id: basic
|
- provider_type: inline::basic
|
||||||
provider_type: inline::basic
|
- provider_type: inline::llm-as-judge
|
||||||
- provider_id: llm-as-judge
|
- provider_type: inline::braintrust
|
||||||
provider_type: inline::llm-as-judge
|
|
||||||
- provider_id: braintrust
|
|
||||||
provider_type: inline::braintrust
|
|
||||||
tool_runtime:
|
tool_runtime:
|
||||||
- provider_id: brave-search
|
- provider_type: remote::brave-search
|
||||||
provider_type: remote::brave-search
|
- provider_type: remote::tavily-search
|
||||||
- provider_id: tavily-search
|
- provider_type: inline::rag-runtime
|
||||||
provider_type: remote::tavily-search
|
- provider_type: remote::model-context-protocol
|
||||||
- provider_id: rag-runtime
|
|
||||||
provider_type: inline::rag-runtime
|
|
||||||
- provider_id: model-context-protocol
|
|
||||||
provider_type: remote::model-context-protocol
|
|
||||||
image_type: conda
|
image_type: conda
|
||||||
image_name: ci-tests
|
image_name: ci-tests
|
||||||
additional_pip_packages:
|
additional_pip_packages:
|
||||||
|
|
|
@ -4,48 +4,31 @@ distribution_spec:
|
||||||
container
|
container
|
||||||
providers:
|
providers:
|
||||||
inference:
|
inference:
|
||||||
- provider_id: tgi
|
- provider_type: remote::tgi
|
||||||
provider_type: remote::tgi
|
- provider_type: inline::sentence-transformers
|
||||||
- provider_id: sentence-transformers
|
|
||||||
provider_type: inline::sentence-transformers
|
|
||||||
vector_io:
|
vector_io:
|
||||||
- provider_id: faiss
|
- provider_type: inline::faiss
|
||||||
provider_type: inline::faiss
|
- provider_type: remote::chromadb
|
||||||
- provider_id: chromadb
|
- provider_type: remote::pgvector
|
||||||
provider_type: remote::chromadb
|
|
||||||
- provider_id: pgvector
|
|
||||||
provider_type: remote::pgvector
|
|
||||||
safety:
|
safety:
|
||||||
- provider_id: llama-guard
|
- provider_type: inline::llama-guard
|
||||||
provider_type: inline::llama-guard
|
|
||||||
agents:
|
agents:
|
||||||
- provider_id: meta-reference
|
- provider_type: inline::meta-reference
|
||||||
provider_type: inline::meta-reference
|
|
||||||
telemetry:
|
telemetry:
|
||||||
- provider_id: meta-reference
|
- provider_type: inline::meta-reference
|
||||||
provider_type: inline::meta-reference
|
|
||||||
eval:
|
eval:
|
||||||
- provider_id: meta-reference
|
- provider_type: inline::meta-reference
|
||||||
provider_type: inline::meta-reference
|
|
||||||
datasetio:
|
datasetio:
|
||||||
- provider_id: huggingface
|
- provider_type: remote::huggingface
|
||||||
provider_type: remote::huggingface
|
- provider_type: inline::localfs
|
||||||
- provider_id: localfs
|
|
||||||
provider_type: inline::localfs
|
|
||||||
scoring:
|
scoring:
|
||||||
- provider_id: basic
|
- provider_type: inline::basic
|
||||||
provider_type: inline::basic
|
- provider_type: inline::llm-as-judge
|
||||||
- provider_id: llm-as-judge
|
- provider_type: inline::braintrust
|
||||||
provider_type: inline::llm-as-judge
|
|
||||||
- provider_id: braintrust
|
|
||||||
provider_type: inline::braintrust
|
|
||||||
tool_runtime:
|
tool_runtime:
|
||||||
- provider_id: brave-search
|
- provider_type: remote::brave-search
|
||||||
provider_type: remote::brave-search
|
- provider_type: remote::tavily-search
|
||||||
- provider_id: tavily-search
|
- provider_type: inline::rag-runtime
|
||||||
provider_type: remote::tavily-search
|
|
||||||
- provider_id: rag-runtime
|
|
||||||
provider_type: inline::rag-runtime
|
|
||||||
image_type: conda
|
image_type: conda
|
||||||
image_name: dell
|
image_name: dell
|
||||||
additional_pip_packages:
|
additional_pip_packages:
|
||||||
|
|
|
@ -6,6 +6,7 @@
|
||||||
|
|
||||||
from llama_stack.apis.models import ModelType
|
from llama_stack.apis.models import ModelType
|
||||||
from llama_stack.distribution.datatypes import (
|
from llama_stack.distribution.datatypes import (
|
||||||
|
BuildProvider,
|
||||||
ModelInput,
|
ModelInput,
|
||||||
Provider,
|
Provider,
|
||||||
ShieldInput,
|
ShieldInput,
|
||||||
|
@ -20,31 +21,31 @@ from llama_stack.templates.template import DistributionTemplate, RunConfigSettin
|
||||||
def get_distribution_template() -> DistributionTemplate:
|
def get_distribution_template() -> DistributionTemplate:
|
||||||
providers = {
|
providers = {
|
||||||
"inference": [
|
"inference": [
|
||||||
Provider(provider_id="tgi", provider_type="remote::tgi"),
|
BuildProvider(provider_type="remote::tgi"),
|
||||||
Provider(provider_id="sentence-transformers", provider_type="inline::sentence-transformers"),
|
BuildProvider(provider_type="inline::sentence-transformers"),
|
||||||
],
|
],
|
||||||
"vector_io": [
|
"vector_io": [
|
||||||
Provider(provider_id="faiss", provider_type="inline::faiss"),
|
BuildProvider(provider_type="inline::faiss"),
|
||||||
Provider(provider_id="chromadb", provider_type="remote::chromadb"),
|
BuildProvider(provider_type="remote::chromadb"),
|
||||||
Provider(provider_id="pgvector", provider_type="remote::pgvector"),
|
BuildProvider(provider_type="remote::pgvector"),
|
||||||
],
|
],
|
||||||
"safety": [Provider(provider_id="llama-guard", provider_type="inline::llama-guard")],
|
"safety": [BuildProvider(provider_type="inline::llama-guard")],
|
||||||
"agents": [Provider(provider_id="meta-reference", provider_type="inline::meta-reference")],
|
"agents": [BuildProvider(provider_type="inline::meta-reference")],
|
||||||
"telemetry": [Provider(provider_id="meta-reference", provider_type="inline::meta-reference")],
|
"telemetry": [BuildProvider(provider_type="inline::meta-reference")],
|
||||||
"eval": [Provider(provider_id="meta-reference", provider_type="inline::meta-reference")],
|
"eval": [BuildProvider(provider_type="inline::meta-reference")],
|
||||||
"datasetio": [
|
"datasetio": [
|
||||||
Provider(provider_id="huggingface", provider_type="remote::huggingface"),
|
BuildProvider(provider_type="remote::huggingface"),
|
||||||
Provider(provider_id="localfs", provider_type="inline::localfs"),
|
BuildProvider(provider_type="inline::localfs"),
|
||||||
],
|
],
|
||||||
"scoring": [
|
"scoring": [
|
||||||
Provider(provider_id="basic", provider_type="inline::basic"),
|
BuildProvider(provider_type="inline::basic"),
|
||||||
Provider(provider_id="llm-as-judge", provider_type="inline::llm-as-judge"),
|
BuildProvider(provider_type="inline::llm-as-judge"),
|
||||||
Provider(provider_id="braintrust", provider_type="inline::braintrust"),
|
BuildProvider(provider_type="inline::braintrust"),
|
||||||
],
|
],
|
||||||
"tool_runtime": [
|
"tool_runtime": [
|
||||||
Provider(provider_id="brave-search", provider_type="remote::brave-search"),
|
BuildProvider(provider_type="remote::brave-search"),
|
||||||
Provider(provider_id="tavily-search", provider_type="remote::tavily-search"),
|
BuildProvider(provider_type="remote::tavily-search"),
|
||||||
Provider(provider_id="rag-runtime", provider_type="inline::rag-runtime"),
|
BuildProvider(provider_type="inline::rag-runtime"),
|
||||||
],
|
],
|
||||||
}
|
}
|
||||||
name = "dell"
|
name = "dell"
|
||||||
|
|
|
@ -3,48 +3,31 @@ distribution_spec:
|
||||||
description: Use Meta Reference for running LLM inference
|
description: Use Meta Reference for running LLM inference
|
||||||
providers:
|
providers:
|
||||||
inference:
|
inference:
|
||||||
- provider_id: meta-reference
|
- provider_type: inline::meta-reference
|
||||||
provider_type: inline::meta-reference
|
|
||||||
vector_io:
|
vector_io:
|
||||||
- provider_id: faiss
|
- provider_type: inline::faiss
|
||||||
provider_type: inline::faiss
|
- provider_type: remote::chromadb
|
||||||
- provider_id: chromadb
|
- provider_type: remote::pgvector
|
||||||
provider_type: remote::chromadb
|
|
||||||
- provider_id: pgvector
|
|
||||||
provider_type: remote::pgvector
|
|
||||||
safety:
|
safety:
|
||||||
- provider_id: llama-guard
|
- provider_type: inline::llama-guard
|
||||||
provider_type: inline::llama-guard
|
|
||||||
agents:
|
agents:
|
||||||
- provider_id: meta-reference
|
- provider_type: inline::meta-reference
|
||||||
provider_type: inline::meta-reference
|
|
||||||
telemetry:
|
telemetry:
|
||||||
- provider_id: meta-reference
|
- provider_type: inline::meta-reference
|
||||||
provider_type: inline::meta-reference
|
|
||||||
eval:
|
eval:
|
||||||
- provider_id: meta-reference
|
- provider_type: inline::meta-reference
|
||||||
provider_type: inline::meta-reference
|
|
||||||
datasetio:
|
datasetio:
|
||||||
- provider_id: huggingface
|
- provider_type: remote::huggingface
|
||||||
provider_type: remote::huggingface
|
- provider_type: inline::localfs
|
||||||
- provider_id: localfs
|
|
||||||
provider_type: inline::localfs
|
|
||||||
scoring:
|
scoring:
|
||||||
- provider_id: basic
|
- provider_type: inline::basic
|
||||||
provider_type: inline::basic
|
- provider_type: inline::llm-as-judge
|
||||||
- provider_id: llm-as-judge
|
- provider_type: inline::braintrust
|
||||||
provider_type: inline::llm-as-judge
|
|
||||||
- provider_id: braintrust
|
|
||||||
provider_type: inline::braintrust
|
|
||||||
tool_runtime:
|
tool_runtime:
|
||||||
- provider_id: brave-search
|
- provider_type: remote::brave-search
|
||||||
provider_type: remote::brave-search
|
- provider_type: remote::tavily-search
|
||||||
- provider_id: tavily-search
|
- provider_type: inline::rag-runtime
|
||||||
provider_type: remote::tavily-search
|
- provider_type: remote::model-context-protocol
|
||||||
- provider_id: rag-runtime
|
|
||||||
provider_type: inline::rag-runtime
|
|
||||||
- provider_id: model-context-protocol
|
|
||||||
provider_type: remote::model-context-protocol
|
|
||||||
image_type: conda
|
image_type: conda
|
||||||
image_name: meta-reference-gpu
|
image_name: meta-reference-gpu
|
||||||
additional_pip_packages:
|
additional_pip_packages:
|
||||||
|
|
|
@ -8,6 +8,7 @@ from pathlib import Path
|
||||||
|
|
||||||
from llama_stack.apis.models import ModelType
|
from llama_stack.apis.models import ModelType
|
||||||
from llama_stack.distribution.datatypes import (
|
from llama_stack.distribution.datatypes import (
|
||||||
|
BuildProvider,
|
||||||
ModelInput,
|
ModelInput,
|
||||||
Provider,
|
Provider,
|
||||||
ShieldInput,
|
ShieldInput,
|
||||||
|
@ -25,91 +26,30 @@ from llama_stack.templates.template import DistributionTemplate, RunConfigSettin
|
||||||
|
|
||||||
def get_distribution_template() -> DistributionTemplate:
|
def get_distribution_template() -> DistributionTemplate:
|
||||||
providers = {
|
providers = {
|
||||||
"inference": [
|
"inference": [BuildProvider(provider_type="inline::meta-reference")],
|
||||||
Provider(
|
|
||||||
provider_id="meta-reference",
|
|
||||||
provider_type="inline::meta-reference",
|
|
||||||
)
|
|
||||||
],
|
|
||||||
"vector_io": [
|
"vector_io": [
|
||||||
Provider(
|
BuildProvider(provider_type="inline::faiss"),
|
||||||
provider_id="faiss",
|
BuildProvider(provider_type="remote::chromadb"),
|
||||||
provider_type="inline::faiss",
|
BuildProvider(provider_type="remote::pgvector"),
|
||||||
),
|
|
||||||
Provider(
|
|
||||||
provider_id="chromadb",
|
|
||||||
provider_type="remote::chromadb",
|
|
||||||
),
|
|
||||||
Provider(
|
|
||||||
provider_id="pgvector",
|
|
||||||
provider_type="remote::pgvector",
|
|
||||||
),
|
|
||||||
],
|
|
||||||
"safety": [
|
|
||||||
Provider(
|
|
||||||
provider_id="llama-guard",
|
|
||||||
provider_type="inline::llama-guard",
|
|
||||||
)
|
|
||||||
],
|
|
||||||
"agents": [
|
|
||||||
Provider(
|
|
||||||
provider_id="meta-reference",
|
|
||||||
provider_type="inline::meta-reference",
|
|
||||||
)
|
|
||||||
],
|
|
||||||
"telemetry": [
|
|
||||||
Provider(
|
|
||||||
provider_id="meta-reference",
|
|
||||||
provider_type="inline::meta-reference",
|
|
||||||
)
|
|
||||||
],
|
|
||||||
"eval": [
|
|
||||||
Provider(
|
|
||||||
provider_id="meta-reference",
|
|
||||||
provider_type="inline::meta-reference",
|
|
||||||
)
|
|
||||||
],
|
],
|
||||||
|
"safety": [BuildProvider(provider_type="inline::llama-guard")],
|
||||||
|
"agents": [BuildProvider(provider_type="inline::meta-reference")],
|
||||||
|
"telemetry": [BuildProvider(provider_type="inline::meta-reference")],
|
||||||
|
"eval": [BuildProvider(provider_type="inline::meta-reference")],
|
||||||
"datasetio": [
|
"datasetio": [
|
||||||
Provider(
|
BuildProvider(provider_type="remote::huggingface"),
|
||||||
provider_id="huggingface",
|
BuildProvider(provider_type="inline::localfs"),
|
||||||
provider_type="remote::huggingface",
|
|
||||||
),
|
|
||||||
Provider(
|
|
||||||
provider_id="localfs",
|
|
||||||
provider_type="inline::localfs",
|
|
||||||
),
|
|
||||||
],
|
],
|
||||||
"scoring": [
|
"scoring": [
|
||||||
Provider(
|
BuildProvider(provider_type="inline::basic"),
|
||||||
provider_id="basic",
|
BuildProvider(provider_type="inline::llm-as-judge"),
|
||||||
provider_type="inline::basic",
|
BuildProvider(provider_type="inline::braintrust"),
|
||||||
),
|
|
||||||
Provider(
|
|
||||||
provider_id="llm-as-judge",
|
|
||||||
provider_type="inline::llm-as-judge",
|
|
||||||
),
|
|
||||||
Provider(
|
|
||||||
provider_id="braintrust",
|
|
||||||
provider_type="inline::braintrust",
|
|
||||||
),
|
|
||||||
],
|
],
|
||||||
"tool_runtime": [
|
"tool_runtime": [
|
||||||
Provider(
|
BuildProvider(provider_type="remote::brave-search"),
|
||||||
provider_id="brave-search",
|
BuildProvider(provider_type="remote::tavily-search"),
|
||||||
provider_type="remote::brave-search",
|
BuildProvider(provider_type="inline::rag-runtime"),
|
||||||
),
|
BuildProvider(provider_type="remote::model-context-protocol"),
|
||||||
Provider(
|
|
||||||
provider_id="tavily-search",
|
|
||||||
provider_type="remote::tavily-search",
|
|
||||||
),
|
|
||||||
Provider(
|
|
||||||
provider_id="rag-runtime",
|
|
||||||
provider_type="inline::rag-runtime",
|
|
||||||
),
|
|
||||||
Provider(
|
|
||||||
provider_id="model-context-protocol",
|
|
||||||
provider_type="remote::model-context-protocol",
|
|
||||||
),
|
|
||||||
],
|
],
|
||||||
}
|
}
|
||||||
name = "meta-reference-gpu"
|
name = "meta-reference-gpu"
|
||||||
|
|
|
@ -3,37 +3,26 @@ distribution_spec:
|
||||||
description: Use NVIDIA NIM for running LLM inference, evaluation and safety
|
description: Use NVIDIA NIM for running LLM inference, evaluation and safety
|
||||||
providers:
|
providers:
|
||||||
inference:
|
inference:
|
||||||
- provider_id: nvidia
|
- provider_type: remote::nvidia
|
||||||
provider_type: remote::nvidia
|
|
||||||
vector_io:
|
vector_io:
|
||||||
- provider_id: faiss
|
- provider_type: inline::faiss
|
||||||
provider_type: inline::faiss
|
|
||||||
safety:
|
safety:
|
||||||
- provider_id: nvidia
|
- provider_type: remote::nvidia
|
||||||
provider_type: remote::nvidia
|
|
||||||
agents:
|
agents:
|
||||||
- provider_id: meta-reference
|
- provider_type: inline::meta-reference
|
||||||
provider_type: inline::meta-reference
|
|
||||||
telemetry:
|
telemetry:
|
||||||
- provider_id: meta-reference
|
- provider_type: inline::meta-reference
|
||||||
provider_type: inline::meta-reference
|
|
||||||
eval:
|
eval:
|
||||||
- provider_id: nvidia
|
- provider_type: remote::nvidia
|
||||||
provider_type: remote::nvidia
|
|
||||||
post_training:
|
post_training:
|
||||||
- provider_id: nvidia
|
- provider_type: remote::nvidia
|
||||||
provider_type: remote::nvidia
|
|
||||||
datasetio:
|
datasetio:
|
||||||
- provider_id: localfs
|
- provider_type: inline::localfs
|
||||||
provider_type: inline::localfs
|
- provider_type: remote::nvidia
|
||||||
- provider_id: nvidia
|
|
||||||
provider_type: remote::nvidia
|
|
||||||
scoring:
|
scoring:
|
||||||
- provider_id: basic
|
- provider_type: inline::basic
|
||||||
provider_type: inline::basic
|
|
||||||
tool_runtime:
|
tool_runtime:
|
||||||
- provider_id: rag-runtime
|
- provider_type: inline::rag-runtime
|
||||||
provider_type: inline::rag-runtime
|
|
||||||
image_type: conda
|
image_type: conda
|
||||||
image_name: nvidia
|
image_name: nvidia
|
||||||
additional_pip_packages:
|
additional_pip_packages:
|
||||||
|
|
|
@ -6,7 +6,7 @@
|
||||||
|
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
|
|
||||||
from llama_stack.distribution.datatypes import ModelInput, Provider, ShieldInput, ToolGroupInput
|
from llama_stack.distribution.datatypes import BuildProvider, ModelInput, Provider, ShieldInput, ToolGroupInput
|
||||||
from llama_stack.providers.remote.datasetio.nvidia import NvidiaDatasetIOConfig
|
from llama_stack.providers.remote.datasetio.nvidia import NvidiaDatasetIOConfig
|
||||||
from llama_stack.providers.remote.eval.nvidia import NVIDIAEvalConfig
|
from llama_stack.providers.remote.eval.nvidia import NVIDIAEvalConfig
|
||||||
from llama_stack.providers.remote.inference.nvidia import NVIDIAConfig
|
from llama_stack.providers.remote.inference.nvidia import NVIDIAConfig
|
||||||
|
@ -17,65 +17,19 @@ from llama_stack.templates.template import DistributionTemplate, RunConfigSettin
|
||||||
|
|
||||||
def get_distribution_template() -> DistributionTemplate:
|
def get_distribution_template() -> DistributionTemplate:
|
||||||
providers = {
|
providers = {
|
||||||
"inference": [
|
"inference": [BuildProvider(provider_type="remote::nvidia")],
|
||||||
Provider(
|
"vector_io": [BuildProvider(provider_type="inline::faiss")],
|
||||||
provider_id="nvidia",
|
"safety": [BuildProvider(provider_type="remote::nvidia")],
|
||||||
provider_type="remote::nvidia",
|
"agents": [BuildProvider(provider_type="inline::meta-reference")],
|
||||||
)
|
"telemetry": [BuildProvider(provider_type="inline::meta-reference")],
|
||||||
],
|
"eval": [BuildProvider(provider_type="remote::nvidia")],
|
||||||
"vector_io": [
|
"post_training": [BuildProvider(provider_type="remote::nvidia")],
|
||||||
Provider(
|
|
||||||
provider_id="faiss",
|
|
||||||
provider_type="inline::faiss",
|
|
||||||
)
|
|
||||||
],
|
|
||||||
"safety": [
|
|
||||||
Provider(
|
|
||||||
provider_id="nvidia",
|
|
||||||
provider_type="remote::nvidia",
|
|
||||||
)
|
|
||||||
],
|
|
||||||
"agents": [
|
|
||||||
Provider(
|
|
||||||
provider_id="meta-reference",
|
|
||||||
provider_type="inline::meta-reference",
|
|
||||||
)
|
|
||||||
],
|
|
||||||
"telemetry": [
|
|
||||||
Provider(
|
|
||||||
provider_id="meta-reference",
|
|
||||||
provider_type="inline::meta-reference",
|
|
||||||
)
|
|
||||||
],
|
|
||||||
"eval": [
|
|
||||||
Provider(
|
|
||||||
provider_id="nvidia",
|
|
||||||
provider_type="remote::nvidia",
|
|
||||||
)
|
|
||||||
],
|
|
||||||
"post_training": [Provider(provider_id="nvidia", provider_type="remote::nvidia", config={})],
|
|
||||||
"datasetio": [
|
"datasetio": [
|
||||||
Provider(
|
BuildProvider(provider_type="inline::localfs"),
|
||||||
provider_id="localfs",
|
BuildProvider(provider_type="remote::nvidia"),
|
||||||
provider_type="inline::localfs",
|
|
||||||
),
|
|
||||||
Provider(
|
|
||||||
provider_id="nvidia",
|
|
||||||
provider_type="remote::nvidia",
|
|
||||||
),
|
|
||||||
],
|
|
||||||
"scoring": [
|
|
||||||
Provider(
|
|
||||||
provider_id="basic",
|
|
||||||
provider_type="inline::basic",
|
|
||||||
)
|
|
||||||
],
|
|
||||||
"tool_runtime": [
|
|
||||||
Provider(
|
|
||||||
provider_id="rag-runtime",
|
|
||||||
provider_type="inline::rag-runtime",
|
|
||||||
)
|
|
||||||
],
|
],
|
||||||
|
"scoring": [BuildProvider(provider_type="inline::basic")],
|
||||||
|
"tool_runtime": [BuildProvider(provider_type="inline::rag-runtime")],
|
||||||
}
|
}
|
||||||
|
|
||||||
inference_provider = Provider(
|
inference_provider = Provider(
|
||||||
|
|
|
@ -3,56 +3,35 @@ distribution_spec:
|
||||||
description: Distribution for running open benchmarks
|
description: Distribution for running open benchmarks
|
||||||
providers:
|
providers:
|
||||||
inference:
|
inference:
|
||||||
- provider_id: openai
|
- provider_type: remote::openai
|
||||||
provider_type: remote::openai
|
- provider_type: remote::anthropic
|
||||||
- provider_id: anthropic
|
- provider_type: remote::gemini
|
||||||
provider_type: remote::anthropic
|
- provider_type: remote::groq
|
||||||
- provider_id: gemini
|
- provider_type: remote::together
|
||||||
provider_type: remote::gemini
|
|
||||||
- provider_id: groq
|
|
||||||
provider_type: remote::groq
|
|
||||||
- provider_id: together
|
|
||||||
provider_type: remote::together
|
|
||||||
vector_io:
|
vector_io:
|
||||||
- provider_id: sqlite-vec
|
- provider_type: inline::sqlite-vec
|
||||||
provider_type: inline::sqlite-vec
|
- provider_type: remote::chromadb
|
||||||
- provider_id: chromadb
|
- provider_type: remote::pgvector
|
||||||
provider_type: remote::chromadb
|
|
||||||
- provider_id: pgvector
|
|
||||||
provider_type: remote::pgvector
|
|
||||||
safety:
|
safety:
|
||||||
- provider_id: llama-guard
|
- provider_type: inline::llama-guard
|
||||||
provider_type: inline::llama-guard
|
|
||||||
agents:
|
agents:
|
||||||
- provider_id: meta-reference
|
- provider_type: inline::meta-reference
|
||||||
provider_type: inline::meta-reference
|
|
||||||
telemetry:
|
telemetry:
|
||||||
- provider_id: meta-reference
|
- provider_type: inline::meta-reference
|
||||||
provider_type: inline::meta-reference
|
|
||||||
eval:
|
eval:
|
||||||
- provider_id: meta-reference
|
- provider_type: inline::meta-reference
|
||||||
provider_type: inline::meta-reference
|
|
||||||
datasetio:
|
datasetio:
|
||||||
- provider_id: huggingface
|
- provider_type: remote::huggingface
|
||||||
provider_type: remote::huggingface
|
- provider_type: inline::localfs
|
||||||
- provider_id: localfs
|
|
||||||
provider_type: inline::localfs
|
|
||||||
scoring:
|
scoring:
|
||||||
- provider_id: basic
|
- provider_type: inline::basic
|
||||||
provider_type: inline::basic
|
- provider_type: inline::llm-as-judge
|
||||||
- provider_id: llm-as-judge
|
- provider_type: inline::braintrust
|
||||||
provider_type: inline::llm-as-judge
|
|
||||||
- provider_id: braintrust
|
|
||||||
provider_type: inline::braintrust
|
|
||||||
tool_runtime:
|
tool_runtime:
|
||||||
- provider_id: brave-search
|
- provider_type: remote::brave-search
|
||||||
provider_type: remote::brave-search
|
- provider_type: remote::tavily-search
|
||||||
- provider_id: tavily-search
|
- provider_type: inline::rag-runtime
|
||||||
provider_type: remote::tavily-search
|
- provider_type: remote::model-context-protocol
|
||||||
- provider_id: rag-runtime
|
|
||||||
provider_type: inline::rag-runtime
|
|
||||||
- provider_id: model-context-protocol
|
|
||||||
provider_type: remote::model-context-protocol
|
|
||||||
image_type: conda
|
image_type: conda
|
||||||
image_name: open-benchmark
|
image_name: open-benchmark
|
||||||
additional_pip_packages:
|
additional_pip_packages:
|
||||||
|
|
|
@ -9,6 +9,7 @@ from llama_stack.apis.datasets import DatasetPurpose, URIDataSource
|
||||||
from llama_stack.apis.models import ModelType
|
from llama_stack.apis.models import ModelType
|
||||||
from llama_stack.distribution.datatypes import (
|
from llama_stack.distribution.datatypes import (
|
||||||
BenchmarkInput,
|
BenchmarkInput,
|
||||||
|
BuildProvider,
|
||||||
DatasetInput,
|
DatasetInput,
|
||||||
ModelInput,
|
ModelInput,
|
||||||
Provider,
|
Provider,
|
||||||
|
@ -96,33 +97,30 @@ def get_inference_providers() -> tuple[list[Provider], dict[str, list[ProviderMo
|
||||||
def get_distribution_template() -> DistributionTemplate:
|
def get_distribution_template() -> DistributionTemplate:
|
||||||
inference_providers, available_models = get_inference_providers()
|
inference_providers, available_models = get_inference_providers()
|
||||||
providers = {
|
providers = {
|
||||||
"inference": inference_providers,
|
"inference": [BuildProvider(provider_type=p.provider_type, module=p.module) for p in inference_providers],
|
||||||
"vector_io": [
|
"vector_io": [
|
||||||
Provider(provider_id="sqlite-vec", provider_type="inline::sqlite-vec"),
|
BuildProvider(provider_type="inline::sqlite-vec"),
|
||||||
Provider(provider_id="chromadb", provider_type="remote::chromadb"),
|
BuildProvider(provider_type="remote::chromadb"),
|
||||||
Provider(provider_id="pgvector", provider_type="remote::pgvector"),
|
BuildProvider(provider_type="remote::pgvector"),
|
||||||
],
|
],
|
||||||
"safety": [Provider(provider_id="llama-guard", provider_type="inline::llama-guard")],
|
"safety": [BuildProvider(provider_type="inline::llama-guard")],
|
||||||
"agents": [Provider(provider_id="meta-reference", provider_type="inline::meta-reference")],
|
"agents": [BuildProvider(provider_type="inline::meta-reference")],
|
||||||
"telemetry": [Provider(provider_id="meta-reference", provider_type="inline::meta-reference")],
|
"telemetry": [BuildProvider(provider_type="inline::meta-reference")],
|
||||||
"eval": [Provider(provider_id="meta-reference", provider_type="inline::meta-reference")],
|
"eval": [BuildProvider(provider_type="inline::meta-reference")],
|
||||||
"datasetio": [
|
"datasetio": [
|
||||||
Provider(provider_id="huggingface", provider_type="remote::huggingface"),
|
BuildProvider(provider_type="remote::huggingface"),
|
||||||
Provider(provider_id="localfs", provider_type="inline::localfs"),
|
BuildProvider(provider_type="inline::localfs"),
|
||||||
],
|
],
|
||||||
"scoring": [
|
"scoring": [
|
||||||
Provider(provider_id="basic", provider_type="inline::basic"),
|
BuildProvider(provider_type="inline::basic"),
|
||||||
Provider(provider_id="llm-as-judge", provider_type="inline::llm-as-judge"),
|
BuildProvider(provider_type="inline::llm-as-judge"),
|
||||||
Provider(provider_id="braintrust", provider_type="inline::braintrust"),
|
BuildProvider(provider_type="inline::braintrust"),
|
||||||
],
|
],
|
||||||
"tool_runtime": [
|
"tool_runtime": [
|
||||||
Provider(provider_id="brave-search", provider_type="remote::brave-search"),
|
BuildProvider(provider_type="remote::brave-search"),
|
||||||
Provider(provider_id="tavily-search", provider_type="remote::tavily-search"),
|
BuildProvider(provider_type="remote::tavily-search"),
|
||||||
Provider(provider_id="rag-runtime", provider_type="inline::rag-runtime"),
|
BuildProvider(provider_type="inline::rag-runtime"),
|
||||||
Provider(
|
BuildProvider(provider_type="remote::model-context-protocol"),
|
||||||
provider_id="model-context-protocol",
|
|
||||||
provider_type="remote::model-context-protocol",
|
|
||||||
),
|
|
||||||
],
|
],
|
||||||
}
|
}
|
||||||
name = "open-benchmark"
|
name = "open-benchmark"
|
||||||
|
|
|
@ -3,31 +3,21 @@ distribution_spec:
|
||||||
description: Quick start template for running Llama Stack with several popular providers
|
description: Quick start template for running Llama Stack with several popular providers
|
||||||
providers:
|
providers:
|
||||||
inference:
|
inference:
|
||||||
- provider_id: vllm-inference
|
- provider_type: remote::vllm
|
||||||
provider_type: remote::vllm
|
- provider_type: inline::sentence-transformers
|
||||||
- provider_id: sentence-transformers
|
|
||||||
provider_type: inline::sentence-transformers
|
|
||||||
vector_io:
|
vector_io:
|
||||||
- provider_id: chromadb
|
- provider_type: remote::chromadb
|
||||||
provider_type: remote::chromadb
|
|
||||||
safety:
|
safety:
|
||||||
- provider_id: llama-guard
|
- provider_type: inline::llama-guard
|
||||||
provider_type: inline::llama-guard
|
|
||||||
agents:
|
agents:
|
||||||
- provider_id: meta-reference
|
- provider_type: inline::meta-reference
|
||||||
provider_type: inline::meta-reference
|
|
||||||
telemetry:
|
telemetry:
|
||||||
- provider_id: meta-reference
|
- provider_type: inline::meta-reference
|
||||||
provider_type: inline::meta-reference
|
|
||||||
tool_runtime:
|
tool_runtime:
|
||||||
- provider_id: brave-search
|
- provider_type: remote::brave-search
|
||||||
provider_type: remote::brave-search
|
- provider_type: remote::tavily-search
|
||||||
- provider_id: tavily-search
|
- provider_type: inline::rag-runtime
|
||||||
provider_type: remote::tavily-search
|
- provider_type: remote::model-context-protocol
|
||||||
- provider_id: rag-runtime
|
|
||||||
provider_type: inline::rag-runtime
|
|
||||||
- provider_id: model-context-protocol
|
|
||||||
provider_type: remote::model-context-protocol
|
|
||||||
image_type: conda
|
image_type: conda
|
||||||
image_name: postgres-demo
|
image_name: postgres-demo
|
||||||
additional_pip_packages:
|
additional_pip_packages:
|
||||||
|
|
|
@ -7,6 +7,7 @@
|
||||||
|
|
||||||
from llama_stack.apis.models import ModelType
|
from llama_stack.apis.models import ModelType
|
||||||
from llama_stack.distribution.datatypes import (
|
from llama_stack.distribution.datatypes import (
|
||||||
|
BuildProvider,
|
||||||
ModelInput,
|
ModelInput,
|
||||||
Provider,
|
Provider,
|
||||||
ShieldInput,
|
ShieldInput,
|
||||||
|
@ -34,24 +35,19 @@ def get_distribution_template() -> DistributionTemplate:
|
||||||
),
|
),
|
||||||
]
|
]
|
||||||
providers = {
|
providers = {
|
||||||
"inference": inference_providers
|
"inference": [
|
||||||
+ [
|
BuildProvider(provider_type="remote::vllm"),
|
||||||
Provider(provider_id="sentence-transformers", provider_type="inline::sentence-transformers"),
|
BuildProvider(provider_type="inline::sentence-transformers"),
|
||||||
],
|
],
|
||||||
"vector_io": [
|
"vector_io": [BuildProvider(provider_type="remote::chromadb")],
|
||||||
Provider(provider_id="chromadb", provider_type="remote::chromadb"),
|
"safety": [BuildProvider(provider_type="inline::llama-guard")],
|
||||||
],
|
"agents": [BuildProvider(provider_type="inline::meta-reference")],
|
||||||
"safety": [Provider(provider_id="llama-guard", provider_type="inline::llama-guard")],
|
"telemetry": [BuildProvider(provider_type="inline::meta-reference")],
|
||||||
"agents": [Provider(provider_id="meta-reference", provider_type="inline::meta-reference")],
|
|
||||||
"telemetry": [Provider(provider_id="meta-reference", provider_type="inline::meta-reference")],
|
|
||||||
"tool_runtime": [
|
"tool_runtime": [
|
||||||
Provider(provider_id="brave-search", provider_type="remote::brave-search"),
|
BuildProvider(provider_type="remote::brave-search"),
|
||||||
Provider(provider_id="tavily-search", provider_type="remote::tavily-search"),
|
BuildProvider(provider_type="remote::tavily-search"),
|
||||||
Provider(provider_id="rag-runtime", provider_type="inline::rag-runtime"),
|
BuildProvider(provider_type="inline::rag-runtime"),
|
||||||
Provider(
|
BuildProvider(provider_type="remote::model-context-protocol"),
|
||||||
provider_id="model-context-protocol",
|
|
||||||
provider_type="remote::model-context-protocol",
|
|
||||||
),
|
|
||||||
],
|
],
|
||||||
}
|
}
|
||||||
name = "postgres-demo"
|
name = "postgres-demo"
|
||||||
|
|
|
@ -3,96 +3,56 @@ distribution_spec:
|
||||||
description: Quick start template for running Llama Stack with several popular providers
|
description: Quick start template for running Llama Stack with several popular providers
|
||||||
providers:
|
providers:
|
||||||
inference:
|
inference:
|
||||||
- provider_id: ${env.ENABLE_CEREBRAS:=__disabled__}
|
- provider_type: remote::cerebras
|
||||||
provider_type: remote::cerebras
|
- provider_type: remote::ollama
|
||||||
- provider_id: ${env.ENABLE_OLLAMA:=__disabled__}
|
- provider_type: remote::vllm
|
||||||
provider_type: remote::ollama
|
- provider_type: remote::tgi
|
||||||
- provider_id: ${env.ENABLE_VLLM:=__disabled__}
|
- provider_type: remote::hf::serverless
|
||||||
provider_type: remote::vllm
|
- provider_type: remote::hf::endpoint
|
||||||
- provider_id: ${env.ENABLE_TGI:=__disabled__}
|
- provider_type: remote::fireworks
|
||||||
provider_type: remote::tgi
|
- provider_type: remote::together
|
||||||
- provider_id: ${env.ENABLE_HF_SERVERLESS:=__disabled__}
|
- provider_type: remote::bedrock
|
||||||
provider_type: remote::hf::serverless
|
- provider_type: remote::databricks
|
||||||
- provider_id: ${env.ENABLE_HF_ENDPOINT:=__disabled__}
|
- provider_type: remote::nvidia
|
||||||
provider_type: remote::hf::endpoint
|
- provider_type: remote::runpod
|
||||||
- provider_id: ${env.ENABLE_FIREWORKS:=__disabled__}
|
- provider_type: remote::openai
|
||||||
provider_type: remote::fireworks
|
- provider_type: remote::anthropic
|
||||||
- provider_id: ${env.ENABLE_TOGETHER:=__disabled__}
|
- provider_type: remote::gemini
|
||||||
provider_type: remote::together
|
- provider_type: remote::groq
|
||||||
- provider_id: ${env.ENABLE_BEDROCK:=__disabled__}
|
- provider_type: remote::llama-openai-compat
|
||||||
provider_type: remote::bedrock
|
- provider_type: remote::sambanova
|
||||||
- provider_id: ${env.ENABLE_DATABRICKS:=__disabled__}
|
- provider_type: remote::passthrough
|
||||||
provider_type: remote::databricks
|
- provider_type: inline::sentence-transformers
|
||||||
- provider_id: ${env.ENABLE_NVIDIA:=__disabled__}
|
|
||||||
provider_type: remote::nvidia
|
|
||||||
- provider_id: ${env.ENABLE_RUNPOD:=__disabled__}
|
|
||||||
provider_type: remote::runpod
|
|
||||||
- provider_id: ${env.ENABLE_OPENAI:=__disabled__}
|
|
||||||
provider_type: remote::openai
|
|
||||||
- provider_id: ${env.ENABLE_ANTHROPIC:=__disabled__}
|
|
||||||
provider_type: remote::anthropic
|
|
||||||
- provider_id: ${env.ENABLE_GEMINI:=__disabled__}
|
|
||||||
provider_type: remote::gemini
|
|
||||||
- provider_id: ${env.ENABLE_GROQ:=__disabled__}
|
|
||||||
provider_type: remote::groq
|
|
||||||
- provider_id: ${env.ENABLE_LLAMA_OPENAI_COMPAT:=__disabled__}
|
|
||||||
provider_type: remote::llama-openai-compat
|
|
||||||
- provider_id: ${env.ENABLE_SAMBANOVA:=__disabled__}
|
|
||||||
provider_type: remote::sambanova
|
|
||||||
- provider_id: ${env.ENABLE_PASSTHROUGH:=__disabled__}
|
|
||||||
provider_type: remote::passthrough
|
|
||||||
- provider_id: sentence-transformers
|
|
||||||
provider_type: inline::sentence-transformers
|
|
||||||
vector_io:
|
vector_io:
|
||||||
- provider_id: ${env.ENABLE_FAISS:=faiss}
|
- provider_type: inline::faiss
|
||||||
provider_type: inline::faiss
|
- provider_type: inline::sqlite-vec
|
||||||
- provider_id: ${env.ENABLE_SQLITE_VEC:=__disabled__}
|
- provider_type: inline::milvus
|
||||||
provider_type: inline::sqlite-vec
|
- provider_type: remote::chromadb
|
||||||
- provider_id: ${env.ENABLE_MILVUS:=__disabled__}
|
- provider_type: remote::pgvector
|
||||||
provider_type: inline::milvus
|
|
||||||
- provider_id: ${env.ENABLE_CHROMADB:=__disabled__}
|
|
||||||
provider_type: remote::chromadb
|
|
||||||
- provider_id: ${env.ENABLE_PGVECTOR:=__disabled__}
|
|
||||||
provider_type: remote::pgvector
|
|
||||||
files:
|
files:
|
||||||
- provider_id: localfs
|
- provider_type: inline::localfs
|
||||||
provider_type: inline::localfs
|
|
||||||
safety:
|
safety:
|
||||||
- provider_id: llama-guard
|
- provider_type: inline::llama-guard
|
||||||
provider_type: inline::llama-guard
|
|
||||||
agents:
|
agents:
|
||||||
- provider_id: meta-reference
|
- provider_type: inline::meta-reference
|
||||||
provider_type: inline::meta-reference
|
|
||||||
telemetry:
|
telemetry:
|
||||||
- provider_id: meta-reference
|
- provider_type: inline::meta-reference
|
||||||
provider_type: inline::meta-reference
|
|
||||||
post_training:
|
post_training:
|
||||||
- provider_id: huggingface
|
- provider_type: inline::huggingface
|
||||||
provider_type: inline::huggingface
|
|
||||||
eval:
|
eval:
|
||||||
- provider_id: meta-reference
|
- provider_type: inline::meta-reference
|
||||||
provider_type: inline::meta-reference
|
|
||||||
datasetio:
|
datasetio:
|
||||||
- provider_id: huggingface
|
- provider_type: remote::huggingface
|
||||||
provider_type: remote::huggingface
|
- provider_type: inline::localfs
|
||||||
- provider_id: localfs
|
|
||||||
provider_type: inline::localfs
|
|
||||||
scoring:
|
scoring:
|
||||||
- provider_id: basic
|
- provider_type: inline::basic
|
||||||
provider_type: inline::basic
|
- provider_type: inline::llm-as-judge
|
||||||
- provider_id: llm-as-judge
|
- provider_type: inline::braintrust
|
||||||
provider_type: inline::llm-as-judge
|
|
||||||
- provider_id: braintrust
|
|
||||||
provider_type: inline::braintrust
|
|
||||||
tool_runtime:
|
tool_runtime:
|
||||||
- provider_id: brave-search
|
- provider_type: remote::brave-search
|
||||||
provider_type: remote::brave-search
|
- provider_type: remote::tavily-search
|
||||||
- provider_id: tavily-search
|
- provider_type: inline::rag-runtime
|
||||||
provider_type: remote::tavily-search
|
- provider_type: remote::model-context-protocol
|
||||||
- provider_id: rag-runtime
|
|
||||||
provider_type: inline::rag-runtime
|
|
||||||
- provider_id: model-context-protocol
|
|
||||||
provider_type: remote::model-context-protocol
|
|
||||||
image_type: conda
|
image_type: conda
|
||||||
image_name: starter
|
image_name: starter
|
||||||
additional_pip_packages:
|
additional_pip_packages:
|
||||||
|
|
|
@ -9,6 +9,7 @@ from typing import Any
|
||||||
|
|
||||||
from llama_stack.apis.models import ModelType
|
from llama_stack.apis.models import ModelType
|
||||||
from llama_stack.distribution.datatypes import (
|
from llama_stack.distribution.datatypes import (
|
||||||
|
BuildProvider,
|
||||||
ModelInput,
|
ModelInput,
|
||||||
Provider,
|
Provider,
|
||||||
ProviderSpec,
|
ProviderSpec,
|
||||||
|
@ -213,131 +214,38 @@ def get_safety_models_for_providers(providers: list[Provider]) -> dict[str, list
|
||||||
|
|
||||||
def get_distribution_template() -> DistributionTemplate:
|
def get_distribution_template() -> DistributionTemplate:
|
||||||
remote_inference_providers, available_models = get_remote_inference_providers()
|
remote_inference_providers, available_models = get_remote_inference_providers()
|
||||||
|
|
||||||
name = "starter"
|
name = "starter"
|
||||||
|
# For build config, use BuildProvider with only provider_type and module
|
||||||
vector_io_providers = [
|
|
||||||
Provider(
|
|
||||||
provider_id="${env.ENABLE_FAISS:=faiss}",
|
|
||||||
provider_type="inline::faiss",
|
|
||||||
config=FaissVectorIOConfig.sample_run_config(f"~/.llama/distributions/{name}"),
|
|
||||||
),
|
|
||||||
Provider(
|
|
||||||
provider_id="${env.ENABLE_SQLITE_VEC:=__disabled__}",
|
|
||||||
provider_type="inline::sqlite-vec",
|
|
||||||
config=SQLiteVectorIOConfig.sample_run_config(f"~/.llama/distributions/{name}"),
|
|
||||||
),
|
|
||||||
Provider(
|
|
||||||
provider_id="${env.ENABLE_MILVUS:=__disabled__}",
|
|
||||||
provider_type="inline::milvus",
|
|
||||||
config=MilvusVectorIOConfig.sample_run_config(f"~/.llama/distributions/{name}"),
|
|
||||||
),
|
|
||||||
Provider(
|
|
||||||
provider_id="${env.ENABLE_CHROMADB:=__disabled__}",
|
|
||||||
provider_type="remote::chromadb",
|
|
||||||
config=ChromaVectorIOConfig.sample_run_config(
|
|
||||||
f"~/.llama/distributions/{name}/",
|
|
||||||
url="${env.CHROMADB_URL:=}",
|
|
||||||
),
|
|
||||||
),
|
|
||||||
Provider(
|
|
||||||
provider_id="${env.ENABLE_PGVECTOR:=__disabled__}",
|
|
||||||
provider_type="remote::pgvector",
|
|
||||||
config=PGVectorVectorIOConfig.sample_run_config(
|
|
||||||
f"~/.llama/distributions/{name}",
|
|
||||||
db="${env.PGVECTOR_DB:=}",
|
|
||||||
user="${env.PGVECTOR_USER:=}",
|
|
||||||
password="${env.PGVECTOR_PASSWORD:=}",
|
|
||||||
),
|
|
||||||
),
|
|
||||||
]
|
|
||||||
|
|
||||||
providers = {
|
providers = {
|
||||||
"inference": remote_inference_providers
|
"inference": [BuildProvider(provider_type=p.provider_type, module=p.module) for p in remote_inference_providers]
|
||||||
+ [
|
+ [BuildProvider(provider_type="inline::sentence-transformers")],
|
||||||
Provider(
|
"vector_io": [
|
||||||
provider_id="sentence-transformers",
|
BuildProvider(provider_type="inline::faiss"),
|
||||||
provider_type="inline::sentence-transformers",
|
BuildProvider(provider_type="inline::sqlite-vec"),
|
||||||
)
|
BuildProvider(provider_type="inline::milvus"),
|
||||||
],
|
BuildProvider(provider_type="remote::chromadb"),
|
||||||
"vector_io": vector_io_providers,
|
BuildProvider(provider_type="remote::pgvector"),
|
||||||
"files": [
|
|
||||||
Provider(
|
|
||||||
provider_id="localfs",
|
|
||||||
provider_type="inline::localfs",
|
|
||||||
)
|
|
||||||
],
|
|
||||||
"safety": [
|
|
||||||
Provider(
|
|
||||||
provider_id="llama-guard",
|
|
||||||
provider_type="inline::llama-guard",
|
|
||||||
)
|
|
||||||
],
|
|
||||||
"agents": [
|
|
||||||
Provider(
|
|
||||||
provider_id="meta-reference",
|
|
||||||
provider_type="inline::meta-reference",
|
|
||||||
)
|
|
||||||
],
|
|
||||||
"telemetry": [
|
|
||||||
Provider(
|
|
||||||
provider_id="meta-reference",
|
|
||||||
provider_type="inline::meta-reference",
|
|
||||||
)
|
|
||||||
],
|
|
||||||
"post_training": [
|
|
||||||
Provider(
|
|
||||||
provider_id="huggingface",
|
|
||||||
provider_type="inline::huggingface",
|
|
||||||
)
|
|
||||||
],
|
|
||||||
"eval": [
|
|
||||||
Provider(
|
|
||||||
provider_id="meta-reference",
|
|
||||||
provider_type="inline::meta-reference",
|
|
||||||
)
|
|
||||||
],
|
],
|
||||||
|
"files": [BuildProvider(provider_type="inline::localfs")],
|
||||||
|
"safety": [BuildProvider(provider_type="inline::llama-guard")],
|
||||||
|
"agents": [BuildProvider(provider_type="inline::meta-reference")],
|
||||||
|
"telemetry": [BuildProvider(provider_type="inline::meta-reference")],
|
||||||
|
"post_training": [BuildProvider(provider_type="inline::huggingface")],
|
||||||
|
"eval": [BuildProvider(provider_type="inline::meta-reference")],
|
||||||
"datasetio": [
|
"datasetio": [
|
||||||
Provider(
|
BuildProvider(provider_type="remote::huggingface"),
|
||||||
provider_id="huggingface",
|
BuildProvider(provider_type="inline::localfs"),
|
||||||
provider_type="remote::huggingface",
|
|
||||||
),
|
|
||||||
Provider(
|
|
||||||
provider_id="localfs",
|
|
||||||
provider_type="inline::localfs",
|
|
||||||
),
|
|
||||||
],
|
],
|
||||||
"scoring": [
|
"scoring": [
|
||||||
Provider(
|
BuildProvider(provider_type="inline::basic"),
|
||||||
provider_id="basic",
|
BuildProvider(provider_type="inline::llm-as-judge"),
|
||||||
provider_type="inline::basic",
|
BuildProvider(provider_type="inline::braintrust"),
|
||||||
),
|
|
||||||
Provider(
|
|
||||||
provider_id="llm-as-judge",
|
|
||||||
provider_type="inline::llm-as-judge",
|
|
||||||
),
|
|
||||||
Provider(
|
|
||||||
provider_id="braintrust",
|
|
||||||
provider_type="inline::braintrust",
|
|
||||||
),
|
|
||||||
],
|
],
|
||||||
"tool_runtime": [
|
"tool_runtime": [
|
||||||
Provider(
|
BuildProvider(provider_type="remote::brave-search"),
|
||||||
provider_id="brave-search",
|
BuildProvider(provider_type="remote::tavily-search"),
|
||||||
provider_type="remote::brave-search",
|
BuildProvider(provider_type="inline::rag-runtime"),
|
||||||
),
|
BuildProvider(provider_type="remote::model-context-protocol"),
|
||||||
Provider(
|
|
||||||
provider_id="tavily-search",
|
|
||||||
provider_type="remote::tavily-search",
|
|
||||||
),
|
|
||||||
Provider(
|
|
||||||
provider_id="rag-runtime",
|
|
||||||
provider_type="inline::rag-runtime",
|
|
||||||
),
|
|
||||||
Provider(
|
|
||||||
provider_id="model-context-protocol",
|
|
||||||
provider_type="remote::model-context-protocol",
|
|
||||||
),
|
|
||||||
],
|
],
|
||||||
}
|
}
|
||||||
files_provider = Provider(
|
files_provider = Provider(
|
||||||
|
@ -392,7 +300,41 @@ def get_distribution_template() -> DistributionTemplate:
|
||||||
"run.yaml": RunConfigSettings(
|
"run.yaml": RunConfigSettings(
|
||||||
provider_overrides={
|
provider_overrides={
|
||||||
"inference": remote_inference_providers + [embedding_provider],
|
"inference": remote_inference_providers + [embedding_provider],
|
||||||
"vector_io": vector_io_providers,
|
"vector_io": [
|
||||||
|
Provider(
|
||||||
|
provider_id="${env.ENABLE_FAISS:=faiss}",
|
||||||
|
provider_type="inline::faiss",
|
||||||
|
config=FaissVectorIOConfig.sample_run_config(f"~/.llama/distributions/{name}"),
|
||||||
|
),
|
||||||
|
Provider(
|
||||||
|
provider_id="${env.ENABLE_SQLITE_VEC:=__disabled__}",
|
||||||
|
provider_type="inline::sqlite-vec",
|
||||||
|
config=SQLiteVectorIOConfig.sample_run_config(f"~/.llama/distributions/{name}"),
|
||||||
|
),
|
||||||
|
Provider(
|
||||||
|
provider_id="${env.ENABLE_MILVUS:=__disabled__}",
|
||||||
|
provider_type="inline::milvus",
|
||||||
|
config=MilvusVectorIOConfig.sample_run_config(f"~/.llama/distributions/{name}"),
|
||||||
|
),
|
||||||
|
Provider(
|
||||||
|
provider_id="${env.ENABLE_CHROMADB:=__disabled__}",
|
||||||
|
provider_type="remote::chromadb",
|
||||||
|
config=ChromaVectorIOConfig.sample_run_config(
|
||||||
|
f"~/.llama/distributions/{name}/",
|
||||||
|
url="${env.CHROMADB_URL:=}",
|
||||||
|
),
|
||||||
|
),
|
||||||
|
Provider(
|
||||||
|
provider_id="${env.ENABLE_PGVECTOR:=__disabled__}",
|
||||||
|
provider_type="remote::pgvector",
|
||||||
|
config=PGVectorVectorIOConfig.sample_run_config(
|
||||||
|
f"~/.llama/distributions/{name}",
|
||||||
|
db="${env.PGVECTOR_DB:=}",
|
||||||
|
user="${env.PGVECTOR_USER:=}",
|
||||||
|
password="${env.PGVECTOR_PASSWORD:=}",
|
||||||
|
),
|
||||||
|
),
|
||||||
|
],
|
||||||
"files": [files_provider],
|
"files": [files_provider],
|
||||||
"post_training": [post_training_provider],
|
"post_training": [post_training_provider],
|
||||||
},
|
},
|
||||||
|
|
|
@ -19,6 +19,7 @@ from llama_stack.distribution.datatypes import (
|
||||||
Api,
|
Api,
|
||||||
BenchmarkInput,
|
BenchmarkInput,
|
||||||
BuildConfig,
|
BuildConfig,
|
||||||
|
BuildProvider,
|
||||||
DatasetInput,
|
DatasetInput,
|
||||||
DistributionSpec,
|
DistributionSpec,
|
||||||
ModelInput,
|
ModelInput,
|
||||||
|
@ -183,7 +184,7 @@ class RunConfigSettings(BaseModel):
|
||||||
def run_config(
|
def run_config(
|
||||||
self,
|
self,
|
||||||
name: str,
|
name: str,
|
||||||
providers: dict[str, list[Provider]],
|
providers: dict[str, list[BuildProvider]],
|
||||||
container_image: str | None = None,
|
container_image: str | None = None,
|
||||||
) -> dict:
|
) -> dict:
|
||||||
provider_registry = get_provider_registry()
|
provider_registry = get_provider_registry()
|
||||||
|
@ -199,7 +200,7 @@ class RunConfigSettings(BaseModel):
|
||||||
api = Api(api_str)
|
api = Api(api_str)
|
||||||
if provider.provider_type not in provider_registry[api]:
|
if provider.provider_type not in provider_registry[api]:
|
||||||
raise ValueError(f"Unknown provider type: {provider.provider_type} for API: {api_str}")
|
raise ValueError(f"Unknown provider type: {provider.provider_type} for API: {api_str}")
|
||||||
|
provider_id = provider.provider_type.split("::")[-1]
|
||||||
config_class = provider_registry[api][provider.provider_type].config_class
|
config_class = provider_registry[api][provider.provider_type].config_class
|
||||||
assert config_class is not None, (
|
assert config_class is not None, (
|
||||||
f"No config class for provider type: {provider.provider_type} for API: {api_str}"
|
f"No config class for provider type: {provider.provider_type} for API: {api_str}"
|
||||||
|
@ -210,10 +211,14 @@ class RunConfigSettings(BaseModel):
|
||||||
config = config_class.sample_run_config(__distro_dir__=f"~/.llama/distributions/{name}")
|
config = config_class.sample_run_config(__distro_dir__=f"~/.llama/distributions/{name}")
|
||||||
else:
|
else:
|
||||||
config = {}
|
config = {}
|
||||||
|
# BuildProvider does not have a config attribute; skip assignment
|
||||||
provider.config = config
|
provider_configs[api_str].append(
|
||||||
# Convert Provider object to dict for YAML serialization
|
Provider(
|
||||||
provider_configs[api_str].append(provider.model_dump(exclude_none=True))
|
provider_id=provider_id,
|
||||||
|
provider_type=provider.provider_type,
|
||||||
|
config=config,
|
||||||
|
).model_dump(exclude_none=True)
|
||||||
|
)
|
||||||
# Get unique set of APIs from providers
|
# Get unique set of APIs from providers
|
||||||
apis = sorted(providers.keys())
|
apis = sorted(providers.keys())
|
||||||
|
|
||||||
|
@ -257,7 +262,8 @@ class DistributionTemplate(BaseModel):
|
||||||
description: str
|
description: str
|
||||||
distro_type: Literal["self_hosted", "remote_hosted", "ondevice"]
|
distro_type: Literal["self_hosted", "remote_hosted", "ondevice"]
|
||||||
|
|
||||||
providers: dict[str, list[Provider]]
|
# Now uses BuildProvider for build config, not Provider
|
||||||
|
providers: dict[str, list[BuildProvider]]
|
||||||
run_configs: dict[str, RunConfigSettings]
|
run_configs: dict[str, RunConfigSettings]
|
||||||
template_path: Path | None = None
|
template_path: Path | None = None
|
||||||
|
|
||||||
|
@ -295,11 +301,9 @@ class DistributionTemplate(BaseModel):
|
||||||
for api, providers in self.providers.items():
|
for api, providers in self.providers.items():
|
||||||
build_providers[api] = []
|
build_providers[api] = []
|
||||||
for provider in providers:
|
for provider in providers:
|
||||||
# Create a minimal provider object with only essential build information
|
# Create a minimal build provider object with only essential build information
|
||||||
build_provider = Provider(
|
build_provider = BuildProvider(
|
||||||
provider_id=provider.provider_id,
|
|
||||||
provider_type=provider.provider_type,
|
provider_type=provider.provider_type,
|
||||||
config={}, # Empty config for build
|
|
||||||
module=provider.module,
|
module=provider.module,
|
||||||
)
|
)
|
||||||
build_providers[api].append(build_provider)
|
build_providers[api].append(build_provider)
|
||||||
|
@ -323,50 +327,52 @@ class DistributionTemplate(BaseModel):
|
||||||
providers_str = ", ".join(f"`{p.provider_type}`" for p in providers)
|
providers_str = ", ".join(f"`{p.provider_type}`" for p in providers)
|
||||||
providers_table += f"| {api} | {providers_str} |\n"
|
providers_table += f"| {api} | {providers_str} |\n"
|
||||||
|
|
||||||
template = self.template_path.read_text()
|
if self.template_path is not None:
|
||||||
comment = "<!-- This file was auto-generated by distro_codegen.py, please edit source -->\n"
|
template = self.template_path.read_text()
|
||||||
orphantext = "---\norphan: true\n---\n"
|
comment = "<!-- This file was auto-generated by distro_codegen.py, please edit source -->\n"
|
||||||
|
orphantext = "---\norphan: true\n---\n"
|
||||||
|
|
||||||
if template.startswith(orphantext):
|
if template.startswith(orphantext):
|
||||||
template = template.replace(orphantext, orphantext + comment)
|
template = template.replace(orphantext, orphantext + comment)
|
||||||
else:
|
else:
|
||||||
template = comment + template
|
template = comment + template
|
||||||
|
|
||||||
# Render template with rich-generated table
|
# Render template with rich-generated table
|
||||||
env = jinja2.Environment(
|
env = jinja2.Environment(
|
||||||
trim_blocks=True,
|
trim_blocks=True,
|
||||||
lstrip_blocks=True,
|
lstrip_blocks=True,
|
||||||
# NOTE: autoescape is required to prevent XSS attacks
|
# NOTE: autoescape is required to prevent XSS attacks
|
||||||
autoescape=True,
|
autoescape=True,
|
||||||
)
|
)
|
||||||
template = env.from_string(template)
|
template = env.from_string(template)
|
||||||
|
|
||||||
default_models = []
|
default_models = []
|
||||||
if self.available_models_by_provider:
|
if self.available_models_by_provider:
|
||||||
has_multiple_providers = len(self.available_models_by_provider.keys()) > 1
|
has_multiple_providers = len(self.available_models_by_provider.keys()) > 1
|
||||||
for provider_id, model_entries in self.available_models_by_provider.items():
|
for provider_id, model_entries in self.available_models_by_provider.items():
|
||||||
for model_entry in model_entries:
|
for model_entry in model_entries:
|
||||||
doc_parts = []
|
doc_parts = []
|
||||||
if model_entry.aliases:
|
if model_entry.aliases:
|
||||||
doc_parts.append(f"aliases: {', '.join(model_entry.aliases)}")
|
doc_parts.append(f"aliases: {', '.join(model_entry.aliases)}")
|
||||||
if has_multiple_providers:
|
if has_multiple_providers:
|
||||||
doc_parts.append(f"provider: {provider_id}")
|
doc_parts.append(f"provider: {provider_id}")
|
||||||
|
|
||||||
default_models.append(
|
default_models.append(
|
||||||
DefaultModel(
|
DefaultModel(
|
||||||
model_id=model_entry.provider_model_id,
|
model_id=model_entry.provider_model_id,
|
||||||
doc_string=(f"({' -- '.join(doc_parts)})" if doc_parts else ""),
|
doc_string=(f"({' -- '.join(doc_parts)})" if doc_parts else ""),
|
||||||
|
)
|
||||||
)
|
)
|
||||||
)
|
|
||||||
|
|
||||||
return template.render(
|
return template.render(
|
||||||
name=self.name,
|
name=self.name,
|
||||||
description=self.description,
|
description=self.description,
|
||||||
providers=self.providers,
|
providers=self.providers,
|
||||||
providers_table=providers_table,
|
providers_table=providers_table,
|
||||||
run_config_env_vars=self.run_config_env_vars,
|
run_config_env_vars=self.run_config_env_vars,
|
||||||
default_models=default_models,
|
default_models=default_models,
|
||||||
)
|
)
|
||||||
|
return ""
|
||||||
|
|
||||||
def save_distribution(self, yaml_output_dir: Path, doc_output_dir: Path) -> None:
|
def save_distribution(self, yaml_output_dir: Path, doc_output_dir: Path) -> None:
|
||||||
def enum_representer(dumper, data):
|
def enum_representer(dumper, data):
|
||||||
|
|
|
@ -7,7 +7,7 @@
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
|
|
||||||
from llama_stack.apis.models import ModelType
|
from llama_stack.apis.models import ModelType
|
||||||
from llama_stack.distribution.datatypes import ModelInput, Provider, ToolGroupInput
|
from llama_stack.distribution.datatypes import BuildProvider, ModelInput, Provider, ToolGroupInput
|
||||||
from llama_stack.providers.inline.inference.sentence_transformers import (
|
from llama_stack.providers.inline.inference.sentence_transformers import (
|
||||||
SentenceTransformersInferenceConfig,
|
SentenceTransformersInferenceConfig,
|
||||||
)
|
)
|
||||||
|
@ -19,86 +19,28 @@ from llama_stack.templates.template import DistributionTemplate, RunConfigSettin
|
||||||
def get_distribution_template() -> DistributionTemplate:
|
def get_distribution_template() -> DistributionTemplate:
|
||||||
providers = {
|
providers = {
|
||||||
"inference": [
|
"inference": [
|
||||||
Provider(
|
BuildProvider(provider_type="remote::watsonx"),
|
||||||
provider_id="watsonx",
|
BuildProvider(provider_type="inline::sentence-transformers"),
|
||||||
provider_type="remote::watsonx",
|
|
||||||
),
|
|
||||||
Provider(
|
|
||||||
provider_id="sentence-transformers",
|
|
||||||
provider_type="inline::sentence-transformers",
|
|
||||||
),
|
|
||||||
],
|
|
||||||
"vector_io": [
|
|
||||||
Provider(
|
|
||||||
provider_id="faiss",
|
|
||||||
provider_type="inline::faiss",
|
|
||||||
)
|
|
||||||
],
|
|
||||||
"safety": [
|
|
||||||
Provider(
|
|
||||||
provider_id="llama-guard",
|
|
||||||
provider_type="inline::llama-guard",
|
|
||||||
)
|
|
||||||
],
|
|
||||||
"agents": [
|
|
||||||
Provider(
|
|
||||||
provider_id="meta-reference",
|
|
||||||
provider_type="inline::meta-reference",
|
|
||||||
)
|
|
||||||
],
|
|
||||||
"telemetry": [
|
|
||||||
Provider(
|
|
||||||
provider_id="meta-reference",
|
|
||||||
provider_type="inline::meta-reference",
|
|
||||||
)
|
|
||||||
],
|
|
||||||
"eval": [
|
|
||||||
Provider(
|
|
||||||
provider_id="meta-reference",
|
|
||||||
provider_type="inline::meta-reference",
|
|
||||||
)
|
|
||||||
],
|
],
|
||||||
|
"vector_io": [BuildProvider(provider_type="inline::faiss")],
|
||||||
|
"safety": [BuildProvider(provider_type="inline::llama-guard")],
|
||||||
|
"agents": [BuildProvider(provider_type="inline::meta-reference")],
|
||||||
|
"telemetry": [BuildProvider(provider_type="inline::meta-reference")],
|
||||||
|
"eval": [BuildProvider(provider_type="inline::meta-reference")],
|
||||||
"datasetio": [
|
"datasetio": [
|
||||||
Provider(
|
BuildProvider(provider_type="remote::huggingface"),
|
||||||
provider_id="huggingface",
|
BuildProvider(provider_type="inline::localfs"),
|
||||||
provider_type="remote::huggingface",
|
|
||||||
),
|
|
||||||
Provider(
|
|
||||||
provider_id="localfs",
|
|
||||||
provider_type="inline::localfs",
|
|
||||||
),
|
|
||||||
],
|
],
|
||||||
"scoring": [
|
"scoring": [
|
||||||
Provider(
|
BuildProvider(provider_type="inline::basic"),
|
||||||
provider_id="basic",
|
BuildProvider(provider_type="inline::llm-as-judge"),
|
||||||
provider_type="inline::basic",
|
BuildProvider(provider_type="inline::braintrust"),
|
||||||
),
|
|
||||||
Provider(
|
|
||||||
provider_id="llm-as-judge",
|
|
||||||
provider_type="inline::llm-as-judge",
|
|
||||||
),
|
|
||||||
Provider(
|
|
||||||
provider_id="braintrust",
|
|
||||||
provider_type="inline::braintrust",
|
|
||||||
),
|
|
||||||
],
|
],
|
||||||
"tool_runtime": [
|
"tool_runtime": [
|
||||||
Provider(
|
BuildProvider(provider_type="remote::brave-search"),
|
||||||
provider_id="brave-search",
|
BuildProvider(provider_type="remote::tavily-search"),
|
||||||
provider_type="remote::brave-search",
|
BuildProvider(provider_type="inline::rag-runtime"),
|
||||||
),
|
BuildProvider(provider_type="remote::model-context-protocol"),
|
||||||
Provider(
|
|
||||||
provider_id="tavily-search",
|
|
||||||
provider_type="remote::tavily-search",
|
|
||||||
),
|
|
||||||
Provider(
|
|
||||||
provider_id="rag-runtime",
|
|
||||||
provider_type="inline::rag-runtime",
|
|
||||||
),
|
|
||||||
Provider(
|
|
||||||
provider_id="model-context-protocol",
|
|
||||||
provider_type="remote::model-context-protocol",
|
|
||||||
),
|
|
||||||
],
|
],
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue