all distros

2025-08-02 08:44:44 +00:00 · 2024-12-03 20:49:30 -08:00 · 2024-12-03 20:49:30 -08:00 · 7103892f54
commit 7103892f54
parent a097bfa761
16 changed files with 79 additions and 62 deletions
--- a/llama_stack/templates/bedrock/bedrock.py
+++ b/llama_stack/templates/bedrock/bedrock.py
@ -6,6 +6,9 @@
 from pathlib import Path
 from llama_stack.distribution.datatypes import Provider
 from llama_stack.providers.inline.memory.faiss.config import FaissImplConfig
 from llama_stack.templates.template import DistributionTemplate, RunConfigSettings
@ -20,9 +23,15 @@ def get_distribution_template() -> DistributionTemplate:
        "datasetio": ["remote::huggingface", "inline::localfs"],
        "scoring": ["inline::basic", "inline::llm-as-judge", "inline::braintrust"],
    }
    name = "bedrock"
    memory_provider = Provider(
        provider_id="faiss",
        provider_type="inline::faiss",
        config=FaissImplConfig.sample_run_config(f"distributions/{name}"),
    )
    return DistributionTemplate(
-        name="bedrock",
+        name=name,
        distro_type="self_hosted",
        description="Use AWS Bedrock for running LLM inference and safety",
        docker_image=None,
@ -30,7 +39,11 @@ def get_distribution_template() -> DistributionTemplate:
        providers=providers,
        default_models=[],
        run_configs={
-            "run.yaml": RunConfigSettings(),
+            "run.yaml": RunConfigSettings(
                provider_overrides={
                    "memory": [memory_provider],
                },
            ),
        },
        run_config_env_vars={
            "LLAMASTACK_PORT": (
--- a/llama_stack/templates/bedrock/run.yaml
+++ b/llama_stack/templates/bedrock/run.yaml
@ -24,12 +24,6 @@ providers:
        type: sqlite
        namespace: null
        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/bedrock}/faiss_store.db
  - provider_id: chromadb
    provider_type: remote::chromadb
    config: {}
  - provider_id: pgvector
    provider_type: remote::pgvector
    config: {}
  safety:
  - provider_id: bedrock
    provider_type: remote::bedrock
--- a/llama_stack/templates/fireworks/fireworks.py
+++ b/llama_stack/templates/fireworks/fireworks.py
@ -9,6 +9,7 @@ from pathlib import Path
 from llama_models.sku_list import all_registered_models
 from llama_stack.distribution.datatypes import ModelInput, Provider, ShieldInput
 from llama_stack.providers.inline.memory.faiss.config import FaissImplConfig
 from llama_stack.providers.remote.inference.fireworks import FireworksImplConfig
 from llama_stack.providers.remote.inference.fireworks.fireworks import MODEL_ALIASES
@ -27,11 +28,18 @@ def get_distribution_template() -> DistributionTemplate:
        "scoring": ["inline::basic", "inline::llm-as-judge", "inline::braintrust"],
    }
    name = "fireworks"
    inference_provider = Provider(
        provider_id="fireworks",
        provider_type="remote::fireworks",
        config=FireworksImplConfig.sample_run_config(),
    )
    memory_provider = Provider(
        provider_id="faiss",
        provider_type="inline::faiss",
        config=FaissImplConfig.sample_run_config(f"distributions/{name}"),
    )
    core_model_to_hf_repo = {
        m.descriptor(): m.huggingface_repo for m in all_registered_models()
@ -45,7 +53,7 @@ def get_distribution_template() -> DistributionTemplate:
    ]
    return DistributionTemplate(
-        name="fireworks",
+        name=name,
        distro_type="self_hosted",
        description="Use Fireworks.AI for running LLM inference",
        docker_image=None,
@ -56,6 +64,7 @@ def get_distribution_template() -> DistributionTemplate:
            "run.yaml": RunConfigSettings(
                provider_overrides={
                    "inference": [inference_provider],
                    "memory": [memory_provider],
                },
                default_models=default_models,
                default_shields=[ShieldInput(shield_id="meta-llama/Llama-Guard-3-8B")],
--- a/llama_stack/templates/fireworks/run.yaml
+++ b/llama_stack/templates/fireworks/run.yaml
@ -26,12 +26,6 @@ providers:
        type: sqlite
        namespace: null
        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/fireworks}/faiss_store.db
  - provider_id: chromadb
    provider_type: remote::chromadb
    config: {}
  - provider_id: pgvector
    provider_type: remote::pgvector
    config: {}
  safety:
  - provider_id: llama-guard
    provider_type: inline::llama-guard
--- a/llama_stack/templates/hf-endpoint/hf_endpoint.py
+++ b/llama_stack/templates/hf-endpoint/hf_endpoint.py
@ -5,6 +5,7 @@
 # the root directory of this source tree.
 from llama_stack.distribution.datatypes import ModelInput, Provider, ShieldInput
 from llama_stack.providers.inline.memory.faiss.config import FaissImplConfig
 from llama_stack.providers.remote.inference.tgi import InferenceEndpointImplConfig
 from llama_stack.templates.template import DistributionTemplate, RunConfigSettings
@ -20,12 +21,17 @@ def get_distribution_template() -> DistributionTemplate:
        "datasetio": ["remote::huggingface", "inline::localfs"],
        "scoring": ["inline::basic", "inline::llm-as-judge", "inline::braintrust"],
    }
-
+    name = "hf-endpoint"
    inference_provider = Provider(
        provider_id="hf-endpoint",
        provider_type="remote::hf::endpoint",
        config=InferenceEndpointImplConfig.sample_run_config(),
    )
    memory_provider = Provider(
        provider_id="faiss",
        provider_type="inline::faiss",
        config=FaissImplConfig.sample_run_config(f"distributions/{name}"),
    )
    inference_model = ModelInput(
        model_id="${env.INFERENCE_MODEL}",
@ -37,7 +43,7 @@ def get_distribution_template() -> DistributionTemplate:
    )
    return DistributionTemplate(
-        name="hf-endpoint",
+        name=name,
        distro_type="self_hosted",
        description="Use (an external) Hugging Face Inference Endpoint for running LLM inference",
        docker_image=None,
@ -48,6 +54,7 @@ def get_distribution_template() -> DistributionTemplate:
            "run.yaml": RunConfigSettings(
                provider_overrides={
                    "inference": [inference_provider],
                    "memory": [memory_provider],
                },
                default_models=[inference_model],
            ),
--- a/llama_stack/templates/hf-endpoint/run.yaml
+++ b/llama_stack/templates/hf-endpoint/run.yaml
@ -26,12 +26,6 @@ providers:
        type: sqlite
        namespace: null
        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/hf-endpoint}/faiss_store.db
  - provider_id: chromadb
    provider_type: remote::chromadb
    config: {}
  - provider_id: pgvector
    provider_type: remote::pgvector
    config: {}
  safety:
  - provider_id: llama-guard
    provider_type: inline::llama-guard
--- a/llama_stack/templates/hf-serverless/hf_serverless.py
+++ b/llama_stack/templates/hf-serverless/hf_serverless.py
@ -5,6 +5,7 @@
 # the root directory of this source tree.
 from llama_stack.distribution.datatypes import ModelInput, Provider, ShieldInput
 from llama_stack.providers.inline.memory.faiss.config import FaissImplConfig
 from llama_stack.providers.remote.inference.tgi import InferenceAPIImplConfig
 from llama_stack.templates.template import DistributionTemplate, RunConfigSettings
@ -21,11 +22,17 @@ def get_distribution_template() -> DistributionTemplate:
        "scoring": ["inline::basic", "inline::llm-as-judge", "inline::braintrust"],
    }
    name = "hf-serverless"
    inference_provider = Provider(
        provider_id="hf-serverless",
        provider_type="remote::hf::serverless",
        config=InferenceAPIImplConfig.sample_run_config(),
    )
    memory_provider = Provider(
        provider_id="faiss",
        provider_type="inline::faiss",
        config=FaissImplConfig.sample_run_config(f"distributions/{name}"),
    )
    inference_model = ModelInput(
        model_id="${env.INFERENCE_MODEL}",
@ -37,7 +44,7 @@ def get_distribution_template() -> DistributionTemplate:
    )
    return DistributionTemplate(
-        name="hf-serverless",
+        name=name,
        distro_type="self_hosted",
        description="Use (an external) Hugging Face Inference Endpoint for running LLM inference",
        docker_image=None,
@ -48,6 +55,7 @@ def get_distribution_template() -> DistributionTemplate:
            "run.yaml": RunConfigSettings(
                provider_overrides={
                    "inference": [inference_provider],
                    "memory": [memory_provider],
                },
                default_models=[inference_model],
            ),
--- a/llama_stack/templates/hf-serverless/run.yaml
+++ b/llama_stack/templates/hf-serverless/run.yaml
@ -26,12 +26,6 @@ providers:
        type: sqlite
        namespace: null
        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/hf-serverless}/faiss_store.db
  - provider_id: chromadb
    provider_type: remote::chromadb
    config: {}
  - provider_id: pgvector
    provider_type: remote::pgvector
    config: {}
  safety:
  - provider_id: llama-guard
    provider_type: inline::llama-guard
--- a/llama_stack/templates/meta-reference-gpu/meta_reference.py
+++ b/llama_stack/templates/meta-reference-gpu/meta_reference.py
@ -10,6 +10,7 @@ from llama_stack.distribution.datatypes import ModelInput, Provider, ShieldInput
 from llama_stack.providers.inline.inference.meta_reference import (
    MetaReferenceInferenceConfig,
 )
 from llama_stack.providers.inline.memory.faiss.config import FaissImplConfig
 from llama_stack.templates.template import DistributionTemplate, RunConfigSettings
@ -24,7 +25,7 @@ def get_distribution_template() -> DistributionTemplate:
        "datasetio": ["remote::huggingface", "inline::localfs"],
        "scoring": ["inline::basic", "inline::llm-as-judge", "inline::braintrust"],
    }
-
+    name = "meta-reference-gpu"
    inference_provider = Provider(
        provider_id="meta-reference-inference",
        provider_type="inline::meta-reference",
@ -33,6 +34,11 @@ def get_distribution_template() -> DistributionTemplate:
            checkpoint_dir="${env.INFERENCE_CHECKPOINT_DIR:null}",
        ),
    )
    memory_provider = Provider(
        provider_id="faiss",
        provider_type="inline::faiss",
        config=FaissImplConfig.sample_run_config(f"distributions/{name}"),
    )
    inference_model = ModelInput(
        model_id="${env.INFERENCE_MODEL}",
@ -44,7 +50,7 @@ def get_distribution_template() -> DistributionTemplate:
    )
    return DistributionTemplate(
-        name="meta-reference-gpu",
+        name=name,
        distro_type="self_hosted",
        description="Use Meta Reference for running LLM inference",
        template_path=Path(__file__).parent / "doc_template.md",
@ -54,6 +60,7 @@ def get_distribution_template() -> DistributionTemplate:
            "run.yaml": RunConfigSettings(
                provider_overrides={
                    "inference": [inference_provider],
                    "memory": [memory_provider],
                },
                default_models=[inference_model],
            ),
--- a/llama_stack/templates/meta-reference-gpu/run.yaml
+++ b/llama_stack/templates/meta-reference-gpu/run.yaml
@ -27,12 +27,6 @@ providers:
        type: sqlite
        namespace: null
        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/meta-reference-gpu}/faiss_store.db
  - provider_id: chromadb
    provider_type: remote::chromadb
    config: {}
  - provider_id: pgvector
    provider_type: remote::pgvector
    config: {}
  safety:
  - provider_id: llama-guard
    provider_type: inline::llama-guard
--- a/llama_stack/templates/meta-reference-quantized-gpu/meta_reference.py
+++ b/llama_stack/templates/meta-reference-quantized-gpu/meta_reference.py
@ -10,6 +10,7 @@ from llama_stack.distribution.datatypes import ModelInput, Provider
 from llama_stack.providers.inline.inference.meta_reference import (
    MetaReferenceQuantizedInferenceConfig,
 )
 from llama_stack.providers.inline.memory.faiss.config import FaissImplConfig
 from llama_stack.templates.template import DistributionTemplate, RunConfigSettings
@ -24,7 +25,7 @@ def get_distribution_template() -> DistributionTemplate:
        "datasetio": ["remote::huggingface", "inline::localfs"],
        "scoring": ["inline::basic", "inline::llm-as-judge", "inline::braintrust"],
    }
-
+    name = "meta-reference-quantized-gpu"
    inference_provider = Provider(
        provider_id="meta-reference-inference",
        provider_type="inline::meta-reference-quantized",
@ -33,13 +34,18 @@ def get_distribution_template() -> DistributionTemplate:
            checkpoint_dir="${env.INFERENCE_CHECKPOINT_DIR:null}",
        ),
    )
    memory_provider = Provider(
        provider_id="faiss",
        provider_type="inline::faiss",
        config=FaissImplConfig.sample_run_config(f"distributions/{name}"),
    )
    inference_model = ModelInput(
        model_id="${env.INFERENCE_MODEL}",
        provider_id="meta-reference-inference",
    )
    return DistributionTemplate(
-        name="meta-reference-quantized-gpu",
+        name=name,
        distro_type="self_hosted",
        description="Use Meta Reference with fp8, int4 quantization for running LLM inference",
        template_path=Path(__file__).parent / "doc_template.md",
@ -49,6 +55,7 @@ def get_distribution_template() -> DistributionTemplate:
            "run.yaml": RunConfigSettings(
                provider_overrides={
                    "inference": [inference_provider],
                    "memory": [memory_provider],
                },
                default_models=[inference_model],
            ),
--- a/llama_stack/templates/meta-reference-quantized-gpu/run.yaml
+++ b/llama_stack/templates/meta-reference-quantized-gpu/run.yaml
@ -29,12 +29,6 @@ providers:
        type: sqlite
        namespace: null
        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/meta-reference-quantized-gpu}/faiss_store.db
  - provider_id: chromadb
    provider_type: remote::chromadb
    config: {}
  - provider_id: pgvector
    provider_type: remote::pgvector
    config: {}
  safety:
  - provider_id: llama-guard
    provider_type: inline::llama-guard
--- a/llama_stack/templates/remote-vllm/run.yaml
+++ b/llama_stack/templates/remote-vllm/run.yaml
@ -24,12 +24,6 @@ providers:
        type: sqlite
        namespace: null
        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/remote-vllm}/faiss_store.db
  - provider_id: chromadb
    provider_type: remote::chromadb
    config: {}
  - provider_id: pgvector
    provider_type: remote::pgvector
    config: {}
  safety:
  - provider_id: llama-guard
    provider_type: inline::llama-guard
--- a/llama_stack/templates/remote-vllm/vllm.py
+++ b/llama_stack/templates/remote-vllm/vllm.py
@ -7,6 +7,7 @@
 from pathlib import Path
 from llama_stack.distribution.datatypes import ModelInput, Provider, ShieldInput
 from llama_stack.providers.inline.memory.faiss.config import FaissImplConfig
 from llama_stack.providers.remote.inference.vllm import VLLMInferenceAdapterConfig
 from llama_stack.templates.template import DistributionTemplate, RunConfigSettings
@ -19,7 +20,7 @@ def get_distribution_template() -> DistributionTemplate:
        "agents": ["inline::meta-reference"],
        "telemetry": ["inline::meta-reference"],
    }
-
+    name = "remote-vllm"
    inference_provider = Provider(
        provider_id="vllm-inference",
        provider_type="remote::vllm",
@ -27,6 +28,11 @@ def get_distribution_template() -> DistributionTemplate:
            url="${env.VLLM_URL}",
        ),
    )
    memory_provider = Provider(
        provider_id="faiss",
        provider_type="inline::faiss",
        config=FaissImplConfig.sample_run_config(f"distributions/{name}"),
    )
    inference_model = ModelInput(
        model_id="${env.INFERENCE_MODEL}",
@ -38,7 +44,7 @@ def get_distribution_template() -> DistributionTemplate:
    )
    return DistributionTemplate(
-        name="remote-vllm",
+        name=name,
        distro_type="self_hosted",
        description="Use (an external) vLLM server for running LLM inference",
        template_path=Path(__file__).parent / "doc_template.md",
@ -48,6 +54,7 @@ def get_distribution_template() -> DistributionTemplate:
            "run.yaml": RunConfigSettings(
                provider_overrides={
                    "inference": [inference_provider],
                    "memory": [memory_provider],
                },
                default_models=[inference_model],
            ),
--- a/llama_stack/templates/tgi/run.yaml
+++ b/llama_stack/templates/tgi/run.yaml
@ -25,12 +25,6 @@ providers:
        type: sqlite
        namespace: null
        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/tgi}/faiss_store.db
  - provider_id: chromadb
    provider_type: remote::chromadb
    config: {}
  - provider_id: pgvector
    provider_type: remote::pgvector
    config: {}
  safety:
  - provider_id: llama-guard
    provider_type: inline::llama-guard
--- a/llama_stack/templates/tgi/tgi.py
+++ b/llama_stack/templates/tgi/tgi.py
@ -7,6 +7,7 @@
 from pathlib import Path
 from llama_stack.distribution.datatypes import ModelInput, Provider, ShieldInput
 from llama_stack.providers.inline.memory.faiss.config import FaissImplConfig
 from llama_stack.providers.remote.inference.tgi import TGIImplConfig
 from llama_stack.templates.template import DistributionTemplate, RunConfigSettings
@ -22,7 +23,7 @@ def get_distribution_template() -> DistributionTemplate:
        "datasetio": ["remote::huggingface", "inline::localfs"],
        "scoring": ["inline::basic", "inline::llm-as-judge", "inline::braintrust"],
    }
-
+    name = "tgi"
    inference_provider = Provider(
        provider_id="tgi-inference",
        provider_type="remote::tgi",
@ -30,6 +31,11 @@ def get_distribution_template() -> DistributionTemplate:
            url="${env.TGI_URL}",
        ),
    )
    memory_provider = Provider(
        provider_id="faiss",
        provider_type="inline::faiss",
        config=FaissImplConfig.sample_run_config(f"distributions/{name}"),
    )
    inference_model = ModelInput(
        model_id="${env.INFERENCE_MODEL}",
@ -41,7 +47,7 @@ def get_distribution_template() -> DistributionTemplate:
    )
    return DistributionTemplate(
-        name="tgi",
+        name=name,
        distro_type="self_hosted",
        description="Use (an external) TGI server for running LLM inference",
        docker_image=None,
@ -52,6 +58,7 @@ def get_distribution_template() -> DistributionTemplate:
            "run.yaml": RunConfigSettings(
                provider_overrides={
                    "inference": [inference_provider],
                    "memory": [memory_provider],
                },
                default_models=[inference_model],
            ),