fix: Safety in starter (#2731)

- fireworks, together do not support Llama-guard 3 8b model anymore - Need to default to ollama - current safety shields logic was not correct since the shield_id was the provider ( which had duplicates ) - Followed similar logic to models Note: Seems a bit over-engineered but this can now be extended to other providers and fits in the overall mechanism of how env_vars are used to manage starter. ### How to test ``` ENABLE_OLLAMA=ollama ENABLE_FIREWORKS=fireworks SAFETY_MODEL=llama-guard3:1b pytest -s -v tests/integration/ --stack-config starter -k 'not(supervised_fine_tune or builtin_tool_code or safety_with_image or code_interpreter_for or rag_and_code or truncation or register_and_unregister)' --text-model fireworks/meta-llama/Llama-3.3-70B-Instruct --vision-model fireworks/meta-llama/Llama-4-Scout-17B-16E-Instruct --safety-shield llama-guard3:1b --embedding-model all-MiniLM-L6-v2 ``` ### Related but not obvious in this PR In the llama-stack-ops repo, we run tests before publishing packages and docker containers. The actions in that repo were using the fireworks / together distros ( which are non-existent ) So need to update that to run with `starter` and use `ollama` specifically for safety.
2025-07-15 17:44:01 +00:00 · 2025-07-14 15:07:40 -07:00 · 2025-07-14 15:07:40 -07:00 · 6b8a8c1be9
commit 6b8a8c1be9
parent 6ad22c209f
9 changed files with 104 additions and 195 deletions
--- a/.github/workflows/integration-tests.yml
+++ b/.github/workflows/integration-tests.yml
@ -89,7 +89,7 @@ jobs:
            -k "not(builtin_tool or safety_with_image or code_interpreter or test_rag)" \
            --text-model="ollama/llama3.2:3b-instruct-fp16" \
            --embedding-model=all-MiniLM-L6-v2 \
-            --safety-shield=ollama \
+            --safety-shield=$SAFETY_MODEL \
            --color=yes \
            --capture=tee-sys | tee pytest-${{ matrix.test-type }}.log
--- a/llama_stack/providers/remote/inference/ollama/models.py
+++ b/llama_stack/providers/remote/inference/ollama/models.py
@ -12,6 +12,19 @@ from llama_stack.providers.utils.inference.model_registry import (
    build_model_entry,
 )
 SAFETY_MODELS_ENTRIES = [
    # The Llama Guard models don't have their full fp16 versions
    # so we are going to alias their default version to the canonical SKU
    build_hf_repo_model_entry(
        "llama-guard3:8b",
        CoreModelId.llama_guard_3_8b.value,
    ),
    build_hf_repo_model_entry(
        "llama-guard3:1b",
        CoreModelId.llama_guard_3_1b.value,
    ),
 ]
 MODEL_ENTRIES = [
    build_hf_repo_model_entry(
        "llama3.1:8b-instruct-fp16",
@ -73,16 +86,6 @@ MODEL_ENTRIES = [
        "llama3.3:70b",
        CoreModelId.llama3_3_70b_instruct.value,
    ),
    # The Llama Guard models don't have their full fp16 versions
    # so we are going to alias their default version to the canonical SKU
    build_hf_repo_model_entry(
        "llama-guard3:8b",
        CoreModelId.llama_guard_3_8b.value,
    ),
    build_hf_repo_model_entry(
        "llama-guard3:1b",
        CoreModelId.llama_guard_3_1b.value,
    ),
    ProviderModelEntry(
        provider_model_id="all-minilm:l6-v2",
        aliases=["all-minilm"],
@ -100,4 +103,4 @@ MODEL_ENTRIES = [
            "context_length": 8192,
        },
    ),
-]
+] + SAFETY_MODELS_ENTRIES
--- a/llama_stack/templates/nvidia/nvidia.py
+++ b/llama_stack/templates/nvidia/nvidia.py
@ -68,7 +68,7 @@ def get_distribution_template() -> DistributionTemplate:
        ),
    ]
-    default_models = get_model_registry(available_models)
+    default_models, _ = get_model_registry(available_models)
    return DistributionTemplate(
        name="nvidia",
        distro_type="self_hosted",
--- a/llama_stack/templates/open-benchmark/open_benchmark.py
+++ b/llama_stack/templates/open-benchmark/open_benchmark.py
@ -146,7 +146,8 @@ def get_distribution_template() -> DistributionTemplate:
        ),
    ]
-    default_models = get_model_registry(available_models) + [
+    models, _ = get_model_registry(available_models)
    default_models = models + [
        ModelInput(
            model_id="meta-llama/Llama-3.3-70B-Instruct",
            provider_id="groq",
--- a/llama_stack/templates/starter/run.yaml
+++ b/llama_stack/templates/starter/run.yaml
@ -1171,24 +1171,8 @@ models:
  provider_id: ${env.ENABLE_SENTENCE_TRANSFORMERS:=sentence-transformers}
  model_type: embedding
 shields:
- shield_id: ${env.ENABLE_OLLAMA:=__disabled__}
+- shield_id: ${env.SAFETY_MODEL:=__disabled__}
-  provider_id: llama-guard
+  provider_shield_id: ${env.ENABLE_OLLAMA:=__disabled__}/${env.SAFETY_MODEL:=__disabled__}
  provider_shield_id: ${env.ENABLE_OLLAMA:=__disabled__}/${env.SAFETY_MODEL:=llama-guard3:1b}
 - shield_id: ${env.ENABLE_FIREWORKS:=__disabled__}
  provider_id: llama-guard
  provider_shield_id: ${env.ENABLE_FIREWORKS:=__disabled__}/${env.SAFETY_MODEL:=accounts/fireworks/models/llama-guard-3-8b}
 - shield_id: ${env.ENABLE_FIREWORKS:=__disabled__}
  provider_id: llama-guard
  provider_shield_id: ${env.ENABLE_FIREWORKS:=__disabled__}/${env.SAFETY_MODEL:=accounts/fireworks/models/llama-guard-3-11b-vision}
 - shield_id: ${env.ENABLE_TOGETHER:=__disabled__}
  provider_id: llama-guard
  provider_shield_id: ${env.ENABLE_TOGETHER:=__disabled__}/${env.SAFETY_MODEL:=meta-llama/Llama-Guard-3-8B}
 - shield_id: ${env.ENABLE_TOGETHER:=__disabled__}
  provider_id: llama-guard
  provider_shield_id: ${env.ENABLE_TOGETHER:=__disabled__}/${env.SAFETY_MODEL:=meta-llama/Llama-Guard-3-11B-Vision-Turbo}
 - shield_id: ${env.ENABLE_SAMBANOVA:=__disabled__}
  provider_id: llama-guard
  provider_shield_id: ${env.ENABLE_SAMBANOVA:=__disabled__}/${env.SAFETY_MODEL:=sambanova/Meta-Llama-Guard-3-8B}
 vector_dbs: []
 datasets: []
 scoring_fns: []
--- a/llama_stack/templates/starter/starter.py
+++ b/llama_stack/templates/starter/starter.py
@ -12,7 +12,6 @@ from llama_stack.distribution.datatypes import (
    ModelInput,
    Provider,
    ProviderSpec,
    ShieldInput,
    ToolGroupInput,
 )
 from llama_stack.distribution.utils.dynamic import instantiate_class_type
@ -32,75 +31,39 @@ from llama_stack.providers.registry.inference import available_providers
 from llama_stack.providers.remote.inference.anthropic.models import (
    MODEL_ENTRIES as ANTHROPIC_MODEL_ENTRIES,
 )
 from llama_stack.providers.remote.inference.anthropic.models import (
    SAFETY_MODELS_ENTRIES as ANTHROPIC_SAFETY_MODELS_ENTRIES,
 )
 from llama_stack.providers.remote.inference.bedrock.models import (
    MODEL_ENTRIES as BEDROCK_MODEL_ENTRIES,
 )
 from llama_stack.providers.remote.inference.bedrock.models import (
    SAFETY_MODELS_ENTRIES as BEDROCK_SAFETY_MODELS_ENTRIES,
 )
 from llama_stack.providers.remote.inference.cerebras.models import (
    MODEL_ENTRIES as CEREBRAS_MODEL_ENTRIES,
 )
 from llama_stack.providers.remote.inference.cerebras.models import (
    SAFETY_MODELS_ENTRIES as CEREBRAS_SAFETY_MODELS_ENTRIES,
 )
 from llama_stack.providers.remote.inference.databricks.databricks import (
    MODEL_ENTRIES as DATABRICKS_MODEL_ENTRIES,
 )
 from llama_stack.providers.remote.inference.databricks.databricks import (
    SAFETY_MODELS_ENTRIES as DATABRICKS_SAFETY_MODELS_ENTRIES,
 )
 from llama_stack.providers.remote.inference.fireworks.models import (
    MODEL_ENTRIES as FIREWORKS_MODEL_ENTRIES,
 )
 from llama_stack.providers.remote.inference.fireworks.models import (
    SAFETY_MODELS_ENTRIES as FIREWORKS_SAFETY_MODELS_ENTRIES,
 )
 from llama_stack.providers.remote.inference.gemini.models import (
    MODEL_ENTRIES as GEMINI_MODEL_ENTRIES,
 )
 from llama_stack.providers.remote.inference.gemini.models import (
    SAFETY_MODELS_ENTRIES as GEMINI_SAFETY_MODELS_ENTRIES,
 )
 from llama_stack.providers.remote.inference.groq.models import (
    MODEL_ENTRIES as GROQ_MODEL_ENTRIES,
 )
 from llama_stack.providers.remote.inference.groq.models import (
    SAFETY_MODELS_ENTRIES as GROQ_SAFETY_MODELS_ENTRIES,
 )
 from llama_stack.providers.remote.inference.nvidia.models import (
    MODEL_ENTRIES as NVIDIA_MODEL_ENTRIES,
 )
 from llama_stack.providers.remote.inference.nvidia.models import (
    SAFETY_MODELS_ENTRIES as NVIDIA_SAFETY_MODELS_ENTRIES,
 )
 from llama_stack.providers.remote.inference.openai.models import (
    MODEL_ENTRIES as OPENAI_MODEL_ENTRIES,
 )
 from llama_stack.providers.remote.inference.openai.models import (
    SAFETY_MODELS_ENTRIES as OPENAI_SAFETY_MODELS_ENTRIES,
 )
 from llama_stack.providers.remote.inference.runpod.runpod import (
    MODEL_ENTRIES as RUNPOD_MODEL_ENTRIES,
 )
 from llama_stack.providers.remote.inference.runpod.runpod import (
    SAFETY_MODELS_ENTRIES as RUNPOD_SAFETY_MODELS_ENTRIES,
 )
 from llama_stack.providers.remote.inference.sambanova.models import (
    MODEL_ENTRIES as SAMBANOVA_MODEL_ENTRIES,
 )
 from llama_stack.providers.remote.inference.sambanova.models import (
    SAFETY_MODELS_ENTRIES as SAMBANOVA_SAFETY_MODELS_ENTRIES,
 )
 from llama_stack.providers.remote.inference.together.models import (
    MODEL_ENTRIES as TOGETHER_MODEL_ENTRIES,
 )
 from llama_stack.providers.remote.inference.together.models import (
    SAFETY_MODELS_ENTRIES as TOGETHER_SAFETY_MODELS_ENTRIES,
 )
 from llama_stack.providers.remote.vector_io.chroma.config import ChromaVectorIOConfig
 from llama_stack.providers.remote.vector_io.pgvector.config import (
    PGVectorVectorIOConfig,
@ -111,6 +74,7 @@ from llama_stack.templates.template import (
    DistributionTemplate,
    RunConfigSettings,
    get_model_registry,
    get_shield_registry,
 )
@ -164,28 +128,13 @@ def _get_model_entries_for_provider(provider_type: str) -> list[ProviderModelEnt
 def _get_model_safety_entries_for_provider(provider_type: str) -> list[ProviderModelEntry]:
    """Get model entries for a specific provider type."""
    safety_model_entries_map = {
-        "openai": OPENAI_SAFETY_MODELS_ENTRIES,
+        "ollama": [
        "fireworks": FIREWORKS_SAFETY_MODELS_ENTRIES,
        "together": TOGETHER_SAFETY_MODELS_ENTRIES,
        "anthropic": ANTHROPIC_SAFETY_MODELS_ENTRIES,
        "gemini": GEMINI_SAFETY_MODELS_ENTRIES,
        "groq": GROQ_SAFETY_MODELS_ENTRIES,
        "sambanova": SAMBANOVA_SAFETY_MODELS_ENTRIES,
        "cerebras": CEREBRAS_SAFETY_MODELS_ENTRIES,
        "bedrock": BEDROCK_SAFETY_MODELS_ENTRIES,
        "databricks": DATABRICKS_SAFETY_MODELS_ENTRIES,
        "nvidia": NVIDIA_SAFETY_MODELS_ENTRIES,
        "runpod": RUNPOD_SAFETY_MODELS_ENTRIES,
    }
    # Special handling for providers with dynamic model entries
    if provider_type == "ollama":
        return [
            ProviderModelEntry(
-                provider_model_id="llama-guard3:1b",
+                provider_model_id="${env.SAFETY_MODEL:=__disabled__}",
                model_type=ModelType.llm,
            ),
-        ]
+        ],
    }
    return safety_model_entries_map.get(provider_type, [])
@ -246,28 +195,20 @@ def get_remote_inference_providers() -> tuple[list[Provider], dict[str, list[Pro
 # build a list of shields for all possible providers
-def get_shields_for_providers(providers: list[Provider]) -> list[ShieldInput]:
+def get_safety_models_for_providers(providers: list[Provider]) -> dict[str, list[ProviderModelEntry]]:
-    shields = []
+    available_models = {}
    for provider in providers:
        provider_type = provider.provider_type.split("::")[1]
        safety_model_entries = _get_model_safety_entries_for_provider(provider_type)
        if len(safety_model_entries) == 0:
            continue
-        if provider.provider_id:
+
-            shield_id = provider.provider_id
+        env_var = f"ENABLE_{provider_type.upper().replace('-', '_').replace('::', '_')}"
-        else:
+        provider_id = f"${{env.{env_var}:=__disabled__}}"
-            raise ValueError(f"Provider {provider.provider_type} has no provider_id")
+
-        for safety_model_entry in safety_model_entries:
+        available_models[provider_id] = safety_model_entries
-            print(f"provider.provider_id: {provider.provider_id}")
+
-            print(f"safety_model_entry.provider_model_id: {safety_model_entry.provider_model_id}")
+    return available_models
            shields.append(
                ShieldInput(
                    provider_id="llama-guard",
                    shield_id=shield_id,
                    provider_shield_id=f"{provider.provider_id}/${{env.SAFETY_MODEL:={safety_model_entry.provider_model_id}}}",
                )
            )
    return shields
 def get_distribution_template() -> DistributionTemplate:
@ -307,8 +248,6 @@ def get_distribution_template() -> DistributionTemplate:
        ),
    ]
    shields = get_shields_for_providers(remote_inference_providers)
    providers = {
        "inference": ([p.provider_type for p in remote_inference_providers] + ["inline::sentence-transformers"]),
        "vector_io": ([p.provider_type for p in vector_io_providers]),
@ -361,7 +300,10 @@ def get_distribution_template() -> DistributionTemplate:
        },
    )
-    default_models = get_model_registry(available_models)
+    default_models, ids_conflict_in_models = get_model_registry(available_models)
    available_safety_models = get_safety_models_for_providers(remote_inference_providers)
    shields = get_shield_registry(available_safety_models, ids_conflict_in_models)
    return DistributionTemplate(
        name=name,
--- a/llama_stack/templates/template.py
+++ b/llama_stack/templates/template.py
@ -37,7 +37,7 @@ from llama_stack.providers.utils.sqlstore.sqlstore import get_pip_packages as ge
 def get_model_registry(
    available_models: dict[str, list[ProviderModelEntry]],
-) -> list[ModelInput]:
+) -> tuple[list[ModelInput], bool]:
    models = []
    # check for conflicts in model ids
@ -74,7 +74,50 @@ def get_model_registry(
                        metadata=entry.metadata,
                    )
                )
-    return models
+    return models, ids_conflict
 def get_shield_registry(
    available_safety_models: dict[str, list[ProviderModelEntry]],
    ids_conflict_in_models: bool,
 ) -> list[ShieldInput]:
    shields = []
    # check for conflicts in shield ids
    all_ids = set()
    ids_conflict = False
    for _, entries in available_safety_models.items():
        for entry in entries:
            ids = [entry.provider_model_id] + entry.aliases
            for model_id in ids:
                if model_id in all_ids:
                    ids_conflict = True
                    rich.print(
                        f"[yellow]Shield id {model_id} conflicts; all shield ids will be prefixed with provider id[/yellow]"
                    )
                    break
            all_ids.update(ids)
            if ids_conflict:
                break
        if ids_conflict:
            break
    for provider_id, entries in available_safety_models.items():
        for entry in entries:
            ids = [entry.provider_model_id] + entry.aliases
            for model_id in ids:
                identifier = f"{provider_id}/{model_id}" if ids_conflict and provider_id not in model_id else model_id
                shields.append(
                    ShieldInput(
                        shield_id=identifier,
                        provider_shield_id=f"{provider_id}/{entry.provider_model_id}"
                        if ids_conflict_in_models
                        else entry.provider_model_id,
                    )
                )
    return shields
 class DefaultModel(BaseModel):
--- a/llama_stack/templates/watsonx/watsonx.py
+++ b/llama_stack/templates/watsonx/watsonx.py
@ -69,7 +69,7 @@ def get_distribution_template() -> DistributionTemplate:
        },
    )
-    default_models = get_model_registry(available_models)
+    default_models, _ = get_model_registry(available_models)
    return DistributionTemplate(
        name="watsonx",
        distro_type="remote_hosted",
--- a/tests/integration/agents/test_agents.py
+++ b/tests/integration/agents/test_agents.py
@ -77,6 +77,24 @@ def agent_config(llama_stack_client, text_model_id):
    return agent_config
@pytest.fixture(scope="session")
 def agent_config_without_safety(text_model_id):
    agent_config = dict(
        model=text_model_id,
        instructions="You are a helpful assistant",
        sampling_params={
            "strategy": {
                "type": "top_p",
                "temperature": 0.0001,
                "top_p": 0.9,
            },
        },
        tools=[],
        enable_session_persistence=False,
    )
    return agent_config
 def test_agent_simple(llama_stack_client, agent_config):
    agent = Agent(llama_stack_client, **agent_config)
    session_id = agent.create_session(f"test-session-{uuid4()}")
@ -491,7 +509,7 @@ def test_rag_agent(llama_stack_client, agent_config, rag_tool_name):
            assert expected_kw in response.output_message.content.lower()
-def test_rag_agent_with_attachments(llama_stack_client, agent_config):
+def test_rag_agent_with_attachments(llama_stack_client, agent_config_without_safety):
    urls = ["llama3.rst", "lora_finetune.rst"]
    documents = [
        # passign as url
@ -514,14 +532,8 @@ def test_rag_agent_with_attachments(llama_stack_client, agent_config):
            metadata={},
        ),
    ]
-    rag_agent = Agent(llama_stack_client, **agent_config)
+    rag_agent = Agent(llama_stack_client, **agent_config_without_safety)
    session_id = rag_agent.create_session(f"test-session-{uuid4()}")
    user_prompts = [
        (
            "Instead of the standard multi-head attention, what attention type does Llama3-8B use?",
            "grouped",
        ),
    ]
    user_prompts = [
        (
            "I am attaching some documentation for Torchtune. Help me answer questions I will ask next.",
@ -549,82 +561,6 @@ def test_rag_agent_with_attachments(llama_stack_client, agent_config):
    assert "lora" in response.output_message.content.lower()
@pytest.mark.skip(reason="Code interpreter is currently disabled in the Stack")
 def test_rag_and_code_agent(llama_stack_client, agent_config):
    if "llama-4" in agent_config["model"].lower():
        pytest.xfail("Not working for llama4")
    documents = []
    documents.append(
        Document(
            document_id="nba_wiki",
            content="The NBA was created on August 3, 1949, with the merger of the Basketball Association of America (BAA) and the National Basketball League (NBL).",
            metadata={},
        )
    )
    documents.append(
        Document(
            document_id="perplexity_wiki",
            content="""Perplexity the company was founded in 2022 by Aravind Srinivas, Andy Konwinski, Denis Yarats and Johnny Ho, engineers with backgrounds in back-end systems, artificial intelligence (AI) and machine learning:
    Srinivas, the CEO, worked at OpenAI as an AI researcher.
    Konwinski was among the founding team at Databricks.
    Yarats, the CTO, was an AI research scientist at Meta.
    Ho, the CSO, worked as an engineer at Quora, then as a quantitative trader on Wall Street.[5]""",
            metadata={},
        )
    )
    vector_db_id = f"test-vector-db-{uuid4()}"
    llama_stack_client.vector_dbs.register(
        vector_db_id=vector_db_id,
        embedding_model="all-MiniLM-L6-v2",
        embedding_dimension=384,
    )
    llama_stack_client.tool_runtime.rag_tool.insert(
        documents=documents,
        vector_db_id=vector_db_id,
        chunk_size_in_tokens=128,
    )
    agent_config = {
        **agent_config,
        "tools": [
            dict(
                name="builtin::rag/knowledge_search",
                args={"vector_db_ids": [vector_db_id]},
            ),
            "builtin::code_interpreter",
        ],
    }
    agent = Agent(llama_stack_client, **agent_config)
    user_prompts = [
        (
            "when was Perplexity the company founded?",
            [],
            "knowledge_search",
            "2022",
        ),
        (
            "when was the nba created?",
            [],
            "knowledge_search",
            "1949",
        ),
    ]
    for prompt, docs, tool_name, expected_kw in user_prompts:
        session_id = agent.create_session(f"test-session-{uuid4()}")
        response = agent.create_turn(
            messages=[{"role": "user", "content": prompt}],
            session_id=session_id,
            documents=docs,
            stream=False,
        )
        tool_execution_step = next(step for step in response.steps if step.step_type == "tool_execution")
        assert tool_execution_step.tool_calls[0].tool_name == tool_name, f"Failed on {prompt}"
        if expected_kw:
            assert expected_kw in response.output_message.content.lower()
@pytest.mark.parametrize(
    "client_tools",
    [(get_boiling_point, False), (get_boiling_point_with_metadata, True)],