mirror of
https://github.com/meta-llama/llama-stack.git
synced 2025-07-15 09:36:10 +00:00
ci: test safety with starter (#2628)
Some checks failed
SqlStore Integration Tests / test-postgres (3.12) (push) Failing after 39s
Integration Tests / test-matrix (library, 3.12, inference) (push) Failing after 12s
Integration Auth Tests / test-matrix (oauth2_token) (push) Failing after 50s
Integration Tests / test-matrix (library, 3.12, datasets) (push) Failing after 13s
Integration Tests / test-matrix (library, 3.12, post_training) (push) Failing after 14s
Integration Tests / test-matrix (library, 3.12, safety) (push) Failing after 11s
Integration Tests / test-matrix (library, 3.12, inspect) (push) Failing after 11s
Integration Tests / test-matrix (library, 3.12, providers) (push) Failing after 12s
Integration Tests / test-matrix (library, 3.12, vector_io) (push) Failing after 8s
Integration Tests / test-matrix (library, 3.12, tool_runtime) (push) Failing after 13s
Integration Tests / test-matrix (library, 3.13, agents) (push) Failing after 11s
Integration Tests / test-matrix (library, 3.13, inference) (push) Failing after 8s
Integration Tests / test-matrix (library, 3.12, scoring) (push) Failing after 8s
Integration Tests / test-matrix (library, 3.13, inspect) (push) Failing after 9s
Integration Tests / test-matrix (library, 3.13, datasets) (push) Failing after 9s
Integration Tests / test-matrix (library, 3.12, agents) (push) Failing after 1m10s
Integration Tests / test-matrix (library, 3.13, providers) (push) Failing after 8s
Integration Tests / test-matrix (library, 3.13, post_training) (push) Failing after 10s
Integration Tests / test-matrix (library, 3.13, safety) (push) Failing after 11s
Integration Tests / test-matrix (library, 3.13, scoring) (push) Failing after 10s
Integration Tests / test-matrix (library, 3.13, tool_runtime) (push) Failing after 16s
Integration Tests / test-matrix (library, 3.13, vector_io) (push) Failing after 14s
Integration Tests / test-matrix (server, 3.12, inference) (push) Failing after 12s
Integration Tests / test-matrix (server, 3.12, datasets) (push) Failing after 14s
Integration Tests / test-matrix (server, 3.12, agents) (push) Failing after 17s
Integration Tests / test-matrix (server, 3.12, inspect) (push) Failing after 10s
SqlStore Integration Tests / test-postgres (3.13) (push) Failing after 1m30s
Integration Tests / test-matrix (server, 3.12, safety) (push) Failing after 13s
Integration Tests / test-matrix (server, 3.12, providers) (push) Failing after 15s
Integration Tests / test-matrix (server, 3.12, scoring) (push) Failing after 13s
Integration Tests / test-matrix (server, 3.13, agents) (push) Failing after 11s
Integration Tests / test-matrix (server, 3.12, vector_io) (push) Failing after 12s
Integration Tests / test-matrix (server, 3.13, datasets) (push) Failing after 11s
Integration Tests / test-matrix (server, 3.13, inference) (push) Failing after 10s
Integration Tests / test-matrix (server, 3.12, post_training) (push) Failing after 25s
Integration Tests / test-matrix (server, 3.13, inspect) (push) Failing after 7s
Integration Tests / test-matrix (server, 3.13, providers) (push) Failing after 11s
Integration Tests / test-matrix (server, 3.13, vector_io) (push) Failing after 10s
Integration Tests / test-matrix (server, 3.13, scoring) (push) Failing after 15s
Vector IO Integration Tests / test-matrix (3.12, inline::faiss) (push) Failing after 15s
Vector IO Integration Tests / test-matrix (3.12, inline::milvus) (push) Failing after 13s
Vector IO Integration Tests / test-matrix (3.12, inline::sqlite-vec) (push) Failing after 11s
Vector IO Integration Tests / test-matrix (3.12, remote::chromadb) (push) Failing after 10s
Vector IO Integration Tests / test-matrix (3.13, inline::faiss) (push) Failing after 7s
Integration Tests / test-matrix (server, 3.13, safety) (push) Failing after 25s
Integration Tests / test-matrix (server, 3.13, post_training) (push) Failing after 27s
Integration Tests / test-matrix (server, 3.13, tool_runtime) (push) Failing after 23s
Vector IO Integration Tests / test-matrix (3.12, remote::pgvector) (push) Failing after 15s
Vector IO Integration Tests / test-matrix (3.13, inline::sqlite-vec) (push) Failing after 7s
Vector IO Integration Tests / test-matrix (3.13, inline::milvus) (push) Failing after 9s
Test Llama Stack Build / generate-matrix (push) Successful in 14s
Vector IO Integration Tests / test-matrix (3.13, remote::pgvector) (push) Failing after 16s
Test Llama Stack Build / build-single-provider (push) Failing after 14s
Integration Tests / test-matrix (server, 3.12, tool_runtime) (push) Failing after 1m7s
Update ReadTheDocs / update-readthedocs (push) Failing after 12s
Unit Tests / unit-tests (3.13) (push) Failing after 14s
Test Llama Stack Build / build-ubi9-container-distribution (push) Failing after 29s
Test External Providers / test-external-providers (venv) (push) Failing after 17s
Test Llama Stack Build / build (push) Failing after 13s
Unit Tests / unit-tests (3.12) (push) Failing after 15s
Vector IO Integration Tests / test-matrix (3.13, remote::chromadb) (push) Failing after 35s
Python Package Build Test / build (3.12) (push) Failing after 31s
Python Package Build Test / build (3.13) (push) Failing after 29s
Test Llama Stack Build / build-custom-container-distribution (push) Failing after 34s
Pre-commit / pre-commit (push) Successful in 1m24s
Some checks failed
SqlStore Integration Tests / test-postgres (3.12) (push) Failing after 39s
Integration Tests / test-matrix (library, 3.12, inference) (push) Failing after 12s
Integration Auth Tests / test-matrix (oauth2_token) (push) Failing after 50s
Integration Tests / test-matrix (library, 3.12, datasets) (push) Failing after 13s
Integration Tests / test-matrix (library, 3.12, post_training) (push) Failing after 14s
Integration Tests / test-matrix (library, 3.12, safety) (push) Failing after 11s
Integration Tests / test-matrix (library, 3.12, inspect) (push) Failing after 11s
Integration Tests / test-matrix (library, 3.12, providers) (push) Failing after 12s
Integration Tests / test-matrix (library, 3.12, vector_io) (push) Failing after 8s
Integration Tests / test-matrix (library, 3.12, tool_runtime) (push) Failing after 13s
Integration Tests / test-matrix (library, 3.13, agents) (push) Failing after 11s
Integration Tests / test-matrix (library, 3.13, inference) (push) Failing after 8s
Integration Tests / test-matrix (library, 3.12, scoring) (push) Failing after 8s
Integration Tests / test-matrix (library, 3.13, inspect) (push) Failing after 9s
Integration Tests / test-matrix (library, 3.13, datasets) (push) Failing after 9s
Integration Tests / test-matrix (library, 3.12, agents) (push) Failing after 1m10s
Integration Tests / test-matrix (library, 3.13, providers) (push) Failing after 8s
Integration Tests / test-matrix (library, 3.13, post_training) (push) Failing after 10s
Integration Tests / test-matrix (library, 3.13, safety) (push) Failing after 11s
Integration Tests / test-matrix (library, 3.13, scoring) (push) Failing after 10s
Integration Tests / test-matrix (library, 3.13, tool_runtime) (push) Failing after 16s
Integration Tests / test-matrix (library, 3.13, vector_io) (push) Failing after 14s
Integration Tests / test-matrix (server, 3.12, inference) (push) Failing after 12s
Integration Tests / test-matrix (server, 3.12, datasets) (push) Failing after 14s
Integration Tests / test-matrix (server, 3.12, agents) (push) Failing after 17s
Integration Tests / test-matrix (server, 3.12, inspect) (push) Failing after 10s
SqlStore Integration Tests / test-postgres (3.13) (push) Failing after 1m30s
Integration Tests / test-matrix (server, 3.12, safety) (push) Failing after 13s
Integration Tests / test-matrix (server, 3.12, providers) (push) Failing after 15s
Integration Tests / test-matrix (server, 3.12, scoring) (push) Failing after 13s
Integration Tests / test-matrix (server, 3.13, agents) (push) Failing after 11s
Integration Tests / test-matrix (server, 3.12, vector_io) (push) Failing after 12s
Integration Tests / test-matrix (server, 3.13, datasets) (push) Failing after 11s
Integration Tests / test-matrix (server, 3.13, inference) (push) Failing after 10s
Integration Tests / test-matrix (server, 3.12, post_training) (push) Failing after 25s
Integration Tests / test-matrix (server, 3.13, inspect) (push) Failing after 7s
Integration Tests / test-matrix (server, 3.13, providers) (push) Failing after 11s
Integration Tests / test-matrix (server, 3.13, vector_io) (push) Failing after 10s
Integration Tests / test-matrix (server, 3.13, scoring) (push) Failing after 15s
Vector IO Integration Tests / test-matrix (3.12, inline::faiss) (push) Failing after 15s
Vector IO Integration Tests / test-matrix (3.12, inline::milvus) (push) Failing after 13s
Vector IO Integration Tests / test-matrix (3.12, inline::sqlite-vec) (push) Failing after 11s
Vector IO Integration Tests / test-matrix (3.12, remote::chromadb) (push) Failing after 10s
Vector IO Integration Tests / test-matrix (3.13, inline::faiss) (push) Failing after 7s
Integration Tests / test-matrix (server, 3.13, safety) (push) Failing after 25s
Integration Tests / test-matrix (server, 3.13, post_training) (push) Failing after 27s
Integration Tests / test-matrix (server, 3.13, tool_runtime) (push) Failing after 23s
Vector IO Integration Tests / test-matrix (3.12, remote::pgvector) (push) Failing after 15s
Vector IO Integration Tests / test-matrix (3.13, inline::sqlite-vec) (push) Failing after 7s
Vector IO Integration Tests / test-matrix (3.13, inline::milvus) (push) Failing after 9s
Test Llama Stack Build / generate-matrix (push) Successful in 14s
Vector IO Integration Tests / test-matrix (3.13, remote::pgvector) (push) Failing after 16s
Test Llama Stack Build / build-single-provider (push) Failing after 14s
Integration Tests / test-matrix (server, 3.12, tool_runtime) (push) Failing after 1m7s
Update ReadTheDocs / update-readthedocs (push) Failing after 12s
Unit Tests / unit-tests (3.13) (push) Failing after 14s
Test Llama Stack Build / build-ubi9-container-distribution (push) Failing after 29s
Test External Providers / test-external-providers (venv) (push) Failing after 17s
Test Llama Stack Build / build (push) Failing after 13s
Unit Tests / unit-tests (3.12) (push) Failing after 15s
Vector IO Integration Tests / test-matrix (3.13, remote::chromadb) (push) Failing after 35s
Python Package Build Test / build (3.12) (push) Failing after 31s
Python Package Build Test / build (3.13) (push) Failing after 29s
Test Llama Stack Build / build-custom-container-distribution (push) Failing after 34s
Pre-commit / pre-commit (push) Successful in 1m24s
# What does this PR do? We are now testing the safety capability with the starter image. This includes a few changes: * Enable the safety integration test * Relax the shield model requirements from llama-guard to make it work with llama-guard3:8b coming from Ollama * Expose a shield for each inference provider in the starter distro. The shield will only be registered if the provider is enabled. Closes: https://github.com/meta-llama/llama-stack/issues/2528 Signed-off-by: Sébastien Han <seb@redhat.com>
This commit is contained in:
parent
de01eefdef
commit
9b7eecebcf
20 changed files with 621 additions and 126 deletions
|
@ -12,6 +12,7 @@ from llama_stack.distribution.datatypes import (
|
|||
ModelInput,
|
||||
Provider,
|
||||
ProviderSpec,
|
||||
ShieldInput,
|
||||
ToolGroupInput,
|
||||
)
|
||||
from llama_stack.distribution.utils.dynamic import instantiate_class_type
|
||||
|
@ -31,24 +32,75 @@ from llama_stack.providers.registry.inference import available_providers
|
|||
from llama_stack.providers.remote.inference.anthropic.models import (
|
||||
MODEL_ENTRIES as ANTHROPIC_MODEL_ENTRIES,
|
||||
)
|
||||
from llama_stack.providers.remote.inference.anthropic.models import (
|
||||
SAFETY_MODELS_ENTRIES as ANTHROPIC_SAFETY_MODELS_ENTRIES,
|
||||
)
|
||||
from llama_stack.providers.remote.inference.bedrock.models import (
|
||||
MODEL_ENTRIES as BEDROCK_MODEL_ENTRIES,
|
||||
)
|
||||
from llama_stack.providers.remote.inference.bedrock.models import (
|
||||
SAFETY_MODELS_ENTRIES as BEDROCK_SAFETY_MODELS_ENTRIES,
|
||||
)
|
||||
from llama_stack.providers.remote.inference.cerebras.models import (
|
||||
MODEL_ENTRIES as CEREBRAS_MODEL_ENTRIES,
|
||||
)
|
||||
from llama_stack.providers.remote.inference.cerebras.models import (
|
||||
SAFETY_MODELS_ENTRIES as CEREBRAS_SAFETY_MODELS_ENTRIES,
|
||||
)
|
||||
from llama_stack.providers.remote.inference.databricks.databricks import (
|
||||
MODEL_ENTRIES as DATABRICKS_MODEL_ENTRIES,
|
||||
)
|
||||
from llama_stack.providers.remote.inference.databricks.databricks import (
|
||||
SAFETY_MODELS_ENTRIES as DATABRICKS_SAFETY_MODELS_ENTRIES,
|
||||
)
|
||||
from llama_stack.providers.remote.inference.fireworks.models import (
|
||||
MODEL_ENTRIES as FIREWORKS_MODEL_ENTRIES,
|
||||
)
|
||||
from llama_stack.providers.remote.inference.fireworks.models import (
|
||||
SAFETY_MODELS_ENTRIES as FIREWORKS_SAFETY_MODELS_ENTRIES,
|
||||
)
|
||||
from llama_stack.providers.remote.inference.gemini.models import (
|
||||
MODEL_ENTRIES as GEMINI_MODEL_ENTRIES,
|
||||
)
|
||||
from llama_stack.providers.remote.inference.gemini.models import (
|
||||
SAFETY_MODELS_ENTRIES as GEMINI_SAFETY_MODELS_ENTRIES,
|
||||
)
|
||||
from llama_stack.providers.remote.inference.groq.models import (
|
||||
MODEL_ENTRIES as GROQ_MODEL_ENTRIES,
|
||||
)
|
||||
from llama_stack.providers.remote.inference.groq.models import (
|
||||
SAFETY_MODELS_ENTRIES as GROQ_SAFETY_MODELS_ENTRIES,
|
||||
)
|
||||
from llama_stack.providers.remote.inference.nvidia.models import (
|
||||
MODEL_ENTRIES as NVIDIA_MODEL_ENTRIES,
|
||||
)
|
||||
from llama_stack.providers.remote.inference.nvidia.models import (
|
||||
SAFETY_MODELS_ENTRIES as NVIDIA_SAFETY_MODELS_ENTRIES,
|
||||
)
|
||||
from llama_stack.providers.remote.inference.openai.models import (
|
||||
MODEL_ENTRIES as OPENAI_MODEL_ENTRIES,
|
||||
)
|
||||
from llama_stack.providers.remote.inference.openai.models import (
|
||||
SAFETY_MODELS_ENTRIES as OPENAI_SAFETY_MODELS_ENTRIES,
|
||||
)
|
||||
from llama_stack.providers.remote.inference.runpod.runpod import (
|
||||
MODEL_ENTRIES as RUNPOD_MODEL_ENTRIES,
|
||||
)
|
||||
from llama_stack.providers.remote.inference.runpod.runpod import (
|
||||
SAFETY_MODELS_ENTRIES as RUNPOD_SAFETY_MODELS_ENTRIES,
|
||||
)
|
||||
from llama_stack.providers.remote.inference.sambanova.models import (
|
||||
MODEL_ENTRIES as SAMBANOVA_MODEL_ENTRIES,
|
||||
)
|
||||
from llama_stack.providers.remote.inference.sambanova.models import (
|
||||
SAFETY_MODELS_ENTRIES as SAMBANOVA_SAFETY_MODELS_ENTRIES,
|
||||
)
|
||||
from llama_stack.providers.remote.inference.together.models import (
|
||||
MODEL_ENTRIES as TOGETHER_MODEL_ENTRIES,
|
||||
)
|
||||
from llama_stack.providers.remote.inference.together.models import (
|
||||
SAFETY_MODELS_ENTRIES as TOGETHER_SAFETY_MODELS_ENTRIES,
|
||||
)
|
||||
from llama_stack.providers.remote.vector_io.chroma.config import ChromaVectorIOConfig
|
||||
from llama_stack.providers.remote.vector_io.pgvector.config import (
|
||||
PGVectorVectorIOConfig,
|
||||
|
@ -72,6 +124,11 @@ def _get_model_entries_for_provider(provider_type: str) -> list[ProviderModelEnt
|
|||
"gemini": GEMINI_MODEL_ENTRIES,
|
||||
"groq": GROQ_MODEL_ENTRIES,
|
||||
"sambanova": SAMBANOVA_MODEL_ENTRIES,
|
||||
"cerebras": CEREBRAS_MODEL_ENTRIES,
|
||||
"bedrock": BEDROCK_MODEL_ENTRIES,
|
||||
"databricks": DATABRICKS_MODEL_ENTRIES,
|
||||
"nvidia": NVIDIA_MODEL_ENTRIES,
|
||||
"runpod": RUNPOD_MODEL_ENTRIES,
|
||||
}
|
||||
|
||||
# Special handling for providers with dynamic model entries
|
||||
|
@ -81,6 +138,10 @@ def _get_model_entries_for_provider(provider_type: str) -> list[ProviderModelEnt
|
|||
provider_model_id="${env.OLLAMA_INFERENCE_MODEL:=__disabled__}",
|
||||
model_type=ModelType.llm,
|
||||
),
|
||||
ProviderModelEntry(
|
||||
provider_model_id="${env.SAFETY_MODEL:=__disabled__}",
|
||||
model_type=ModelType.llm,
|
||||
),
|
||||
ProviderModelEntry(
|
||||
provider_model_id="${env.OLLAMA_EMBEDDING_MODEL:=__disabled__}",
|
||||
model_type=ModelType.embedding,
|
||||
|
@ -100,6 +161,35 @@ def _get_model_entries_for_provider(provider_type: str) -> list[ProviderModelEnt
|
|||
return model_entries_map.get(provider_type, [])
|
||||
|
||||
|
||||
def _get_model_safety_entries_for_provider(provider_type: str) -> list[ProviderModelEntry]:
|
||||
"""Get model entries for a specific provider type."""
|
||||
safety_model_entries_map = {
|
||||
"openai": OPENAI_SAFETY_MODELS_ENTRIES,
|
||||
"fireworks": FIREWORKS_SAFETY_MODELS_ENTRIES,
|
||||
"together": TOGETHER_SAFETY_MODELS_ENTRIES,
|
||||
"anthropic": ANTHROPIC_SAFETY_MODELS_ENTRIES,
|
||||
"gemini": GEMINI_SAFETY_MODELS_ENTRIES,
|
||||
"groq": GROQ_SAFETY_MODELS_ENTRIES,
|
||||
"sambanova": SAMBANOVA_SAFETY_MODELS_ENTRIES,
|
||||
"cerebras": CEREBRAS_SAFETY_MODELS_ENTRIES,
|
||||
"bedrock": BEDROCK_SAFETY_MODELS_ENTRIES,
|
||||
"databricks": DATABRICKS_SAFETY_MODELS_ENTRIES,
|
||||
"nvidia": NVIDIA_SAFETY_MODELS_ENTRIES,
|
||||
"runpod": RUNPOD_SAFETY_MODELS_ENTRIES,
|
||||
}
|
||||
|
||||
# Special handling for providers with dynamic model entries
|
||||
if provider_type == "ollama":
|
||||
return [
|
||||
ProviderModelEntry(
|
||||
provider_model_id="llama-guard3:1b",
|
||||
model_type=ModelType.llm,
|
||||
),
|
||||
]
|
||||
|
||||
return safety_model_entries_map.get(provider_type, [])
|
||||
|
||||
|
||||
def _get_config_for_provider(provider_spec: ProviderSpec) -> dict[str, Any]:
|
||||
"""Get configuration for a provider using its adapter's config class."""
|
||||
config_class = instantiate_class_type(provider_spec.config_class)
|
||||
|
@ -155,6 +245,31 @@ def get_remote_inference_providers() -> tuple[list[Provider], dict[str, list[Pro
|
|||
return inference_providers, available_models
|
||||
|
||||
|
||||
# build a list of shields for all possible providers
|
||||
def get_shields_for_providers(providers: list[Provider]) -> list[ShieldInput]:
|
||||
shields = []
|
||||
for provider in providers:
|
||||
provider_type = provider.provider_type.split("::")[1]
|
||||
safety_model_entries = _get_model_safety_entries_for_provider(provider_type)
|
||||
if len(safety_model_entries) == 0:
|
||||
continue
|
||||
if provider.provider_id:
|
||||
shield_id = provider.provider_id
|
||||
else:
|
||||
raise ValueError(f"Provider {provider.provider_type} has no provider_id")
|
||||
for safety_model_entry in safety_model_entries:
|
||||
print(f"provider.provider_id: {provider.provider_id}")
|
||||
print(f"safety_model_entry.provider_model_id: {safety_model_entry.provider_model_id}")
|
||||
shields.append(
|
||||
ShieldInput(
|
||||
provider_id="llama-guard",
|
||||
shield_id=shield_id,
|
||||
provider_shield_id=f"{provider.provider_id}/${{env.SAFETY_MODEL:={safety_model_entry.provider_model_id}}}",
|
||||
)
|
||||
)
|
||||
return shields
|
||||
|
||||
|
||||
def get_distribution_template() -> DistributionTemplate:
|
||||
remote_inference_providers, available_models = get_remote_inference_providers()
|
||||
|
||||
|
@ -192,6 +307,8 @@ def get_distribution_template() -> DistributionTemplate:
|
|||
),
|
||||
]
|
||||
|
||||
shields = get_shields_for_providers(remote_inference_providers)
|
||||
|
||||
providers = {
|
||||
"inference": ([p.provider_type for p in remote_inference_providers] + ["inline::sentence-transformers"]),
|
||||
"vector_io": ([p.provider_type for p in vector_io_providers]),
|
||||
|
@ -266,9 +383,7 @@ def get_distribution_template() -> DistributionTemplate:
|
|||
default_models=default_models + [embedding_model],
|
||||
default_tool_groups=default_tool_groups,
|
||||
# TODO: add a way to enable/disable shields on the fly
|
||||
# default_shields=[
|
||||
# ShieldInput(provider_id="llama-guard", shield_id="${env.SAFETY_MODEL:=meta-llama/Llama-Guard-3-8B}")
|
||||
# ],
|
||||
default_shields=shields,
|
||||
),
|
||||
},
|
||||
run_config_env_vars={
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue