mirror of
https://github.com/meta-llama/llama-stack.git
synced 2025-07-16 01:53:10 +00:00
Kill "remote" providers and fix testing with a remote stack properly (#435)
# What does this PR do? This PR kills the notion of "pure passthrough" remote providers. You cannot specify a single provider you must specify a whole distribution (stack) as remote. This PR also significantly fixes / upgrades testing infrastructure so you can now test against a remotely hosted stack server by just doing ```bash pytest -s -v -m remote test_agents.py \ --inference-model=Llama3.1-8B-Instruct --safety-shield=Llama-Guard-3-1B \ --env REMOTE_STACK_URL=http://localhost:5001 ``` Also fixed `test_agents_persistence.py` (which was broken) and killed some deprecated testing functions. ## Test Plan All the tests.
This commit is contained in:
parent
59a65e34d3
commit
12947ac19e
28 changed files with 406 additions and 519 deletions
|
@ -66,14 +66,14 @@ def pytest_configure(config):
|
|||
|
||||
def pytest_addoption(parser):
|
||||
parser.addoption(
|
||||
"--safety-model",
|
||||
"--safety-shield",
|
||||
action="store",
|
||||
default=None,
|
||||
help="Specify the safety model to use for testing",
|
||||
help="Specify the safety shield to use for testing",
|
||||
)
|
||||
|
||||
|
||||
SAFETY_MODEL_PARAMS = [
|
||||
SAFETY_SHIELD_PARAMS = [
|
||||
pytest.param("Llama-Guard-3-1B", marks=pytest.mark.guard_1b, id="guard_1b"),
|
||||
]
|
||||
|
||||
|
@ -83,13 +83,13 @@ def pytest_generate_tests(metafunc):
|
|||
# But a user can also pass in a custom combination via the CLI by doing
|
||||
# `--providers inference=together,safety=meta_reference`
|
||||
|
||||
if "safety_model" in metafunc.fixturenames:
|
||||
model = metafunc.config.getoption("--safety-model")
|
||||
if model:
|
||||
params = [pytest.param(model, id="")]
|
||||
if "safety_shield" in metafunc.fixturenames:
|
||||
shield_id = metafunc.config.getoption("--safety-shield")
|
||||
if shield_id:
|
||||
params = [pytest.param(shield_id, id="")]
|
||||
else:
|
||||
params = SAFETY_MODEL_PARAMS
|
||||
for fixture in ["inference_model", "safety_model"]:
|
||||
params = SAFETY_SHIELD_PARAMS
|
||||
for fixture in ["inference_model", "safety_shield"]:
|
||||
metafunc.parametrize(
|
||||
fixture,
|
||||
params,
|
||||
|
|
|
@ -16,7 +16,7 @@ from llama_stack.providers.inline.safety.llama_guard import LlamaGuardConfig
|
|||
from llama_stack.providers.inline.safety.prompt_guard import PromptGuardConfig
|
||||
from llama_stack.providers.remote.safety.bedrock import BedrockSafetyConfig
|
||||
|
||||
from llama_stack.providers.tests.resolver import resolve_impls_for_test_v2
|
||||
from llama_stack.providers.tests.resolver import construct_stack_for_test
|
||||
|
||||
from ..conftest import ProviderFixture, remote_stack_fixture
|
||||
from ..env import get_env_or_fail
|
||||
|
@ -27,19 +27,38 @@ def safety_remote() -> ProviderFixture:
|
|||
return remote_stack_fixture()
|
||||
|
||||
|
||||
def safety_model_from_shield(shield_id):
|
||||
if shield_id in ("Bedrock", "CodeScanner", "CodeShield"):
|
||||
return None
|
||||
|
||||
return shield_id
|
||||
|
||||
|
||||
@pytest.fixture(scope="session")
|
||||
def safety_model(request):
|
||||
def safety_shield(request):
|
||||
if hasattr(request, "param"):
|
||||
return request.param
|
||||
return request.config.getoption("--safety-model", None)
|
||||
shield_id = request.param
|
||||
else:
|
||||
shield_id = request.config.getoption("--safety-shield", None)
|
||||
|
||||
if shield_id == "bedrock":
|
||||
shield_id = get_env_or_fail("BEDROCK_GUARDRAIL_IDENTIFIER")
|
||||
params = {"guardrailVersion": get_env_or_fail("BEDROCK_GUARDRAIL_VERSION")}
|
||||
else:
|
||||
params = {}
|
||||
|
||||
return ShieldInput(
|
||||
shield_id=shield_id,
|
||||
params=params,
|
||||
)
|
||||
|
||||
|
||||
@pytest.fixture(scope="session")
|
||||
def safety_llama_guard(safety_model) -> ProviderFixture:
|
||||
def safety_llama_guard() -> ProviderFixture:
|
||||
return ProviderFixture(
|
||||
providers=[
|
||||
Provider(
|
||||
provider_id="inline::llama-guard",
|
||||
provider_id="llama-guard",
|
||||
provider_type="inline::llama-guard",
|
||||
config=LlamaGuardConfig().model_dump(),
|
||||
)
|
||||
|
@ -55,7 +74,7 @@ def safety_prompt_guard() -> ProviderFixture:
|
|||
return ProviderFixture(
|
||||
providers=[
|
||||
Provider(
|
||||
provider_id="inline::prompt-guard",
|
||||
provider_id="prompt-guard",
|
||||
provider_type="inline::prompt-guard",
|
||||
config=PromptGuardConfig().model_dump(),
|
||||
)
|
||||
|
@ -80,50 +99,25 @@ SAFETY_FIXTURES = ["llama_guard", "bedrock", "remote"]
|
|||
|
||||
|
||||
@pytest_asyncio.fixture(scope="session")
|
||||
async def safety_stack(inference_model, safety_model, request):
|
||||
async def safety_stack(inference_model, safety_shield, request):
|
||||
# We need an inference + safety fixture to test safety
|
||||
fixture_dict = request.param
|
||||
inference_fixture = request.getfixturevalue(
|
||||
f"inference_{fixture_dict['inference']}"
|
||||
)
|
||||
safety_fixture = request.getfixturevalue(f"safety_{fixture_dict['safety']}")
|
||||
|
||||
providers = {
|
||||
"inference": inference_fixture.providers,
|
||||
"safety": safety_fixture.providers,
|
||||
}
|
||||
providers = {}
|
||||
provider_data = {}
|
||||
if inference_fixture.provider_data:
|
||||
provider_data.update(inference_fixture.provider_data)
|
||||
if safety_fixture.provider_data:
|
||||
provider_data.update(safety_fixture.provider_data)
|
||||
for key in ["inference", "safety"]:
|
||||
fixture = request.getfixturevalue(f"{key}_{fixture_dict[key]}")
|
||||
providers[key] = fixture.providers
|
||||
if fixture.provider_data:
|
||||
provider_data.update(fixture.provider_data)
|
||||
|
||||
shield_provider_type = safety_fixture.providers[0].provider_type
|
||||
shield_input = get_shield_to_register(shield_provider_type, safety_model)
|
||||
|
||||
print(f"inference_model: {inference_model}")
|
||||
print(f"shield_input = {shield_input}")
|
||||
impls = await resolve_impls_for_test_v2(
|
||||
test_stack = await construct_stack_for_test(
|
||||
[Api.safety, Api.shields, Api.inference],
|
||||
providers,
|
||||
provider_data,
|
||||
models=[ModelInput(model_id=inference_model)],
|
||||
shields=[shield_input],
|
||||
shields=[safety_shield],
|
||||
)
|
||||
|
||||
shield = await impls[Api.shields].get_shield(shield_input.shield_id)
|
||||
return impls[Api.safety], impls[Api.shields], shield
|
||||
|
||||
|
||||
def get_shield_to_register(provider_type: str, safety_model: str) -> ShieldInput:
|
||||
if provider_type == "remote::bedrock":
|
||||
identifier = get_env_or_fail("BEDROCK_GUARDRAIL_IDENTIFIER")
|
||||
params = {"guardrailVersion": get_env_or_fail("BEDROCK_GUARDRAIL_VERSION")}
|
||||
else:
|
||||
params = {}
|
||||
identifier = safety_model
|
||||
|
||||
return ShieldInput(
|
||||
shield_id=identifier,
|
||||
params=params,
|
||||
)
|
||||
shield = await test_stack.impls[Api.shields].get_shield(safety_shield.shield_id)
|
||||
return test_stack.impls[Api.safety], test_stack.impls[Api.shields], shield
|
||||
|
|
|
@ -18,13 +18,6 @@ from llama_stack.distribution.datatypes import * # noqa: F403
|
|||
|
||||
|
||||
class TestSafety:
|
||||
@pytest.mark.asyncio
|
||||
async def test_new_shield(self, safety_stack):
|
||||
_, shields_impl, shield = safety_stack
|
||||
assert shield is not None
|
||||
assert shield.provider_resource_id == shield.identifier
|
||||
assert shield.provider_id is not None
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_shield_list(self, safety_stack):
|
||||
_, shields_impl, _ = safety_stack
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue