forked from phoenix-oss/llama-stack-mirror
Kill "remote" providers and fix testing with a remote stack properly (#435)
# What does this PR do? This PR kills the notion of "pure passthrough" remote providers. You cannot specify a single provider you must specify a whole distribution (stack) as remote. This PR also significantly fixes / upgrades testing infrastructure so you can now test against a remotely hosted stack server by just doing ```bash pytest -s -v -m remote test_agents.py \ --inference-model=Llama3.1-8B-Instruct --safety-shield=Llama-Guard-3-1B \ --env REMOTE_STACK_URL=http://localhost:5001 ``` Also fixed `test_agents_persistence.py` (which was broken) and killed some deprecated testing functions. ## Test Plan All the tests.
This commit is contained in:
parent
59a65e34d3
commit
12947ac19e
28 changed files with 406 additions and 519 deletions
|
@ -16,7 +16,7 @@ from llama_stack.providers.inline.safety.llama_guard import LlamaGuardConfig
|
|||
from llama_stack.providers.inline.safety.prompt_guard import PromptGuardConfig
|
||||
from llama_stack.providers.remote.safety.bedrock import BedrockSafetyConfig
|
||||
|
||||
from llama_stack.providers.tests.resolver import resolve_impls_for_test_v2
|
||||
from llama_stack.providers.tests.resolver import construct_stack_for_test
|
||||
|
||||
from ..conftest import ProviderFixture, remote_stack_fixture
|
||||
from ..env import get_env_or_fail
|
||||
|
@ -27,19 +27,38 @@ def safety_remote() -> ProviderFixture:
|
|||
return remote_stack_fixture()
|
||||
|
||||
|
||||
def safety_model_from_shield(shield_id):
|
||||
if shield_id in ("Bedrock", "CodeScanner", "CodeShield"):
|
||||
return None
|
||||
|
||||
return shield_id
|
||||
|
||||
|
||||
@pytest.fixture(scope="session")
|
||||
def safety_model(request):
|
||||
def safety_shield(request):
|
||||
if hasattr(request, "param"):
|
||||
return request.param
|
||||
return request.config.getoption("--safety-model", None)
|
||||
shield_id = request.param
|
||||
else:
|
||||
shield_id = request.config.getoption("--safety-shield", None)
|
||||
|
||||
if shield_id == "bedrock":
|
||||
shield_id = get_env_or_fail("BEDROCK_GUARDRAIL_IDENTIFIER")
|
||||
params = {"guardrailVersion": get_env_or_fail("BEDROCK_GUARDRAIL_VERSION")}
|
||||
else:
|
||||
params = {}
|
||||
|
||||
return ShieldInput(
|
||||
shield_id=shield_id,
|
||||
params=params,
|
||||
)
|
||||
|
||||
|
||||
@pytest.fixture(scope="session")
|
||||
def safety_llama_guard(safety_model) -> ProviderFixture:
|
||||
def safety_llama_guard() -> ProviderFixture:
|
||||
return ProviderFixture(
|
||||
providers=[
|
||||
Provider(
|
||||
provider_id="inline::llama-guard",
|
||||
provider_id="llama-guard",
|
||||
provider_type="inline::llama-guard",
|
||||
config=LlamaGuardConfig().model_dump(),
|
||||
)
|
||||
|
@ -55,7 +74,7 @@ def safety_prompt_guard() -> ProviderFixture:
|
|||
return ProviderFixture(
|
||||
providers=[
|
||||
Provider(
|
||||
provider_id="inline::prompt-guard",
|
||||
provider_id="prompt-guard",
|
||||
provider_type="inline::prompt-guard",
|
||||
config=PromptGuardConfig().model_dump(),
|
||||
)
|
||||
|
@ -80,50 +99,25 @@ SAFETY_FIXTURES = ["llama_guard", "bedrock", "remote"]
|
|||
|
||||
|
||||
@pytest_asyncio.fixture(scope="session")
|
||||
async def safety_stack(inference_model, safety_model, request):
|
||||
async def safety_stack(inference_model, safety_shield, request):
|
||||
# We need an inference + safety fixture to test safety
|
||||
fixture_dict = request.param
|
||||
inference_fixture = request.getfixturevalue(
|
||||
f"inference_{fixture_dict['inference']}"
|
||||
)
|
||||
safety_fixture = request.getfixturevalue(f"safety_{fixture_dict['safety']}")
|
||||
|
||||
providers = {
|
||||
"inference": inference_fixture.providers,
|
||||
"safety": safety_fixture.providers,
|
||||
}
|
||||
providers = {}
|
||||
provider_data = {}
|
||||
if inference_fixture.provider_data:
|
||||
provider_data.update(inference_fixture.provider_data)
|
||||
if safety_fixture.provider_data:
|
||||
provider_data.update(safety_fixture.provider_data)
|
||||
for key in ["inference", "safety"]:
|
||||
fixture = request.getfixturevalue(f"{key}_{fixture_dict[key]}")
|
||||
providers[key] = fixture.providers
|
||||
if fixture.provider_data:
|
||||
provider_data.update(fixture.provider_data)
|
||||
|
||||
shield_provider_type = safety_fixture.providers[0].provider_type
|
||||
shield_input = get_shield_to_register(shield_provider_type, safety_model)
|
||||
|
||||
print(f"inference_model: {inference_model}")
|
||||
print(f"shield_input = {shield_input}")
|
||||
impls = await resolve_impls_for_test_v2(
|
||||
test_stack = await construct_stack_for_test(
|
||||
[Api.safety, Api.shields, Api.inference],
|
||||
providers,
|
||||
provider_data,
|
||||
models=[ModelInput(model_id=inference_model)],
|
||||
shields=[shield_input],
|
||||
shields=[safety_shield],
|
||||
)
|
||||
|
||||
shield = await impls[Api.shields].get_shield(shield_input.shield_id)
|
||||
return impls[Api.safety], impls[Api.shields], shield
|
||||
|
||||
|
||||
def get_shield_to_register(provider_type: str, safety_model: str) -> ShieldInput:
|
||||
if provider_type == "remote::bedrock":
|
||||
identifier = get_env_or_fail("BEDROCK_GUARDRAIL_IDENTIFIER")
|
||||
params = {"guardrailVersion": get_env_or_fail("BEDROCK_GUARDRAIL_VERSION")}
|
||||
else:
|
||||
params = {}
|
||||
identifier = safety_model
|
||||
|
||||
return ShieldInput(
|
||||
shield_id=identifier,
|
||||
params=params,
|
||||
)
|
||||
shield = await test_stack.impls[Api.shields].get_shield(safety_shield.shield_id)
|
||||
return test_stack.impls[Api.safety], test_stack.impls[Api.shields], shield
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue