mirror of
https://github.com/meta-llama/llama-stack.git
synced 2025-06-29 19:34:19 +00:00
Split safety into (llama-guard, prompt-guard, code-scanner) (#400)
Splits the meta-reference safety implementation into three distinct providers: - inline::llama-guard - inline::prompt-guard - inline::code-scanner Note that this PR is a backward incompatible change to the llama stack server. I have added deprecation_error field to ProviderSpec -- the server reads it and immediately barfs. This is used to direct the user with a specific message on what action to perform. An automagical "config upgrade" is a bit too much work to implement right now :/ (Note that we will be gradually prefixing all inline providers with inline:: -- I am only doing this for this set of new providers because otherwise existing configuration files will break even more badly.)
This commit is contained in:
parent
6d38b1690b
commit
c1f7ba3aed
47 changed files with 464 additions and 500 deletions
|
@ -65,7 +65,6 @@ def inference_ollama(inference_model) -> ProviderFixture:
|
|||
inference_model = (
|
||||
[inference_model] if isinstance(inference_model, str) else inference_model
|
||||
)
|
||||
print("!!!", inference_model)
|
||||
if "Llama3.1-8B-Instruct" in inference_model:
|
||||
pytest.skip("Ollama only supports Llama3.2-3B-Instruct for testing")
|
||||
|
||||
|
@ -162,9 +161,11 @@ async def inference_stack(request, inference_model):
|
|||
inference_fixture.provider_data,
|
||||
)
|
||||
|
||||
provider_id = inference_fixture.providers[0].provider_id
|
||||
print(f"Registering model {inference_model} with provider {provider_id}")
|
||||
await impls[Api.models].register_model(
|
||||
model_id=inference_model,
|
||||
provider_model_id=inference_fixture.providers[0].provider_id,
|
||||
provider_id=provider_id,
|
||||
)
|
||||
|
||||
return (impls[Api.inference], impls[Api.models])
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue