mirror of
https://github.com/meta-llama/llama-stack.git
synced 2025-07-15 17:44:01 +00:00
- fireworks, together do not support Llama-guard 3 8b model anymore - Need to default to ollama - current safety shields logic was not correct since the shield_id was the provider ( which had duplicates ) - Followed similar logic to models Note: Seems a bit over-engineered but this can now be extended to other providers and fits in the overall mechanism of how env_vars are used to manage starter. ### How to test ``` ENABLE_OLLAMA=ollama ENABLE_FIREWORKS=fireworks SAFETY_MODEL=llama-guard3:1b pytest -s -v tests/integration/ --stack-config starter -k 'not(supervised_fine_tune or builtin_tool_code or safety_with_image or code_interpreter_for or rag_and_code or truncation or register_and_unregister)' --text-model fireworks/meta-llama/Llama-3.3-70B-Instruct --vision-model fireworks/meta-llama/Llama-4-Scout-17B-16E-Instruct --safety-shield llama-guard3:1b --embedding-model all-MiniLM-L6-v2 ``` ### Related but not obvious in this PR In the llama-stack-ops repo, we run tests before publishing packages and docker containers. The actions in that repo were using the fireworks / together distros ( which are non-existent ) So need to update that to run with `starter` and use `ollama` specifically for safety.
150 lines
5.2 KiB
Python
150 lines
5.2 KiB
Python
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
|
# All rights reserved.
|
|
#
|
|
# This source code is licensed under the terms described in the LICENSE file in
|
|
# the root directory of this source tree.
|
|
|
|
from pathlib import Path
|
|
|
|
from llama_stack.distribution.datatypes import ModelInput, Provider, ShieldInput, ToolGroupInput
|
|
from llama_stack.providers.remote.datasetio.nvidia import NvidiaDatasetIOConfig
|
|
from llama_stack.providers.remote.eval.nvidia import NVIDIAEvalConfig
|
|
from llama_stack.providers.remote.inference.nvidia import NVIDIAConfig
|
|
from llama_stack.providers.remote.inference.nvidia.models import MODEL_ENTRIES
|
|
from llama_stack.providers.remote.safety.nvidia import NVIDIASafetyConfig
|
|
from llama_stack.templates.template import DistributionTemplate, RunConfigSettings, get_model_registry
|
|
|
|
|
|
def get_distribution_template() -> DistributionTemplate:
|
|
providers = {
|
|
"inference": ["remote::nvidia"],
|
|
"vector_io": ["inline::faiss"],
|
|
"safety": ["remote::nvidia"],
|
|
"agents": ["inline::meta-reference"],
|
|
"telemetry": ["inline::meta-reference"],
|
|
"eval": ["remote::nvidia"],
|
|
"post_training": ["remote::nvidia"],
|
|
"datasetio": ["inline::localfs", "remote::nvidia"],
|
|
"scoring": ["inline::basic"],
|
|
"tool_runtime": ["inline::rag-runtime"],
|
|
}
|
|
|
|
inference_provider = Provider(
|
|
provider_id="nvidia",
|
|
provider_type="remote::nvidia",
|
|
config=NVIDIAConfig.sample_run_config(),
|
|
)
|
|
safety_provider = Provider(
|
|
provider_id="nvidia",
|
|
provider_type="remote::nvidia",
|
|
config=NVIDIASafetyConfig.sample_run_config(),
|
|
)
|
|
datasetio_provider = Provider(
|
|
provider_id="nvidia",
|
|
provider_type="remote::nvidia",
|
|
config=NvidiaDatasetIOConfig.sample_run_config(),
|
|
)
|
|
eval_provider = Provider(
|
|
provider_id="nvidia",
|
|
provider_type="remote::nvidia",
|
|
config=NVIDIAEvalConfig.sample_run_config(),
|
|
)
|
|
inference_model = ModelInput(
|
|
model_id="${env.INFERENCE_MODEL}",
|
|
provider_id="nvidia",
|
|
)
|
|
safety_model = ModelInput(
|
|
model_id="${env.SAFETY_MODEL}",
|
|
provider_id="nvidia",
|
|
)
|
|
|
|
available_models = {
|
|
"nvidia": MODEL_ENTRIES,
|
|
}
|
|
default_tool_groups = [
|
|
ToolGroupInput(
|
|
toolgroup_id="builtin::rag",
|
|
provider_id="rag-runtime",
|
|
),
|
|
]
|
|
|
|
default_models, _ = get_model_registry(available_models)
|
|
return DistributionTemplate(
|
|
name="nvidia",
|
|
distro_type="self_hosted",
|
|
description="Use NVIDIA NIM for running LLM inference, evaluation and safety",
|
|
container_image=None,
|
|
template_path=Path(__file__).parent / "doc_template.md",
|
|
providers=providers,
|
|
available_models_by_provider=available_models,
|
|
run_configs={
|
|
"run.yaml": RunConfigSettings(
|
|
provider_overrides={
|
|
"inference": [inference_provider],
|
|
"datasetio": [datasetio_provider],
|
|
"eval": [eval_provider],
|
|
},
|
|
default_models=default_models,
|
|
default_tool_groups=default_tool_groups,
|
|
),
|
|
"run-with-safety.yaml": RunConfigSettings(
|
|
provider_overrides={
|
|
"inference": [
|
|
inference_provider,
|
|
safety_provider,
|
|
],
|
|
"eval": [eval_provider],
|
|
},
|
|
default_models=[inference_model, safety_model],
|
|
default_shields=[ShieldInput(shield_id="${env.SAFETY_MODEL}", provider_id="nvidia")],
|
|
default_tool_groups=default_tool_groups,
|
|
),
|
|
},
|
|
run_config_env_vars={
|
|
"NVIDIA_API_KEY": (
|
|
"",
|
|
"NVIDIA API Key",
|
|
),
|
|
"NVIDIA_APPEND_API_VERSION": (
|
|
"True",
|
|
"Whether to append the API version to the base_url",
|
|
),
|
|
## Nemo Customizer related variables
|
|
"NVIDIA_DATASET_NAMESPACE": (
|
|
"default",
|
|
"NVIDIA Dataset Namespace",
|
|
),
|
|
"NVIDIA_PROJECT_ID": (
|
|
"test-project",
|
|
"NVIDIA Project ID",
|
|
),
|
|
"NVIDIA_CUSTOMIZER_URL": (
|
|
"https://customizer.api.nvidia.com",
|
|
"NVIDIA Customizer URL",
|
|
),
|
|
"NVIDIA_OUTPUT_MODEL_DIR": (
|
|
"test-example-model@v1",
|
|
"NVIDIA Output Model Directory",
|
|
),
|
|
"GUARDRAILS_SERVICE_URL": (
|
|
"http://0.0.0.0:7331",
|
|
"URL for the NeMo Guardrails Service",
|
|
),
|
|
"NVIDIA_GUARDRAILS_CONFIG_ID": (
|
|
"self-check",
|
|
"NVIDIA Guardrail Configuration ID",
|
|
),
|
|
"NVIDIA_EVALUATOR_URL": (
|
|
"http://0.0.0.0:7331",
|
|
"URL for the NeMo Evaluator Service",
|
|
),
|
|
"INFERENCE_MODEL": (
|
|
"Llama3.1-8B-Instruct",
|
|
"Inference model",
|
|
),
|
|
"SAFETY_MODEL": (
|
|
"meta/llama-3.1-8b-instruct",
|
|
"Name of the model to use for safety",
|
|
),
|
|
},
|
|
)
|