forked from phoenix-oss/llama-stack-mirror
# What does this PR do? This PR adds support for NVIDIA's NeMo Evaluator API to the Llama Stack eval module. The integration enables users to evaluate models via the Llama Stack interface. ## Test Plan [Describe the tests you ran to verify your changes with result summaries. *Provide clear instructions so the plan can be easily re-executed.*] 1. Added unit tests and successfully ran from root of project: `./scripts/unit-tests.sh tests/unit/providers/nvidia/test_eval.py` ``` tests/unit/providers/nvidia/test_eval.py::TestNVIDIAEvalImpl::test_job_cancel PASSED tests/unit/providers/nvidia/test_eval.py::TestNVIDIAEvalImpl::test_job_result PASSED tests/unit/providers/nvidia/test_eval.py::TestNVIDIAEvalImpl::test_job_status PASSED tests/unit/providers/nvidia/test_eval.py::TestNVIDIAEvalImpl::test_register_benchmark PASSED tests/unit/providers/nvidia/test_eval.py::TestNVIDIAEvalImpl::test_run_eval PASSED ``` 2. Verified I could build the Llama Stack image: `LLAMA_STACK_DIR=$(pwd) llama stack build --template nvidia --image-type venv` Documentation added to `llama_stack/providers/remote/eval/nvidia/README.md` --------- Co-authored-by: Jash Gulabrai <jgulabrai@nvidia.com>
143 lines
4.9 KiB
Python
143 lines
4.9 KiB
Python
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
|
# All rights reserved.
|
|
#
|
|
# This source code is licensed under the terms described in the LICENSE file in
|
|
# the root directory of this source tree.
|
|
|
|
from pathlib import Path
|
|
|
|
from llama_stack.distribution.datatypes import ModelInput, Provider, ShieldInput, ToolGroupInput
|
|
from llama_stack.providers.remote.eval.nvidia import NVIDIAEvalConfig
|
|
from llama_stack.providers.remote.inference.nvidia import NVIDIAConfig
|
|
from llama_stack.providers.remote.inference.nvidia.models import MODEL_ENTRIES
|
|
from llama_stack.providers.remote.safety.nvidia import NVIDIASafetyConfig
|
|
from llama_stack.templates.template import DistributionTemplate, RunConfigSettings, get_model_registry
|
|
|
|
|
|
def get_distribution_template() -> DistributionTemplate:
|
|
providers = {
|
|
"inference": ["remote::nvidia"],
|
|
"vector_io": ["inline::faiss"],
|
|
"safety": ["remote::nvidia"],
|
|
"agents": ["inline::meta-reference"],
|
|
"telemetry": ["inline::meta-reference"],
|
|
"eval": ["remote::nvidia"],
|
|
"post_training": ["remote::nvidia"],
|
|
"datasetio": ["inline::localfs"],
|
|
"scoring": ["inline::basic"],
|
|
"tool_runtime": ["inline::rag-runtime"],
|
|
}
|
|
|
|
inference_provider = Provider(
|
|
provider_id="nvidia",
|
|
provider_type="remote::nvidia",
|
|
config=NVIDIAConfig.sample_run_config(),
|
|
)
|
|
safety_provider = Provider(
|
|
provider_id="nvidia",
|
|
provider_type="remote::nvidia",
|
|
config=NVIDIASafetyConfig.sample_run_config(),
|
|
)
|
|
eval_provider = Provider(
|
|
provider_id="nvidia",
|
|
provider_type="remote::nvidia",
|
|
config=NVIDIAEvalConfig.sample_run_config(),
|
|
)
|
|
inference_model = ModelInput(
|
|
model_id="${env.INFERENCE_MODEL}",
|
|
provider_id="nvidia",
|
|
)
|
|
safety_model = ModelInput(
|
|
model_id="${env.SAFETY_MODEL}",
|
|
provider_id="nvidia",
|
|
)
|
|
|
|
available_models = {
|
|
"nvidia": MODEL_ENTRIES,
|
|
}
|
|
default_tool_groups = [
|
|
ToolGroupInput(
|
|
toolgroup_id="builtin::rag",
|
|
provider_id="rag-runtime",
|
|
),
|
|
]
|
|
|
|
default_models = get_model_registry(available_models)
|
|
return DistributionTemplate(
|
|
name="nvidia",
|
|
distro_type="self_hosted",
|
|
description="Use NVIDIA NIM for running LLM inference, evaluation and safety",
|
|
container_image=None,
|
|
template_path=Path(__file__).parent / "doc_template.md",
|
|
providers=providers,
|
|
available_models_by_provider=available_models,
|
|
run_configs={
|
|
"run.yaml": RunConfigSettings(
|
|
provider_overrides={
|
|
"inference": [inference_provider],
|
|
"eval": [eval_provider],
|
|
},
|
|
default_models=default_models,
|
|
default_tool_groups=default_tool_groups,
|
|
),
|
|
"run-with-safety.yaml": RunConfigSettings(
|
|
provider_overrides={
|
|
"inference": [
|
|
inference_provider,
|
|
safety_provider,
|
|
],
|
|
"eval": [eval_provider],
|
|
},
|
|
default_models=[inference_model, safety_model],
|
|
default_shields=[ShieldInput(shield_id="${env.SAFETY_MODEL}", provider_id="nvidia")],
|
|
default_tool_groups=default_tool_groups,
|
|
),
|
|
},
|
|
run_config_env_vars={
|
|
"NVIDIA_API_KEY": (
|
|
"",
|
|
"NVIDIA API Key",
|
|
),
|
|
## Nemo Customizer related variables
|
|
"NVIDIA_USER_ID": (
|
|
"llama-stack-user",
|
|
"NVIDIA User ID",
|
|
),
|
|
"NVIDIA_DATASET_NAMESPACE": (
|
|
"default",
|
|
"NVIDIA Dataset Namespace",
|
|
),
|
|
"NVIDIA_ACCESS_POLICIES": (
|
|
"{}",
|
|
"NVIDIA Access Policies",
|
|
),
|
|
"NVIDIA_PROJECT_ID": (
|
|
"test-project",
|
|
"NVIDIA Project ID",
|
|
),
|
|
"NVIDIA_CUSTOMIZER_URL": (
|
|
"https://customizer.api.nvidia.com",
|
|
"NVIDIA Customizer URL",
|
|
),
|
|
"NVIDIA_OUTPUT_MODEL_DIR": (
|
|
"test-example-model@v1",
|
|
"NVIDIA Output Model Directory",
|
|
),
|
|
"GUARDRAILS_SERVICE_URL": (
|
|
"http://0.0.0.0:7331",
|
|
"URL for the NeMo Guardrails Service",
|
|
),
|
|
"NVIDIA_EVALUATOR_URL": (
|
|
"http://0.0.0.0:7331",
|
|
"URL for the NeMo Evaluator Service",
|
|
),
|
|
"INFERENCE_MODEL": (
|
|
"Llama3.1-8B-Instruct",
|
|
"Inference model",
|
|
),
|
|
"SAFETY_MODEL": (
|
|
"meta/llama-3.1-8b-instruct",
|
|
"Name of the model to use for safety",
|
|
),
|
|
},
|
|
)
|