mirror of
https://github.com/meta-llama/llama-stack.git
synced 2025-06-27 18:50:41 +00:00
# What does this PR do? This PR contains two sets of notebooks that serve as reference material for developers getting started with Llama Stack using the NVIDIA Provider. Developers should be able to execute these notebooks end-to-end, pointing to their NeMo Microservices deployment. 1. `beginner_e2e/`: Notebook that walks through a beginner end-to-end workflow that covers creating datasets, running inference, customizing and evaluating models, and running safety checks. 2. `tool_calling/`: Notebook that is ported over from the [Data Flywheel & Tool Calling notebook](https://github.com/NVIDIA/GenerativeAIExamples/tree/main/nemo/data-flywheel) that is referenced in the NeMo Microservices docs. I updated the notebook to use the Llama Stack client wherever possible, and added relevant instructions. [//]: # (If resolving an issue, uncomment and update the line below) [//]: # (Closes #[issue-number]) ## Test Plan - Both notebook folders contain READMEs with pre-requisites. To manually test these notebooks, you'll need to have a deployment of the NeMo Microservices Platform and update the `config.py` file with your deployment's information. - I've run through these notebooks manually end-to-end to verify each step works. [//]: # (## Documentation) --------- Co-authored-by: Jash Gulabrai <jgulabrai@nvidia.com>
150 lines
5.2 KiB
Python
150 lines
5.2 KiB
Python
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
|
# All rights reserved.
|
|
#
|
|
# This source code is licensed under the terms described in the LICENSE file in
|
|
# the root directory of this source tree.
|
|
|
|
from pathlib import Path
|
|
|
|
from llama_stack.distribution.datatypes import ModelInput, Provider, ShieldInput, ToolGroupInput
|
|
from llama_stack.providers.remote.datasetio.nvidia import NvidiaDatasetIOConfig
|
|
from llama_stack.providers.remote.eval.nvidia import NVIDIAEvalConfig
|
|
from llama_stack.providers.remote.inference.nvidia import NVIDIAConfig
|
|
from llama_stack.providers.remote.inference.nvidia.models import MODEL_ENTRIES
|
|
from llama_stack.providers.remote.safety.nvidia import NVIDIASafetyConfig
|
|
from llama_stack.templates.template import DistributionTemplate, RunConfigSettings, get_model_registry
|
|
|
|
|
|
def get_distribution_template() -> DistributionTemplate:
|
|
providers = {
|
|
"inference": ["remote::nvidia"],
|
|
"vector_io": ["inline::faiss"],
|
|
"safety": ["remote::nvidia"],
|
|
"agents": ["inline::meta-reference"],
|
|
"telemetry": ["inline::meta-reference"],
|
|
"eval": ["remote::nvidia"],
|
|
"post_training": ["remote::nvidia"],
|
|
"datasetio": ["inline::localfs", "remote::nvidia"],
|
|
"scoring": ["inline::basic"],
|
|
"tool_runtime": ["inline::rag-runtime"],
|
|
}
|
|
|
|
inference_provider = Provider(
|
|
provider_id="nvidia",
|
|
provider_type="remote::nvidia",
|
|
config=NVIDIAConfig.sample_run_config(),
|
|
)
|
|
safety_provider = Provider(
|
|
provider_id="nvidia",
|
|
provider_type="remote::nvidia",
|
|
config=NVIDIASafetyConfig.sample_run_config(),
|
|
)
|
|
datasetio_provider = Provider(
|
|
provider_id="nvidia",
|
|
provider_type="remote::nvidia",
|
|
config=NvidiaDatasetIOConfig.sample_run_config(),
|
|
)
|
|
eval_provider = Provider(
|
|
provider_id="nvidia",
|
|
provider_type="remote::nvidia",
|
|
config=NVIDIAEvalConfig.sample_run_config(),
|
|
)
|
|
inference_model = ModelInput(
|
|
model_id="${env.INFERENCE_MODEL}",
|
|
provider_id="nvidia",
|
|
)
|
|
safety_model = ModelInput(
|
|
model_id="${env.SAFETY_MODEL}",
|
|
provider_id="nvidia",
|
|
)
|
|
|
|
available_models = {
|
|
"nvidia": MODEL_ENTRIES,
|
|
}
|
|
default_tool_groups = [
|
|
ToolGroupInput(
|
|
toolgroup_id="builtin::rag",
|
|
provider_id="rag-runtime",
|
|
),
|
|
]
|
|
|
|
default_models = get_model_registry(available_models)
|
|
return DistributionTemplate(
|
|
name="nvidia",
|
|
distro_type="self_hosted",
|
|
description="Use NVIDIA NIM for running LLM inference, evaluation and safety",
|
|
container_image=None,
|
|
template_path=Path(__file__).parent / "doc_template.md",
|
|
providers=providers,
|
|
available_models_by_provider=available_models,
|
|
run_configs={
|
|
"run.yaml": RunConfigSettings(
|
|
provider_overrides={
|
|
"inference": [inference_provider],
|
|
"datasetio": [datasetio_provider],
|
|
"eval": [eval_provider],
|
|
},
|
|
default_models=default_models,
|
|
default_tool_groups=default_tool_groups,
|
|
),
|
|
"run-with-safety.yaml": RunConfigSettings(
|
|
provider_overrides={
|
|
"inference": [
|
|
inference_provider,
|
|
safety_provider,
|
|
],
|
|
"eval": [eval_provider],
|
|
},
|
|
default_models=[inference_model, safety_model],
|
|
default_shields=[ShieldInput(shield_id="${env.SAFETY_MODEL}", provider_id="nvidia")],
|
|
default_tool_groups=default_tool_groups,
|
|
),
|
|
},
|
|
run_config_env_vars={
|
|
"NVIDIA_API_KEY": (
|
|
"",
|
|
"NVIDIA API Key",
|
|
),
|
|
"NVIDIA_APPEND_API_VERSION": (
|
|
"True",
|
|
"Whether to append the API version to the base_url",
|
|
),
|
|
## Nemo Customizer related variables
|
|
"NVIDIA_DATASET_NAMESPACE": (
|
|
"default",
|
|
"NVIDIA Dataset Namespace",
|
|
),
|
|
"NVIDIA_PROJECT_ID": (
|
|
"test-project",
|
|
"NVIDIA Project ID",
|
|
),
|
|
"NVIDIA_CUSTOMIZER_URL": (
|
|
"https://customizer.api.nvidia.com",
|
|
"NVIDIA Customizer URL",
|
|
),
|
|
"NVIDIA_OUTPUT_MODEL_DIR": (
|
|
"test-example-model@v1",
|
|
"NVIDIA Output Model Directory",
|
|
),
|
|
"GUARDRAILS_SERVICE_URL": (
|
|
"http://0.0.0.0:7331",
|
|
"URL for the NeMo Guardrails Service",
|
|
),
|
|
"NVIDIA_GUARDRAILS_CONFIG_ID": (
|
|
"self-check",
|
|
"NVIDIA Guardrail Configuration ID",
|
|
),
|
|
"NVIDIA_EVALUATOR_URL": (
|
|
"http://0.0.0.0:7331",
|
|
"URL for the NeMo Evaluator Service",
|
|
),
|
|
"INFERENCE_MODEL": (
|
|
"Llama3.1-8B-Instruct",
|
|
"Inference model",
|
|
),
|
|
"SAFETY_MODEL": (
|
|
"meta/llama-3.1-8b-instruct",
|
|
"Name of the model to use for safety",
|
|
),
|
|
},
|
|
)
|