From a5d413045c5ab0141b91f08eb003b6c304cc8354 Mon Sep 17 00:00:00 2001 From: Chantal D Gama Rose Date: Tue, 19 Nov 2024 21:02:20 +0000 Subject: [PATCH] Add nvidia remote distro --- .../remote_hosted_distro/nvidia.md | 60 +++++++++++++++++ .../remote/inference/nvidia/_config.py | 9 ++- llama_stack/templates/nvidia/__init__.py | 7 ++ llama_stack/templates/nvidia/build.yaml | 19 ++++++ llama_stack/templates/nvidia/doc_template.md | 60 +++++++++++++++++ llama_stack/templates/nvidia/nvidia.py | 64 +++++++++++++++++++ llama_stack/templates/nvidia/run.yaml | 55 ++++++++++++++++ 7 files changed, 273 insertions(+), 1 deletion(-) create mode 100644 docs/source/getting_started/distributions/remote_hosted_distro/nvidia.md create mode 100644 llama_stack/templates/nvidia/__init__.py create mode 100644 llama_stack/templates/nvidia/build.yaml create mode 100644 llama_stack/templates/nvidia/doc_template.md create mode 100644 llama_stack/templates/nvidia/nvidia.py create mode 100644 llama_stack/templates/nvidia/run.yaml diff --git a/docs/source/getting_started/distributions/remote_hosted_distro/nvidia.md b/docs/source/getting_started/distributions/remote_hosted_distro/nvidia.md new file mode 100644 index 000000000..b670c7345 --- /dev/null +++ b/docs/source/getting_started/distributions/remote_hosted_distro/nvidia.md @@ -0,0 +1,60 @@ +# NVIDIA Distribution + +The `llamastack/distribution-nvidia` distribution consists of the following provider configurations. + +| API | Provider(s) | +|-----|-------------| +| agents | `inline::meta-reference` | +| inference | `remote::nvidia` | +| memory | `inline::faiss`, `remote::chromadb`, `remote::pgvector` | +| safety | `inline::llama-guard` | +| telemetry | `inline::meta-reference` | + + +### Environment Variables + +The following environment variables can be configured: + +- `LLAMASTACK_PORT`: Port for the Llama Stack distribution server (default: `5001`) +- `NVIDIA_API_KEY`: NVIDIA API Key (default: ``) + +### Models + +The following models are available by default: + +- `${env.INFERENCE_MODEL} (None)` + + +### Prerequisite: API Keys + +Make sure you have access to a NVIDIA API Key. You can get one by visiting [https://build.nvidia.com/](https://build.nvidia.com/). + + +## Running Llama Stack with NVIDIA + +You can do this via Conda (build code) or Docker which has a pre-built image. + +### Via Docker + +This method allows you to get started quickly without having to build the distribution code. + +```bash +LLAMA_STACK_PORT=5001 +docker run \ + -it \ + -p $LLAMA_STACK_PORT:$LLAMA_STACK_PORT \ + -v ./run.yaml:/root/my-run.yaml \ + llamastack/distribution-nvidia \ + --yaml-config /root/my-run.yaml \ + --port $LLAMA_STACK_PORT \ + --env FIREWORKS_API_KEY=$FIREWORKS_API_KEY +``` + +### Via Conda + +```bash +llama stack build --template fireworks --image-type conda +llama stack run ./run.yaml \ + --port 5001 \ + --env FIREWORKS_API_KEY=$FIREWORKS_API_KEY +``` \ No newline at end of file diff --git a/llama_stack/providers/remote/inference/nvidia/_config.py b/llama_stack/providers/remote/inference/nvidia/_config.py index 46ac3fa5b..fab636c8d 100644 --- a/llama_stack/providers/remote/inference/nvidia/_config.py +++ b/llama_stack/providers/remote/inference/nvidia/_config.py @@ -5,7 +5,7 @@ # the root directory of this source tree. import os -from typing import Optional +from typing import Any, Dict, Optional from llama_models.schema_utils import json_schema_type from pydantic import BaseModel, Field @@ -50,3 +50,10 @@ class NVIDIAConfig(BaseModel): @property def is_hosted(self) -> bool: return "integrate.api.nvidia.com" in self.base_url + + @classmethod + def sample_run_config(cls, **kwargs) -> Dict[str, Any]: + return { + "url": "https://integrate.api.nvidia.com", + "api_key": "${env.NVIDIA_API_KEY}", + } diff --git a/llama_stack/templates/nvidia/__init__.py b/llama_stack/templates/nvidia/__init__.py new file mode 100644 index 000000000..24e2fbd21 --- /dev/null +++ b/llama_stack/templates/nvidia/__init__.py @@ -0,0 +1,7 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the terms described in the LICENSE file in +# the root directory of this source tree. + +from .nvidia import get_distribution_template # noqa: F401 diff --git a/llama_stack/templates/nvidia/build.yaml b/llama_stack/templates/nvidia/build.yaml new file mode 100644 index 000000000..9a735c220 --- /dev/null +++ b/llama_stack/templates/nvidia/build.yaml @@ -0,0 +1,19 @@ +version: '2' +name: nvidia +distribution_spec: + description: Use NVIDIA NIM for running LLM inference + docker_image: null + providers: + inference: + - remote::nvidia + memory: + - inline::faiss + - remote::chromadb + - remote::pgvector + safety: + - inline::llama-guard + agents: + - inline::meta-reference + telemetry: + - inline::meta-reference +image_type: conda diff --git a/llama_stack/templates/nvidia/doc_template.md b/llama_stack/templates/nvidia/doc_template.md new file mode 100644 index 000000000..a9db77055 --- /dev/null +++ b/llama_stack/templates/nvidia/doc_template.md @@ -0,0 +1,60 @@ +# NVIDIA Distribution + +The `llamastack/distribution-{{ name }}` distribution consists of the following provider configurations. + +{{ providers_table }} + +{% if run_config_env_vars %} +### Environment Variables + +The following environment variables can be configured: + +{% for var, (default_value, description) in run_config_env_vars.items() %} +- `{{ var }}`: {{ description }} (default: `{{ default_value }}`) +{% endfor %} +{% endif %} + +{% if default_models %} +### Models + +The following models are available by default: + +{% for model in default_models %} +- `{{ model.model_id }} ({{ model.provider_model_id }})` +{% endfor %} +{% endif %} + + +### Prerequisite: API Keys + +Make sure you have access to a NVIDIA API Key. You can get one by visiting [https://build.nvidia.com/](https://build.nvidia.com/). + + +## Running Llama Stack with NVIDIA + +You can do this via Conda (build code) or Docker which has a pre-built image. + +### Via Docker + +This method allows you to get started quickly without having to build the distribution code. + +```bash +LLAMA_STACK_PORT=5001 +docker run \ + -it \ + -p $LLAMA_STACK_PORT:$LLAMA_STACK_PORT \ + -v ./run.yaml:/root/my-run.yaml \ + llamastack/distribution-{{ name }} \ + --yaml-config /root/my-run.yaml \ + --port $LLAMA_STACK_PORT \ + --env FIREWORKS_API_KEY=$FIREWORKS_API_KEY +``` + +### Via Conda + +```bash +llama stack build --template fireworks --image-type conda +llama stack run ./run.yaml \ + --port 5001 \ + --env FIREWORKS_API_KEY=$FIREWORKS_API_KEY +``` diff --git a/llama_stack/templates/nvidia/nvidia.py b/llama_stack/templates/nvidia/nvidia.py new file mode 100644 index 000000000..0f1551180 --- /dev/null +++ b/llama_stack/templates/nvidia/nvidia.py @@ -0,0 +1,64 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the terms described in the LICENSE file in +# the root directory of this source tree. + +from pathlib import Path + +from llama_models.sku_list import all_registered_models + +from llama_stack.distribution.datatypes import ModelInput, Provider, ShieldInput +from llama_stack.providers.remote.inference.nvidia import NVIDIAConfig +from llama_stack.providers.remote.inference.nvidia._nvidia import _MODEL_ALIASES + +from llama_stack.templates.template import DistributionTemplate, RunConfigSettings + + +def get_distribution_template() -> DistributionTemplate: + providers = { + "inference": ["remote::nvidia"], + "memory": ["inline::faiss", "remote::chromadb", "remote::pgvector"], + "safety": ["inline::llama-guard"], + "agents": ["inline::meta-reference"], + "telemetry": ["inline::meta-reference"], + } + + inference_provider = Provider( + provider_id="nvidia", + provider_type="remote::nvidia", + config=NVIDIAConfig.sample_run_config(), + ) + + inference_model = ModelInput( + model_id="${env.INFERENCE_MODEL}", + provider_id="nvidia", + ) + + return DistributionTemplate( + name="nvidia", + distro_type="remote_hosted", + description="Use NVIDIA NIM for running LLM inference", + docker_image=None, + template_path=Path(__file__).parent / "doc_template.md", + providers=providers, + default_models=[inference_model], + run_configs={ + "run.yaml": RunConfigSettings( + provider_overrides={ + "inference": [inference_provider], + }, + default_models=[inference_model], + ), + }, + run_config_env_vars={ + "LLAMASTACK_PORT": ( + "5001", + "Port for the Llama Stack distribution server", + ), + "NVIDIA_API_KEY": ( + "", + "NVIDIA API Key", + ), + }, + ) diff --git a/llama_stack/templates/nvidia/run.yaml b/llama_stack/templates/nvidia/run.yaml new file mode 100644 index 000000000..f4953852f --- /dev/null +++ b/llama_stack/templates/nvidia/run.yaml @@ -0,0 +1,55 @@ +version: '2' +image_name: nvidia +docker_image: null +conda_env: nvidia +apis: +- agents +- inference +- memory +- safety +- telemetry +providers: + inference: + - provider_id: nvidia + provider_type: remote::nvidia + config: + url: https://integrate.api.nvidia.com + api_key: ${env.NVIDIA_API_KEY} + memory: + - provider_id: faiss + provider_type: inline::faiss + config: + kvstore: + type: sqlite + namespace: null + db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/nvidia}/faiss_store.db + safety: + - provider_id: llama-guard + provider_type: inline::llama-guard + config: {} + agents: + - provider_id: meta-reference + provider_type: inline::meta-reference + config: + persistence_store: + type: sqlite + namespace: null + db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/nvidia}/agents_store.db + telemetry: + - provider_id: meta-reference + provider_type: inline::meta-reference + config: {} +metadata_store: + namespace: null + type: sqlite + db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/nvidia}/registry.db +models: +- metadata: {} + model_id: ${env.INFERENCE_MODEL} + provider_id: nvidia + provider_model_id: null +shields: [] +memory_banks: [] +datasets: [] +scoring_fns: [] +eval_tasks: []