Add nvidia remote distro

2025-12-16 18:39:28 +00:00 · 2024-11-19 21:02:20 +00:00 · 2024-11-19 21:02:20 +00:00 · a5d413045c
commit a5d413045c
parent 18e8f18749
7 changed files with 273 additions and 1 deletions
--- a/docs/source/getting_started/distributions/remote_hosted_distro/nvidia.md
+++ b/docs/source/getting_started/distributions/remote_hosted_distro/nvidia.md
@ -0,0 +1,60 @@
 # NVIDIA Distribution
 The `llamastack/distribution-nvidia` distribution consists of the following provider configurations.
 | API | Provider(s) |
 |-----|-------------|
 | agents | `inline::meta-reference` |
 | inference | `remote::nvidia` |
 | memory | `inline::faiss`, `remote::chromadb`, `remote::pgvector` |
 | safety | `inline::llama-guard` |
 | telemetry | `inline::meta-reference` |
 ### Environment Variables
 The following environment variables can be configured:
 - `LLAMASTACK_PORT`: Port for the Llama Stack distribution server (default: `5001`)
 - `NVIDIA_API_KEY`: NVIDIA API Key (default: ``)
 ### Models
 The following models are available by default:
 - `${env.INFERENCE_MODEL} (None)`
 ### Prerequisite: API Keys
 Make sure you have access to a NVIDIA API Key. You can get one by visiting [https://build.nvidia.com/](https://build.nvidia.com/).
 ## Running Llama Stack with NVIDIA
 You can do this via Conda (build code) or Docker which has a pre-built image.
 ### Via Docker
 This method allows you to get started quickly without having to build the distribution code.
 ```bash
 LLAMA_STACK_PORT=5001
 docker run \
  -it \
  -p $LLAMA_STACK_PORT:$LLAMA_STACK_PORT \
  -v ./run.yaml:/root/my-run.yaml \
  llamastack/distribution-nvidia \
  --yaml-config /root/my-run.yaml \
  --port $LLAMA_STACK_PORT \
  --env FIREWORKS_API_KEY=$FIREWORKS_API_KEY
 ```
 ### Via Conda
 ```bash
 llama stack build --template fireworks --image-type conda
 llama stack run ./run.yaml \
  --port 5001 \
  --env FIREWORKS_API_KEY=$FIREWORKS_API_KEY
 ```
--- a/llama_stack/providers/remote/inference/nvidia/_config.py
+++ b/llama_stack/providers/remote/inference/nvidia/_config.py
@ -5,7 +5,7 @@
 # the root directory of this source tree.
 import os
-from typing import Optional
+from typing import Any, Dict, Optional
 from llama_models.schema_utils import json_schema_type
 from pydantic import BaseModel, Field
@ -50,3 +50,10 @@ class NVIDIAConfig(BaseModel):
    @property
    def is_hosted(self) -> bool:
        return "integrate.api.nvidia.com" in self.base_url
    @classmethod
    def sample_run_config(cls, **kwargs) -> Dict[str, Any]:
        return {
            "url": "https://integrate.api.nvidia.com",
            "api_key": "${env.NVIDIA_API_KEY}",
        }
--- a/llama_stack/templates/nvidia/init.py
+++ b/llama_stack/templates/nvidia/init.py
@ -0,0 +1,7 @@
 # Copyright (c) Meta Platforms, Inc. and affiliates.
 # All rights reserved.
 #
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 from .nvidia import get_distribution_template  # noqa: F401
--- a/llama_stack/templates/nvidia/build.yaml
+++ b/llama_stack/templates/nvidia/build.yaml
@ -0,0 +1,19 @@
 version: '2'
 name: nvidia
 distribution_spec:
  description: Use NVIDIA NIM for running LLM inference
  docker_image: null
  providers:
    inference:
    - remote::nvidia
    memory:
    - inline::faiss
    - remote::chromadb
    - remote::pgvector
    safety:
    - inline::llama-guard
    agents:
    - inline::meta-reference
    telemetry:
    - inline::meta-reference
 image_type: conda
--- a/llama_stack/templates/nvidia/doc_template.md
+++ b/llama_stack/templates/nvidia/doc_template.md
@ -0,0 +1,60 @@
 # NVIDIA Distribution
 The `llamastack/distribution-{{ name }}` distribution consists of the following provider configurations.
 {{ providers_table }}
 {% if run_config_env_vars %}
 ### Environment Variables
 The following environment variables can be configured:
 {% for var, (default_value, description) in run_config_env_vars.items() %}
 - `{{ var }}`: {{ description }} (default: `{{ default_value }}`)
 {% endfor %}
 {% endif %}
 {% if default_models %}
 ### Models
 The following models are available by default:
 {% for model in default_models %}
 - `{{ model.model_id }} ({{ model.provider_model_id }})`
 {% endfor %}
 {% endif %}
 ### Prerequisite: API Keys
 Make sure you have access to a NVIDIA API Key. You can get one by visiting [https://build.nvidia.com/](https://build.nvidia.com/).
 ## Running Llama Stack with NVIDIA
 You can do this via Conda (build code) or Docker which has a pre-built image.
 ### Via Docker
 This method allows you to get started quickly without having to build the distribution code.
 ```bash
 LLAMA_STACK_PORT=5001
 docker run \
  -it \
  -p $LLAMA_STACK_PORT:$LLAMA_STACK_PORT \
  -v ./run.yaml:/root/my-run.yaml \
  llamastack/distribution-{{ name }} \
  --yaml-config /root/my-run.yaml \
  --port $LLAMA_STACK_PORT \
  --env FIREWORKS_API_KEY=$FIREWORKS_API_KEY
 ```
 ### Via Conda
 ```bash
 llama stack build --template fireworks --image-type conda
 llama stack run ./run.yaml \
  --port 5001 \
  --env FIREWORKS_API_KEY=$FIREWORKS_API_KEY
 ```
--- a/llama_stack/templates/nvidia/nvidia.py
+++ b/llama_stack/templates/nvidia/nvidia.py
@ -0,0 +1,64 @@
 # Copyright (c) Meta Platforms, Inc. and affiliates.
 # All rights reserved.
 #
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 from pathlib import Path
 from llama_models.sku_list import all_registered_models
 from llama_stack.distribution.datatypes import ModelInput, Provider, ShieldInput
 from llama_stack.providers.remote.inference.nvidia import NVIDIAConfig
 from llama_stack.providers.remote.inference.nvidia._nvidia import _MODEL_ALIASES
 from llama_stack.templates.template import DistributionTemplate, RunConfigSettings
 def get_distribution_template() -> DistributionTemplate:
    providers = {
        "inference": ["remote::nvidia"],
        "memory": ["inline::faiss", "remote::chromadb", "remote::pgvector"],
        "safety": ["inline::llama-guard"],
        "agents": ["inline::meta-reference"],
        "telemetry": ["inline::meta-reference"],
    }
    inference_provider = Provider(
        provider_id="nvidia",
        provider_type="remote::nvidia",
        config=NVIDIAConfig.sample_run_config(),
    )
    inference_model = ModelInput(
        model_id="${env.INFERENCE_MODEL}",
        provider_id="nvidia",
    )
    return DistributionTemplate(
        name="nvidia",
        distro_type="remote_hosted",
        description="Use NVIDIA NIM for running LLM inference",
        docker_image=None,
        template_path=Path(__file__).parent / "doc_template.md",
        providers=providers,
        default_models=[inference_model],
        run_configs={
            "run.yaml": RunConfigSettings(
                provider_overrides={
                    "inference": [inference_provider],
                },
                default_models=[inference_model],
            ),
        },
        run_config_env_vars={
            "LLAMASTACK_PORT": (
                "5001",
                "Port for the Llama Stack distribution server",
            ),
            "NVIDIA_API_KEY": (
                "",
                "NVIDIA API Key",
            ),
        },
    )
--- a/llama_stack/templates/nvidia/run.yaml
+++ b/llama_stack/templates/nvidia/run.yaml
@ -0,0 +1,55 @@
 version: '2'
 image_name: nvidia
 docker_image: null
 conda_env: nvidia
 apis:
 - agents
 - inference
 - memory
 - safety
 - telemetry
 providers:
  inference:
  - provider_id: nvidia
    provider_type: remote::nvidia
    config:
      url: https://integrate.api.nvidia.com
      api_key: ${env.NVIDIA_API_KEY}
  memory:
  - provider_id: faiss
    provider_type: inline::faiss
    config:
      kvstore:
        type: sqlite
        namespace: null
        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/nvidia}/faiss_store.db
  safety:
  - provider_id: llama-guard
    provider_type: inline::llama-guard
    config: {}
  agents:
  - provider_id: meta-reference
    provider_type: inline::meta-reference
    config:
      persistence_store:
        type: sqlite
        namespace: null
        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/nvidia}/agents_store.db
  telemetry:
  - provider_id: meta-reference
    provider_type: inline::meta-reference
    config: {}
 metadata_store:
  namespace: null
  type: sqlite
  db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/nvidia}/registry.db
 models:
 - metadata: {}
  model_id: ${env.INFERENCE_MODEL}
  provider_id: nvidia
  provider_model_id: null
 shields: []
 memory_banks: []
 datasets: []
 scoring_fns: []
 eval_tasks: []