Merge branch 'main' into allow-dynamic-models-nvidia

2025-12-24 02:48:04 +00:00 · 2025-07-14 19:01:28 -04:00 · 2025-07-14 19:01:28 -04:00 · c2ab8988e6
commit c2ab8988e6
parent f4af72d98e 33f0d83ad3
127 changed files with 3997 additions and 3394 deletions
--- a/llama_stack/providers/remote/inference/nvidia/models.py
+++ b/llama_stack/providers/remote/inference/nvidia/models.py
@ -11,6 +11,9 @@ from llama_stack.providers.utils.inference.model_registry import (
    build_hf_repo_model_entry,
 )

+SAFETY_MODELS_ENTRIES = []
+
+# https://docs.nvidia.com/nim/large-language-models/latest/supported-llm-agnostic-architectures.html
 MODEL_ENTRIES = [
    build_hf_repo_model_entry(
        "meta/llama3-8b-instruct",
@ -99,4 +102,4 @@ MODEL_ENTRIES = [
    ),
    # TODO(mf): how do we handle Nemotron models?
    # "Llama3.1-Nemotron-51B-Instruct" -> "meta/llama-3.1-nemotron-51b-instruct",
-]
+] + SAFETY_MODELS_ENTRIES