From 4205376653f9f1f22ec2e7bd87518bb753bc141b Mon Sep 17 00:00:00 2001 From: Matthew Farrellee Date: Thu, 17 Apr 2025 09:50:40 -0400 Subject: [PATCH] chore: add meta/llama-3.3-70b-instruct as supported nvidia inference provider model (#1985) see https://build.nvidia.com/meta/llama-3_3-70b-instruct --- docs/source/distributions/self_hosted_distro/nvidia.md | 1 + .../providers/remote/inference/nvidia/models.py | 4 ++++ llama_stack/templates/nvidia/run.yaml | 10 ++++++++++ 3 files changed, 15 insertions(+) diff --git a/docs/source/distributions/self_hosted_distro/nvidia.md b/docs/source/distributions/self_hosted_distro/nvidia.md index 563fdf4e5..539d18d92 100644 --- a/docs/source/distributions/self_hosted_distro/nvidia.md +++ b/docs/source/distributions/self_hosted_distro/nvidia.md @@ -45,6 +45,7 @@ The following models are available by default: - `meta/llama-3.2-3b-instruct (aliases: meta-llama/Llama-3.2-3B-Instruct)` - `meta/llama-3.2-11b-vision-instruct (aliases: meta-llama/Llama-3.2-11B-Vision-Instruct)` - `meta/llama-3.2-90b-vision-instruct (aliases: meta-llama/Llama-3.2-90B-Vision-Instruct)` +- `meta/llama-3.3-70b-instruct (aliases: meta-llama/Llama-3.3-70B-Instruct)` - `nvidia/llama-3.2-nv-embedqa-1b-v2 ` - `nvidia/nv-embedqa-e5-v5 ` - `nvidia/nv-embedqa-mistral-7b-v2 ` diff --git a/llama_stack/providers/remote/inference/nvidia/models.py b/llama_stack/providers/remote/inference/nvidia/models.py index 964125148..127a6ca59 100644 --- a/llama_stack/providers/remote/inference/nvidia/models.py +++ b/llama_stack/providers/remote/inference/nvidia/models.py @@ -48,6 +48,10 @@ MODEL_ENTRIES = [ "meta/llama-3.2-90b-vision-instruct", CoreModelId.llama3_2_90b_vision_instruct.value, ), + build_hf_repo_model_entry( + "meta/llama-3.3-70b-instruct", + CoreModelId.llama3_3_70b_instruct.value, + ), # NeMo Retriever Text Embedding models - # # https://docs.nvidia.com/nim/nemo-retriever/text-embedding/latest/support-matrix.html diff --git a/llama_stack/templates/nvidia/run.yaml b/llama_stack/templates/nvidia/run.yaml index 1267a9883..ff548d82e 100644 --- a/llama_stack/templates/nvidia/run.yaml +++ b/llama_stack/templates/nvidia/run.yaml @@ -173,6 +173,16 @@ models: provider_id: nvidia provider_model_id: meta/llama-3.2-90b-vision-instruct model_type: llm +- metadata: {} + model_id: meta/llama-3.3-70b-instruct + provider_id: nvidia + provider_model_id: meta/llama-3.3-70b-instruct + model_type: llm +- metadata: {} + model_id: meta-llama/Llama-3.3-70B-Instruct + provider_id: nvidia + provider_model_id: meta/llama-3.3-70b-instruct + model_type: llm - metadata: embedding_dimension: 2048 context_length: 8192