feat: allow dynamic model registration for nvidia inference provider (#2726)

# What does this PR do?

let's users register models available at
https://integrate.api.nvidia.com/v1/models that isn't already in
llama_stack/providers/remote/inference/nvidia/models.py

## Test Plan

1. run the nvidia distro
2. register a model from https://integrate.api.nvidia.com/v1/models that
isn't already know, as of this writing
nvidia/llama-3.1-nemotron-ultra-253b-v1 is a good example
3. perform inference w/ the model
This commit is contained in:
Matthew Farrellee 2025-07-17 15:11:30 -04:00 committed by GitHub
parent 57745101be
commit 477bcd4d09
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
2 changed files with 23 additions and 48 deletions

View file

@ -7,7 +7,7 @@
import os
import unittest
import warnings
from unittest.mock import patch
from unittest.mock import AsyncMock, patch
import pytest
@ -343,7 +343,11 @@ class TestNvidiaPostTraining(unittest.TestCase):
provider_resource_id=model_id,
model_type=model_type,
)
result = self.run_async(self.inference_adapter.register_model(model))
# simulate a NIM where default/job-1234 is an available model
with patch.object(self.inference_adapter, "check_model_availability", new_callable=AsyncMock) as mock_check:
mock_check.return_value = True
result = self.run_async(self.inference_adapter.register_model(model))
assert result == model
assert len(self.inference_adapter.alias_to_provider_id_map) > 1
assert self.inference_adapter.get_provider_model_id(model.provider_model_id) == model_id