mirror of
https://github.com/meta-llama/llama-stack.git
synced 2025-07-21 03:59:42 +00:00
feat: allow dynamic model registration for nvidia inference provider (#2726)
# What does this PR do? let's users register models available at https://integrate.api.nvidia.com/v1/models that isn't already in llama_stack/providers/remote/inference/nvidia/models.py ## Test Plan 1. run the nvidia distro 2. register a model from https://integrate.api.nvidia.com/v1/models that isn't already know, as of this writing nvidia/llama-3.1-nemotron-ultra-253b-v1 is a good example 3. perform inference w/ the model
This commit is contained in:
parent
57745101be
commit
477bcd4d09
2 changed files with 23 additions and 48 deletions
|
@ -7,7 +7,7 @@
|
|||
import os
|
||||
import unittest
|
||||
import warnings
|
||||
from unittest.mock import patch
|
||||
from unittest.mock import AsyncMock, patch
|
||||
|
||||
import pytest
|
||||
|
||||
|
@ -343,7 +343,11 @@ class TestNvidiaPostTraining(unittest.TestCase):
|
|||
provider_resource_id=model_id,
|
||||
model_type=model_type,
|
||||
)
|
||||
result = self.run_async(self.inference_adapter.register_model(model))
|
||||
|
||||
# simulate a NIM where default/job-1234 is an available model
|
||||
with patch.object(self.inference_adapter, "check_model_availability", new_callable=AsyncMock) as mock_check:
|
||||
mock_check.return_value = True
|
||||
result = self.run_async(self.inference_adapter.register_model(model))
|
||||
assert result == model
|
||||
assert len(self.inference_adapter.alias_to_provider_id_map) > 1
|
||||
assert self.inference_adapter.get_provider_model_id(model.provider_model_id) == model_id
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue