fix: ensure run_eval accepts model alias and converts to nvidia model ID

2025-12-28 15:22:00 +00:00 · 2025-04-15 12:56:55 -04:00 · 2025-04-15 12:56:55 -04:00 · 5f2f838656
commit 5f2f838656
parent 95619892ea
2 changed files with 11 additions and 3 deletions
--- a/llama_stack/providers/remote/eval/nvidia/eval.py
+++ b/llama_stack/providers/remote/eval/nvidia/eval.py
@ -7,6 +7,7 @@ from typing import Any, Dict, List

 import requests

+from build.lib.llama_stack.providers.remote.inference.nvidia.models import MODEL_ENTRIES
 from llama_stack.apis.agents import Agents
 from llama_stack.apis.benchmarks import Benchmark
 from llama_stack.apis.datasetio import DatasetIO
@ -14,6 +15,7 @@ from llama_stack.apis.datasets import Datasets
 from llama_stack.apis.inference import Inference
 from llama_stack.apis.scoring import Scoring, ScoringResult
 from llama_stack.providers.datatypes import BenchmarksProtocolPrivate
+from llama_stack.providers.utils.inference.model_registry import ModelRegistryHelper

 from .....apis.common.job_types import Job, JobStatus
 from .....apis.eval.eval import BenchmarkConfig, Eval, EvaluateResponse
@ -25,6 +27,7 @@ DEFAULT_NAMESPACE = "nvidia"
 class NVIDIAEvalImpl(
    Eval,
    BenchmarksProtocolPrivate,
+    ModelRegistryHelper,
 ):
    def __init__(
        self,
@ -42,6 +45,8 @@ class NVIDIAEvalImpl(
        self.inference_api = inference_api
        self.agents_api = agents_api

+        ModelRegistryHelper.__init__(self, model_entries=MODEL_ENTRIES)
+
    async def initialize(self) -> None: ...

    async def shutdown(self) -> None: ...
@ -81,11 +86,13 @@ class NVIDIAEvalImpl(
            if benchmark_config.eval_candidate.type == "model"
            else benchmark_config.eval_candidate.config.model
        )
+        nvidia_model = self.get_provider_model_id(model)
+
        result = await self._evaluator_post(
            "/v1/evaluation/jobs",
            {
                "config": f"{DEFAULT_NAMESPACE}/{benchmark_id}",
-                "target": {"type": "model", "model": model},
+                "target": {"type": "model", "model": nvidia_model},
            },
        )

--- a/tests/unit/providers/nvidia/test_eval.py
+++ b/tests/unit/providers/nvidia/test_eval.py
@ -13,6 +13,7 @@ import pytest
 from llama_stack.apis.benchmarks import Benchmark
 from llama_stack.apis.common.job_types import Job, JobStatus
 from llama_stack.apis.eval.eval import BenchmarkConfig, EvaluateResponse, ModelCandidate, SamplingParams
+from llama_stack.models.llama.sku_types import CoreModelId
 from llama_stack.providers.remote.eval.nvidia.config import NVIDIAEvalConfig
 from llama_stack.providers.remote.eval.nvidia.eval import NVIDIAEvalImpl

@ -121,7 +122,7 @@ class TestNVIDIAEvalImpl(unittest.TestCase):
        benchmark_config = BenchmarkConfig(
            eval_candidate=ModelCandidate(
                type="model",
-                model="meta/llama-3.1-8b-instruct",
+                model=CoreModelId.llama3_1_8b_instruct.value,
                sampling_params=SamplingParams(max_tokens=100, temperature=0.7),
            )
        )
@ -140,7 +141,7 @@ class TestNVIDIAEvalImpl(unittest.TestCase):
        self._assert_request_body(
            {
                "config": f"nvidia/{MOCK_BENCHMARK_ID}",
-                "target": {"type": "model", "model": benchmark_config.eval_candidate.model},
+                "target": {"type": "model", "model": "meta/llama-3.1-8b-instruct"},
            }
        )