From 5f2f83865696ff895f728f25e8c2800242156dfe Mon Sep 17 00:00:00 2001 From: Jash Gulabrai Date: Tue, 15 Apr 2025 12:56:55 -0400 Subject: [PATCH] fix: ensure run_eval accepts model alias and converts to nvidia model ID --- llama_stack/providers/remote/eval/nvidia/eval.py | 9 ++++++++- tests/unit/providers/nvidia/test_eval.py | 5 +++-- 2 files changed, 11 insertions(+), 3 deletions(-) diff --git a/llama_stack/providers/remote/eval/nvidia/eval.py b/llama_stack/providers/remote/eval/nvidia/eval.py index ca8464f51..5c351c8ca 100644 --- a/llama_stack/providers/remote/eval/nvidia/eval.py +++ b/llama_stack/providers/remote/eval/nvidia/eval.py @@ -7,6 +7,7 @@ from typing import Any, Dict, List import requests +from build.lib.llama_stack.providers.remote.inference.nvidia.models import MODEL_ENTRIES from llama_stack.apis.agents import Agents from llama_stack.apis.benchmarks import Benchmark from llama_stack.apis.datasetio import DatasetIO @@ -14,6 +15,7 @@ from llama_stack.apis.datasets import Datasets from llama_stack.apis.inference import Inference from llama_stack.apis.scoring import Scoring, ScoringResult from llama_stack.providers.datatypes import BenchmarksProtocolPrivate +from llama_stack.providers.utils.inference.model_registry import ModelRegistryHelper from .....apis.common.job_types import Job, JobStatus from .....apis.eval.eval import BenchmarkConfig, Eval, EvaluateResponse @@ -25,6 +27,7 @@ DEFAULT_NAMESPACE = "nvidia" class NVIDIAEvalImpl( Eval, BenchmarksProtocolPrivate, + ModelRegistryHelper, ): def __init__( self, @@ -42,6 +45,8 @@ class NVIDIAEvalImpl( self.inference_api = inference_api self.agents_api = agents_api + ModelRegistryHelper.__init__(self, model_entries=MODEL_ENTRIES) + async def initialize(self) -> None: ... async def shutdown(self) -> None: ... @@ -81,11 +86,13 @@ class NVIDIAEvalImpl( if benchmark_config.eval_candidate.type == "model" else benchmark_config.eval_candidate.config.model ) + nvidia_model = self.get_provider_model_id(model) + result = await self._evaluator_post( "/v1/evaluation/jobs", { "config": f"{DEFAULT_NAMESPACE}/{benchmark_id}", - "target": {"type": "model", "model": model}, + "target": {"type": "model", "model": nvidia_model}, }, ) diff --git a/tests/unit/providers/nvidia/test_eval.py b/tests/unit/providers/nvidia/test_eval.py index 8e09820b5..68f102f83 100644 --- a/tests/unit/providers/nvidia/test_eval.py +++ b/tests/unit/providers/nvidia/test_eval.py @@ -13,6 +13,7 @@ import pytest from llama_stack.apis.benchmarks import Benchmark from llama_stack.apis.common.job_types import Job, JobStatus from llama_stack.apis.eval.eval import BenchmarkConfig, EvaluateResponse, ModelCandidate, SamplingParams +from llama_stack.models.llama.sku_types import CoreModelId from llama_stack.providers.remote.eval.nvidia.config import NVIDIAEvalConfig from llama_stack.providers.remote.eval.nvidia.eval import NVIDIAEvalImpl @@ -121,7 +122,7 @@ class TestNVIDIAEvalImpl(unittest.TestCase): benchmark_config = BenchmarkConfig( eval_candidate=ModelCandidate( type="model", - model="meta/llama-3.1-8b-instruct", + model=CoreModelId.llama3_1_8b_instruct.value, sampling_params=SamplingParams(max_tokens=100, temperature=0.7), ) ) @@ -140,7 +141,7 @@ class TestNVIDIAEvalImpl(unittest.TestCase): self._assert_request_body( { "config": f"nvidia/{MOCK_BENCHMARK_ID}", - "target": {"type": "model", "model": benchmark_config.eval_candidate.model}, + "target": {"type": "model", "model": "meta/llama-3.1-8b-instruct"}, } )