fix: ensure run_eval accepts model alias and converts to nvidia model ID

This commit is contained in:
Jash Gulabrai 2025-04-15 12:56:55 -04:00
parent 95619892ea
commit 5f2f838656
2 changed files with 11 additions and 3 deletions

View file

@ -7,6 +7,7 @@ from typing import Any, Dict, List
import requests import requests
from build.lib.llama_stack.providers.remote.inference.nvidia.models import MODEL_ENTRIES
from llama_stack.apis.agents import Agents from llama_stack.apis.agents import Agents
from llama_stack.apis.benchmarks import Benchmark from llama_stack.apis.benchmarks import Benchmark
from llama_stack.apis.datasetio import DatasetIO from llama_stack.apis.datasetio import DatasetIO
@ -14,6 +15,7 @@ from llama_stack.apis.datasets import Datasets
from llama_stack.apis.inference import Inference from llama_stack.apis.inference import Inference
from llama_stack.apis.scoring import Scoring, ScoringResult from llama_stack.apis.scoring import Scoring, ScoringResult
from llama_stack.providers.datatypes import BenchmarksProtocolPrivate from llama_stack.providers.datatypes import BenchmarksProtocolPrivate
from llama_stack.providers.utils.inference.model_registry import ModelRegistryHelper
from .....apis.common.job_types import Job, JobStatus from .....apis.common.job_types import Job, JobStatus
from .....apis.eval.eval import BenchmarkConfig, Eval, EvaluateResponse from .....apis.eval.eval import BenchmarkConfig, Eval, EvaluateResponse
@ -25,6 +27,7 @@ DEFAULT_NAMESPACE = "nvidia"
class NVIDIAEvalImpl( class NVIDIAEvalImpl(
Eval, Eval,
BenchmarksProtocolPrivate, BenchmarksProtocolPrivate,
ModelRegistryHelper,
): ):
def __init__( def __init__(
self, self,
@ -42,6 +45,8 @@ class NVIDIAEvalImpl(
self.inference_api = inference_api self.inference_api = inference_api
self.agents_api = agents_api self.agents_api = agents_api
ModelRegistryHelper.__init__(self, model_entries=MODEL_ENTRIES)
async def initialize(self) -> None: ... async def initialize(self) -> None: ...
async def shutdown(self) -> None: ... async def shutdown(self) -> None: ...
@ -81,11 +86,13 @@ class NVIDIAEvalImpl(
if benchmark_config.eval_candidate.type == "model" if benchmark_config.eval_candidate.type == "model"
else benchmark_config.eval_candidate.config.model else benchmark_config.eval_candidate.config.model
) )
nvidia_model = self.get_provider_model_id(model)
result = await self._evaluator_post( result = await self._evaluator_post(
"/v1/evaluation/jobs", "/v1/evaluation/jobs",
{ {
"config": f"{DEFAULT_NAMESPACE}/{benchmark_id}", "config": f"{DEFAULT_NAMESPACE}/{benchmark_id}",
"target": {"type": "model", "model": model}, "target": {"type": "model", "model": nvidia_model},
}, },
) )

View file

@ -13,6 +13,7 @@ import pytest
from llama_stack.apis.benchmarks import Benchmark from llama_stack.apis.benchmarks import Benchmark
from llama_stack.apis.common.job_types import Job, JobStatus from llama_stack.apis.common.job_types import Job, JobStatus
from llama_stack.apis.eval.eval import BenchmarkConfig, EvaluateResponse, ModelCandidate, SamplingParams from llama_stack.apis.eval.eval import BenchmarkConfig, EvaluateResponse, ModelCandidate, SamplingParams
from llama_stack.models.llama.sku_types import CoreModelId
from llama_stack.providers.remote.eval.nvidia.config import NVIDIAEvalConfig from llama_stack.providers.remote.eval.nvidia.config import NVIDIAEvalConfig
from llama_stack.providers.remote.eval.nvidia.eval import NVIDIAEvalImpl from llama_stack.providers.remote.eval.nvidia.eval import NVIDIAEvalImpl
@ -121,7 +122,7 @@ class TestNVIDIAEvalImpl(unittest.TestCase):
benchmark_config = BenchmarkConfig( benchmark_config = BenchmarkConfig(
eval_candidate=ModelCandidate( eval_candidate=ModelCandidate(
type="model", type="model",
model="meta/llama-3.1-8b-instruct", model=CoreModelId.llama3_1_8b_instruct.value,
sampling_params=SamplingParams(max_tokens=100, temperature=0.7), sampling_params=SamplingParams(max_tokens=100, temperature=0.7),
) )
) )
@ -140,7 +141,7 @@ class TestNVIDIAEvalImpl(unittest.TestCase):
self._assert_request_body( self._assert_request_body(
{ {
"config": f"nvidia/{MOCK_BENCHMARK_ID}", "config": f"nvidia/{MOCK_BENCHMARK_ID}",
"target": {"type": "model", "model": benchmark_config.eval_candidate.model}, "target": {"type": "model", "model": "meta/llama-3.1-8b-instruct"},
} }
) )