fix: ensure run_eval accepts model alias and converts to nvidia model ID

This commit is contained in:
Jash Gulabrai 2025-04-15 12:56:55 -04:00
parent 95619892ea
commit 5f2f838656
2 changed files with 11 additions and 3 deletions

View file

@ -7,6 +7,7 @@ from typing import Any, Dict, List
import requests
from build.lib.llama_stack.providers.remote.inference.nvidia.models import MODEL_ENTRIES
from llama_stack.apis.agents import Agents
from llama_stack.apis.benchmarks import Benchmark
from llama_stack.apis.datasetio import DatasetIO
@ -14,6 +15,7 @@ from llama_stack.apis.datasets import Datasets
from llama_stack.apis.inference import Inference
from llama_stack.apis.scoring import Scoring, ScoringResult
from llama_stack.providers.datatypes import BenchmarksProtocolPrivate
from llama_stack.providers.utils.inference.model_registry import ModelRegistryHelper
from .....apis.common.job_types import Job, JobStatus
from .....apis.eval.eval import BenchmarkConfig, Eval, EvaluateResponse
@ -25,6 +27,7 @@ DEFAULT_NAMESPACE = "nvidia"
class NVIDIAEvalImpl(
Eval,
BenchmarksProtocolPrivate,
ModelRegistryHelper,
):
def __init__(
self,
@ -42,6 +45,8 @@ class NVIDIAEvalImpl(
self.inference_api = inference_api
self.agents_api = agents_api
ModelRegistryHelper.__init__(self, model_entries=MODEL_ENTRIES)
async def initialize(self) -> None: ...
async def shutdown(self) -> None: ...
@ -81,11 +86,13 @@ class NVIDIAEvalImpl(
if benchmark_config.eval_candidate.type == "model"
else benchmark_config.eval_candidate.config.model
)
nvidia_model = self.get_provider_model_id(model)
result = await self._evaluator_post(
"/v1/evaluation/jobs",
{
"config": f"{DEFAULT_NAMESPACE}/{benchmark_id}",
"target": {"type": "model", "model": model},
"target": {"type": "model", "model": nvidia_model},
},
)

View file

@ -13,6 +13,7 @@ import pytest
from llama_stack.apis.benchmarks import Benchmark
from llama_stack.apis.common.job_types import Job, JobStatus
from llama_stack.apis.eval.eval import BenchmarkConfig, EvaluateResponse, ModelCandidate, SamplingParams
from llama_stack.models.llama.sku_types import CoreModelId
from llama_stack.providers.remote.eval.nvidia.config import NVIDIAEvalConfig
from llama_stack.providers.remote.eval.nvidia.eval import NVIDIAEvalImpl
@ -121,7 +122,7 @@ class TestNVIDIAEvalImpl(unittest.TestCase):
benchmark_config = BenchmarkConfig(
eval_candidate=ModelCandidate(
type="model",
model="meta/llama-3.1-8b-instruct",
model=CoreModelId.llama3_1_8b_instruct.value,
sampling_params=SamplingParams(max_tokens=100, temperature=0.7),
)
)
@ -140,7 +141,7 @@ class TestNVIDIAEvalImpl(unittest.TestCase):
self._assert_request_body(
{
"config": f"nvidia/{MOCK_BENCHMARK_ID}",
"target": {"type": "model", "model": benchmark_config.eval_candidate.model},
"target": {"type": "model", "model": "meta/llama-3.1-8b-instruct"},
}
)