mirror of
https://github.com/meta-llama/llama-stack.git
synced 2025-08-03 09:21:45 +00:00
fix: ensure run_eval accepts model alias and converts to nvidia model ID
This commit is contained in:
parent
95619892ea
commit
5f2f838656
2 changed files with 11 additions and 3 deletions
|
@ -7,6 +7,7 @@ from typing import Any, Dict, List
|
||||||
|
|
||||||
import requests
|
import requests
|
||||||
|
|
||||||
|
from build.lib.llama_stack.providers.remote.inference.nvidia.models import MODEL_ENTRIES
|
||||||
from llama_stack.apis.agents import Agents
|
from llama_stack.apis.agents import Agents
|
||||||
from llama_stack.apis.benchmarks import Benchmark
|
from llama_stack.apis.benchmarks import Benchmark
|
||||||
from llama_stack.apis.datasetio import DatasetIO
|
from llama_stack.apis.datasetio import DatasetIO
|
||||||
|
@ -14,6 +15,7 @@ from llama_stack.apis.datasets import Datasets
|
||||||
from llama_stack.apis.inference import Inference
|
from llama_stack.apis.inference import Inference
|
||||||
from llama_stack.apis.scoring import Scoring, ScoringResult
|
from llama_stack.apis.scoring import Scoring, ScoringResult
|
||||||
from llama_stack.providers.datatypes import BenchmarksProtocolPrivate
|
from llama_stack.providers.datatypes import BenchmarksProtocolPrivate
|
||||||
|
from llama_stack.providers.utils.inference.model_registry import ModelRegistryHelper
|
||||||
|
|
||||||
from .....apis.common.job_types import Job, JobStatus
|
from .....apis.common.job_types import Job, JobStatus
|
||||||
from .....apis.eval.eval import BenchmarkConfig, Eval, EvaluateResponse
|
from .....apis.eval.eval import BenchmarkConfig, Eval, EvaluateResponse
|
||||||
|
@ -25,6 +27,7 @@ DEFAULT_NAMESPACE = "nvidia"
|
||||||
class NVIDIAEvalImpl(
|
class NVIDIAEvalImpl(
|
||||||
Eval,
|
Eval,
|
||||||
BenchmarksProtocolPrivate,
|
BenchmarksProtocolPrivate,
|
||||||
|
ModelRegistryHelper,
|
||||||
):
|
):
|
||||||
def __init__(
|
def __init__(
|
||||||
self,
|
self,
|
||||||
|
@ -42,6 +45,8 @@ class NVIDIAEvalImpl(
|
||||||
self.inference_api = inference_api
|
self.inference_api = inference_api
|
||||||
self.agents_api = agents_api
|
self.agents_api = agents_api
|
||||||
|
|
||||||
|
ModelRegistryHelper.__init__(self, model_entries=MODEL_ENTRIES)
|
||||||
|
|
||||||
async def initialize(self) -> None: ...
|
async def initialize(self) -> None: ...
|
||||||
|
|
||||||
async def shutdown(self) -> None: ...
|
async def shutdown(self) -> None: ...
|
||||||
|
@ -81,11 +86,13 @@ class NVIDIAEvalImpl(
|
||||||
if benchmark_config.eval_candidate.type == "model"
|
if benchmark_config.eval_candidate.type == "model"
|
||||||
else benchmark_config.eval_candidate.config.model
|
else benchmark_config.eval_candidate.config.model
|
||||||
)
|
)
|
||||||
|
nvidia_model = self.get_provider_model_id(model)
|
||||||
|
|
||||||
result = await self._evaluator_post(
|
result = await self._evaluator_post(
|
||||||
"/v1/evaluation/jobs",
|
"/v1/evaluation/jobs",
|
||||||
{
|
{
|
||||||
"config": f"{DEFAULT_NAMESPACE}/{benchmark_id}",
|
"config": f"{DEFAULT_NAMESPACE}/{benchmark_id}",
|
||||||
"target": {"type": "model", "model": model},
|
"target": {"type": "model", "model": nvidia_model},
|
||||||
},
|
},
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
|
@ -13,6 +13,7 @@ import pytest
|
||||||
from llama_stack.apis.benchmarks import Benchmark
|
from llama_stack.apis.benchmarks import Benchmark
|
||||||
from llama_stack.apis.common.job_types import Job, JobStatus
|
from llama_stack.apis.common.job_types import Job, JobStatus
|
||||||
from llama_stack.apis.eval.eval import BenchmarkConfig, EvaluateResponse, ModelCandidate, SamplingParams
|
from llama_stack.apis.eval.eval import BenchmarkConfig, EvaluateResponse, ModelCandidate, SamplingParams
|
||||||
|
from llama_stack.models.llama.sku_types import CoreModelId
|
||||||
from llama_stack.providers.remote.eval.nvidia.config import NVIDIAEvalConfig
|
from llama_stack.providers.remote.eval.nvidia.config import NVIDIAEvalConfig
|
||||||
from llama_stack.providers.remote.eval.nvidia.eval import NVIDIAEvalImpl
|
from llama_stack.providers.remote.eval.nvidia.eval import NVIDIAEvalImpl
|
||||||
|
|
||||||
|
@ -121,7 +122,7 @@ class TestNVIDIAEvalImpl(unittest.TestCase):
|
||||||
benchmark_config = BenchmarkConfig(
|
benchmark_config = BenchmarkConfig(
|
||||||
eval_candidate=ModelCandidate(
|
eval_candidate=ModelCandidate(
|
||||||
type="model",
|
type="model",
|
||||||
model="meta/llama-3.1-8b-instruct",
|
model=CoreModelId.llama3_1_8b_instruct.value,
|
||||||
sampling_params=SamplingParams(max_tokens=100, temperature=0.7),
|
sampling_params=SamplingParams(max_tokens=100, temperature=0.7),
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
|
@ -140,7 +141,7 @@ class TestNVIDIAEvalImpl(unittest.TestCase):
|
||||||
self._assert_request_body(
|
self._assert_request_body(
|
||||||
{
|
{
|
||||||
"config": f"nvidia/{MOCK_BENCHMARK_ID}",
|
"config": f"nvidia/{MOCK_BENCHMARK_ID}",
|
||||||
"target": {"type": "model", "model": benchmark_config.eval_candidate.model},
|
"target": {"type": "model", "model": "meta/llama-3.1-8b-instruct"},
|
||||||
}
|
}
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue