mirror of
https://github.com/meta-llama/llama-stack.git
synced 2025-06-28 02:53:30 +00:00
# What does this PR do? - remove model registration & parameterize model in scoring/eval pytests ## Test Plan ``` pytest -v -s -m meta_reference_eval_together_inference eval/test_eval.py pytest -v -s -m meta_reference_eval_together_inference_huggingface_datasetio eval/test_eval.py ``` ``` pytest -v -s -m llm_as_judge_scoring_together_inference scoring/test_scoring.py --judge-model meta-llama/Llama-3.2-3B-Instruct pytest -v -s -m basic_scoring_together_inference scoring/test_scoring.py ``` <img width="860" alt="image" src="https://github.com/user-attachments/assets/d4b0badc-da34-4097-9b7c-9511f8261723" /> ## Sources Please link relevant resources if necessary. ## Before submitting - [ ] This PR fixes a typo or improves the docs (you can dismiss the other checks if that's the case). - [ ] Ran pre-commit to handle lint / formatting issues. - [ ] Read the [contributor guideline](https://github.com/meta-llama/llama-stack/blob/main/CONTRIBUTING.md), Pull Request section? - [ ] Updated relevant documentation. - [ ] Wrote necessary unit or integration tests.
78 lines
1.9 KiB
Python
78 lines
1.9 KiB
Python
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
|
# All rights reserved.
|
|
#
|
|
# This source code is licensed under the terms described in the LICENSE file in
|
|
# the root directory of this source tree.
|
|
|
|
import pytest
|
|
import pytest_asyncio
|
|
|
|
from llama_stack.distribution.datatypes import Api, ModelInput, Provider
|
|
|
|
from llama_stack.providers.tests.resolver import construct_stack_for_test
|
|
from ..conftest import ProviderFixture, remote_stack_fixture
|
|
|
|
|
|
@pytest.fixture(scope="session")
|
|
def eval_remote() -> ProviderFixture:
|
|
return remote_stack_fixture()
|
|
|
|
|
|
@pytest.fixture(scope="session")
|
|
def eval_meta_reference() -> ProviderFixture:
|
|
return ProviderFixture(
|
|
providers=[
|
|
Provider(
|
|
provider_id="meta-reference",
|
|
provider_type="inline::meta-reference",
|
|
config={},
|
|
)
|
|
],
|
|
)
|
|
|
|
|
|
EVAL_FIXTURES = ["meta_reference", "remote"]
|
|
|
|
|
|
@pytest_asyncio.fixture(scope="session")
|
|
async def eval_stack(request, inference_model, judge_model):
|
|
fixture_dict = request.param
|
|
|
|
providers = {}
|
|
provider_data = {}
|
|
for key in [
|
|
"datasetio",
|
|
"eval",
|
|
"scoring",
|
|
"inference",
|
|
"agents",
|
|
"safety",
|
|
"memory",
|
|
]:
|
|
fixture = request.getfixturevalue(f"{key}_{fixture_dict[key]}")
|
|
providers[key] = fixture.providers
|
|
if fixture.provider_data:
|
|
provider_data.update(fixture.provider_data)
|
|
|
|
test_stack = await construct_stack_for_test(
|
|
[
|
|
Api.eval,
|
|
Api.datasetio,
|
|
Api.inference,
|
|
Api.scoring,
|
|
Api.agents,
|
|
Api.safety,
|
|
Api.memory,
|
|
],
|
|
providers,
|
|
provider_data,
|
|
models=[
|
|
ModelInput(model_id=model)
|
|
for model in [
|
|
inference_model,
|
|
judge_model,
|
|
]
|
|
],
|
|
)
|
|
|
|
return test_stack.impls
|