rename evals related stuff

This commit is contained in:
Xi Yan 2024-11-11 15:11:07 -05:00
parent 2b7d70ba86
commit acd055d763
31 changed files with 21 additions and 41 deletions

View file

@ -9,14 +9,13 @@ from llama_models.llama3.api.datatypes import * # noqa: F403
from .....apis.common.job_types import Job
from .....apis.eval.eval import Eval, EvalTaskConfig, EvaluateResponse, JobStatus
from llama_stack.apis.common.type_system import * # noqa: F403
from tqdm import tqdm
from llama_stack.apis.datasetio import DatasetIO
from llama_stack.apis.datasets import Datasets
from llama_stack.apis.eval_tasks import EvalTaskDef
from llama_stack.apis.inference import Inference
from llama_stack.apis.scoring import Scoring
from llama_stack.providers.datatypes import EvalTasksProtocolPrivate
from tqdm import tqdm
from .config import MetaReferenceEvalConfig

View file

@ -16,9 +16,8 @@ from llama_stack.apis.datasets import * # noqa: F403
from autoevals.llm import Factuality
from autoevals.ragas import AnswerCorrectness
from llama_stack.providers.datatypes import ScoringFunctionsProtocolPrivate
from llama_stack.providers.inline.meta_reference.scoring.scoring_fn.common import (
aggregate_average,
)
from ..meta_reference.scoring.scoring_fn.common import aggregate_average
from .config import BraintrustScoringConfig
from .scoring_fn.fn_defs.answer_correctness import answer_correctness_fn_def

View file

@ -4,20 +4,13 @@
# This source code is licensed under the terms described in the LICENSE file in
# the root directory of this source tree.
from llama_stack.providers.inline.meta_reference.scoring.scoring_fn.base_scoring_fn import (
BaseScoringFn,
)
from .base_scoring_fn import BaseScoringFn
from llama_stack.apis.scoring_functions import * # noqa: F401, F403
from llama_stack.apis.scoring import * # noqa: F401, F403
from llama_stack.apis.common.type_system import * # noqa: F403
from llama_stack.providers.inline.meta_reference.scoring.scoring_fn.common import (
aggregate_accuracy,
)
from llama_stack.providers.inline.meta_reference.scoring.scoring_fn.fn_defs.equality import (
equality,
)
from .common import aggregate_accuracy
from .fn_defs.equality import equality
class EqualityScoringFn(BaseScoringFn):

View file

@ -4,20 +4,15 @@
# This source code is licensed under the terms described in the LICENSE file in
# the root directory of this source tree.
from llama_stack.apis.inference.inference import Inference
from llama_stack.providers.inline.meta_reference.scoring.scoring_fn.base_scoring_fn import (
BaseScoringFn,
)
from .base_scoring_fn import BaseScoringFn
from llama_stack.apis.scoring_functions import * # noqa: F401, F403
from llama_stack.apis.scoring import * # noqa: F401, F403
from llama_stack.apis.common.type_system import * # noqa: F403
import re
from llama_stack.providers.inline.meta_reference.scoring.scoring_fn.common import (
aggregate_average,
)
from llama_stack.providers.inline.meta_reference.scoring.scoring_fn.fn_defs.llm_as_judge_8b_correctness import (
llm_as_judge_8b_correctness,
)
from .common import aggregate_average
from .fn_defs.llm_as_judge_8b_correctness import llm_as_judge_8b_correctness
class LlmAsJudgeScoringFn(BaseScoringFn):

View file

@ -4,19 +4,13 @@
# This source code is licensed under the terms described in the LICENSE file in
# the root directory of this source tree.
from llama_stack.providers.inline.meta_reference.scoring.scoring_fn.base_scoring_fn import (
BaseScoringFn,
)
from .base_scoring_fn import BaseScoringFn
from llama_stack.apis.scoring_functions import * # noqa: F401, F403
from llama_stack.apis.scoring import * # noqa: F401, F403
from llama_stack.apis.common.type_system import * # noqa: F403
from llama_stack.providers.inline.meta_reference.scoring.scoring_fn.common import (
aggregate_accuracy,
)
from .common import aggregate_accuracy
from llama_stack.providers.inline.meta_reference.scoring.scoring_fn.fn_defs.subset_of import (
subset_of,
)
from .fn_defs.subset_of import subset_of
class SubsetOfScoringFn(BaseScoringFn):

View file

@ -15,8 +15,8 @@ def available_providers() -> List[ProviderSpec]:
api=Api.datasetio,
provider_type="meta-reference",
pip_packages=["pandas"],
module="llama_stack.providers.inline.meta_reference.datasetio",
config_class="llama_stack.providers.inline.meta_reference.datasetio.MetaReferenceDatasetIOConfig",
module="llama_stack.providers.inline.datasetio.meta_reference",
config_class="llama_stack.providers.inline.datasetio.meta_reference.MetaReferenceDatasetIOConfig",
api_dependencies=[],
),
remote_provider_spec(

View file

@ -15,8 +15,8 @@ def available_providers() -> List[ProviderSpec]:
api=Api.eval,
provider_type="meta-reference",
pip_packages=[],
module="llama_stack.providers.inline.meta_reference.eval",
config_class="llama_stack.providers.inline.meta_reference.eval.MetaReferenceEvalConfig",
module="llama_stack.providers.inline.eval.meta_reference",
config_class="llama_stack.providers.inline.eval.meta_reference.MetaReferenceEvalConfig",
api_dependencies=[
Api.datasetio,
Api.datasets,

View file

@ -15,8 +15,8 @@ def available_providers() -> List[ProviderSpec]:
api=Api.scoring,
provider_type="meta-reference",
pip_packages=[],
module="llama_stack.providers.inline.meta_reference.scoring",
config_class="llama_stack.providers.inline.meta_reference.scoring.MetaReferenceScoringConfig",
module="llama_stack.providers.inline.scoring.meta_reference",
config_class="llama_stack.providers.inline.scoring.meta_reference.MetaReferenceScoringConfig",
api_dependencies=[
Api.datasetio,
Api.datasets,
@ -27,8 +27,8 @@ def available_providers() -> List[ProviderSpec]:
api=Api.scoring,
provider_type="braintrust",
pip_packages=["autoevals", "openai"],
module="llama_stack.providers.inline.braintrust.scoring",
config_class="llama_stack.providers.inline.braintrust.scoring.BraintrustScoringConfig",
module="llama_stack.providers.inline.scoring.braintrust",
config_class="llama_stack.providers.inline.scoring.braintrust.BraintrustScoringConfig",
api_dependencies=[
Api.datasetio,
Api.datasets,