diff --git a/llama_stack/providers/inline/meta_reference/datasetio/__init__.py b/llama_stack/providers/inline/datasetio/localfs/__init__.py similarity index 100% rename from llama_stack/providers/inline/meta_reference/datasetio/__init__.py rename to llama_stack/providers/inline/datasetio/localfs/__init__.py diff --git a/llama_stack/providers/inline/meta_reference/datasetio/config.py b/llama_stack/providers/inline/datasetio/localfs/config.py similarity index 100% rename from llama_stack/providers/inline/meta_reference/datasetio/config.py rename to llama_stack/providers/inline/datasetio/localfs/config.py diff --git a/llama_stack/providers/inline/meta_reference/datasetio/datasetio.py b/llama_stack/providers/inline/datasetio/localfs/datasetio.py similarity index 100% rename from llama_stack/providers/inline/meta_reference/datasetio/datasetio.py rename to llama_stack/providers/inline/datasetio/localfs/datasetio.py diff --git a/llama_stack/providers/inline/meta_reference/eval/__init__.py b/llama_stack/providers/inline/eval/meta_reference/__init__.py similarity index 100% rename from llama_stack/providers/inline/meta_reference/eval/__init__.py rename to llama_stack/providers/inline/eval/meta_reference/__init__.py diff --git a/llama_stack/providers/inline/meta_reference/eval/config.py b/llama_stack/providers/inline/eval/meta_reference/config.py similarity index 100% rename from llama_stack/providers/inline/meta_reference/eval/config.py rename to llama_stack/providers/inline/eval/meta_reference/config.py diff --git a/llama_stack/providers/inline/meta_reference/eval/eval.py b/llama_stack/providers/inline/eval/meta_reference/eval.py similarity index 99% rename from llama_stack/providers/inline/meta_reference/eval/eval.py rename to llama_stack/providers/inline/eval/meta_reference/eval.py index 48d8e2b04..df642f33b 100644 --- a/llama_stack/providers/inline/meta_reference/eval/eval.py +++ b/llama_stack/providers/inline/eval/meta_reference/eval.py @@ -9,14 +9,13 @@ from llama_models.llama3.api.datatypes import * # noqa: F403 from .....apis.common.job_types import Job from .....apis.eval.eval import Eval, EvalTaskConfig, EvaluateResponse, JobStatus from llama_stack.apis.common.type_system import * # noqa: F403 -from tqdm import tqdm - from llama_stack.apis.datasetio import DatasetIO from llama_stack.apis.datasets import Datasets from llama_stack.apis.eval_tasks import EvalTaskDef from llama_stack.apis.inference import Inference from llama_stack.apis.scoring import Scoring from llama_stack.providers.datatypes import EvalTasksProtocolPrivate +from tqdm import tqdm from .config import MetaReferenceEvalConfig diff --git a/llama_stack/providers/inline/braintrust/scoring/__init__.py b/llama_stack/providers/inline/scoring/braintrust/__init__.py similarity index 100% rename from llama_stack/providers/inline/braintrust/scoring/__init__.py rename to llama_stack/providers/inline/scoring/braintrust/__init__.py diff --git a/llama_stack/providers/inline/braintrust/scoring/braintrust.py b/llama_stack/providers/inline/scoring/braintrust/braintrust.py similarity index 98% rename from llama_stack/providers/inline/braintrust/scoring/braintrust.py rename to llama_stack/providers/inline/scoring/braintrust/braintrust.py index 6488a63eb..b0683dd04 100644 --- a/llama_stack/providers/inline/braintrust/scoring/braintrust.py +++ b/llama_stack/providers/inline/scoring/braintrust/braintrust.py @@ -16,9 +16,8 @@ from llama_stack.apis.datasets import * # noqa: F403 from autoevals.llm import Factuality from autoevals.ragas import AnswerCorrectness from llama_stack.providers.datatypes import ScoringFunctionsProtocolPrivate -from llama_stack.providers.inline.meta_reference.scoring.scoring_fn.common import ( - aggregate_average, -) + +from ..meta_reference.scoring.scoring_fn.common import aggregate_average from .config import BraintrustScoringConfig from .scoring_fn.fn_defs.answer_correctness import answer_correctness_fn_def diff --git a/llama_stack/providers/inline/braintrust/scoring/config.py b/llama_stack/providers/inline/scoring/braintrust/config.py similarity index 100% rename from llama_stack/providers/inline/braintrust/scoring/config.py rename to llama_stack/providers/inline/scoring/braintrust/config.py diff --git a/llama_stack/providers/inline/braintrust/scoring/scoring_fn/__init__.py b/llama_stack/providers/inline/scoring/braintrust/scoring_fn/__init__.py similarity index 100% rename from llama_stack/providers/inline/braintrust/scoring/scoring_fn/__init__.py rename to llama_stack/providers/inline/scoring/braintrust/scoring_fn/__init__.py diff --git a/llama_stack/providers/inline/braintrust/scoring/scoring_fn/fn_defs/__init__.py b/llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/__init__.py similarity index 100% rename from llama_stack/providers/inline/braintrust/scoring/scoring_fn/fn_defs/__init__.py rename to llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/__init__.py diff --git a/llama_stack/providers/inline/braintrust/scoring/scoring_fn/fn_defs/answer_correctness.py b/llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/answer_correctness.py similarity index 100% rename from llama_stack/providers/inline/braintrust/scoring/scoring_fn/fn_defs/answer_correctness.py rename to llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/answer_correctness.py diff --git a/llama_stack/providers/inline/braintrust/scoring/scoring_fn/fn_defs/factuality.py b/llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/factuality.py similarity index 100% rename from llama_stack/providers/inline/braintrust/scoring/scoring_fn/fn_defs/factuality.py rename to llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/factuality.py diff --git a/llama_stack/providers/inline/meta_reference/scoring/__init__.py b/llama_stack/providers/inline/scoring/meta_reference/__init__.py similarity index 100% rename from llama_stack/providers/inline/meta_reference/scoring/__init__.py rename to llama_stack/providers/inline/scoring/meta_reference/__init__.py diff --git a/llama_stack/providers/inline/meta_reference/scoring/config.py b/llama_stack/providers/inline/scoring/meta_reference/config.py similarity index 100% rename from llama_stack/providers/inline/meta_reference/scoring/config.py rename to llama_stack/providers/inline/scoring/meta_reference/config.py diff --git a/llama_stack/providers/inline/meta_reference/scoring/scoring.py b/llama_stack/providers/inline/scoring/meta_reference/scoring.py similarity index 100% rename from llama_stack/providers/inline/meta_reference/scoring/scoring.py rename to llama_stack/providers/inline/scoring/meta_reference/scoring.py diff --git a/llama_stack/providers/inline/meta_reference/scoring/scoring_fn/__init__.py b/llama_stack/providers/inline/scoring/meta_reference/scoring_fn/__init__.py similarity index 100% rename from llama_stack/providers/inline/meta_reference/scoring/scoring_fn/__init__.py rename to llama_stack/providers/inline/scoring/meta_reference/scoring_fn/__init__.py diff --git a/llama_stack/providers/inline/meta_reference/scoring/scoring_fn/base_scoring_fn.py b/llama_stack/providers/inline/scoring/meta_reference/scoring_fn/base_scoring_fn.py similarity index 100% rename from llama_stack/providers/inline/meta_reference/scoring/scoring_fn/base_scoring_fn.py rename to llama_stack/providers/inline/scoring/meta_reference/scoring_fn/base_scoring_fn.py diff --git a/llama_stack/providers/inline/meta_reference/scoring/scoring_fn/common.py b/llama_stack/providers/inline/scoring/meta_reference/scoring_fn/common.py similarity index 100% rename from llama_stack/providers/inline/meta_reference/scoring/scoring_fn/common.py rename to llama_stack/providers/inline/scoring/meta_reference/scoring_fn/common.py diff --git a/llama_stack/providers/inline/meta_reference/scoring/scoring_fn/equality_scoring_fn.py b/llama_stack/providers/inline/scoring/meta_reference/scoring_fn/equality_scoring_fn.py similarity index 82% rename from llama_stack/providers/inline/meta_reference/scoring/scoring_fn/equality_scoring_fn.py rename to llama_stack/providers/inline/scoring/meta_reference/scoring_fn/equality_scoring_fn.py index 07405d56c..89e516663 100644 --- a/llama_stack/providers/inline/meta_reference/scoring/scoring_fn/equality_scoring_fn.py +++ b/llama_stack/providers/inline/scoring/meta_reference/scoring_fn/equality_scoring_fn.py @@ -4,20 +4,13 @@ # This source code is licensed under the terms described in the LICENSE file in # the root directory of this source tree. -from llama_stack.providers.inline.meta_reference.scoring.scoring_fn.base_scoring_fn import ( - BaseScoringFn, -) +from .base_scoring_fn import BaseScoringFn from llama_stack.apis.scoring_functions import * # noqa: F401, F403 from llama_stack.apis.scoring import * # noqa: F401, F403 from llama_stack.apis.common.type_system import * # noqa: F403 -from llama_stack.providers.inline.meta_reference.scoring.scoring_fn.common import ( - aggregate_accuracy, -) - -from llama_stack.providers.inline.meta_reference.scoring.scoring_fn.fn_defs.equality import ( - equality, -) +from .common import aggregate_accuracy +from .fn_defs.equality import equality class EqualityScoringFn(BaseScoringFn): diff --git a/llama_stack/providers/inline/meta_reference/scoring/scoring_fn/fn_defs/__init__.py b/llama_stack/providers/inline/scoring/meta_reference/scoring_fn/fn_defs/__init__.py similarity index 100% rename from llama_stack/providers/inline/meta_reference/scoring/scoring_fn/fn_defs/__init__.py rename to llama_stack/providers/inline/scoring/meta_reference/scoring_fn/fn_defs/__init__.py diff --git a/llama_stack/providers/inline/meta_reference/scoring/scoring_fn/fn_defs/equality.py b/llama_stack/providers/inline/scoring/meta_reference/scoring_fn/fn_defs/equality.py similarity index 100% rename from llama_stack/providers/inline/meta_reference/scoring/scoring_fn/fn_defs/equality.py rename to llama_stack/providers/inline/scoring/meta_reference/scoring_fn/fn_defs/equality.py diff --git a/llama_stack/providers/inline/meta_reference/scoring/scoring_fn/fn_defs/llm_as_judge_8b_correctness.py b/llama_stack/providers/inline/scoring/meta_reference/scoring_fn/fn_defs/llm_as_judge_8b_correctness.py similarity index 100% rename from llama_stack/providers/inline/meta_reference/scoring/scoring_fn/fn_defs/llm_as_judge_8b_correctness.py rename to llama_stack/providers/inline/scoring/meta_reference/scoring_fn/fn_defs/llm_as_judge_8b_correctness.py diff --git a/llama_stack/providers/inline/meta_reference/scoring/scoring_fn/fn_defs/regex_parser_multiple_choice_answer.py b/llama_stack/providers/inline/scoring/meta_reference/scoring_fn/fn_defs/regex_parser_multiple_choice_answer.py similarity index 100% rename from llama_stack/providers/inline/meta_reference/scoring/scoring_fn/fn_defs/regex_parser_multiple_choice_answer.py rename to llama_stack/providers/inline/scoring/meta_reference/scoring_fn/fn_defs/regex_parser_multiple_choice_answer.py diff --git a/llama_stack/providers/inline/meta_reference/scoring/scoring_fn/fn_defs/subset_of.py b/llama_stack/providers/inline/scoring/meta_reference/scoring_fn/fn_defs/subset_of.py similarity index 100% rename from llama_stack/providers/inline/meta_reference/scoring/scoring_fn/fn_defs/subset_of.py rename to llama_stack/providers/inline/scoring/meta_reference/scoring_fn/fn_defs/subset_of.py diff --git a/llama_stack/providers/inline/meta_reference/scoring/scoring_fn/llm_as_judge_scoring_fn.py b/llama_stack/providers/inline/scoring/meta_reference/scoring_fn/llm_as_judge_scoring_fn.py similarity index 88% rename from llama_stack/providers/inline/meta_reference/scoring/scoring_fn/llm_as_judge_scoring_fn.py rename to llama_stack/providers/inline/scoring/meta_reference/scoring_fn/llm_as_judge_scoring_fn.py index f98f7fb5e..24bdc6400 100644 --- a/llama_stack/providers/inline/meta_reference/scoring/scoring_fn/llm_as_judge_scoring_fn.py +++ b/llama_stack/providers/inline/scoring/meta_reference/scoring_fn/llm_as_judge_scoring_fn.py @@ -4,20 +4,15 @@ # This source code is licensed under the terms described in the LICENSE file in # the root directory of this source tree. from llama_stack.apis.inference.inference import Inference -from llama_stack.providers.inline.meta_reference.scoring.scoring_fn.base_scoring_fn import ( - BaseScoringFn, -) + +from .base_scoring_fn import BaseScoringFn from llama_stack.apis.scoring_functions import * # noqa: F401, F403 from llama_stack.apis.scoring import * # noqa: F401, F403 from llama_stack.apis.common.type_system import * # noqa: F403 import re -from llama_stack.providers.inline.meta_reference.scoring.scoring_fn.common import ( - aggregate_average, -) -from llama_stack.providers.inline.meta_reference.scoring.scoring_fn.fn_defs.llm_as_judge_8b_correctness import ( - llm_as_judge_8b_correctness, -) +from .common import aggregate_average +from .fn_defs.llm_as_judge_8b_correctness import llm_as_judge_8b_correctness class LlmAsJudgeScoringFn(BaseScoringFn): diff --git a/llama_stack/providers/inline/meta_reference/scoring/scoring_fn/regex_parser_scoring_fn.py b/llama_stack/providers/inline/scoring/meta_reference/scoring_fn/regex_parser_scoring_fn.py similarity index 100% rename from llama_stack/providers/inline/meta_reference/scoring/scoring_fn/regex_parser_scoring_fn.py rename to llama_stack/providers/inline/scoring/meta_reference/scoring_fn/regex_parser_scoring_fn.py diff --git a/llama_stack/providers/inline/meta_reference/scoring/scoring_fn/subset_of_scoring_fn.py b/llama_stack/providers/inline/scoring/meta_reference/scoring_fn/subset_of_scoring_fn.py similarity index 80% rename from llama_stack/providers/inline/meta_reference/scoring/scoring_fn/subset_of_scoring_fn.py rename to llama_stack/providers/inline/scoring/meta_reference/scoring_fn/subset_of_scoring_fn.py index 289c63dd7..d484e182c 100644 --- a/llama_stack/providers/inline/meta_reference/scoring/scoring_fn/subset_of_scoring_fn.py +++ b/llama_stack/providers/inline/scoring/meta_reference/scoring_fn/subset_of_scoring_fn.py @@ -4,19 +4,13 @@ # This source code is licensed under the terms described in the LICENSE file in # the root directory of this source tree. -from llama_stack.providers.inline.meta_reference.scoring.scoring_fn.base_scoring_fn import ( - BaseScoringFn, -) +from .base_scoring_fn import BaseScoringFn from llama_stack.apis.scoring_functions import * # noqa: F401, F403 from llama_stack.apis.scoring import * # noqa: F401, F403 from llama_stack.apis.common.type_system import * # noqa: F403 -from llama_stack.providers.inline.meta_reference.scoring.scoring_fn.common import ( - aggregate_accuracy, -) +from .common import aggregate_accuracy -from llama_stack.providers.inline.meta_reference.scoring.scoring_fn.fn_defs.subset_of import ( - subset_of, -) +from .fn_defs.subset_of import subset_of class SubsetOfScoringFn(BaseScoringFn): diff --git a/llama_stack/providers/registry/datasetio.py b/llama_stack/providers/registry/datasetio.py index 3fdeac997..895508609 100644 --- a/llama_stack/providers/registry/datasetio.py +++ b/llama_stack/providers/registry/datasetio.py @@ -15,8 +15,8 @@ def available_providers() -> List[ProviderSpec]: api=Api.datasetio, provider_type="meta-reference", pip_packages=["pandas"], - module="llama_stack.providers.inline.meta_reference.datasetio", - config_class="llama_stack.providers.inline.meta_reference.datasetio.MetaReferenceDatasetIOConfig", + module="llama_stack.providers.inline.datasetio.meta_reference", + config_class="llama_stack.providers.inline.datasetio.meta_reference.MetaReferenceDatasetIOConfig", api_dependencies=[], ), remote_provider_spec( diff --git a/llama_stack/providers/registry/eval.py b/llama_stack/providers/registry/eval.py index 9b9ba6409..275cc92db 100644 --- a/llama_stack/providers/registry/eval.py +++ b/llama_stack/providers/registry/eval.py @@ -15,8 +15,8 @@ def available_providers() -> List[ProviderSpec]: api=Api.eval, provider_type="meta-reference", pip_packages=[], - module="llama_stack.providers.inline.meta_reference.eval", - config_class="llama_stack.providers.inline.meta_reference.eval.MetaReferenceEvalConfig", + module="llama_stack.providers.inline.eval.meta_reference", + config_class="llama_stack.providers.inline.eval.meta_reference.MetaReferenceEvalConfig", api_dependencies=[ Api.datasetio, Api.datasets, diff --git a/llama_stack/providers/registry/scoring.py b/llama_stack/providers/registry/scoring.py index 2586083f6..70f43ad73 100644 --- a/llama_stack/providers/registry/scoring.py +++ b/llama_stack/providers/registry/scoring.py @@ -15,8 +15,8 @@ def available_providers() -> List[ProviderSpec]: api=Api.scoring, provider_type="meta-reference", pip_packages=[], - module="llama_stack.providers.inline.meta_reference.scoring", - config_class="llama_stack.providers.inline.meta_reference.scoring.MetaReferenceScoringConfig", + module="llama_stack.providers.inline.scoring.meta_reference", + config_class="llama_stack.providers.inline.scoring.meta_reference.MetaReferenceScoringConfig", api_dependencies=[ Api.datasetio, Api.datasets, @@ -27,8 +27,8 @@ def available_providers() -> List[ProviderSpec]: api=Api.scoring, provider_type="braintrust", pip_packages=["autoevals", "openai"], - module="llama_stack.providers.inline.braintrust.scoring", - config_class="llama_stack.providers.inline.braintrust.scoring.BraintrustScoringConfig", + module="llama_stack.providers.inline.scoring.braintrust", + config_class="llama_stack.providers.inline.scoring.braintrust.BraintrustScoringConfig", api_dependencies=[ Api.datasetio, Api.datasets,