input query optional input for braintrust scorer

This commit is contained in:
Xi Yan 2024-10-14 21:17:16 -07:00
parent 7b5895003a
commit 3c29108b6e
4 changed files with 8 additions and 3 deletions

View file

@ -118,7 +118,7 @@ async def run_main(host: str, port: int):
response = await client.run_scorer(
dataset_config=EvaluateDatasetConfig(
dataset_identifier="Llama-3.1-8B-Instruct-evals__mmlu_pro__details",
# row_limit=10,
row_limit=10,
),
eval_scoring_config=EvaluateScoringConfig(
scorer_config_list=[

View file

@ -16,7 +16,7 @@ ScorerRegistry = Registry[BaseScorer]()
SCORER_REGISTRY = {
"accuracy": AccuracyScorer,
"random": RandomScorer,
"braintrust::factuality": BrainTrustFactualityScorer,
"braintrust::factuality": BraintrustFactualityScorer,
"braintrust::answer-correctness": BraintrustAnswerCorrectnessScorer,
}

View file

@ -33,11 +33,15 @@ class RunScoringTask(BaseTask):
for x in dataset:
expected_answer = x.data["expected_answer"]
generated_answer = x.data["generated_answer"]
input_query = None
if "input_query" in x.data:
input_query = x.data["input_query"]
scorer_inputs.append(
ScorerInputSample(
expected_answer=expected_answer,
generated_answer=generated_answer,
input_query=input_query,
)
)
@ -74,7 +78,6 @@ class RunScoringTask(BaseTask):
)
scorer_results = scorer.score(postprocessed)
cprint(scorer_results, "magenta")
eval_result = scorer.aggregate_results(scorer_results)
return eval_result

View file

@ -20,6 +20,8 @@ def available_providers() -> List[ProviderSpec]:
"pandas",
"scikit-learn",
"datasets",
"numpy",
"autoevals",
],
module="llama_stack.providers.impls.meta_reference.evals",
config_class="llama_stack.providers.impls.meta_reference.evals.MetaReferenceEvalsImplConfig",