This commit is contained in:
Xi Yan 2024-11-18 11:42:02 -08:00
parent 0d8de1c768
commit ba5d755848
3 changed files with 5 additions and 5 deletions

View file

@ -133,7 +133,7 @@ class MetaReferenceEvalImpl(Eval, EvalTasksProtocolPrivate):
self, input_rows: List[Dict[str, Any]], task_config: EvalTaskConfig
) -> List[Dict[str, Any]]:
candidate = task_config.eval_candidate
create_response = await self.agent_api.create_agent(candidate.config)
create_response = await self.agents_api.create_agent(candidate.config)
agent_id = create_response.agent_id
generations = []
@ -143,7 +143,7 @@ class MetaReferenceEvalImpl(Eval, EvalTasksProtocolPrivate):
input_messages = [UserMessage(**x) for x in input_messages]
# NOTE: only single-turn agent generation is supported. Create a new session for each input row
session_create_response = await self.agent_api.create_agent_session(
session_create_response = await self.agents_api.create_agent_session(
agent_id, f"session-{i}"
)
session_id = session_create_response.session_id
@ -156,7 +156,7 @@ class MetaReferenceEvalImpl(Eval, EvalTasksProtocolPrivate):
)
turn_response = [
chunk
async for chunk in await self.agent_api.create_agent_turn(
async for chunk in await self.agents_api.create_agent_turn(
**turn_request
)
]

View file

@ -78,7 +78,7 @@ Just return the letters "A", "B", or "C", with no text around it.
llm_as_judge_405b_simpleqa = ScoringFn(
identifier="llm-as-judge::llm_as_judge_405b_simpleqa",
identifier="llm-as-judge::405b-simpleqa",
description="Llm As Judge Scoring Function for SimpleQA Benchmark (https://github.com/openai/simple-evals/blob/main/simpleqa_eval.py)",
return_type=NumberType(),
provider_id="llm-as-judge",

View file

@ -9,7 +9,7 @@ from llama_stack.apis.scoring_functions import ScoringFn
llm_as_judge_base = ScoringFn(
identifier="llm-as-judge::llm_as_judge_base",
identifier="llm-as-judge::base",
description="Llm As Judge Scoring Function",
return_type=NumberType(),
provider_id="llm-as-judge",