mirror of
https://github.com/meta-llama/llama-stack.git
synced 2025-07-31 16:01:46 +00:00
rename
This commit is contained in:
parent
0d8de1c768
commit
ba5d755848
3 changed files with 5 additions and 5 deletions
|
@ -133,7 +133,7 @@ class MetaReferenceEvalImpl(Eval, EvalTasksProtocolPrivate):
|
|||
self, input_rows: List[Dict[str, Any]], task_config: EvalTaskConfig
|
||||
) -> List[Dict[str, Any]]:
|
||||
candidate = task_config.eval_candidate
|
||||
create_response = await self.agent_api.create_agent(candidate.config)
|
||||
create_response = await self.agents_api.create_agent(candidate.config)
|
||||
agent_id = create_response.agent_id
|
||||
|
||||
generations = []
|
||||
|
@ -143,7 +143,7 @@ class MetaReferenceEvalImpl(Eval, EvalTasksProtocolPrivate):
|
|||
input_messages = [UserMessage(**x) for x in input_messages]
|
||||
|
||||
# NOTE: only single-turn agent generation is supported. Create a new session for each input row
|
||||
session_create_response = await self.agent_api.create_agent_session(
|
||||
session_create_response = await self.agents_api.create_agent_session(
|
||||
agent_id, f"session-{i}"
|
||||
)
|
||||
session_id = session_create_response.session_id
|
||||
|
@ -156,7 +156,7 @@ class MetaReferenceEvalImpl(Eval, EvalTasksProtocolPrivate):
|
|||
)
|
||||
turn_response = [
|
||||
chunk
|
||||
async for chunk in await self.agent_api.create_agent_turn(
|
||||
async for chunk in await self.agents_api.create_agent_turn(
|
||||
**turn_request
|
||||
)
|
||||
]
|
||||
|
|
|
@ -78,7 +78,7 @@ Just return the letters "A", "B", or "C", with no text around it.
|
|||
|
||||
|
||||
llm_as_judge_405b_simpleqa = ScoringFn(
|
||||
identifier="llm-as-judge::llm_as_judge_405b_simpleqa",
|
||||
identifier="llm-as-judge::405b-simpleqa",
|
||||
description="Llm As Judge Scoring Function for SimpleQA Benchmark (https://github.com/openai/simple-evals/blob/main/simpleqa_eval.py)",
|
||||
return_type=NumberType(),
|
||||
provider_id="llm-as-judge",
|
||||
|
|
|
@ -9,7 +9,7 @@ from llama_stack.apis.scoring_functions import ScoringFn
|
|||
|
||||
|
||||
llm_as_judge_base = ScoringFn(
|
||||
identifier="llm-as-judge::llm_as_judge_base",
|
||||
identifier="llm-as-judge::base",
|
||||
description="Llm As Judge Scoring Function",
|
||||
return_type=NumberType(),
|
||||
provider_id="llm-as-judge",
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue