mirror of
https://github.com/meta-llama/llama-stack.git
synced 2025-08-01 16:24:44 +00:00
rename
This commit is contained in:
parent
0d8de1c768
commit
ba5d755848
3 changed files with 5 additions and 5 deletions
|
@ -133,7 +133,7 @@ class MetaReferenceEvalImpl(Eval, EvalTasksProtocolPrivate):
|
||||||
self, input_rows: List[Dict[str, Any]], task_config: EvalTaskConfig
|
self, input_rows: List[Dict[str, Any]], task_config: EvalTaskConfig
|
||||||
) -> List[Dict[str, Any]]:
|
) -> List[Dict[str, Any]]:
|
||||||
candidate = task_config.eval_candidate
|
candidate = task_config.eval_candidate
|
||||||
create_response = await self.agent_api.create_agent(candidate.config)
|
create_response = await self.agents_api.create_agent(candidate.config)
|
||||||
agent_id = create_response.agent_id
|
agent_id = create_response.agent_id
|
||||||
|
|
||||||
generations = []
|
generations = []
|
||||||
|
@ -143,7 +143,7 @@ class MetaReferenceEvalImpl(Eval, EvalTasksProtocolPrivate):
|
||||||
input_messages = [UserMessage(**x) for x in input_messages]
|
input_messages = [UserMessage(**x) for x in input_messages]
|
||||||
|
|
||||||
# NOTE: only single-turn agent generation is supported. Create a new session for each input row
|
# NOTE: only single-turn agent generation is supported. Create a new session for each input row
|
||||||
session_create_response = await self.agent_api.create_agent_session(
|
session_create_response = await self.agents_api.create_agent_session(
|
||||||
agent_id, f"session-{i}"
|
agent_id, f"session-{i}"
|
||||||
)
|
)
|
||||||
session_id = session_create_response.session_id
|
session_id = session_create_response.session_id
|
||||||
|
@ -156,7 +156,7 @@ class MetaReferenceEvalImpl(Eval, EvalTasksProtocolPrivate):
|
||||||
)
|
)
|
||||||
turn_response = [
|
turn_response = [
|
||||||
chunk
|
chunk
|
||||||
async for chunk in await self.agent_api.create_agent_turn(
|
async for chunk in await self.agents_api.create_agent_turn(
|
||||||
**turn_request
|
**turn_request
|
||||||
)
|
)
|
||||||
]
|
]
|
||||||
|
|
|
@ -78,7 +78,7 @@ Just return the letters "A", "B", or "C", with no text around it.
|
||||||
|
|
||||||
|
|
||||||
llm_as_judge_405b_simpleqa = ScoringFn(
|
llm_as_judge_405b_simpleqa = ScoringFn(
|
||||||
identifier="llm-as-judge::llm_as_judge_405b_simpleqa",
|
identifier="llm-as-judge::405b-simpleqa",
|
||||||
description="Llm As Judge Scoring Function for SimpleQA Benchmark (https://github.com/openai/simple-evals/blob/main/simpleqa_eval.py)",
|
description="Llm As Judge Scoring Function for SimpleQA Benchmark (https://github.com/openai/simple-evals/blob/main/simpleqa_eval.py)",
|
||||||
return_type=NumberType(),
|
return_type=NumberType(),
|
||||||
provider_id="llm-as-judge",
|
provider_id="llm-as-judge",
|
||||||
|
|
|
@ -9,7 +9,7 @@ from llama_stack.apis.scoring_functions import ScoringFn
|
||||||
|
|
||||||
|
|
||||||
llm_as_judge_base = ScoringFn(
|
llm_as_judge_base = ScoringFn(
|
||||||
identifier="llm-as-judge::llm_as_judge_base",
|
identifier="llm-as-judge::base",
|
||||||
description="Llm As Judge Scoring Function",
|
description="Llm As Judge Scoring Function",
|
||||||
return_type=NumberType(),
|
return_type=NumberType(),
|
||||||
provider_id="llm-as-judge",
|
provider_id="llm-as-judge",
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue