From ba5d755848c31613330ae1ce2e59e6bc0a707780 Mon Sep 17 00:00:00 2001 From: Xi Yan Date: Mon, 18 Nov 2024 11:42:02 -0800 Subject: [PATCH] rename --- llama_stack/providers/inline/eval/meta_reference/eval.py | 6 +++--- .../scoring_fn/fn_defs/llm_as_judge_405b_simpleqa.py | 2 +- .../llm_as_judge/scoring_fn/fn_defs/llm_as_judge_base.py | 2 +- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/llama_stack/providers/inline/eval/meta_reference/eval.py b/llama_stack/providers/inline/eval/meta_reference/eval.py index 7c3c27c77..d1df869b4 100644 --- a/llama_stack/providers/inline/eval/meta_reference/eval.py +++ b/llama_stack/providers/inline/eval/meta_reference/eval.py @@ -133,7 +133,7 @@ class MetaReferenceEvalImpl(Eval, EvalTasksProtocolPrivate): self, input_rows: List[Dict[str, Any]], task_config: EvalTaskConfig ) -> List[Dict[str, Any]]: candidate = task_config.eval_candidate - create_response = await self.agent_api.create_agent(candidate.config) + create_response = await self.agents_api.create_agent(candidate.config) agent_id = create_response.agent_id generations = [] @@ -143,7 +143,7 @@ class MetaReferenceEvalImpl(Eval, EvalTasksProtocolPrivate): input_messages = [UserMessage(**x) for x in input_messages] # NOTE: only single-turn agent generation is supported. Create a new session for each input row - session_create_response = await self.agent_api.create_agent_session( + session_create_response = await self.agents_api.create_agent_session( agent_id, f"session-{i}" ) session_id = session_create_response.session_id @@ -156,7 +156,7 @@ class MetaReferenceEvalImpl(Eval, EvalTasksProtocolPrivate): ) turn_response = [ chunk - async for chunk in await self.agent_api.create_agent_turn( + async for chunk in await self.agents_api.create_agent_turn( **turn_request ) ] diff --git a/llama_stack/providers/inline/scoring/llm_as_judge/scoring_fn/fn_defs/llm_as_judge_405b_simpleqa.py b/llama_stack/providers/inline/scoring/llm_as_judge/scoring_fn/fn_defs/llm_as_judge_405b_simpleqa.py index c567b3414..8ed501099 100644 --- a/llama_stack/providers/inline/scoring/llm_as_judge/scoring_fn/fn_defs/llm_as_judge_405b_simpleqa.py +++ b/llama_stack/providers/inline/scoring/llm_as_judge/scoring_fn/fn_defs/llm_as_judge_405b_simpleqa.py @@ -78,7 +78,7 @@ Just return the letters "A", "B", or "C", with no text around it. llm_as_judge_405b_simpleqa = ScoringFn( - identifier="llm-as-judge::llm_as_judge_405b_simpleqa", + identifier="llm-as-judge::405b-simpleqa", description="Llm As Judge Scoring Function for SimpleQA Benchmark (https://github.com/openai/simple-evals/blob/main/simpleqa_eval.py)", return_type=NumberType(), provider_id="llm-as-judge", diff --git a/llama_stack/providers/inline/scoring/llm_as_judge/scoring_fn/fn_defs/llm_as_judge_base.py b/llama_stack/providers/inline/scoring/llm_as_judge/scoring_fn/fn_defs/llm_as_judge_base.py index 51517a0b0..b00b9a7db 100644 --- a/llama_stack/providers/inline/scoring/llm_as_judge/scoring_fn/fn_defs/llm_as_judge_base.py +++ b/llama_stack/providers/inline/scoring/llm_as_judge/scoring_fn/fn_defs/llm_as_judge_base.py @@ -9,7 +9,7 @@ from llama_stack.apis.scoring_functions import ScoringFn llm_as_judge_base = ScoringFn( - identifier="llm-as-judge::llm_as_judge_base", + identifier="llm-as-judge::base", description="Llm As Judge Scoring Function", return_type=NumberType(), provider_id="llm-as-judge",