From b1ebc837f80c527d679e70eb4234a54881b28beb Mon Sep 17 00:00:00 2001
From: Xi Yan <xiyan@meta.com>
Date: Mon, 11 Nov 2024 15:49:18 -0500
Subject: [PATCH] refactor scoring

---
 .../scoring_fn/fn_defs/llm_as_judge_base.py   | 23 -------------------
 1 file changed, 23 deletions(-)

diff --git a/llama_stack/providers/inline/scoring/meta_reference/scoring_fn/fn_defs/llm_as_judge_base.py b/llama_stack/providers/inline/scoring/meta_reference/scoring_fn/fn_defs/llm_as_judge_base.py
index f7de54f46..171e09def 100644
--- a/llama_stack/providers/inline/scoring/meta_reference/scoring_fn/fn_defs/llm_as_judge_base.py
+++ b/llama_stack/providers/inline/scoring/meta_reference/scoring_fn/fn_defs/llm_as_judge_base.py
@@ -8,32 +8,9 @@ from llama_stack.apis.scoring_functions import *  # noqa: F401, F403
 from llama_stack.apis.scoring import *  # noqa: F401, F403
 from llama_stack.apis.common.type_system import NumberType
 
-# JUDGE_PROMPT = """
-# You will be given a question, a expected_answer, and a system_answer.
-# Your task is to provide a 'total rating' scoring how well the system_answer answers compared with ground truth in expected_answer in terms of factual correctness to the question.
-# Give your answer as a integer on a scale of 0 to 5, where 0 means that the system_answer is not correct at all compared with expected_answer, and 5 means that the answer completely and correctly answers the question.
-# Provide your feedback as follows:
-# Feedback:::
-# Total rating: (your rating, as a int between 0 and 5)
-# Now here are the question, expected_answer, system_answer.
-# Question: {input_query}
-# Expected Answer: {expected_answer}
-# System Answer: {generated_answer}
-# Feedback:::
-# Total rating:
-# """
 
 llm_as_judge_base = ScoringFnDef(
     identifier="meta-reference::llm_as_judge_base",
     description="Llm As Judge Scoring Function",
     return_type=NumberType(),
-    # params=LLMAsJudgeScoringFnParams(
-    #     prompt_template=JUDGE_PROMPT,
-    #     judge_model="Llama3.1-8B-Instruct",
-    #     judge_score_regexes=[
-    #         r"Total rating: (\d+)",
-    #         r"rating: (\d+)",
-    #         r"Rating: (\d+)",
-    #     ],
-    # ),
 )