mirror of
https://github.com/meta-llama/llama-stack.git
synced 2025-08-01 16:24:44 +00:00
extract score regex to llm context
This commit is contained in:
parent
247a53d393
commit
9b410a87bf
3 changed files with 13 additions and 7 deletions
|
@ -26,6 +26,10 @@ class Parameter(BaseModel):
|
||||||
class LLMAsJudgeContext(BaseModel):
|
class LLMAsJudgeContext(BaseModel):
|
||||||
judge_model: str
|
judge_model: str
|
||||||
prompt_template: Optional[str] = None
|
prompt_template: Optional[str] = None
|
||||||
|
judge_score_regex: Optional[List[str]] = Field(
|
||||||
|
description="Regex to extract the score from the judge response",
|
||||||
|
default=None,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
@json_schema_type
|
@json_schema_type
|
||||||
|
|
|
@ -8,6 +8,7 @@
|
||||||
},
|
},
|
||||||
"context": {
|
"context": {
|
||||||
"judge_model": "Llama3.1-8B-Instruct",
|
"judge_model": "Llama3.1-8B-Instruct",
|
||||||
"prompt_template": "\nYou will be given a question, a expected_answer, and a system_answer.\nYour task is to provide a 'total rating' scoring how well the system_answer answers compared with ground truth in expected_answer in terms of factual correctness to the question.\nGive your answer as a integer on a scale of 0 to 5, where 0 means that the system_answer is not correct at all compared with expected_answer, and 5 means that the answer completely and correctly answers the question.\nProvide your feedback as follows:\nFeedback:::\nTotal rating: (your rating, as a int between 0 and 5)\nNow here are the question, expected_answer, system_answer.\nQuestion: {input_query}\nExpected Answer: {expected_answer}\nSystem Answer: {generated_answer}\nFeedback:::\nTotal rating:\n"
|
"prompt_template": "\nYou will be given a question, a expected_answer, and a system_answer.\nYour task is to provide a 'total rating' scoring how well the system_answer answers compared with ground truth in expected_answer in terms of factual correctness to the question.\nGive your answer as a integer on a scale of 0 to 5, where 0 means that the system_answer is not correct at all compared with expected_answer, and 5 means that the answer completely and correctly answers the question.\nProvide your feedback as follows:\nFeedback:::\nTotal rating: (your rating, as a int between 0 and 5)\nNow here are the question, expected_answer, system_answer.\nQuestion: {input_query}\nExpected Answer: {expected_answer}\nSystem Answer: {generated_answer}\nFeedback:::\nTotal rating:\n",
|
||||||
|
"judge_score_regex": ["Total rating: (\\d+)", "rating: (\\d+)", "Rating: (\\d+)"]
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -37,8 +37,12 @@ class LlmAsJudgeScoringFn(BaseScoringFn):
|
||||||
scoring_fn_identifier is not None
|
scoring_fn_identifier is not None
|
||||||
), "Scoring function identifier not found."
|
), "Scoring function identifier not found."
|
||||||
fn_def = self.supported_fn_defs_registry[scoring_fn_identifier]
|
fn_def = self.supported_fn_defs_registry[scoring_fn_identifier]
|
||||||
|
assert fn_def.context is not None, f"LLMAsJudgeContext not found for {fn_def}."
|
||||||
assert (
|
assert (
|
||||||
fn_def.context is not None and fn_def.context.prompt_template is not None
|
fn_def.context.prompt_template is not None
|
||||||
|
), "LLM Judge prompt_template not found."
|
||||||
|
assert (
|
||||||
|
fn_def.context.judge_score_regex is not None
|
||||||
), "LLM Judge prompt_template not found."
|
), "LLM Judge prompt_template not found."
|
||||||
|
|
||||||
input_query = input_row["input_query"]
|
input_query = input_row["input_query"]
|
||||||
|
@ -61,11 +65,8 @@ class LlmAsJudgeScoringFn(BaseScoringFn):
|
||||||
],
|
],
|
||||||
)
|
)
|
||||||
content = judge_response.completion_message.content
|
content = judge_response.completion_message.content
|
||||||
rating_regexs = [
|
rating_regexs = fn_def.context.judge_score_regex
|
||||||
r"Total rating: (\d+)",
|
|
||||||
r"rating: (\d+)",
|
|
||||||
r"Rating: (\d+)",
|
|
||||||
]
|
|
||||||
judge_rating = None
|
judge_rating = None
|
||||||
for regex in rating_regexs:
|
for regex in rating_regexs:
|
||||||
match = re.search(regex, content)
|
match = re.search(regex, content)
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue