mirror of
https://github.com/meta-llama/llama-stack.git
synced 2025-10-16 14:57:20 +00:00
simpleqa eval
This commit is contained in:
parent
f94681baac
commit
43fb522a13
8 changed files with 191 additions and 24 deletions
|
@ -43,6 +43,11 @@ class LLMAsJudgeContext(BaseModel):
|
|||
description="Regex to extract the score from the judge response",
|
||||
default=None,
|
||||
)
|
||||
# TODO: think about whether to put this as a scoring function context or in separate scorer
|
||||
# and how the LLM as judge defines the response
|
||||
judge_grade_metrics: Optional[Dict[str, str]] = Field(
|
||||
description="Mapping of extracted judge response to score", default=None
|
||||
)
|
||||
|
||||
|
||||
@json_schema_type
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue