forked from phoenix-oss/llama-stack-mirror
[Evals API][6/n] meta-reference llm as judge, registration for ScoringFnDefs (#330)
* wip scoring refactor * llm as judge, move folders * test full generation + eval * extract score regex to llm context * remove prints, cleanup braintrust in this branch * change json -> class * remove initialize * address nits * check identifier prefix * udpate MANIFEST
This commit is contained in:
parent
04a4784287
commit
7b8748c53e
20 changed files with 360 additions and 50 deletions
|
@ -18,6 +18,7 @@ from .config import MetaReferenceEvalConfig
|
|||
|
||||
|
||||
class ColumnName(Enum):
|
||||
input_query = "input_query"
|
||||
expected_answer = "expected_answer"
|
||||
chat_completion_input = "chat_completion_input"
|
||||
completion_input = "completion_input"
|
||||
|
@ -53,10 +54,12 @@ class MetaReferenceEvalImpl(Eval):
|
|||
|
||||
expected_schemas = [
|
||||
{
|
||||
ColumnName.input_query.value: StringType(),
|
||||
ColumnName.expected_answer.value: StringType(),
|
||||
ColumnName.chat_completion_input.value: ChatCompletionInputType(),
|
||||
},
|
||||
{
|
||||
ColumnName.input_query.value: StringType(),
|
||||
ColumnName.expected_answer.value: StringType(),
|
||||
ColumnName.completion_input.value: CompletionInputType(),
|
||||
},
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue