[Evals API][6/n] meta-reference llm as judge, registration for ScoringFnDefs (#330)

* wip scoring refactor

* llm as judge, move folders

* test full generation + eval

* extract score regex to llm context

* remove prints, cleanup braintrust in this branch

* change json -> class

* remove initialize

* address nits

* check identifier prefix

* udpate MANIFEST
This commit is contained in:
Xi Yan 2024-10-28 14:08:42 -07:00 committed by GitHub
parent 04a4784287
commit 7b8748c53e
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
20 changed files with 360 additions and 50 deletions

View file

@ -18,6 +18,7 @@ from .config import MetaReferenceEvalConfig
class ColumnName(Enum):
input_query = "input_query"
expected_answer = "expected_answer"
chat_completion_input = "chat_completion_input"
completion_input = "completion_input"
@ -53,10 +54,12 @@ class MetaReferenceEvalImpl(Eval):
expected_schemas = [
{
ColumnName.input_query.value: StringType(),
ColumnName.expected_answer.value: StringType(),
ColumnName.chat_completion_input.value: ChatCompletionInputType(),
},
{
ColumnName.input_query.value: StringType(),
ColumnName.expected_answer.value: StringType(),
ColumnName.completion_input.value: CompletionInputType(),
},