mirror of
https://github.com/meta-llama/llama-stack.git
synced 2025-08-12 04:50:39 +00:00
more fix
This commit is contained in:
parent
000569b003
commit
b464575a1e
1 changed files with 2 additions and 8 deletions
|
@ -75,13 +75,14 @@ system_message = {
|
||||||
"content": SYSTEM_PROMPT_TEMPLATE.format(subject=subset),
|
"content": SYSTEM_PROMPT_TEMPLATE.format(subject=subset),
|
||||||
}
|
}
|
||||||
|
|
||||||
|
# register the evaluation benchmark task with the dataset and scoring function
|
||||||
client.benchmarks.register(
|
client.benchmarks.register(
|
||||||
benchmark_id="meta-reference::mmmu",
|
benchmark_id="meta-reference::mmmu",
|
||||||
dataset_id=f"mmmu-{subset}-{split}",
|
dataset_id=f"mmmu-{subset}-{split}",
|
||||||
scoring_functions=["basic::regex_parser_multiple_choice_answer"],
|
scoring_functions=["basic::regex_parser_multiple_choice_answer"],
|
||||||
)
|
)
|
||||||
|
|
||||||
response = client.eval.evaluate_rows_alpha(
|
response = client.eval.evaluate_rows(
|
||||||
benchmark_id="meta-reference::mmmu",
|
benchmark_id="meta-reference::mmmu",
|
||||||
input_rows=eval_rows,
|
input_rows=eval_rows,
|
||||||
scoring_functions=["basic::regex_parser_multiple_choice_answer"],
|
scoring_functions=["basic::regex_parser_multiple_choice_answer"],
|
||||||
|
@ -134,13 +135,6 @@ eval_rows = client.datasetio.get_rows_paginated(
|
||||||
```
|
```
|
||||||
|
|
||||||
```python
|
```python
|
||||||
# register 405B as LLM Judge model
|
|
||||||
client.models.register(
|
|
||||||
model_id="meta-llama/Llama-3.1-405B-Instruct",
|
|
||||||
provider_model_id="meta-llama/Meta-Llama-3.1-405B-Instruct-Turbo",
|
|
||||||
provider_id="together",
|
|
||||||
)
|
|
||||||
|
|
||||||
client.benchmarks.register(
|
client.benchmarks.register(
|
||||||
benchmark_id="meta-reference::simpleqa",
|
benchmark_id="meta-reference::simpleqa",
|
||||||
dataset_id=simpleqa_dataset_id,
|
dataset_id=simpleqa_dataset_id,
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue