forked from phoenix-oss/llama-stack-mirror
# What does this PR do? In this PR, we added a new eval open benchmark IfEval based on paper https://arxiv.org/abs/2311.07911 to measure the model capability of instruction following. ## Test Plan spin up a llama stack server with open-benchmark template run `llama-stack-client --endpoint xxx eval run-benchmark "meta-reference-ifeval" --model-id "meta-llama/Llama-3.3-70B-Instruct" --output-dir "/home/markchen1015/" --num-examples 20` on client side and get the eval aggregate results
28 lines
948 B
Python
28 lines
948 B
Python
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
|
# All rights reserved.
|
|
#
|
|
# This source code is licensed under the terms described in the LICENSE file in
|
|
# the root directory of this source tree.
|
|
|
|
from typing import List
|
|
|
|
from llama_stack.providers.datatypes import Api, InlineProviderSpec, ProviderSpec
|
|
|
|
|
|
def available_providers() -> List[ProviderSpec]:
|
|
return [
|
|
InlineProviderSpec(
|
|
api=Api.eval,
|
|
provider_type="inline::meta-reference",
|
|
pip_packages=["tree_sitter", "pythainlp", "langdetect", "emoji", "nltk"],
|
|
module="llama_stack.providers.inline.eval.meta_reference",
|
|
config_class="llama_stack.providers.inline.eval.meta_reference.MetaReferenceEvalConfig",
|
|
api_dependencies=[
|
|
Api.datasetio,
|
|
Api.datasets,
|
|
Api.scoring,
|
|
Api.inference,
|
|
Api.agents,
|
|
],
|
|
),
|
|
]
|