mirror of
https://github.com/meta-llama/llama-stack.git
synced 2025-12-11 19:56:03 +00:00
datasets api
This commit is contained in:
parent
18fe966e96
commit
f046899a1c
15 changed files with 281 additions and 80 deletions
|
|
@ -28,11 +28,13 @@ class Api(Enum):
|
|||
models = "models"
|
||||
shields = "shields"
|
||||
memory_banks = "memory_banks"
|
||||
evals = "evals"
|
||||
|
||||
# built-in API
|
||||
inspect = "inspect"
|
||||
|
||||
evals = "evals"
|
||||
datasets = "datasets"
|
||||
|
||||
|
||||
class ModelsProtocolPrivate(Protocol):
|
||||
async def list_models(self) -> List[ModelDef]: ...
|
||||
|
|
|
|||
|
|
@ -9,11 +9,9 @@ from termcolor import cprint
|
|||
|
||||
from llama_stack.apis.inference import * # noqa: F403
|
||||
from llama_stack.apis.evals import * # noqa: F403
|
||||
from llama_stack.apis.dataset import * # noqa: F403
|
||||
from llama_stack.apis.datasets import * # noqa: F403
|
||||
|
||||
from .config import MetaReferenceEvalsImplConfig
|
||||
|
||||
# from llama_stack.distribution.registry.tasks.task_registry import TaskRegistry
|
||||
from .tasks.run_eval_task import RunEvalTask
|
||||
|
||||
|
||||
|
|
@ -47,7 +45,7 @@ class MetaReferenceEvalsImpl(Evals):
|
|||
eval_task_config = EvaluateTaskConfig(
|
||||
dataset_config=EvaluateDatasetConfig(
|
||||
dataset_name=dataset,
|
||||
row_limit=2,
|
||||
row_limit=3,
|
||||
),
|
||||
generation_config=EvaluateModelGenerationConfig(
|
||||
model=model,
|
||||
|
|
|
|||
|
|
@ -6,7 +6,7 @@
|
|||
import random
|
||||
|
||||
from llama_stack.apis.evals.evals import BaseScorer, EvalResult, SingleEvalResult
|
||||
from llama_stack.apis.dataset.dataset import * # noqa: F401 F403
|
||||
from llama_stack.apis.datasets.datasets import * # noqa: F401 F403
|
||||
|
||||
|
||||
class AggregateScorer(BaseScorer[ScorerInputSample]):
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue