forked from phoenix-oss/llama-stack-mirror
comments
This commit is contained in:
parent
a8b0467ec3
commit
a69759613a
7 changed files with 2486 additions and 389 deletions
|
@ -62,7 +62,7 @@ class Benchmarks(Protocol):
|
|||
"""
|
||||
Register a new benchmark.
|
||||
|
||||
:param dataset_id: The ID of the dataset to used to run the benchmark.
|
||||
:param dataset_id: The ID of the dataset to be used to run the benchmark.
|
||||
:param grader_ids: List of grader ids to use for this benchmark.
|
||||
:param benchmark_id: (Optional) The ID of the benchmark to register. If not provided, an ID will be generated.
|
||||
:param metadata: (Optional) Metadata for this benchmark for additional descriptions.
|
||||
|
@ -87,3 +87,10 @@ class Benchmarks(Protocol):
|
|||
:param benchmark_id: The ID of the benchmark to get.
|
||||
"""
|
||||
...
|
||||
|
||||
@webmethod(route="/benchmarks/{benchmark_id}", method="DELETE")
|
||||
async def unregister_benchmark(self, benchmark_id: str) -> None:
|
||||
"""
|
||||
Unregister a benchmark by ID.
|
||||
"""
|
||||
...
|
||||
|
|
|
@ -5,7 +5,6 @@
|
|||
# the root directory of this source tree.
|
||||
from datetime import datetime
|
||||
from enum import Enum
|
||||
from typing import Optional
|
||||
|
||||
from pydantic import BaseModel
|
||||
|
||||
|
@ -38,12 +37,12 @@ class CommonJobFields(BaseModel):
|
|||
:param id: The ID of the job.
|
||||
:param status: The status of the job.
|
||||
:param created_at: The time the job was created.
|
||||
:param ended_at: The time the job ended.
|
||||
:param completed_at: The time the job completed.
|
||||
:param error: If status of the job is failed, this will contain the error message.
|
||||
"""
|
||||
|
||||
id: str
|
||||
status: JobStatus
|
||||
created_at: datetime
|
||||
ended_at: Optional[datetime] = None
|
||||
error: Optional[str] = None
|
||||
completed_at: datetime | None = None
|
||||
error: str | None = None
|
||||
|
|
|
@ -48,28 +48,28 @@ EvaluationCandidate = register_schema(
|
|||
|
||||
|
||||
@json_schema_type
|
||||
class BenchmarkTask(BaseModel):
|
||||
type: Literal["benchmark_id"] = "benchmark_id"
|
||||
class BenchmarkEvaluationTask(BaseModel):
|
||||
type: Literal["benchmark"] = "benchmark"
|
||||
benchmark_id: str
|
||||
|
||||
|
||||
@json_schema_type
|
||||
class DatasetGraderTask(BaseModel):
|
||||
type: Literal["dataset_grader"] = "dataset_grader"
|
||||
class DatasetEvaluationTask(BaseModel):
|
||||
type: Literal["dataset"] = "dataset"
|
||||
dataset_id: str
|
||||
grader_ids: List[str]
|
||||
|
||||
|
||||
@json_schema_type
|
||||
class DataSourceGraderTask(BaseModel):
|
||||
type: Literal["data_source_grader"] = "data_source_grader"
|
||||
class DataEvaluationTask(BaseModel):
|
||||
type: Literal["data"] = "data"
|
||||
data_source: DataSource
|
||||
grader_ids: List[str]
|
||||
|
||||
|
||||
EvaluationTask = register_schema(
|
||||
Annotated[
|
||||
Union[BenchmarkTask, DatasetGraderTask, DataSourceGraderTask],
|
||||
Union[BenchmarkEvaluationTask, DatasetEvaluationTask, DataEvaluationTask],
|
||||
Field(discriminator="type"),
|
||||
],
|
||||
name="EvaluationTask",
|
||||
|
|
|
@ -29,6 +29,13 @@ from .graders import * # noqa: F401 F403
|
|||
class GraderType(Enum):
|
||||
"""
|
||||
A type of grader. Each type is a criteria for evaluating answers.
|
||||
|
||||
:cvar llm: Use an LLM to score the answer.
|
||||
:cvar regex_parser: Use a regex parser to score the answer.
|
||||
:cvar equality: Check if the answer is equal to the reference answer.
|
||||
:cvar subset_of: Check if the answer is a subset of the reference answer.
|
||||
:cvar factuality: Check if the answer is factually correct using LLM as judge.
|
||||
:cvar faithfulness: Check if the answer is faithful to the reference answer using LLM as judge.
|
||||
"""
|
||||
|
||||
llm = "llm"
|
||||
|
@ -221,9 +228,9 @@ class Graders(Protocol):
|
|||
...
|
||||
|
||||
@webmethod(route="/graders/{grader_id:path}", method="DELETE")
|
||||
async def delete_grader(self, grader_id: str) -> None:
|
||||
async def unregister_grader(self, grader_id: str) -> None:
|
||||
"""
|
||||
Delete a grader by ID.
|
||||
Unregister a grader by ID.
|
||||
:param grader_id: The ID of the grader.
|
||||
"""
|
||||
...
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue