This commit is contained in:
Xi Yan 2025-02-13 09:48:52 -08:00
parent ceff63130d
commit 9ce00ede9b
4 changed files with 18 additions and 58 deletions

View file

@ -2665,15 +2665,6 @@
"EvalTask": { "EvalTask": {
"type": "object", "type": "object",
"properties": { "properties": {
"identifier": {
"type": "string"
},
"provider_resource_id": {
"type": "string"
},
"provider_id": {
"type": "string"
},
"type": { "type": {
"type": "string", "type": "string",
"const": "eval_task", "const": "eval_task",
@ -2682,53 +2673,23 @@
"dataset_id": { "dataset_id": {
"type": "string" "type": "string"
}, },
"scoring_functions": { "config": {
"type": "array", "$ref": "#/components/schemas/AgentConfig"
"items": {
"type": "string"
}
},
"metadata": {
"type": "object",
"additionalProperties": {
"oneOf": [
{
"type": "null"
},
{
"type": "boolean"
},
{
"type": "number"
},
{
"type": "string"
},
{
"type": "array"
},
{
"type": "object"
}
]
}
} }
}, },
"additionalProperties": false, "additionalProperties": false,
"required": [ "required": [
"identifier",
"provider_resource_id",
"provider_id",
"type", "type",
"dataset_id", "config"
"scoring_functions",
"metadata"
] ]
}, },
"ListEvalTasksResponse": { "ListEvalTasksResponse": {
"type": "object", "type": "object",
"properties": { "properties": {
"data": { "sampling_params": {
"$ref": "#/components/schemas/SamplingParams"
},
"input_shields": {
"type": "array", "type": "array",
"items": { "items": {
"$ref": "#/components/schemas/EvalTask" "$ref": "#/components/schemas/EvalTask"
@ -2768,7 +2729,7 @@
"input_shields": { "input_shields": {
"type": "array", "type": "array",
"items": { "items": {
"type": "string" "$ref": "#/components/schemas/ToolDef"
} }
}, },
"output_shields": { "output_shields": {

View file

@ -1616,12 +1616,6 @@ components:
EvalTask: EvalTask:
type: object type: object
properties: properties:
identifier:
type: string
provider_resource_id:
type: string
provider_id:
type: string
type: type:
type: string type: string
const: eval_task const: eval_task
@ -1644,9 +1638,6 @@ components:
- type: object - type: object
additionalProperties: false additionalProperties: false
required: required:
- identifier
- provider_resource_id
- provider_id
- type - type
- dataset_id - dataset_id
- scoring_functions - scoring_functions
@ -1654,7 +1645,9 @@ components:
ListEvalTasksResponse: ListEvalTasksResponse:
type: object type: object
properties: properties:
data: sampling_params:
$ref: '#/components/schemas/SamplingParams'
input_shields:
type: array type: array
items: items:
$ref: '#/components/schemas/EvalTask' $ref: '#/components/schemas/EvalTask'

View file

@ -1214,7 +1214,7 @@
" \"sampling_params\": {\n", " \"sampling_params\": {\n",
" \"strategy\": {\n", " \"strategy\": {\n",
" \"type\": \"greedy\",\n", " \"type\": \"greedy\",\n",
" },\n", " },b\n",
" \"max_tokens\": 4096,\n", " \"max_tokens\": 4096,\n",
" \"repeat_penalty\": 1.0,\n", " \"repeat_penalty\": 1.0,\n",
" },\n", " },\n",

View file

@ -472,16 +472,20 @@ class BenchmarksRoutingTable(CommonRoutingTableImpl, Benchmarks):
async def DEPRECATED_list_eval_tasks(self) -> ListBenchmarksResponse: async def DEPRECATED_list_eval_tasks(self) -> ListBenchmarksResponse:
logger.warning("DEPRECATED: Use /eval/benchmarks instead") logger.warning("DEPRECATED: Use /eval/benchmarks instead")
return await self.list_benchmarks() return await self.list_benchmarks()
return await self.list_benchmarks()
async def DEPRECATED_get_eval_task( async def DEPRECATED_get_eval_task(
self, self,
task_id: str,
eval_task_id: str, eval_task_id: str,
) -> Optional[Benchmark]: ) -> Optional[Benchmark]:
logger.warning("DEPRECATED: Use /eval/benchmarks instead") logger.warning("DEPRECATED: Use /eval/benchmarks instead")
return await self.get_benchmark(task_id)
return await self.get_benchmark(eval_task_id) return await self.get_benchmark(eval_task_id)
async def DEPRECATED_register_eval_task( async def DEPRECATED_register_eval_task(
self, self,
task_id: str,
eval_task_id: str, eval_task_id: str,
dataset_id: str, dataset_id: str,
scoring_functions: List[str], scoring_functions: List[str],
@ -490,6 +494,8 @@ class BenchmarksRoutingTable(CommonRoutingTableImpl, Benchmarks):
metadata: Optional[Dict[str, Any]] = None, metadata: Optional[Dict[str, Any]] = None,
) -> None: ) -> None:
logger.warning("DEPRECATED: Use /eval/benchmarks instead") logger.warning("DEPRECATED: Use /eval/benchmarks instead")
return await self.register_benchmark(
benchmark_id=task_id,
return await self.register_benchmark( return await self.register_benchmark(
benchmark_id=eval_task_id, benchmark_id=eval_task_id,
dataset_id=dataset_id, dataset_id=dataset_id,