diff --git a/docs/_static/llama-stack-spec.html b/docs/_static/llama-stack-spec.html
index b93f6a380..2c5827d37 100644
--- a/docs/_static/llama-stack-spec.html
+++ b/docs/_static/llama-stack-spec.html
@@ -40,6 +40,7 @@
}
],
"paths": {
+<<<<<<< HEAD
"/v1/eval/tasks/{task_id}/evaluations": {
"post": {
"responses": {
@@ -234,6 +235,8 @@
"deprecated": true
}
},
+=======
+>>>>>>> 974941be (deprecation in OpenAPI spec)
"/v1/eval-tasks": {
"get": {
"responses": {
@@ -242,18 +245,27 @@
"content": {
"application/json": {
"schema": {
+<<<<<<< HEAD
"$ref": "#/components/schemas/ListBenchmarksResponse"
+=======
+ "$ref": "#/components/schemas/ListEvalTasksResponse"
+>>>>>>> 974941be (deprecation in OpenAPI spec)
}
}
}
}
},
"tags": [
+<<<<<<< HEAD
"Benchmarks"
+=======
+ "EvalTasks"
+>>>>>>> 974941be (deprecation in OpenAPI spec)
],
"description": "",
"parameters": [],
"deprecated": true
+<<<<<<< HEAD
},
"post": {
"responses": {
@@ -318,6 +330,8 @@
"required": true
},
"deprecated": true
+=======
+>>>>>>> 974941be (deprecation in OpenAPI spec)
}
},
"/v1/datasetio/rows": {
@@ -2645,7 +2659,89 @@
"jsonSchemaDialect": "https://json-schema.org/draft/2020-12/schema",
"components": {
"schemas": {
+<<<<<<< HEAD
"AgentCandidate": {
+=======
+ "EvalTask": {
+ "type": "object",
+ "properties": {
+ "identifier": {
+ "type": "string"
+ },
+ "provider_resource_id": {
+ "type": "string"
+ },
+ "provider_id": {
+ "type": "string"
+ },
+ "type": {
+ "type": "string",
+ "const": "eval_task",
+ "default": "eval_task"
+ },
+ "dataset_id": {
+ "type": "string"
+ },
+ "scoring_functions": {
+ "type": "array",
+ "items": {
+ "type": "string"
+ }
+ },
+ "metadata": {
+ "type": "object",
+ "additionalProperties": {
+ "oneOf": [
+ {
+ "type": "null"
+ },
+ {
+ "type": "boolean"
+ },
+ {
+ "type": "number"
+ },
+ {
+ "type": "string"
+ },
+ {
+ "type": "array"
+ },
+ {
+ "type": "object"
+ }
+ ]
+ }
+ }
+ },
+ "additionalProperties": false,
+ "required": [
+ "identifier",
+ "provider_resource_id",
+ "provider_id",
+ "type",
+ "dataset_id",
+ "scoring_functions",
+ "metadata"
+ ]
+ },
+ "ListEvalTasksResponse": {
+ "type": "object",
+ "properties": {
+ "data": {
+ "type": "array",
+ "items": {
+ "$ref": "#/components/schemas/EvalTask"
+ }
+ }
+ },
+ "additionalProperties": false,
+ "required": [
+ "data"
+ ]
+ },
+ "AppendRowsRequest": {
+>>>>>>> 974941be (deprecation in OpenAPI spec)
"type": "object",
"properties": {
"type": {
diff --git a/docs/_static/llama-stack-spec.yaml b/docs/_static/llama-stack-spec.yaml
index b30025020..c743ce47a 100644
--- a/docs/_static/llama-stack-spec.yaml
+++ b/docs/_static/llama-stack-spec.yaml
@@ -10,6 +10,7 @@ info:
servers:
- url: http://any-hosted-llama-stack.com
paths:
+<<<<<<< HEAD
/v1/eval/tasks/{task_id}/evaluations:
post:
responses:
@@ -125,6 +126,8 @@ paths:
schema:
type: string
deprecated: true
+=======
+>>>>>>> 974941be (deprecation in OpenAPI spec)
/v1/eval-tasks:
get:
responses:
@@ -133,6 +136,7 @@ paths:
content:
application/json:
schema:
+<<<<<<< HEAD
$ref: '#/components/schemas/ListBenchmarksResponse'
tags:
- Benchmarks
@@ -179,6 +183,14 @@ paths:
$ref: '#/components/schemas/DeprecatedRunEvalRequest'
required: true
deprecated: true
+=======
+ $ref: '#/components/schemas/ListEvalTasksResponse'
+ tags:
+ - EvalTasks
+ description: ''
+ parameters: []
+ deprecated: true
+>>>>>>> 974941be (deprecation in OpenAPI spec)
/v1/datasetio/rows:
get:
responses:
@@ -1598,7 +1610,59 @@ jsonSchemaDialect: >-
https://json-schema.org/draft/2020-12/schema
components:
schemas:
+<<<<<<< HEAD
AgentCandidate:
+=======
+ EvalTask:
+ type: object
+ properties:
+ identifier:
+ type: string
+ provider_resource_id:
+ type: string
+ provider_id:
+ type: string
+ type:
+ type: string
+ const: eval_task
+ default: eval_task
+ dataset_id:
+ type: string
+ scoring_functions:
+ type: array
+ items:
+ type: string
+ metadata:
+ type: object
+ additionalProperties:
+ oneOf:
+ - type: 'null'
+ - type: boolean
+ - type: number
+ - type: string
+ - type: array
+ - type: object
+ additionalProperties: false
+ required:
+ - identifier
+ - provider_resource_id
+ - provider_id
+ - type
+ - dataset_id
+ - scoring_functions
+ - metadata
+ ListEvalTasksResponse:
+ type: object
+ properties:
+ data:
+ type: array
+ items:
+ $ref: '#/components/schemas/EvalTask'
+ additionalProperties: false
+ required:
+ - data
+ AppendRowsRequest:
+>>>>>>> 974941be (deprecation in OpenAPI spec)
type: object
properties:
type:
diff --git a/docs/openapi_generator/pyopenapi/generator.py b/docs/openapi_generator/pyopenapi/generator.py
index e37c45690..0f3b99784 100644
--- a/docs/openapi_generator/pyopenapi/generator.py
+++ b/docs/openapi_generator/pyopenapi/generator.py
@@ -647,6 +647,7 @@ class Generator:
description = "\n".join(
filter(None, [doc_string.short_description, doc_string.long_description])
)
+
return Operation(
tags=[op.defining_class.__name__],
summary=None,
diff --git a/llama_stack/apis/eval_tasks/eval_tasks.py b/llama_stack/apis/eval_tasks/eval_tasks.py
new file mode 100644
index 000000000..9a26fd0c0
--- /dev/null
+++ b/llama_stack/apis/eval_tasks/eval_tasks.py
@@ -0,0 +1,71 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
+from typing import Any, Dict, List, Literal, Optional, Protocol, runtime_checkable
+
+from llama_models.schema_utils import json_schema_type, webmethod
+from pydantic import BaseModel, Field
+
+from llama_stack.apis.resource import Resource, ResourceType
+
+
+class CommonEvalTaskFields(BaseModel):
+ dataset_id: str
+ scoring_functions: List[str]
+ metadata: Dict[str, Any] = Field(
+ default_factory=dict,
+ description="Metadata for this evaluation task",
+ )
+
+
+@json_schema_type
+class EvalTask(CommonEvalTaskFields, Resource):
+ type: Literal[ResourceType.eval_task.value] = ResourceType.eval_task.value
+
+ @property
+ def task_id(self) -> str:
+ return self.identifier
+
+ @property
+ def provider_eval_task_id(self) -> str:
+ return self.provider_resource_id
+
+
+class EvalTaskInput(CommonEvalTaskFields, BaseModel):
+ task_id: str
+ provider_id: Optional[str] = None
+ provider_eval_task_id: Optional[str] = None
+
+
+class ListEvalTasksResponse(BaseModel):
+ data: List[EvalTask]
+
+
+@runtime_checkable
+class EvalTasks(Protocol):
+ @webmethod(route="/eval-tasks", method="GET")
+ async def DEPRECATED_list_eval_tasks_deprecated(
+ self,
+ ) -> ListEvalTasksResponse: ...
+
+ @webmethod(route="/eval/tasks", method="GET")
+ async def list_eval_tasks(self) -> ListEvalTasksResponse: ...
+
+ @webmethod(route="/eval/tasks/{task_id}", method="GET")
+ async def get_eval_task(
+ self,
+ task_id: str,
+ ) -> Optional[EvalTask]: ...
+
+ @webmethod(route="/eval/tasks", method="POST")
+ async def register_eval_task(
+ self,
+ task_id: str,
+ dataset_id: str,
+ scoring_functions: List[str],
+ provider_eval_task_id: Optional[str] = None,
+ provider_id: Optional[str] = None,
+ metadata: Optional[Dict[str, Any]] = None,
+ ) -> None: ...