diff --git a/docs/_static/llama-stack-spec.html b/docs/_static/llama-stack-spec.html index b93f6a380..2c5827d37 100644 --- a/docs/_static/llama-stack-spec.html +++ b/docs/_static/llama-stack-spec.html @@ -40,6 +40,7 @@ } ], "paths": { +<<<<<<< HEAD "/v1/eval/tasks/{task_id}/evaluations": { "post": { "responses": { @@ -234,6 +235,8 @@ "deprecated": true } }, +======= +>>>>>>> 974941be (deprecation in OpenAPI spec) "/v1/eval-tasks": { "get": { "responses": { @@ -242,18 +245,27 @@ "content": { "application/json": { "schema": { +<<<<<<< HEAD "$ref": "#/components/schemas/ListBenchmarksResponse" +======= + "$ref": "#/components/schemas/ListEvalTasksResponse" +>>>>>>> 974941be (deprecation in OpenAPI spec) } } } } }, "tags": [ +<<<<<<< HEAD "Benchmarks" +======= + "EvalTasks" +>>>>>>> 974941be (deprecation in OpenAPI spec) ], "description": "", "parameters": [], "deprecated": true +<<<<<<< HEAD }, "post": { "responses": { @@ -318,6 +330,8 @@ "required": true }, "deprecated": true +======= +>>>>>>> 974941be (deprecation in OpenAPI spec) } }, "/v1/datasetio/rows": { @@ -2645,7 +2659,89 @@ "jsonSchemaDialect": "https://json-schema.org/draft/2020-12/schema", "components": { "schemas": { +<<<<<<< HEAD "AgentCandidate": { +======= + "EvalTask": { + "type": "object", + "properties": { + "identifier": { + "type": "string" + }, + "provider_resource_id": { + "type": "string" + }, + "provider_id": { + "type": "string" + }, + "type": { + "type": "string", + "const": "eval_task", + "default": "eval_task" + }, + "dataset_id": { + "type": "string" + }, + "scoring_functions": { + "type": "array", + "items": { + "type": "string" + } + }, + "metadata": { + "type": "object", + "additionalProperties": { + "oneOf": [ + { + "type": "null" + }, + { + "type": "boolean" + }, + { + "type": "number" + }, + { + "type": "string" + }, + { + "type": "array" + }, + { + "type": "object" + } + ] + } + } + }, + "additionalProperties": false, + "required": [ + "identifier", + "provider_resource_id", + "provider_id", + "type", + "dataset_id", + "scoring_functions", + "metadata" + ] + }, + "ListEvalTasksResponse": { + "type": "object", + "properties": { + "data": { + "type": "array", + "items": { + "$ref": "#/components/schemas/EvalTask" + } + } + }, + "additionalProperties": false, + "required": [ + "data" + ] + }, + "AppendRowsRequest": { +>>>>>>> 974941be (deprecation in OpenAPI spec) "type": "object", "properties": { "type": { diff --git a/docs/_static/llama-stack-spec.yaml b/docs/_static/llama-stack-spec.yaml index b30025020..c743ce47a 100644 --- a/docs/_static/llama-stack-spec.yaml +++ b/docs/_static/llama-stack-spec.yaml @@ -10,6 +10,7 @@ info: servers: - url: http://any-hosted-llama-stack.com paths: +<<<<<<< HEAD /v1/eval/tasks/{task_id}/evaluations: post: responses: @@ -125,6 +126,8 @@ paths: schema: type: string deprecated: true +======= +>>>>>>> 974941be (deprecation in OpenAPI spec) /v1/eval-tasks: get: responses: @@ -133,6 +136,7 @@ paths: content: application/json: schema: +<<<<<<< HEAD $ref: '#/components/schemas/ListBenchmarksResponse' tags: - Benchmarks @@ -179,6 +183,14 @@ paths: $ref: '#/components/schemas/DeprecatedRunEvalRequest' required: true deprecated: true +======= + $ref: '#/components/schemas/ListEvalTasksResponse' + tags: + - EvalTasks + description: '' + parameters: [] + deprecated: true +>>>>>>> 974941be (deprecation in OpenAPI spec) /v1/datasetio/rows: get: responses: @@ -1598,7 +1610,59 @@ jsonSchemaDialect: >- https://json-schema.org/draft/2020-12/schema components: schemas: +<<<<<<< HEAD AgentCandidate: +======= + EvalTask: + type: object + properties: + identifier: + type: string + provider_resource_id: + type: string + provider_id: + type: string + type: + type: string + const: eval_task + default: eval_task + dataset_id: + type: string + scoring_functions: + type: array + items: + type: string + metadata: + type: object + additionalProperties: + oneOf: + - type: 'null' + - type: boolean + - type: number + - type: string + - type: array + - type: object + additionalProperties: false + required: + - identifier + - provider_resource_id + - provider_id + - type + - dataset_id + - scoring_functions + - metadata + ListEvalTasksResponse: + type: object + properties: + data: + type: array + items: + $ref: '#/components/schemas/EvalTask' + additionalProperties: false + required: + - data + AppendRowsRequest: +>>>>>>> 974941be (deprecation in OpenAPI spec) type: object properties: type: diff --git a/docs/openapi_generator/pyopenapi/generator.py b/docs/openapi_generator/pyopenapi/generator.py index e37c45690..0f3b99784 100644 --- a/docs/openapi_generator/pyopenapi/generator.py +++ b/docs/openapi_generator/pyopenapi/generator.py @@ -647,6 +647,7 @@ class Generator: description = "\n".join( filter(None, [doc_string.short_description, doc_string.long_description]) ) + return Operation( tags=[op.defining_class.__name__], summary=None, diff --git a/llama_stack/apis/eval_tasks/eval_tasks.py b/llama_stack/apis/eval_tasks/eval_tasks.py new file mode 100644 index 000000000..9a26fd0c0 --- /dev/null +++ b/llama_stack/apis/eval_tasks/eval_tasks.py @@ -0,0 +1,71 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the terms described in the LICENSE file in +# the root directory of this source tree. +from typing import Any, Dict, List, Literal, Optional, Protocol, runtime_checkable + +from llama_models.schema_utils import json_schema_type, webmethod +from pydantic import BaseModel, Field + +from llama_stack.apis.resource import Resource, ResourceType + + +class CommonEvalTaskFields(BaseModel): + dataset_id: str + scoring_functions: List[str] + metadata: Dict[str, Any] = Field( + default_factory=dict, + description="Metadata for this evaluation task", + ) + + +@json_schema_type +class EvalTask(CommonEvalTaskFields, Resource): + type: Literal[ResourceType.eval_task.value] = ResourceType.eval_task.value + + @property + def task_id(self) -> str: + return self.identifier + + @property + def provider_eval_task_id(self) -> str: + return self.provider_resource_id + + +class EvalTaskInput(CommonEvalTaskFields, BaseModel): + task_id: str + provider_id: Optional[str] = None + provider_eval_task_id: Optional[str] = None + + +class ListEvalTasksResponse(BaseModel): + data: List[EvalTask] + + +@runtime_checkable +class EvalTasks(Protocol): + @webmethod(route="/eval-tasks", method="GET") + async def DEPRECATED_list_eval_tasks_deprecated( + self, + ) -> ListEvalTasksResponse: ... + + @webmethod(route="/eval/tasks", method="GET") + async def list_eval_tasks(self) -> ListEvalTasksResponse: ... + + @webmethod(route="/eval/tasks/{task_id}", method="GET") + async def get_eval_task( + self, + task_id: str, + ) -> Optional[EvalTask]: ... + + @webmethod(route="/eval/tasks", method="POST") + async def register_eval_task( + self, + task_id: str, + dataset_id: str, + scoring_functions: List[str], + provider_eval_task_id: Optional[str] = None, + provider_id: Optional[str] = None, + metadata: Optional[Dict[str, Any]] = None, + ) -> None: ...