diff --git a/docs/_static/llama-stack-spec.html b/docs/_static/llama-stack-spec.html index 4344dc26b..608237cfd 100644 --- a/docs/_static/llama-stack-spec.html +++ b/docs/_static/llama-stack-spec.html @@ -1999,7 +1999,7 @@ } } }, - "/v1/evaluation/grade_inline": { + "/v1/evaluation/grade_sync": { "post": { "responses": { "200": { @@ -2034,7 +2034,7 @@ "content": { "application/json": { "schema": { - "$ref": "#/components/schemas/GradeInlineRequest" + "$ref": "#/components/schemas/GradeSyncRequest" } } }, @@ -3451,49 +3451,6 @@ } } }, - "/v1/evaluation/run_inline": { - "post": { - "responses": { - "200": { - "description": "OK", - "content": { - "application/json": { - "schema": { - "$ref": "#/components/schemas/EvaluationResponse" - } - } - } - }, - "400": { - "$ref": "#/components/responses/BadRequest400" - }, - "429": { - "$ref": "#/components/responses/TooManyRequests429" - }, - "500": { - "$ref": "#/components/responses/InternalServerError500" - }, - "default": { - "$ref": "#/components/responses/DefaultError" - } - }, - "tags": [ - "Evaluation" - ], - "description": "Run an evaluation job inline.", - "parameters": [], - "requestBody": { - "content": { - "application/json": { - "schema": { - "$ref": "#/components/schemas/RunInlineRequest" - } - } - }, - "required": true - } - } - }, "/v1/safety/run-shield": { "post": { "responses": { @@ -3537,6 +3494,49 @@ } } }, + "/v1/evaluation/run_sync": { + "post": { + "responses": { + "200": { + "description": "OK", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/EvaluationResponse" + } + } + } + }, + "400": { + "$ref": "#/components/responses/BadRequest400" + }, + "429": { + "$ref": "#/components/responses/TooManyRequests429" + }, + "500": { + "$ref": "#/components/responses/InternalServerError500" + }, + "default": { + "$ref": "#/components/responses/DefaultError" + } + }, + "tags": [ + "Evaluation" + ], + "description": "Run an evaluation job inline.", + "parameters": [], + "requestBody": { + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/RunSyncRequest" + } + } + }, + "required": true + } + } + }, "/v1/telemetry/spans/export": { "post": { "responses": { @@ -7682,7 +7682,7 @@ "title": "ModelCandidate", "description": "A model candidate for evaluation." }, - "GradeInlineRequest": { + "GradeSyncRequest": { "type": "object", "properties": { "task": { @@ -7694,7 +7694,7 @@ "required": [ "task" ], - "title": "GradeInlineRequest" + "title": "GradeSyncRequest" }, "EvaluationResponse": { "type": "object", @@ -9726,25 +9726,6 @@ ], "title": "RunRequest" }, - "RunInlineRequest": { - "type": "object", - "properties": { - "task": { - "$ref": "#/components/schemas/EvaluationTask", - "description": "The task to evaluate. One of: - BenchmarkTask: Run evaluation task against a benchmark_id - DatasetGraderTask: Run evaluation task against a dataset_id and a list of grader_ids - DataSourceGraderTask: Run evaluation task against a data source (e.g. rows, uri, etc.) and a list of grader_ids" - }, - "candidate": { - "$ref": "#/components/schemas/EvaluationCandidate", - "description": "The candidate to evaluate." - } - }, - "additionalProperties": false, - "required": [ - "task", - "candidate" - ], - "title": "RunInlineRequest" - }, "RunShieldRequest": { "type": "object", "properties": { @@ -9801,6 +9782,25 @@ "additionalProperties": false, "title": "RunShieldResponse" }, + "RunSyncRequest": { + "type": "object", + "properties": { + "task": { + "$ref": "#/components/schemas/EvaluationTask", + "description": "The task to evaluate. One of: - BenchmarkTask: Run evaluation task against a benchmark_id - DatasetGraderTask: Run evaluation task against a dataset_id and a list of grader_ids - DataSourceGraderTask: Run evaluation task against a data source (e.g. rows, uri, etc.) and a list of grader_ids" + }, + "candidate": { + "$ref": "#/components/schemas/EvaluationCandidate", + "description": "The candidate to evaluate." + } + }, + "additionalProperties": false, + "required": [ + "task", + "candidate" + ], + "title": "RunSyncRequest" + }, "SaveSpansToDatasetRequest": { "type": "object", "properties": { diff --git a/docs/_static/llama-stack-spec.yaml b/docs/_static/llama-stack-spec.yaml index ef42d032b..57bf76478 100644 --- a/docs/_static/llama-stack-spec.yaml +++ b/docs/_static/llama-stack-spec.yaml @@ -1343,7 +1343,7 @@ paths: schema: $ref: '#/components/schemas/GradeRequest' required: true - /v1/evaluation/grade_inline: + /v1/evaluation/grade_sync: post: responses: '200': @@ -1373,7 +1373,7 @@ paths: content: application/json: schema: - $ref: '#/components/schemas/GradeInlineRequest' + $ref: '#/components/schemas/GradeSyncRequest' required: true /v1/health: get: @@ -2350,35 +2350,6 @@ paths: schema: $ref: '#/components/schemas/RunRequest' required: true - /v1/evaluation/run_inline: - post: - responses: - '200': - description: OK - content: - application/json: - schema: - $ref: '#/components/schemas/EvaluationResponse' - '400': - $ref: '#/components/responses/BadRequest400' - '429': - $ref: >- - #/components/responses/TooManyRequests429 - '500': - $ref: >- - #/components/responses/InternalServerError500 - default: - $ref: '#/components/responses/DefaultError' - tags: - - Evaluation - description: Run an evaluation job inline. - parameters: [] - requestBody: - content: - application/json: - schema: - $ref: '#/components/schemas/RunInlineRequest' - required: true /v1/safety/run-shield: post: responses: @@ -2408,6 +2379,35 @@ paths: schema: $ref: '#/components/schemas/RunShieldRequest' required: true + /v1/evaluation/run_sync: + post: + responses: + '200': + description: OK + content: + application/json: + schema: + $ref: '#/components/schemas/EvaluationResponse' + '400': + $ref: '#/components/responses/BadRequest400' + '429': + $ref: >- + #/components/responses/TooManyRequests429 + '500': + $ref: >- + #/components/responses/InternalServerError500 + default: + $ref: '#/components/responses/DefaultError' + tags: + - Evaluation + description: Run an evaluation job inline. + parameters: [] + requestBody: + content: + application/json: + schema: + $ref: '#/components/schemas/RunSyncRequest' + required: true /v1/telemetry/spans/export: post: responses: @@ -5326,7 +5326,7 @@ components: - sampling_params title: ModelCandidate description: A model candidate for evaluation. - GradeInlineRequest: + GradeSyncRequest: type: object properties: task: @@ -5339,7 +5339,7 @@ components: additionalProperties: false required: - task - title: GradeInlineRequest + title: GradeSyncRequest EvaluationResponse: type: object properties: @@ -6643,24 +6643,6 @@ components: - task - candidate title: RunRequest - RunInlineRequest: - type: object - properties: - task: - $ref: '#/components/schemas/EvaluationTask' - description: >- - The task to evaluate. One of: - BenchmarkTask: Run evaluation task against - a benchmark_id - DatasetGraderTask: Run evaluation task against a dataset_id - and a list of grader_ids - DataSourceGraderTask: Run evaluation task against - a data source (e.g. rows, uri, etc.) and a list of grader_ids - candidate: - $ref: '#/components/schemas/EvaluationCandidate' - description: The candidate to evaluate. - additionalProperties: false - required: - - task - - candidate - title: RunInlineRequest RunShieldRequest: type: object properties: @@ -6693,6 +6675,24 @@ components: $ref: '#/components/schemas/SafetyViolation' additionalProperties: false title: RunShieldResponse + RunSyncRequest: + type: object + properties: + task: + $ref: '#/components/schemas/EvaluationTask' + description: >- + The task to evaluate. One of: - BenchmarkTask: Run evaluation task against + a benchmark_id - DatasetGraderTask: Run evaluation task against a dataset_id + and a list of grader_ids - DataSourceGraderTask: Run evaluation task against + a data source (e.g. rows, uri, etc.) and a list of grader_ids + candidate: + $ref: '#/components/schemas/EvaluationCandidate' + description: The candidate to evaluate. + additionalProperties: false + required: + - task + - candidate + title: RunSyncRequest SaveSpansToDatasetRequest: type: object properties: diff --git a/llama_stack/apis/evaluation/evaluation.py b/llama_stack/apis/evaluation/evaluation.py index 444495b6e..faa620872 100644 --- a/llama_stack/apis/evaluation/evaluation.py +++ b/llama_stack/apis/evaluation/evaluation.py @@ -129,8 +129,8 @@ class Evaluation(Protocol): """ ... - @webmethod(route="/evaluation/run_inline", method="POST") - async def run_inline( + @webmethod(route="/evaluation/run_sync", method="POST") + async def run_sync( self, task: EvaluationTask, candidate: EvaluationCandidate, @@ -160,8 +160,8 @@ class Evaluation(Protocol): """ ... - @webmethod(route="/evaluation/grade_inline", method="POST") - async def grade_inline(self, task: EvaluationTask) -> EvaluationResponse: + @webmethod(route="/evaluation/grade_sync", method="POST") + async def grade_sync(self, task: EvaluationTask) -> EvaluationResponse: """ Run an grading job with generated results inline.