diff --git a/docs/_static/llama-stack-spec.html b/docs/_static/llama-stack-spec.html
index 4344dc26b..608237cfd 100644
--- a/docs/_static/llama-stack-spec.html
+++ b/docs/_static/llama-stack-spec.html
@@ -1999,7 +1999,7 @@
}
}
},
- "/v1/evaluation/grade_inline": {
+ "/v1/evaluation/grade_sync": {
"post": {
"responses": {
"200": {
@@ -2034,7 +2034,7 @@
"content": {
"application/json": {
"schema": {
- "$ref": "#/components/schemas/GradeInlineRequest"
+ "$ref": "#/components/schemas/GradeSyncRequest"
}
}
},
@@ -3451,49 +3451,6 @@
}
}
},
- "/v1/evaluation/run_inline": {
- "post": {
- "responses": {
- "200": {
- "description": "OK",
- "content": {
- "application/json": {
- "schema": {
- "$ref": "#/components/schemas/EvaluationResponse"
- }
- }
- }
- },
- "400": {
- "$ref": "#/components/responses/BadRequest400"
- },
- "429": {
- "$ref": "#/components/responses/TooManyRequests429"
- },
- "500": {
- "$ref": "#/components/responses/InternalServerError500"
- },
- "default": {
- "$ref": "#/components/responses/DefaultError"
- }
- },
- "tags": [
- "Evaluation"
- ],
- "description": "Run an evaluation job inline.",
- "parameters": [],
- "requestBody": {
- "content": {
- "application/json": {
- "schema": {
- "$ref": "#/components/schemas/RunInlineRequest"
- }
- }
- },
- "required": true
- }
- }
- },
"/v1/safety/run-shield": {
"post": {
"responses": {
@@ -3537,6 +3494,49 @@
}
}
},
+ "/v1/evaluation/run_sync": {
+ "post": {
+ "responses": {
+ "200": {
+ "description": "OK",
+ "content": {
+ "application/json": {
+ "schema": {
+ "$ref": "#/components/schemas/EvaluationResponse"
+ }
+ }
+ }
+ },
+ "400": {
+ "$ref": "#/components/responses/BadRequest400"
+ },
+ "429": {
+ "$ref": "#/components/responses/TooManyRequests429"
+ },
+ "500": {
+ "$ref": "#/components/responses/InternalServerError500"
+ },
+ "default": {
+ "$ref": "#/components/responses/DefaultError"
+ }
+ },
+ "tags": [
+ "Evaluation"
+ ],
+ "description": "Run an evaluation job inline.",
+ "parameters": [],
+ "requestBody": {
+ "content": {
+ "application/json": {
+ "schema": {
+ "$ref": "#/components/schemas/RunSyncRequest"
+ }
+ }
+ },
+ "required": true
+ }
+ }
+ },
"/v1/telemetry/spans/export": {
"post": {
"responses": {
@@ -7682,7 +7682,7 @@
"title": "ModelCandidate",
"description": "A model candidate for evaluation."
},
- "GradeInlineRequest": {
+ "GradeSyncRequest": {
"type": "object",
"properties": {
"task": {
@@ -7694,7 +7694,7 @@
"required": [
"task"
],
- "title": "GradeInlineRequest"
+ "title": "GradeSyncRequest"
},
"EvaluationResponse": {
"type": "object",
@@ -9726,25 +9726,6 @@
],
"title": "RunRequest"
},
- "RunInlineRequest": {
- "type": "object",
- "properties": {
- "task": {
- "$ref": "#/components/schemas/EvaluationTask",
- "description": "The task to evaluate. One of: - BenchmarkTask: Run evaluation task against a benchmark_id - DatasetGraderTask: Run evaluation task against a dataset_id and a list of grader_ids - DataSourceGraderTask: Run evaluation task against a data source (e.g. rows, uri, etc.) and a list of grader_ids"
- },
- "candidate": {
- "$ref": "#/components/schemas/EvaluationCandidate",
- "description": "The candidate to evaluate."
- }
- },
- "additionalProperties": false,
- "required": [
- "task",
- "candidate"
- ],
- "title": "RunInlineRequest"
- },
"RunShieldRequest": {
"type": "object",
"properties": {
@@ -9801,6 +9782,25 @@
"additionalProperties": false,
"title": "RunShieldResponse"
},
+ "RunSyncRequest": {
+ "type": "object",
+ "properties": {
+ "task": {
+ "$ref": "#/components/schemas/EvaluationTask",
+ "description": "The task to evaluate. One of: - BenchmarkTask: Run evaluation task against a benchmark_id - DatasetGraderTask: Run evaluation task against a dataset_id and a list of grader_ids - DataSourceGraderTask: Run evaluation task against a data source (e.g. rows, uri, etc.) and a list of grader_ids"
+ },
+ "candidate": {
+ "$ref": "#/components/schemas/EvaluationCandidate",
+ "description": "The candidate to evaluate."
+ }
+ },
+ "additionalProperties": false,
+ "required": [
+ "task",
+ "candidate"
+ ],
+ "title": "RunSyncRequest"
+ },
"SaveSpansToDatasetRequest": {
"type": "object",
"properties": {
diff --git a/docs/_static/llama-stack-spec.yaml b/docs/_static/llama-stack-spec.yaml
index ef42d032b..57bf76478 100644
--- a/docs/_static/llama-stack-spec.yaml
+++ b/docs/_static/llama-stack-spec.yaml
@@ -1343,7 +1343,7 @@ paths:
schema:
$ref: '#/components/schemas/GradeRequest'
required: true
- /v1/evaluation/grade_inline:
+ /v1/evaluation/grade_sync:
post:
responses:
'200':
@@ -1373,7 +1373,7 @@ paths:
content:
application/json:
schema:
- $ref: '#/components/schemas/GradeInlineRequest'
+ $ref: '#/components/schemas/GradeSyncRequest'
required: true
/v1/health:
get:
@@ -2350,35 +2350,6 @@ paths:
schema:
$ref: '#/components/schemas/RunRequest'
required: true
- /v1/evaluation/run_inline:
- post:
- responses:
- '200':
- description: OK
- content:
- application/json:
- schema:
- $ref: '#/components/schemas/EvaluationResponse'
- '400':
- $ref: '#/components/responses/BadRequest400'
- '429':
- $ref: >-
- #/components/responses/TooManyRequests429
- '500':
- $ref: >-
- #/components/responses/InternalServerError500
- default:
- $ref: '#/components/responses/DefaultError'
- tags:
- - Evaluation
- description: Run an evaluation job inline.
- parameters: []
- requestBody:
- content:
- application/json:
- schema:
- $ref: '#/components/schemas/RunInlineRequest'
- required: true
/v1/safety/run-shield:
post:
responses:
@@ -2408,6 +2379,35 @@ paths:
schema:
$ref: '#/components/schemas/RunShieldRequest'
required: true
+ /v1/evaluation/run_sync:
+ post:
+ responses:
+ '200':
+ description: OK
+ content:
+ application/json:
+ schema:
+ $ref: '#/components/schemas/EvaluationResponse'
+ '400':
+ $ref: '#/components/responses/BadRequest400'
+ '429':
+ $ref: >-
+ #/components/responses/TooManyRequests429
+ '500':
+ $ref: >-
+ #/components/responses/InternalServerError500
+ default:
+ $ref: '#/components/responses/DefaultError'
+ tags:
+ - Evaluation
+ description: Run an evaluation job inline.
+ parameters: []
+ requestBody:
+ content:
+ application/json:
+ schema:
+ $ref: '#/components/schemas/RunSyncRequest'
+ required: true
/v1/telemetry/spans/export:
post:
responses:
@@ -5326,7 +5326,7 @@ components:
- sampling_params
title: ModelCandidate
description: A model candidate for evaluation.
- GradeInlineRequest:
+ GradeSyncRequest:
type: object
properties:
task:
@@ -5339,7 +5339,7 @@ components:
additionalProperties: false
required:
- task
- title: GradeInlineRequest
+ title: GradeSyncRequest
EvaluationResponse:
type: object
properties:
@@ -6643,24 +6643,6 @@ components:
- task
- candidate
title: RunRequest
- RunInlineRequest:
- type: object
- properties:
- task:
- $ref: '#/components/schemas/EvaluationTask'
- description: >-
- The task to evaluate. One of: - BenchmarkTask: Run evaluation task against
- a benchmark_id - DatasetGraderTask: Run evaluation task against a dataset_id
- and a list of grader_ids - DataSourceGraderTask: Run evaluation task against
- a data source (e.g. rows, uri, etc.) and a list of grader_ids
- candidate:
- $ref: '#/components/schemas/EvaluationCandidate'
- description: The candidate to evaluate.
- additionalProperties: false
- required:
- - task
- - candidate
- title: RunInlineRequest
RunShieldRequest:
type: object
properties:
@@ -6693,6 +6675,24 @@ components:
$ref: '#/components/schemas/SafetyViolation'
additionalProperties: false
title: RunShieldResponse
+ RunSyncRequest:
+ type: object
+ properties:
+ task:
+ $ref: '#/components/schemas/EvaluationTask'
+ description: >-
+ The task to evaluate. One of: - BenchmarkTask: Run evaluation task against
+ a benchmark_id - DatasetGraderTask: Run evaluation task against a dataset_id
+ and a list of grader_ids - DataSourceGraderTask: Run evaluation task against
+ a data source (e.g. rows, uri, etc.) and a list of grader_ids
+ candidate:
+ $ref: '#/components/schemas/EvaluationCandidate'
+ description: The candidate to evaluate.
+ additionalProperties: false
+ required:
+ - task
+ - candidate
+ title: RunSyncRequest
SaveSpansToDatasetRequest:
type: object
properties:
diff --git a/llama_stack/apis/evaluation/evaluation.py b/llama_stack/apis/evaluation/evaluation.py
index 444495b6e..faa620872 100644
--- a/llama_stack/apis/evaluation/evaluation.py
+++ b/llama_stack/apis/evaluation/evaluation.py
@@ -129,8 +129,8 @@ class Evaluation(Protocol):
"""
...
- @webmethod(route="/evaluation/run_inline", method="POST")
- async def run_inline(
+ @webmethod(route="/evaluation/run_sync", method="POST")
+ async def run_sync(
self,
task: EvaluationTask,
candidate: EvaluationCandidate,
@@ -160,8 +160,8 @@ class Evaluation(Protocol):
"""
...
- @webmethod(route="/evaluation/grade_inline", method="POST")
- async def grade_inline(self, task: EvaluationTask) -> EvaluationResponse:
+ @webmethod(route="/evaluation/grade_sync", method="POST")
+ async def grade_sync(self, task: EvaluationTask) -> EvaluationResponse:
"""
Run an grading job with generated results inline.