diff --git a/docs/_static/llama-stack-spec.html b/docs/_static/llama-stack-spec.html
index 48a433495..58d3c918a 100644
--- a/docs/_static/llama-stack-spec.html
+++ b/docs/_static/llama-stack-spec.html
@@ -230,7 +230,7 @@
}
}
},
- "/v1/eval/job/{job_id}/cancel": {
+ "/v1/eval/jobs/{job_id}/cancel": {
"post": {
"responses": {
"200": {
@@ -280,7 +280,7 @@
]
}
},
- "/v1/scoring/job/{job_id}/cancel": {
+ "/v1/scoring/jobs/{job_id}/cancel": {
"post": {
"responses": {
"200": {
@@ -923,7 +923,7 @@
]
}
},
- "/v1/eval/job/{job_id}": {
+ "/v1/eval/jobs/{job_id}": {
"get": {
"responses": {
"200": {
@@ -1123,7 +1123,7 @@
]
}
},
- "/v1/scoring/job/{job_id}": {
+ "/v1/scoring/jobs/{job_id}": {
"get": {
"responses": {
"200": {
@@ -5160,26 +5160,36 @@
},
"type": {
"type": "string",
- "const": "eval",
- "default": "eval"
+ "enum": [
+ "batch_inference",
+ "scoring",
+ "evaluation",
+ "post_training"
+ ],
+ "default": "evaluation",
+ "description": "The type of the job."
},
"result_files": {
"type": "array",
"items": {
"type": "string"
- }
+ },
+ "description": "The file ids of the eval results."
},
"result_datasets": {
"type": "array",
"items": {
"type": "string"
- }
+ },
+ "description": "The ids of the datasets containing the eval results."
},
"benchmark_id": {
- "type": "string"
+ "type": "string",
+ "description": "The id of the benchmark to evaluate on."
},
"candidate": {
- "$ref": "#/components/schemas/EvalCandidate"
+ "$ref": "#/components/schemas/EvalCandidate",
+ "description": "The candidate to evaluate on."
}
},
"additionalProperties": false,
@@ -5193,7 +5203,8 @@
"benchmark_id",
"candidate"
],
- "title": "EvalJob"
+ "title": "EvalJob",
+ "description": "An evaluation job."
},
"ModelCandidate": {
"type": "object",
@@ -5399,29 +5410,39 @@
},
"type": {
"type": "string",
- "const": "scoring",
- "default": "scoring"
+ "enum": [
+ "batch_inference",
+ "scoring",
+ "evaluation",
+ "post_training"
+ ],
+ "default": "scoring",
+ "description": "The type of the job."
},
"result_files": {
"type": "array",
"items": {
"type": "string"
- }
+ },
+ "description": "The file ids of the scoring results."
},
"result_datasets": {
"type": "array",
"items": {
"type": "string"
- }
+ },
+ "description": "The ids of the datasets containing the scoring results."
},
"dataset_id": {
- "type": "string"
+ "type": "string",
+ "description": "The id of the dataset used for scoring."
},
"scoring_fn_ids": {
"type": "array",
"items": {
"type": "string"
- }
+ },
+ "description": "The ids of the scoring functions used."
}
},
"additionalProperties": false,
@@ -5435,7 +5456,8 @@
"dataset_id",
"scoring_fn_ids"
],
- "title": "ScoringJob"
+ "title": "ScoringJob",
+ "description": "A scoring job."
},
"CancelTrainingJobRequest": {
"type": "object",
diff --git a/docs/_static/llama-stack-spec.yaml b/docs/_static/llama-stack-spec.yaml
index 45058fbdc..8220cf5e7 100644
--- a/docs/_static/llama-stack-spec.yaml
+++ b/docs/_static/llama-stack-spec.yaml
@@ -142,7 +142,7 @@ paths:
schema:
$ref: '#/components/schemas/BatchCompletionRequest'
required: true
- /v1/eval/job/{job_id}/cancel:
+ /v1/eval/jobs/{job_id}/cancel:
post:
responses:
'200':
@@ -173,7 +173,7 @@ paths:
required: true
schema:
type: string
- /v1/scoring/job/{job_id}/cancel:
+ /v1/scoring/jobs/{job_id}/cancel:
post:
responses:
'200':
@@ -622,7 +622,7 @@ paths:
required: true
schema:
type: string
- /v1/eval/job/{job_id}:
+ /v1/eval/jobs/{job_id}:
get:
responses:
'200':
@@ -756,7 +756,7 @@ paths:
required: true
schema:
type: string
- /v1/scoring/job/{job_id}:
+ /v1/scoring/jobs/{job_id}:
get:
responses:
'200':
@@ -3514,20 +3514,30 @@ components:
If status of the job is failed, this will contain the error message.
type:
type: string
- const: eval
- default: eval
+ enum:
+ - batch_inference
+ - scoring
+ - evaluation
+ - post_training
+ default: evaluation
+ description: The type of the job.
result_files:
type: array
items:
type: string
+ description: The file ids of the eval results.
result_datasets:
type: array
items:
type: string
+ description: >-
+ The ids of the datasets containing the eval results.
benchmark_id:
type: string
+ description: The id of the benchmark to evaluate on.
candidate:
$ref: '#/components/schemas/EvalCandidate'
+ description: The candidate to evaluate on.
additionalProperties: false
required:
- id
@@ -3539,6 +3549,7 @@ components:
- benchmark_id
- candidate
title: EvalJob
+ description: An evaluation job.
ModelCandidate:
type: object
properties:
@@ -3693,22 +3704,32 @@ components:
If status of the job is failed, this will contain the error message.
type:
type: string
- const: scoring
+ enum:
+ - batch_inference
+ - scoring
+ - evaluation
+ - post_training
default: scoring
+ description: The type of the job.
result_files:
type: array
items:
type: string
+ description: The file ids of the scoring results.
result_datasets:
type: array
items:
type: string
+ description: >-
+ The ids of the datasets containing the scoring results.
dataset_id:
type: string
+ description: The id of the dataset used for scoring.
scoring_fn_ids:
type: array
items:
type: string
+ description: The ids of the scoring functions used.
additionalProperties: false
required:
- id
@@ -3720,6 +3741,7 @@ components:
- dataset_id
- scoring_fn_ids
title: ScoringJob
+ description: A scoring job.
CancelTrainingJobRequest:
type: object
properties:
diff --git a/llama_stack/apis/eval/eval.py b/llama_stack/apis/eval/eval.py
index d98c9cb87..1d971ab81 100644
--- a/llama_stack/apis/eval/eval.py
+++ b/llama_stack/apis/eval/eval.py
@@ -10,7 +10,7 @@ from pydantic import BaseModel, Field
from typing_extensions import Annotated
from llama_stack.apis.agents import AgentConfig
-from llama_stack.apis.common.job_types import CommonJobFields, JobStatus
+from llama_stack.apis.common.job_types import CommonJobFields, JobType
from llama_stack.apis.inference import SamplingParams, SystemMessage
from llama_stack.apis.scoring import ScoringResult
from llama_stack.schema_utils import json_schema_type, register_schema, webmethod
@@ -63,7 +63,17 @@ class EvaluateResponse(BaseModel):
@json_schema_type
class EvalJob(CommonJobFields):
- type: Literal["eval"] = "eval"
+ """
+ An evaluation job.
+
+ :param type: The type of the job.
+ :param result_files: The file ids of the eval results.
+ :param result_datasets: The ids of the datasets containing the eval results.
+ :param benchmark_id: The id of the benchmark to evaluate on.
+ :param candidate: The candidate to evaluate on.
+ """
+
+ type: JobType = JobType.evaluation.value
result_files: List[str] = Field(
description="The file ids of the eval results.",
default_factory=list,
diff --git a/llama_stack/apis/scoring/scoring.py b/llama_stack/apis/scoring/scoring.py
index d54b34491..961598e35 100644
--- a/llama_stack/apis/scoring/scoring.py
+++ b/llama_stack/apis/scoring/scoring.py
@@ -50,7 +50,17 @@ class ScoreResponse(BaseModel):
@json_schema_type
class ScoringJob(CommonJobFields):
- type: Literal["scoring"] = "scoring"
+ """
+ A scoring job.
+
+ :param type: The type of the job.
+ :param result_files: The file ids of the scoring results.
+ :param result_datasets: The ids of the datasets containing the scoring results.
+ :param dataset_id: The id of the dataset used for scoring.
+ :param scoring_fn_ids: The ids of the scoring functions used.
+ """
+
+ type: JobType = JobType.scoring.value
result_files: List[str] = Field(
description="The file ids of the scoring results.",