mirror of
https://github.com/meta-llama/llama-stack.git
synced 2025-08-10 04:08:31 +00:00
jobs eval scoring
This commit is contained in:
parent
36320728bf
commit
775e8514b7
4 changed files with 92 additions and 28 deletions
58
docs/_static/llama-stack-spec.html
vendored
58
docs/_static/llama-stack-spec.html
vendored
|
@ -230,7 +230,7 @@
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"/v1/eval/job/{job_id}/cancel": {
|
"/v1/eval/jobs/{job_id}/cancel": {
|
||||||
"post": {
|
"post": {
|
||||||
"responses": {
|
"responses": {
|
||||||
"200": {
|
"200": {
|
||||||
|
@ -280,7 +280,7 @@
|
||||||
]
|
]
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"/v1/scoring/job/{job_id}/cancel": {
|
"/v1/scoring/jobs/{job_id}/cancel": {
|
||||||
"post": {
|
"post": {
|
||||||
"responses": {
|
"responses": {
|
||||||
"200": {
|
"200": {
|
||||||
|
@ -923,7 +923,7 @@
|
||||||
]
|
]
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"/v1/eval/job/{job_id}": {
|
"/v1/eval/jobs/{job_id}": {
|
||||||
"get": {
|
"get": {
|
||||||
"responses": {
|
"responses": {
|
||||||
"200": {
|
"200": {
|
||||||
|
@ -1123,7 +1123,7 @@
|
||||||
]
|
]
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"/v1/scoring/job/{job_id}": {
|
"/v1/scoring/jobs/{job_id}": {
|
||||||
"get": {
|
"get": {
|
||||||
"responses": {
|
"responses": {
|
||||||
"200": {
|
"200": {
|
||||||
|
@ -5160,26 +5160,36 @@
|
||||||
},
|
},
|
||||||
"type": {
|
"type": {
|
||||||
"type": "string",
|
"type": "string",
|
||||||
"const": "eval",
|
"enum": [
|
||||||
"default": "eval"
|
"batch_inference",
|
||||||
|
"scoring",
|
||||||
|
"evaluation",
|
||||||
|
"post_training"
|
||||||
|
],
|
||||||
|
"default": "evaluation",
|
||||||
|
"description": "The type of the job."
|
||||||
},
|
},
|
||||||
"result_files": {
|
"result_files": {
|
||||||
"type": "array",
|
"type": "array",
|
||||||
"items": {
|
"items": {
|
||||||
"type": "string"
|
"type": "string"
|
||||||
}
|
},
|
||||||
|
"description": "The file ids of the eval results."
|
||||||
},
|
},
|
||||||
"result_datasets": {
|
"result_datasets": {
|
||||||
"type": "array",
|
"type": "array",
|
||||||
"items": {
|
"items": {
|
||||||
"type": "string"
|
"type": "string"
|
||||||
}
|
},
|
||||||
|
"description": "The ids of the datasets containing the eval results."
|
||||||
},
|
},
|
||||||
"benchmark_id": {
|
"benchmark_id": {
|
||||||
"type": "string"
|
"type": "string",
|
||||||
|
"description": "The id of the benchmark to evaluate on."
|
||||||
},
|
},
|
||||||
"candidate": {
|
"candidate": {
|
||||||
"$ref": "#/components/schemas/EvalCandidate"
|
"$ref": "#/components/schemas/EvalCandidate",
|
||||||
|
"description": "The candidate to evaluate on."
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"additionalProperties": false,
|
"additionalProperties": false,
|
||||||
|
@ -5193,7 +5203,8 @@
|
||||||
"benchmark_id",
|
"benchmark_id",
|
||||||
"candidate"
|
"candidate"
|
||||||
],
|
],
|
||||||
"title": "EvalJob"
|
"title": "EvalJob",
|
||||||
|
"description": "An evaluation job."
|
||||||
},
|
},
|
||||||
"ModelCandidate": {
|
"ModelCandidate": {
|
||||||
"type": "object",
|
"type": "object",
|
||||||
|
@ -5399,29 +5410,39 @@
|
||||||
},
|
},
|
||||||
"type": {
|
"type": {
|
||||||
"type": "string",
|
"type": "string",
|
||||||
"const": "scoring",
|
"enum": [
|
||||||
"default": "scoring"
|
"batch_inference",
|
||||||
|
"scoring",
|
||||||
|
"evaluation",
|
||||||
|
"post_training"
|
||||||
|
],
|
||||||
|
"default": "scoring",
|
||||||
|
"description": "The type of the job."
|
||||||
},
|
},
|
||||||
"result_files": {
|
"result_files": {
|
||||||
"type": "array",
|
"type": "array",
|
||||||
"items": {
|
"items": {
|
||||||
"type": "string"
|
"type": "string"
|
||||||
}
|
},
|
||||||
|
"description": "The file ids of the scoring results."
|
||||||
},
|
},
|
||||||
"result_datasets": {
|
"result_datasets": {
|
||||||
"type": "array",
|
"type": "array",
|
||||||
"items": {
|
"items": {
|
||||||
"type": "string"
|
"type": "string"
|
||||||
}
|
},
|
||||||
|
"description": "The ids of the datasets containing the scoring results."
|
||||||
},
|
},
|
||||||
"dataset_id": {
|
"dataset_id": {
|
||||||
"type": "string"
|
"type": "string",
|
||||||
|
"description": "The id of the dataset used for scoring."
|
||||||
},
|
},
|
||||||
"scoring_fn_ids": {
|
"scoring_fn_ids": {
|
||||||
"type": "array",
|
"type": "array",
|
||||||
"items": {
|
"items": {
|
||||||
"type": "string"
|
"type": "string"
|
||||||
}
|
},
|
||||||
|
"description": "The ids of the scoring functions used."
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"additionalProperties": false,
|
"additionalProperties": false,
|
||||||
|
@ -5435,7 +5456,8 @@
|
||||||
"dataset_id",
|
"dataset_id",
|
||||||
"scoring_fn_ids"
|
"scoring_fn_ids"
|
||||||
],
|
],
|
||||||
"title": "ScoringJob"
|
"title": "ScoringJob",
|
||||||
|
"description": "A scoring job."
|
||||||
},
|
},
|
||||||
"CancelTrainingJobRequest": {
|
"CancelTrainingJobRequest": {
|
||||||
"type": "object",
|
"type": "object",
|
||||||
|
|
36
docs/_static/llama-stack-spec.yaml
vendored
36
docs/_static/llama-stack-spec.yaml
vendored
|
@ -142,7 +142,7 @@ paths:
|
||||||
schema:
|
schema:
|
||||||
$ref: '#/components/schemas/BatchCompletionRequest'
|
$ref: '#/components/schemas/BatchCompletionRequest'
|
||||||
required: true
|
required: true
|
||||||
/v1/eval/job/{job_id}/cancel:
|
/v1/eval/jobs/{job_id}/cancel:
|
||||||
post:
|
post:
|
||||||
responses:
|
responses:
|
||||||
'200':
|
'200':
|
||||||
|
@ -173,7 +173,7 @@ paths:
|
||||||
required: true
|
required: true
|
||||||
schema:
|
schema:
|
||||||
type: string
|
type: string
|
||||||
/v1/scoring/job/{job_id}/cancel:
|
/v1/scoring/jobs/{job_id}/cancel:
|
||||||
post:
|
post:
|
||||||
responses:
|
responses:
|
||||||
'200':
|
'200':
|
||||||
|
@ -622,7 +622,7 @@ paths:
|
||||||
required: true
|
required: true
|
||||||
schema:
|
schema:
|
||||||
type: string
|
type: string
|
||||||
/v1/eval/job/{job_id}:
|
/v1/eval/jobs/{job_id}:
|
||||||
get:
|
get:
|
||||||
responses:
|
responses:
|
||||||
'200':
|
'200':
|
||||||
|
@ -756,7 +756,7 @@ paths:
|
||||||
required: true
|
required: true
|
||||||
schema:
|
schema:
|
||||||
type: string
|
type: string
|
||||||
/v1/scoring/job/{job_id}:
|
/v1/scoring/jobs/{job_id}:
|
||||||
get:
|
get:
|
||||||
responses:
|
responses:
|
||||||
'200':
|
'200':
|
||||||
|
@ -3514,20 +3514,30 @@ components:
|
||||||
If status of the job is failed, this will contain the error message.
|
If status of the job is failed, this will contain the error message.
|
||||||
type:
|
type:
|
||||||
type: string
|
type: string
|
||||||
const: eval
|
enum:
|
||||||
default: eval
|
- batch_inference
|
||||||
|
- scoring
|
||||||
|
- evaluation
|
||||||
|
- post_training
|
||||||
|
default: evaluation
|
||||||
|
description: The type of the job.
|
||||||
result_files:
|
result_files:
|
||||||
type: array
|
type: array
|
||||||
items:
|
items:
|
||||||
type: string
|
type: string
|
||||||
|
description: The file ids of the eval results.
|
||||||
result_datasets:
|
result_datasets:
|
||||||
type: array
|
type: array
|
||||||
items:
|
items:
|
||||||
type: string
|
type: string
|
||||||
|
description: >-
|
||||||
|
The ids of the datasets containing the eval results.
|
||||||
benchmark_id:
|
benchmark_id:
|
||||||
type: string
|
type: string
|
||||||
|
description: The id of the benchmark to evaluate on.
|
||||||
candidate:
|
candidate:
|
||||||
$ref: '#/components/schemas/EvalCandidate'
|
$ref: '#/components/schemas/EvalCandidate'
|
||||||
|
description: The candidate to evaluate on.
|
||||||
additionalProperties: false
|
additionalProperties: false
|
||||||
required:
|
required:
|
||||||
- id
|
- id
|
||||||
|
@ -3539,6 +3549,7 @@ components:
|
||||||
- benchmark_id
|
- benchmark_id
|
||||||
- candidate
|
- candidate
|
||||||
title: EvalJob
|
title: EvalJob
|
||||||
|
description: An evaluation job.
|
||||||
ModelCandidate:
|
ModelCandidate:
|
||||||
type: object
|
type: object
|
||||||
properties:
|
properties:
|
||||||
|
@ -3693,22 +3704,32 @@ components:
|
||||||
If status of the job is failed, this will contain the error message.
|
If status of the job is failed, this will contain the error message.
|
||||||
type:
|
type:
|
||||||
type: string
|
type: string
|
||||||
const: scoring
|
enum:
|
||||||
|
- batch_inference
|
||||||
|
- scoring
|
||||||
|
- evaluation
|
||||||
|
- post_training
|
||||||
default: scoring
|
default: scoring
|
||||||
|
description: The type of the job.
|
||||||
result_files:
|
result_files:
|
||||||
type: array
|
type: array
|
||||||
items:
|
items:
|
||||||
type: string
|
type: string
|
||||||
|
description: The file ids of the scoring results.
|
||||||
result_datasets:
|
result_datasets:
|
||||||
type: array
|
type: array
|
||||||
items:
|
items:
|
||||||
type: string
|
type: string
|
||||||
|
description: >-
|
||||||
|
The ids of the datasets containing the scoring results.
|
||||||
dataset_id:
|
dataset_id:
|
||||||
type: string
|
type: string
|
||||||
|
description: The id of the dataset used for scoring.
|
||||||
scoring_fn_ids:
|
scoring_fn_ids:
|
||||||
type: array
|
type: array
|
||||||
items:
|
items:
|
||||||
type: string
|
type: string
|
||||||
|
description: The ids of the scoring functions used.
|
||||||
additionalProperties: false
|
additionalProperties: false
|
||||||
required:
|
required:
|
||||||
- id
|
- id
|
||||||
|
@ -3720,6 +3741,7 @@ components:
|
||||||
- dataset_id
|
- dataset_id
|
||||||
- scoring_fn_ids
|
- scoring_fn_ids
|
||||||
title: ScoringJob
|
title: ScoringJob
|
||||||
|
description: A scoring job.
|
||||||
CancelTrainingJobRequest:
|
CancelTrainingJobRequest:
|
||||||
type: object
|
type: object
|
||||||
properties:
|
properties:
|
||||||
|
|
|
@ -10,7 +10,7 @@ from pydantic import BaseModel, Field
|
||||||
from typing_extensions import Annotated
|
from typing_extensions import Annotated
|
||||||
|
|
||||||
from llama_stack.apis.agents import AgentConfig
|
from llama_stack.apis.agents import AgentConfig
|
||||||
from llama_stack.apis.common.job_types import CommonJobFields, JobStatus
|
from llama_stack.apis.common.job_types import CommonJobFields, JobType
|
||||||
from llama_stack.apis.inference import SamplingParams, SystemMessage
|
from llama_stack.apis.inference import SamplingParams, SystemMessage
|
||||||
from llama_stack.apis.scoring import ScoringResult
|
from llama_stack.apis.scoring import ScoringResult
|
||||||
from llama_stack.schema_utils import json_schema_type, register_schema, webmethod
|
from llama_stack.schema_utils import json_schema_type, register_schema, webmethod
|
||||||
|
@ -63,7 +63,17 @@ class EvaluateResponse(BaseModel):
|
||||||
|
|
||||||
@json_schema_type
|
@json_schema_type
|
||||||
class EvalJob(CommonJobFields):
|
class EvalJob(CommonJobFields):
|
||||||
type: Literal["eval"] = "eval"
|
"""
|
||||||
|
An evaluation job.
|
||||||
|
|
||||||
|
:param type: The type of the job.
|
||||||
|
:param result_files: The file ids of the eval results.
|
||||||
|
:param result_datasets: The ids of the datasets containing the eval results.
|
||||||
|
:param benchmark_id: The id of the benchmark to evaluate on.
|
||||||
|
:param candidate: The candidate to evaluate on.
|
||||||
|
"""
|
||||||
|
|
||||||
|
type: JobType = JobType.evaluation.value
|
||||||
result_files: List[str] = Field(
|
result_files: List[str] = Field(
|
||||||
description="The file ids of the eval results.",
|
description="The file ids of the eval results.",
|
||||||
default_factory=list,
|
default_factory=list,
|
||||||
|
|
|
@ -50,7 +50,17 @@ class ScoreResponse(BaseModel):
|
||||||
|
|
||||||
@json_schema_type
|
@json_schema_type
|
||||||
class ScoringJob(CommonJobFields):
|
class ScoringJob(CommonJobFields):
|
||||||
type: Literal["scoring"] = "scoring"
|
"""
|
||||||
|
A scoring job.
|
||||||
|
|
||||||
|
:param type: The type of the job.
|
||||||
|
:param result_files: The file ids of the scoring results.
|
||||||
|
:param result_datasets: The ids of the datasets containing the scoring results.
|
||||||
|
:param dataset_id: The id of the dataset used for scoring.
|
||||||
|
:param scoring_fn_ids: The ids of the scoring functions used.
|
||||||
|
"""
|
||||||
|
|
||||||
|
type: JobType = JobType.scoring.value
|
||||||
|
|
||||||
result_files: List[str] = Field(
|
result_files: List[str] = Field(
|
||||||
description="The file ids of the scoring results.",
|
description="The file ids of the scoring results.",
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue