mirror of
https://github.com/meta-llama/llama-stack.git
synced 2026-01-01 17:04:31 +00:00
scoring job
This commit is contained in:
parent
f88755eb93
commit
83d8777f56
2 changed files with 729 additions and 762 deletions
861
docs/_static/llama-stack-spec.html
vendored
861
docs/_static/llama-stack-spec.html
vendored
|
|
@ -230,6 +230,108 @@
|
|||
}
|
||||
}
|
||||
},
|
||||
"/v1/eval/benchmark/{benchmark_id}/jobs/{job_id}": {
|
||||
"get": {
|
||||
"responses": {
|
||||
"200": {
|
||||
"description": "EvalJob object indicating its status",
|
||||
"content": {
|
||||
"application/json": {
|
||||
"schema": {
|
||||
"oneOf": [
|
||||
{
|
||||
"$ref": "#/components/schemas/EvalJob"
|
||||
},
|
||||
{
|
||||
"type": "null"
|
||||
}
|
||||
]
|
||||
}
|
||||
}
|
||||
}
|
||||
},
|
||||
"400": {
|
||||
"$ref": "#/components/responses/BadRequest400"
|
||||
},
|
||||
"429": {
|
||||
"$ref": "#/components/responses/TooManyRequests429"
|
||||
},
|
||||
"500": {
|
||||
"$ref": "#/components/responses/InternalServerError500"
|
||||
},
|
||||
"default": {
|
||||
"$ref": "#/components/responses/DefaultError"
|
||||
}
|
||||
},
|
||||
"tags": [
|
||||
"Scoring"
|
||||
],
|
||||
"description": "Get the EvalJob object for a given job id and benchmark id.",
|
||||
"parameters": [
|
||||
{
|
||||
"name": "benchmark_id",
|
||||
"in": "path",
|
||||
"description": "The ID of the benchmark to run the evaluation on.",
|
||||
"required": true,
|
||||
"schema": {
|
||||
"type": "string"
|
||||
}
|
||||
},
|
||||
{
|
||||
"name": "job_id",
|
||||
"in": "path",
|
||||
"description": "The ID of the job to get the status of.",
|
||||
"required": true,
|
||||
"schema": {
|
||||
"type": "string"
|
||||
}
|
||||
}
|
||||
]
|
||||
},
|
||||
"delete": {
|
||||
"responses": {
|
||||
"200": {
|
||||
"description": "OK"
|
||||
},
|
||||
"400": {
|
||||
"$ref": "#/components/responses/BadRequest400"
|
||||
},
|
||||
"429": {
|
||||
"$ref": "#/components/responses/TooManyRequests429"
|
||||
},
|
||||
"500": {
|
||||
"$ref": "#/components/responses/InternalServerError500"
|
||||
},
|
||||
"default": {
|
||||
"$ref": "#/components/responses/DefaultError"
|
||||
}
|
||||
},
|
||||
"tags": [
|
||||
"Scoring"
|
||||
],
|
||||
"description": "Cancel a job.",
|
||||
"parameters": [
|
||||
{
|
||||
"name": "benchmark_id",
|
||||
"in": "path",
|
||||
"description": "The ID of the benchmark to run the evaluation on.",
|
||||
"required": true,
|
||||
"schema": {
|
||||
"type": "string"
|
||||
}
|
||||
},
|
||||
{
|
||||
"name": "job_id",
|
||||
"in": "path",
|
||||
"description": "The ID of the job to cancel.",
|
||||
"required": true,
|
||||
"schema": {
|
||||
"type": "string"
|
||||
}
|
||||
}
|
||||
]
|
||||
}
|
||||
},
|
||||
"/v1/post-training/job/cancel": {
|
||||
"post": {
|
||||
"responses": {
|
||||
|
|
@ -968,7 +1070,60 @@
|
|||
}
|
||||
}
|
||||
},
|
||||
"/v1/eval/benchmarks/{benchmark_id}/evaluations": {
|
||||
"/v1/eval/benchmark/{benchmark_id}/jobs": {
|
||||
"post": {
|
||||
"responses": {
|
||||
"200": {
|
||||
"description": "The job that was created to run the evaluation.",
|
||||
"content": {
|
||||
"application/json": {
|
||||
"schema": {
|
||||
"$ref": "#/components/schemas/EvalJob"
|
||||
}
|
||||
}
|
||||
}
|
||||
},
|
||||
"400": {
|
||||
"$ref": "#/components/responses/BadRequest400"
|
||||
},
|
||||
"429": {
|
||||
"$ref": "#/components/responses/TooManyRequests429"
|
||||
},
|
||||
"500": {
|
||||
"$ref": "#/components/responses/InternalServerError500"
|
||||
},
|
||||
"default": {
|
||||
"$ref": "#/components/responses/DefaultError"
|
||||
}
|
||||
},
|
||||
"tags": [
|
||||
"Eval"
|
||||
],
|
||||
"description": "Run an evaluation on a benchmark.",
|
||||
"parameters": [
|
||||
{
|
||||
"name": "benchmark_id",
|
||||
"in": "path",
|
||||
"description": "The ID of the benchmark to run the evaluation on.",
|
||||
"required": true,
|
||||
"schema": {
|
||||
"type": "string"
|
||||
}
|
||||
}
|
||||
],
|
||||
"requestBody": {
|
||||
"content": {
|
||||
"application/json": {
|
||||
"schema": {
|
||||
"$ref": "#/components/schemas/EvaluateBenchmarkRequest"
|
||||
}
|
||||
}
|
||||
},
|
||||
"required": true
|
||||
}
|
||||
}
|
||||
},
|
||||
"/v1/eval/rows": {
|
||||
"post": {
|
||||
"responses": {
|
||||
"200": {
|
||||
|
|
@ -997,18 +1152,8 @@
|
|||
"tags": [
|
||||
"Eval"
|
||||
],
|
||||
"description": "Evaluate a list of rows on a benchmark.",
|
||||
"parameters": [
|
||||
{
|
||||
"name": "benchmark_id",
|
||||
"in": "path",
|
||||
"description": "The ID of the benchmark to run the evaluation on.",
|
||||
"required": true,
|
||||
"schema": {
|
||||
"type": "string"
|
||||
}
|
||||
}
|
||||
],
|
||||
"description": "Evaluate a list of rows on a candidate.",
|
||||
"parameters": [],
|
||||
"requestBody": {
|
||||
"content": {
|
||||
"application/json": {
|
||||
|
|
@ -2194,160 +2339,6 @@
|
|||
}
|
||||
}
|
||||
},
|
||||
"/v1/eval/benchmarks/{benchmark_id}/jobs/{job_id}": {
|
||||
"get": {
|
||||
"responses": {
|
||||
"200": {
|
||||
"description": "The status of the evaluationjob.",
|
||||
"content": {
|
||||
"application/json": {
|
||||
"schema": {
|
||||
"oneOf": [
|
||||
{
|
||||
"$ref": "#/components/schemas/JobStatus"
|
||||
},
|
||||
{
|
||||
"type": "null"
|
||||
}
|
||||
]
|
||||
}
|
||||
}
|
||||
}
|
||||
},
|
||||
"400": {
|
||||
"$ref": "#/components/responses/BadRequest400"
|
||||
},
|
||||
"429": {
|
||||
"$ref": "#/components/responses/TooManyRequests429"
|
||||
},
|
||||
"500": {
|
||||
"$ref": "#/components/responses/InternalServerError500"
|
||||
},
|
||||
"default": {
|
||||
"$ref": "#/components/responses/DefaultError"
|
||||
}
|
||||
},
|
||||
"tags": [
|
||||
"Eval"
|
||||
],
|
||||
"description": "Get the status of a job.",
|
||||
"parameters": [
|
||||
{
|
||||
"name": "benchmark_id",
|
||||
"in": "path",
|
||||
"description": "The ID of the benchmark to run the evaluation on.",
|
||||
"required": true,
|
||||
"schema": {
|
||||
"type": "string"
|
||||
}
|
||||
},
|
||||
{
|
||||
"name": "job_id",
|
||||
"in": "path",
|
||||
"description": "The ID of the job to get the status of.",
|
||||
"required": true,
|
||||
"schema": {
|
||||
"type": "string"
|
||||
}
|
||||
}
|
||||
]
|
||||
},
|
||||
"delete": {
|
||||
"responses": {
|
||||
"200": {
|
||||
"description": "OK"
|
||||
},
|
||||
"400": {
|
||||
"$ref": "#/components/responses/BadRequest400"
|
||||
},
|
||||
"429": {
|
||||
"$ref": "#/components/responses/TooManyRequests429"
|
||||
},
|
||||
"500": {
|
||||
"$ref": "#/components/responses/InternalServerError500"
|
||||
},
|
||||
"default": {
|
||||
"$ref": "#/components/responses/DefaultError"
|
||||
}
|
||||
},
|
||||
"tags": [
|
||||
"Eval"
|
||||
],
|
||||
"description": "Cancel a job.",
|
||||
"parameters": [
|
||||
{
|
||||
"name": "benchmark_id",
|
||||
"in": "path",
|
||||
"description": "The ID of the benchmark to run the evaluation on.",
|
||||
"required": true,
|
||||
"schema": {
|
||||
"type": "string"
|
||||
}
|
||||
},
|
||||
{
|
||||
"name": "job_id",
|
||||
"in": "path",
|
||||
"description": "The ID of the job to cancel.",
|
||||
"required": true,
|
||||
"schema": {
|
||||
"type": "string"
|
||||
}
|
||||
}
|
||||
]
|
||||
}
|
||||
},
|
||||
"/v1/eval/benchmarks/{benchmark_id}/jobs/{job_id}/result": {
|
||||
"get": {
|
||||
"responses": {
|
||||
"200": {
|
||||
"description": "The result of the job.",
|
||||
"content": {
|
||||
"application/json": {
|
||||
"schema": {
|
||||
"$ref": "#/components/schemas/EvaluateResponse"
|
||||
}
|
||||
}
|
||||
}
|
||||
},
|
||||
"400": {
|
||||
"$ref": "#/components/responses/BadRequest400"
|
||||
},
|
||||
"429": {
|
||||
"$ref": "#/components/responses/TooManyRequests429"
|
||||
},
|
||||
"500": {
|
||||
"$ref": "#/components/responses/InternalServerError500"
|
||||
},
|
||||
"default": {
|
||||
"$ref": "#/components/responses/DefaultError"
|
||||
}
|
||||
},
|
||||
"tags": [
|
||||
"Eval"
|
||||
],
|
||||
"description": "Get the result of a job.",
|
||||
"parameters": [
|
||||
{
|
||||
"name": "benchmark_id",
|
||||
"in": "path",
|
||||
"description": "The ID of the benchmark to run the evaluation on.",
|
||||
"required": true,
|
||||
"schema": {
|
||||
"type": "string"
|
||||
}
|
||||
},
|
||||
{
|
||||
"name": "job_id",
|
||||
"in": "path",
|
||||
"description": "The ID of the job to get the result of.",
|
||||
"required": true,
|
||||
"schema": {
|
||||
"type": "string"
|
||||
}
|
||||
}
|
||||
]
|
||||
}
|
||||
},
|
||||
"/v1/agents/{agent_id}/sessions": {
|
||||
"get": {
|
||||
"responses": {
|
||||
|
|
@ -3430,59 +3421,6 @@
|
|||
}
|
||||
}
|
||||
},
|
||||
"/v1/eval/benchmarks/{benchmark_id}/jobs": {
|
||||
"post": {
|
||||
"responses": {
|
||||
"200": {
|
||||
"description": "The job that was created to run the evaluation.",
|
||||
"content": {
|
||||
"application/json": {
|
||||
"schema": {
|
||||
"$ref": "#/components/schemas/Job"
|
||||
}
|
||||
}
|
||||
}
|
||||
},
|
||||
"400": {
|
||||
"$ref": "#/components/responses/BadRequest400"
|
||||
},
|
||||
"429": {
|
||||
"$ref": "#/components/responses/TooManyRequests429"
|
||||
},
|
||||
"500": {
|
||||
"$ref": "#/components/responses/InternalServerError500"
|
||||
},
|
||||
"default": {
|
||||
"$ref": "#/components/responses/DefaultError"
|
||||
}
|
||||
},
|
||||
"tags": [
|
||||
"Eval"
|
||||
],
|
||||
"description": "Run an evaluation on a benchmark.",
|
||||
"parameters": [
|
||||
{
|
||||
"name": "benchmark_id",
|
||||
"in": "path",
|
||||
"description": "The ID of the benchmark to run the evaluation on.",
|
||||
"required": true,
|
||||
"schema": {
|
||||
"type": "string"
|
||||
}
|
||||
}
|
||||
],
|
||||
"requestBody": {
|
||||
"content": {
|
||||
"application/json": {
|
||||
"schema": {
|
||||
"$ref": "#/components/schemas/RunEvalRequest"
|
||||
}
|
||||
}
|
||||
},
|
||||
"required": true
|
||||
}
|
||||
}
|
||||
},
|
||||
"/v1/safety/run-shield": {
|
||||
"post": {
|
||||
"responses": {
|
||||
|
|
@ -3562,7 +3500,50 @@
|
|||
}
|
||||
}
|
||||
},
|
||||
"/v1/scoring/score": {
|
||||
"/v1/scoring/jobs": {
|
||||
"post": {
|
||||
"responses": {
|
||||
"200": {
|
||||
"description": "OK",
|
||||
"content": {
|
||||
"application/json": {
|
||||
"schema": {
|
||||
"$ref": "#/components/schemas/ScoringJob"
|
||||
}
|
||||
}
|
||||
}
|
||||
},
|
||||
"400": {
|
||||
"$ref": "#/components/responses/BadRequest400"
|
||||
},
|
||||
"429": {
|
||||
"$ref": "#/components/responses/TooManyRequests429"
|
||||
},
|
||||
"500": {
|
||||
"$ref": "#/components/responses/InternalServerError500"
|
||||
},
|
||||
"default": {
|
||||
"$ref": "#/components/responses/DefaultError"
|
||||
}
|
||||
},
|
||||
"tags": [
|
||||
"Scoring"
|
||||
],
|
||||
"description": "",
|
||||
"parameters": [],
|
||||
"requestBody": {
|
||||
"content": {
|
||||
"application/json": {
|
||||
"schema": {
|
||||
"$ref": "#/components/schemas/ScoreDatasetRequest"
|
||||
}
|
||||
}
|
||||
},
|
||||
"required": true
|
||||
}
|
||||
}
|
||||
},
|
||||
"/v1/scoring/rows": {
|
||||
"post": {
|
||||
"responses": {
|
||||
"200": {
|
||||
|
|
@ -3597,50 +3578,7 @@
|
|||
"content": {
|
||||
"application/json": {
|
||||
"schema": {
|
||||
"$ref": "#/components/schemas/ScoreRequest"
|
||||
}
|
||||
}
|
||||
},
|
||||
"required": true
|
||||
}
|
||||
}
|
||||
},
|
||||
"/v1/scoring/score-batch": {
|
||||
"post": {
|
||||
"responses": {
|
||||
"200": {
|
||||
"description": "OK",
|
||||
"content": {
|
||||
"application/json": {
|
||||
"schema": {
|
||||
"$ref": "#/components/schemas/ScoreBatchResponse"
|
||||
}
|
||||
}
|
||||
}
|
||||
},
|
||||
"400": {
|
||||
"$ref": "#/components/responses/BadRequest400"
|
||||
},
|
||||
"429": {
|
||||
"$ref": "#/components/responses/TooManyRequests429"
|
||||
},
|
||||
"500": {
|
||||
"$ref": "#/components/responses/InternalServerError500"
|
||||
},
|
||||
"default": {
|
||||
"$ref": "#/components/responses/DefaultError"
|
||||
}
|
||||
},
|
||||
"tags": [
|
||||
"Scoring"
|
||||
],
|
||||
"description": "",
|
||||
"parameters": [],
|
||||
"requestBody": {
|
||||
"content": {
|
||||
"application/json": {
|
||||
"schema": {
|
||||
"$ref": "#/components/schemas/ScoreBatchRequest"
|
||||
"$ref": "#/components/schemas/ScoreRowsRequest"
|
||||
}
|
||||
}
|
||||
},
|
||||
|
|
@ -6347,6 +6285,122 @@
|
|||
"title": "AgentCandidate",
|
||||
"description": "An agent candidate for evaluation."
|
||||
},
|
||||
"EvalCandidate": {
|
||||
"oneOf": [
|
||||
{
|
||||
"$ref": "#/components/schemas/ModelCandidate"
|
||||
},
|
||||
{
|
||||
"$ref": "#/components/schemas/AgentCandidate"
|
||||
}
|
||||
],
|
||||
"discriminator": {
|
||||
"propertyName": "type",
|
||||
"mapping": {
|
||||
"model": "#/components/schemas/ModelCandidate",
|
||||
"agent": "#/components/schemas/AgentCandidate"
|
||||
}
|
||||
}
|
||||
},
|
||||
"ModelCandidate": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"type": {
|
||||
"type": "string",
|
||||
"const": "model",
|
||||
"default": "model"
|
||||
},
|
||||
"model": {
|
||||
"type": "string",
|
||||
"description": "The model ID to evaluate."
|
||||
},
|
||||
"sampling_params": {
|
||||
"$ref": "#/components/schemas/SamplingParams",
|
||||
"description": "The sampling parameters for the model."
|
||||
},
|
||||
"system_message": {
|
||||
"$ref": "#/components/schemas/SystemMessage",
|
||||
"description": "(Optional) The system message providing instructions or context to the model."
|
||||
}
|
||||
},
|
||||
"additionalProperties": false,
|
||||
"required": [
|
||||
"type",
|
||||
"model",
|
||||
"sampling_params"
|
||||
],
|
||||
"title": "ModelCandidate",
|
||||
"description": "A model candidate for evaluation."
|
||||
},
|
||||
"EvaluateBenchmarkRequest": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"candidate": {
|
||||
"$ref": "#/components/schemas/EvalCandidate",
|
||||
"description": "Candidate to evaluate on. - { \"type\": \"model\", \"model\": \"Llama-3.1-8B-Instruct\", \"sampling_params\": {...}, \"system_message\": \"You are a helpful assistant.\", } - { \"type\": \"agent\", \"config\": {...}, }"
|
||||
}
|
||||
},
|
||||
"additionalProperties": false,
|
||||
"required": [
|
||||
"candidate"
|
||||
],
|
||||
"title": "EvaluateBenchmarkRequest"
|
||||
},
|
||||
"EvalJob": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"id": {
|
||||
"type": "string",
|
||||
"description": "The ID of the job."
|
||||
},
|
||||
"status": {
|
||||
"type": "string",
|
||||
"enum": [
|
||||
"completed",
|
||||
"in_progress",
|
||||
"failed",
|
||||
"scheduled",
|
||||
"cancelled"
|
||||
],
|
||||
"description": "The status of the job."
|
||||
},
|
||||
"created_at": {
|
||||
"type": "string",
|
||||
"format": "date-time",
|
||||
"description": "The time the job was created."
|
||||
},
|
||||
"finished_at": {
|
||||
"type": "string",
|
||||
"format": "date-time",
|
||||
"description": "The time the job finished."
|
||||
},
|
||||
"error": {
|
||||
"type": "string",
|
||||
"description": "If status of the job is failed, this will contain the error message."
|
||||
},
|
||||
"type": {
|
||||
"type": "string",
|
||||
"const": "eval",
|
||||
"default": "eval"
|
||||
},
|
||||
"result_files": {
|
||||
"type": "array",
|
||||
"items": {
|
||||
"type": "string"
|
||||
}
|
||||
}
|
||||
},
|
||||
"additionalProperties": false,
|
||||
"required": [
|
||||
"id",
|
||||
"status",
|
||||
"created_at",
|
||||
"type",
|
||||
"result_files"
|
||||
],
|
||||
"title": "EvalJob",
|
||||
"description": "The EvalJob object representing a evaluation job that was created through API."
|
||||
},
|
||||
"AggregationFunctionType": {
|
||||
"type": "string",
|
||||
"enum": [
|
||||
|
|
@ -6424,33 +6478,6 @@
|
|||
],
|
||||
"title": "AnswerSimilarityScoringFnParams"
|
||||
},
|
||||
"BenchmarkConfig": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"eval_candidate": {
|
||||
"$ref": "#/components/schemas/EvalCandidate",
|
||||
"description": "The candidate to evaluate."
|
||||
},
|
||||
"scoring_params": {
|
||||
"type": "object",
|
||||
"additionalProperties": {
|
||||
"$ref": "#/components/schemas/ScoringFnParams"
|
||||
},
|
||||
"description": "Map between scoring function id and parameters for each scoring function you want to run"
|
||||
},
|
||||
"num_examples": {
|
||||
"type": "integer",
|
||||
"description": "(Optional) The number of examples to evaluate. If not provided, all examples in the dataset will be evaluated"
|
||||
}
|
||||
},
|
||||
"additionalProperties": false,
|
||||
"required": [
|
||||
"eval_candidate",
|
||||
"scoring_params"
|
||||
],
|
||||
"title": "BenchmarkConfig",
|
||||
"description": "A benchmark configuration for evaluation."
|
||||
},
|
||||
"ContextEntityRecallScoringFnParams": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
|
|
@ -6561,23 +6588,6 @@
|
|||
],
|
||||
"title": "EqualityScoringFnParams"
|
||||
},
|
||||
"EvalCandidate": {
|
||||
"oneOf": [
|
||||
{
|
||||
"$ref": "#/components/schemas/ModelCandidate"
|
||||
},
|
||||
{
|
||||
"$ref": "#/components/schemas/AgentCandidate"
|
||||
}
|
||||
],
|
||||
"discriminator": {
|
||||
"propertyName": "type",
|
||||
"mapping": {
|
||||
"model": "#/components/schemas/ModelCandidate",
|
||||
"agent": "#/components/schemas/AgentCandidate"
|
||||
}
|
||||
}
|
||||
},
|
||||
"FactualityScoringFnParams": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
|
|
@ -6656,36 +6666,6 @@
|
|||
],
|
||||
"title": "LLMAsJudgeScoringFnParams"
|
||||
},
|
||||
"ModelCandidate": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"type": {
|
||||
"type": "string",
|
||||
"const": "model",
|
||||
"default": "model"
|
||||
},
|
||||
"model": {
|
||||
"type": "string",
|
||||
"description": "The model ID to evaluate."
|
||||
},
|
||||
"sampling_params": {
|
||||
"$ref": "#/components/schemas/SamplingParams",
|
||||
"description": "The sampling parameters for the model."
|
||||
},
|
||||
"system_message": {
|
||||
"$ref": "#/components/schemas/SystemMessage",
|
||||
"description": "(Optional) The system message providing instructions or context to the model."
|
||||
}
|
||||
},
|
||||
"additionalProperties": false,
|
||||
"required": [
|
||||
"type",
|
||||
"model",
|
||||
"sampling_params"
|
||||
],
|
||||
"title": "ModelCandidate",
|
||||
"description": "A model candidate for evaluation."
|
||||
},
|
||||
"RegexParserMathScoringFnParams": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
|
|
@ -6836,7 +6816,7 @@
|
|||
"EvaluateRowsRequest": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"input_rows": {
|
||||
"dataset_rows": {
|
||||
"type": "array",
|
||||
"items": {
|
||||
"type": "object",
|
||||
|
|
@ -6868,20 +6848,20 @@
|
|||
"scoring_functions": {
|
||||
"type": "array",
|
||||
"items": {
|
||||
"type": "string"
|
||||
"$ref": "#/components/schemas/ScoringFnParams"
|
||||
},
|
||||
"description": "The scoring functions to use for the evaluation."
|
||||
},
|
||||
"benchmark_config": {
|
||||
"$ref": "#/components/schemas/BenchmarkConfig",
|
||||
"description": "The configuration for the benchmark."
|
||||
"candidate": {
|
||||
"$ref": "#/components/schemas/EvalCandidate",
|
||||
"description": "The candidate to evaluate on."
|
||||
}
|
||||
},
|
||||
"additionalProperties": false,
|
||||
"required": [
|
||||
"input_rows",
|
||||
"dataset_rows",
|
||||
"scoring_functions",
|
||||
"benchmark_config"
|
||||
"candidate"
|
||||
],
|
||||
"title": "EvaluateRowsRequest"
|
||||
},
|
||||
|
|
@ -7941,16 +7921,6 @@
|
|||
"title": "PostTrainingJobArtifactsResponse",
|
||||
"description": "Artifacts of a finetuning job."
|
||||
},
|
||||
"JobStatus": {
|
||||
"type": "string",
|
||||
"enum": [
|
||||
"completed",
|
||||
"in_progress",
|
||||
"failed",
|
||||
"scheduled"
|
||||
],
|
||||
"title": "JobStatus"
|
||||
},
|
||||
"PostTrainingJobStatusResponse": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
|
|
@ -7958,7 +7928,15 @@
|
|||
"type": "string"
|
||||
},
|
||||
"status": {
|
||||
"$ref": "#/components/schemas/JobStatus"
|
||||
"type": "string",
|
||||
"enum": [
|
||||
"completed",
|
||||
"in_progress",
|
||||
"failed",
|
||||
"scheduled",
|
||||
"cancelled"
|
||||
],
|
||||
"title": "JobStatus"
|
||||
},
|
||||
"scheduled_at": {
|
||||
"type": "string",
|
||||
|
|
@ -9796,33 +9774,6 @@
|
|||
],
|
||||
"title": "ResumeAgentTurnRequest"
|
||||
},
|
||||
"RunEvalRequest": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"benchmark_config": {
|
||||
"$ref": "#/components/schemas/BenchmarkConfig",
|
||||
"description": "The configuration for the benchmark."
|
||||
}
|
||||
},
|
||||
"additionalProperties": false,
|
||||
"required": [
|
||||
"benchmark_config"
|
||||
],
|
||||
"title": "RunEvalRequest"
|
||||
},
|
||||
"Job": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"job_id": {
|
||||
"type": "string"
|
||||
}
|
||||
},
|
||||
"additionalProperties": false,
|
||||
"required": [
|
||||
"job_id"
|
||||
],
|
||||
"title": "Job"
|
||||
},
|
||||
"RunShieldRequest": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
|
|
@ -9909,7 +9860,82 @@
|
|||
],
|
||||
"title": "SaveSpansToDatasetRequest"
|
||||
},
|
||||
"ScoreRequest": {
|
||||
"ScoreDatasetRequest": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"dataset_id": {
|
||||
"type": "string"
|
||||
},
|
||||
"scoring_functions": {
|
||||
"type": "array",
|
||||
"items": {
|
||||
"$ref": "#/components/schemas/ScoringFnParams"
|
||||
}
|
||||
}
|
||||
},
|
||||
"additionalProperties": false,
|
||||
"required": [
|
||||
"dataset_id",
|
||||
"scoring_functions"
|
||||
],
|
||||
"title": "ScoreDatasetRequest"
|
||||
},
|
||||
"ScoringJob": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"id": {
|
||||
"type": "string",
|
||||
"description": "The ID of the job."
|
||||
},
|
||||
"status": {
|
||||
"type": "string",
|
||||
"enum": [
|
||||
"completed",
|
||||
"in_progress",
|
||||
"failed",
|
||||
"scheduled",
|
||||
"cancelled"
|
||||
],
|
||||
"description": "The status of the job."
|
||||
},
|
||||
"created_at": {
|
||||
"type": "string",
|
||||
"format": "date-time",
|
||||
"description": "The time the job was created."
|
||||
},
|
||||
"finished_at": {
|
||||
"type": "string",
|
||||
"format": "date-time",
|
||||
"description": "The time the job finished."
|
||||
},
|
||||
"error": {
|
||||
"type": "string",
|
||||
"description": "If status of the job is failed, this will contain the error message."
|
||||
},
|
||||
"type": {
|
||||
"type": "string",
|
||||
"const": "scoring",
|
||||
"default": "scoring"
|
||||
},
|
||||
"result_files": {
|
||||
"type": "array",
|
||||
"items": {
|
||||
"type": "string"
|
||||
}
|
||||
}
|
||||
},
|
||||
"additionalProperties": false,
|
||||
"required": [
|
||||
"id",
|
||||
"status",
|
||||
"created_at",
|
||||
"type",
|
||||
"result_files"
|
||||
],
|
||||
"title": "ScoringJob",
|
||||
"description": "The ScoringJob object representing a scoring job that was created through API."
|
||||
},
|
||||
"ScoreRowsRequest": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"input_rows": {
|
||||
|
|
@ -9942,16 +9968,9 @@
|
|||
"description": "The rows to score."
|
||||
},
|
||||
"scoring_functions": {
|
||||
"type": "object",
|
||||
"additionalProperties": {
|
||||
"oneOf": [
|
||||
{
|
||||
"$ref": "#/components/schemas/ScoringFnParams"
|
||||
},
|
||||
{
|
||||
"type": "null"
|
||||
}
|
||||
]
|
||||
"type": "array",
|
||||
"items": {
|
||||
"$ref": "#/components/schemas/ScoringFnParams"
|
||||
},
|
||||
"description": "The scoring functions to use for the scoring."
|
||||
}
|
||||
|
|
@ -9961,7 +9980,7 @@
|
|||
"input_rows",
|
||||
"scoring_functions"
|
||||
],
|
||||
"title": "ScoreRequest"
|
||||
"title": "ScoreRowsRequest"
|
||||
},
|
||||
"ScoreResponse": {
|
||||
"type": "object",
|
||||
|
|
@ -9981,56 +10000,6 @@
|
|||
"title": "ScoreResponse",
|
||||
"description": "The response from scoring."
|
||||
},
|
||||
"ScoreBatchRequest": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"dataset_id": {
|
||||
"type": "string"
|
||||
},
|
||||
"scoring_functions": {
|
||||
"type": "object",
|
||||
"additionalProperties": {
|
||||
"oneOf": [
|
||||
{
|
||||
"$ref": "#/components/schemas/ScoringFnParams"
|
||||
},
|
||||
{
|
||||
"type": "null"
|
||||
}
|
||||
]
|
||||
}
|
||||
},
|
||||
"save_results_dataset": {
|
||||
"type": "boolean"
|
||||
}
|
||||
},
|
||||
"additionalProperties": false,
|
||||
"required": [
|
||||
"dataset_id",
|
||||
"scoring_functions",
|
||||
"save_results_dataset"
|
||||
],
|
||||
"title": "ScoreBatchRequest"
|
||||
},
|
||||
"ScoreBatchResponse": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"dataset_id": {
|
||||
"type": "string"
|
||||
},
|
||||
"results": {
|
||||
"type": "object",
|
||||
"additionalProperties": {
|
||||
"$ref": "#/components/schemas/ScoringResult"
|
||||
}
|
||||
}
|
||||
},
|
||||
"additionalProperties": false,
|
||||
"required": [
|
||||
"results"
|
||||
],
|
||||
"title": "ScoreBatchResponse"
|
||||
},
|
||||
"AlgorithmConfig": {
|
||||
"oneOf": [
|
||||
{
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue