post training job api

This commit is contained in:
Xi Yan 2025-03-12 01:29:18 -07:00
parent 961528eae1
commit dd9e1420da
3 changed files with 133 additions and 308 deletions

View file

@ -332,8 +332,55 @@
] ]
} }
}, },
"/v1/post-training/job/cancel": { "/v1/post-training/jobs/{job_id}": {
"post": { "get": {
"responses": {
"200": {
"description": "OK",
"content": {
"application/json": {
"schema": {
"oneOf": [
{
"$ref": "#/components/schemas/PostTrainingJob"
},
{
"type": "null"
}
]
}
}
}
},
"400": {
"$ref": "#/components/responses/BadRequest400"
},
"429": {
"$ref": "#/components/responses/TooManyRequests429"
},
"500": {
"$ref": "#/components/responses/InternalServerError500"
},
"default": {
"$ref": "#/components/responses/DefaultError"
}
},
"tags": [
"PostTraining (Coming Soon)"
],
"description": "",
"parameters": [
{
"name": "job_id",
"in": "path",
"required": true,
"schema": {
"type": "string"
}
}
]
},
"delete": {
"responses": { "responses": {
"200": { "200": {
"description": "OK" "description": "OK"
@ -355,17 +402,16 @@
"PostTraining (Coming Soon)" "PostTraining (Coming Soon)"
], ],
"description": "", "description": "",
"parameters": [], "parameters": [
"requestBody": { {
"content": { "name": "job_id",
"application/json": { "in": "path",
"schema": { "required": true,
"$ref": "#/components/schemas/CancelTrainingJobRequest" "schema": {
} "type": "string"
} }
}, }
"required": true ]
}
} }
}, },
"/v1/inference/chat-completion": { "/v1/inference/chat-completion": {
@ -1869,104 +1915,6 @@
] ]
} }
}, },
"/v1/post-training/job/artifacts": {
"get": {
"responses": {
"200": {
"description": "OK",
"content": {
"application/json": {
"schema": {
"oneOf": [
{
"$ref": "#/components/schemas/PostTrainingJobArtifactsResponse"
},
{
"type": "null"
}
]
}
}
}
},
"400": {
"$ref": "#/components/responses/BadRequest400"
},
"429": {
"$ref": "#/components/responses/TooManyRequests429"
},
"500": {
"$ref": "#/components/responses/InternalServerError500"
},
"default": {
"$ref": "#/components/responses/DefaultError"
}
},
"tags": [
"PostTraining (Coming Soon)"
],
"description": "",
"parameters": [
{
"name": "job_uuid",
"in": "query",
"required": true,
"schema": {
"type": "string"
}
}
]
}
},
"/v1/post-training/job/status": {
"get": {
"responses": {
"200": {
"description": "OK",
"content": {
"application/json": {
"schema": {
"oneOf": [
{
"$ref": "#/components/schemas/PostTrainingJobStatusResponse"
},
{
"type": "null"
}
]
}
}
}
},
"400": {
"$ref": "#/components/responses/BadRequest400"
},
"429": {
"$ref": "#/components/responses/TooManyRequests429"
},
"500": {
"$ref": "#/components/responses/InternalServerError500"
},
"default": {
"$ref": "#/components/responses/DefaultError"
}
},
"tags": [
"PostTraining (Coming Soon)"
],
"description": "",
"parameters": [
{
"name": "job_uuid",
"in": "query",
"required": true,
"schema": {
"type": "string"
}
}
]
}
},
"/v1/post-training/jobs": { "/v1/post-training/jobs": {
"get": { "get": {
"responses": { "responses": {
@ -3130,7 +3078,7 @@
} }
} }
}, },
"/v1/post-training/preference-optimize": { "/v1/post-training/preference-optimize/jobs": {
"post": { "post": {
"responses": { "responses": {
"200": { "200": {
@ -3586,7 +3534,7 @@
} }
} }
}, },
"/v1/post-training/supervised-fine-tune": { "/v1/post-training/supervised-fine-tune/jobs": {
"post": { "post": {
"responses": { "responses": {
"200": { "200": {
@ -4703,19 +4651,6 @@
"title": "CompletionResponse", "title": "CompletionResponse",
"description": "Response from a completion request." "description": "Response from a completion request."
}, },
"CancelTrainingJobRequest": {
"type": "object",
"properties": {
"job_uuid": {
"type": "string"
}
},
"additionalProperties": false,
"required": [
"job_uuid"
],
"title": "CancelTrainingJobRequest"
},
"ToolConfig": { "ToolConfig": {
"type": "object", "type": "object",
"properties": { "properties": {
@ -7900,32 +7835,12 @@
"description": "Checkpoint created during training runs", "description": "Checkpoint created during training runs",
"title": "Checkpoint" "title": "Checkpoint"
}, },
"PostTrainingJobArtifactsResponse": { "PostTrainingJob": {
"type": "object", "type": "object",
"properties": { "properties": {
"job_uuid": { "id": {
"type": "string" "type": "string",
}, "description": "The ID of the job."
"checkpoints": {
"type": "array",
"items": {
"$ref": "#/components/schemas/Checkpoint"
}
}
},
"additionalProperties": false,
"required": [
"job_uuid",
"checkpoints"
],
"title": "PostTrainingJobArtifactsResponse",
"description": "Artifacts of a finetuning job."
},
"PostTrainingJobStatusResponse": {
"type": "object",
"properties": {
"job_uuid": {
"type": "string"
}, },
"status": { "status": {
"type": "string", "type": "string",
@ -7936,19 +7851,21 @@
"scheduled", "scheduled",
"cancelled" "cancelled"
], ],
"title": "JobStatus" "description": "The status of the job."
}, },
"scheduled_at": { "created_at": {
"type": "string", "type": "string",
"format": "date-time" "format": "date-time",
"description": "The time the job was created."
}, },
"started_at": { "finished_at": {
"type": "string", "type": "string",
"format": "date-time" "format": "date-time",
"description": "The time the job finished."
}, },
"completed_at": { "error": {
"type": "string", "type": "string",
"format": "date-time" "description": "If status of the job is failed, this will contain the error message."
}, },
"resources_allocated": { "resources_allocated": {
"type": "object", "type": "object",
@ -7984,12 +7901,12 @@
}, },
"additionalProperties": false, "additionalProperties": false,
"required": [ "required": [
"job_uuid", "id",
"status", "status",
"created_at",
"checkpoints" "checkpoints"
], ],
"title": "PostTrainingJobStatusResponse", "title": "PostTrainingJob"
"description": "Status of a finetuning job."
}, },
"ListPostTrainingJobsResponse": { "ListPostTrainingJobsResponse": {
"type": "object", "type": "object",
@ -7997,17 +7914,7 @@
"data": { "data": {
"type": "array", "type": "array",
"items": { "items": {
"type": "object", "$ref": "#/components/schemas/PostTrainingJob"
"properties": {
"job_uuid": {
"type": "string"
}
},
"additionalProperties": false,
"required": [
"job_uuid"
],
"title": "PostTrainingJob"
} }
} }
}, },
@ -9042,19 +8949,6 @@
], ],
"title": "PreferenceOptimizeRequest" "title": "PreferenceOptimizeRequest"
}, },
"PostTrainingJob": {
"type": "object",
"properties": {
"job_uuid": {
"type": "string"
}
},
"additionalProperties": false,
"required": [
"job_uuid"
],
"title": "PostTrainingJob"
},
"DefaultRAGQueryGeneratorConfig": { "DefaultRAGQueryGeneratorConfig": {
"type": "object", "type": "object",
"properties": { "properties": {

View file

@ -212,8 +212,37 @@ paths:
required: true required: true
schema: schema:
type: string type: string
/v1/post-training/job/cancel: /v1/post-training/jobs/{job_id}:
post: get:
responses:
'200':
description: OK
content:
application/json:
schema:
oneOf:
- $ref: '#/components/schemas/PostTrainingJob'
- type: 'null'
'400':
$ref: '#/components/responses/BadRequest400'
'429':
$ref: >-
#/components/responses/TooManyRequests429
'500':
$ref: >-
#/components/responses/InternalServerError500
default:
$ref: '#/components/responses/DefaultError'
tags:
- PostTraining (Coming Soon)
description: ''
parameters:
- name: job_id
in: path
required: true
schema:
type: string
delete:
responses: responses:
'200': '200':
description: OK description: OK
@ -230,13 +259,12 @@ paths:
tags: tags:
- PostTraining (Coming Soon) - PostTraining (Coming Soon)
description: '' description: ''
parameters: [] parameters:
requestBody: - name: job_id
content: in: path
application/json: required: true
schema: schema:
$ref: '#/components/schemas/CancelTrainingJobRequest' type: string
required: true
/v1/inference/chat-completion: /v1/inference/chat-completion:
post: post:
responses: responses:
@ -1263,66 +1291,6 @@ paths:
required: true required: true
schema: schema:
type: string type: string
/v1/post-training/job/artifacts:
get:
responses:
'200':
description: OK
content:
application/json:
schema:
oneOf:
- $ref: '#/components/schemas/PostTrainingJobArtifactsResponse'
- type: 'null'
'400':
$ref: '#/components/responses/BadRequest400'
'429':
$ref: >-
#/components/responses/TooManyRequests429
'500':
$ref: >-
#/components/responses/InternalServerError500
default:
$ref: '#/components/responses/DefaultError'
tags:
- PostTraining (Coming Soon)
description: ''
parameters:
- name: job_uuid
in: query
required: true
schema:
type: string
/v1/post-training/job/status:
get:
responses:
'200':
description: OK
content:
application/json:
schema:
oneOf:
- $ref: '#/components/schemas/PostTrainingJobStatusResponse'
- type: 'null'
'400':
$ref: '#/components/responses/BadRequest400'
'429':
$ref: >-
#/components/responses/TooManyRequests429
'500':
$ref: >-
#/components/responses/InternalServerError500
default:
$ref: '#/components/responses/DefaultError'
tags:
- PostTraining (Coming Soon)
description: ''
parameters:
- name: job_uuid
in: query
required: true
schema:
type: string
/v1/post-training/jobs: /v1/post-training/jobs:
get: get:
responses: responses:
@ -2119,7 +2087,7 @@ paths:
schema: schema:
$ref: '#/components/schemas/LogEventRequest' $ref: '#/components/schemas/LogEventRequest'
required: true required: true
/v1/post-training/preference-optimize: /v1/post-training/preference-optimize/jobs:
post: post:
responses: responses:
'200': '200':
@ -2435,7 +2403,7 @@ paths:
schema: schema:
$ref: '#/components/schemas/ScoreRowsRequest' $ref: '#/components/schemas/ScoreRowsRequest'
required: true required: true
/v1/post-training/supervised-fine-tune: /v1/post-training/supervised-fine-tune/jobs:
post: post:
responses: responses:
'200': '200':
@ -3207,15 +3175,6 @@ components:
- stop_reason - stop_reason
title: CompletionResponse title: CompletionResponse
description: Response from a completion request. description: Response from a completion request.
CancelTrainingJobRequest:
type: object
properties:
job_uuid:
type: string
additionalProperties: false
required:
- job_uuid
title: CancelTrainingJobRequest
ToolConfig: ToolConfig:
type: object type: object
properties: properties:
@ -5452,26 +5411,12 @@ components:
Checkpoint: Checkpoint:
description: Checkpoint created during training runs description: Checkpoint created during training runs
title: Checkpoint title: Checkpoint
PostTrainingJobArtifactsResponse: PostTrainingJob:
type: object type: object
properties: properties:
job_uuid: id:
type: string
checkpoints:
type: array
items:
$ref: '#/components/schemas/Checkpoint'
additionalProperties: false
required:
- job_uuid
- checkpoints
title: PostTrainingJobArtifactsResponse
description: Artifacts of a finetuning job.
PostTrainingJobStatusResponse:
type: object
properties:
job_uuid:
type: string type: string
description: The ID of the job.
status: status:
type: string type: string
enum: enum:
@ -5480,16 +5425,19 @@ components:
- failed - failed
- scheduled - scheduled
- cancelled - cancelled
title: JobStatus description: The status of the job.
scheduled_at: created_at:
type: string type: string
format: date-time format: date-time
started_at: description: The time the job was created.
finished_at:
type: string type: string
format: date-time format: date-time
completed_at: description: The time the job finished.
error:
type: string type: string
format: date-time description: >-
If status of the job is failed, this will contain the error message.
resources_allocated: resources_allocated:
type: object type: object
additionalProperties: additionalProperties:
@ -5506,25 +5454,18 @@ components:
$ref: '#/components/schemas/Checkpoint' $ref: '#/components/schemas/Checkpoint'
additionalProperties: false additionalProperties: false
required: required:
- job_uuid - id
- status - status
- created_at
- checkpoints - checkpoints
title: PostTrainingJobStatusResponse title: PostTrainingJob
description: Status of a finetuning job.
ListPostTrainingJobsResponse: ListPostTrainingJobsResponse:
type: object type: object
properties: properties:
data: data:
type: array type: array
items: items:
type: object $ref: '#/components/schemas/PostTrainingJob'
properties:
job_uuid:
type: string
additionalProperties: false
required:
- job_uuid
title: PostTrainingJob
additionalProperties: false additionalProperties: false
required: required:
- data - data
@ -6192,15 +6133,6 @@ components:
- hyperparam_search_config - hyperparam_search_config
- logger_config - logger_config
title: PreferenceOptimizeRequest title: PreferenceOptimizeRequest
PostTrainingJob:
type: object
properties:
job_uuid:
type: string
additionalProperties: false
required:
- job_uuid
title: PostTrainingJob
DefaultRAGQueryGeneratorConfig: DefaultRAGQueryGeneratorConfig:
type: object type: object
properties: properties:

View file

@ -4,7 +4,6 @@
# This source code is licensed under the terms described in the LICENSE file in # This source code is licensed under the terms described in the LICENSE file in
# the root directory of this source tree. # the root directory of this source tree.
from datetime import datetime
from enum import Enum from enum import Enum
from typing import Any, Dict, List, Literal, Optional, Protocol, Union from typing import Any, Dict, List, Literal, Optional, Protocol, Union
@ -12,7 +11,7 @@ from pydantic import BaseModel, Field
from typing_extensions import Annotated from typing_extensions import Annotated
from llama_stack.apis.common.content_types import URL from llama_stack.apis.common.content_types import URL
from llama_stack.apis.common.job_types import JobCommonFields, JobStatus from llama_stack.apis.common.job_types import CommonJobFields
from llama_stack.apis.common.training_types import Checkpoint from llama_stack.apis.common.training_types import Checkpoint
from llama_stack.schema_utils import json_schema_type, register_schema, webmethod from llama_stack.schema_utils import json_schema_type, register_schema, webmethod
@ -140,7 +139,7 @@ class PostTrainingRLHFRequest(BaseModel):
@json_schema_type @json_schema_type
class PostTrainingJob(JobCommonFields): class PostTrainingJob(CommonJobFields):
resources_allocated: Optional[Dict[str, Any]] = None resources_allocated: Optional[Dict[str, Any]] = None
checkpoints: List[Checkpoint] = Field(default_factory=list) checkpoints: List[Checkpoint] = Field(default_factory=list)