diff --git a/docs/_static/llama-stack-spec.html b/docs/_static/llama-stack-spec.html
index c4430c8d0..48a433495 100644
--- a/docs/_static/llama-stack-spec.html
+++ b/docs/_static/llama-stack-spec.html
@@ -230,6 +230,106 @@
}
}
},
+ "/v1/eval/job/{job_id}/cancel": {
+ "post": {
+ "responses": {
+ "200": {
+ "description": "OK",
+ "content": {
+ "application/json": {
+ "schema": {
+ "oneOf": [
+ {
+ "$ref": "#/components/schemas/EvalJob"
+ },
+ {
+ "type": "null"
+ }
+ ]
+ }
+ }
+ }
+ },
+ "400": {
+ "$ref": "#/components/responses/BadRequest400"
+ },
+ "429": {
+ "$ref": "#/components/responses/TooManyRequests429"
+ },
+ "500": {
+ "$ref": "#/components/responses/InternalServerError500"
+ },
+ "default": {
+ "$ref": "#/components/responses/DefaultError"
+ }
+ },
+ "tags": [
+ "Eval"
+ ],
+ "description": "Cancel a job.",
+ "parameters": [
+ {
+ "name": "job_id",
+ "in": "path",
+ "description": "The id of the job to cancel.",
+ "required": true,
+ "schema": {
+ "type": "string"
+ }
+ }
+ ]
+ }
+ },
+ "/v1/scoring/job/{job_id}/cancel": {
+ "post": {
+ "responses": {
+ "200": {
+ "description": "OK",
+ "content": {
+ "application/json": {
+ "schema": {
+ "oneOf": [
+ {
+ "$ref": "#/components/schemas/ScoringJob"
+ },
+ {
+ "type": "null"
+ }
+ ]
+ }
+ }
+ }
+ },
+ "400": {
+ "$ref": "#/components/responses/BadRequest400"
+ },
+ "429": {
+ "$ref": "#/components/responses/TooManyRequests429"
+ },
+ "500": {
+ "$ref": "#/components/responses/InternalServerError500"
+ },
+ "default": {
+ "$ref": "#/components/responses/DefaultError"
+ }
+ },
+ "tags": [
+ "Scoring"
+ ],
+ "description": "Cancel a job.",
+ "parameters": [
+ {
+ "name": "job_id",
+ "in": "path",
+ "description": "The id of the job to cancel.",
+ "required": true,
+ "schema": {
+ "type": "string"
+ }
+ }
+ ]
+ }
+ },
"/v1/post-training/job/cancel": {
"post": {
"responses": {
@@ -823,6 +923,104 @@
]
}
},
+ "/v1/eval/job/{job_id}": {
+ "get": {
+ "responses": {
+ "200": {
+ "description": "The job.",
+ "content": {
+ "application/json": {
+ "schema": {
+ "oneOf": [
+ {
+ "$ref": "#/components/schemas/EvalJob"
+ },
+ {
+ "type": "null"
+ }
+ ]
+ }
+ }
+ }
+ },
+ "400": {
+ "$ref": "#/components/responses/BadRequest400"
+ },
+ "429": {
+ "$ref": "#/components/responses/TooManyRequests429"
+ },
+ "500": {
+ "$ref": "#/components/responses/InternalServerError500"
+ },
+ "default": {
+ "$ref": "#/components/responses/DefaultError"
+ }
+ },
+ "tags": [
+ "Eval"
+ ],
+ "description": "Get a job by id.",
+ "parameters": [
+ {
+ "name": "job_id",
+ "in": "path",
+ "description": "The id of the job to get.",
+ "required": true,
+ "schema": {
+ "type": "string"
+ }
+ }
+ ]
+ },
+ "delete": {
+ "responses": {
+ "200": {
+ "description": "OK",
+ "content": {
+ "application/json": {
+ "schema": {
+ "oneOf": [
+ {
+ "$ref": "#/components/schemas/EvalJob"
+ },
+ {
+ "type": "null"
+ }
+ ]
+ }
+ }
+ }
+ },
+ "400": {
+ "$ref": "#/components/responses/BadRequest400"
+ },
+ "429": {
+ "$ref": "#/components/responses/TooManyRequests429"
+ },
+ "500": {
+ "$ref": "#/components/responses/InternalServerError500"
+ },
+ "default": {
+ "$ref": "#/components/responses/DefaultError"
+ }
+ },
+ "tags": [
+ "Eval"
+ ],
+ "description": "Delete a job.",
+ "parameters": [
+ {
+ "name": "job_id",
+ "in": "path",
+ "description": "The id of the job to delete.",
+ "required": true,
+ "schema": {
+ "type": "string"
+ }
+ }
+ ]
+ }
+ },
"/v1/files/{bucket}/{key}": {
"get": {
"responses": {
@@ -925,6 +1123,104 @@
]
}
},
+ "/v1/scoring/job/{job_id}": {
+ "get": {
+ "responses": {
+ "200": {
+ "description": "The job.",
+ "content": {
+ "application/json": {
+ "schema": {
+ "oneOf": [
+ {
+ "$ref": "#/components/schemas/ScoringJob"
+ },
+ {
+ "type": "null"
+ }
+ ]
+ }
+ }
+ }
+ },
+ "400": {
+ "$ref": "#/components/responses/BadRequest400"
+ },
+ "429": {
+ "$ref": "#/components/responses/TooManyRequests429"
+ },
+ "500": {
+ "$ref": "#/components/responses/InternalServerError500"
+ },
+ "default": {
+ "$ref": "#/components/responses/DefaultError"
+ }
+ },
+ "tags": [
+ "Scoring"
+ ],
+ "description": "Get a job by id.",
+ "parameters": [
+ {
+ "name": "job_id",
+ "in": "path",
+ "description": "The id of the job to get.",
+ "required": true,
+ "schema": {
+ "type": "string"
+ }
+ }
+ ]
+ },
+ "delete": {
+ "responses": {
+ "200": {
+ "description": "OK",
+ "content": {
+ "application/json": {
+ "schema": {
+ "oneOf": [
+ {
+ "$ref": "#/components/schemas/ScoringJob"
+ },
+ {
+ "type": "null"
+ }
+ ]
+ }
+ }
+ }
+ },
+ "400": {
+ "$ref": "#/components/responses/BadRequest400"
+ },
+ "429": {
+ "$ref": "#/components/responses/TooManyRequests429"
+ },
+ "500": {
+ "$ref": "#/components/responses/InternalServerError500"
+ },
+ "default": {
+ "$ref": "#/components/responses/DefaultError"
+ }
+ },
+ "tags": [
+ "Scoring"
+ ],
+ "description": "Delete a job.",
+ "parameters": [
+ {
+ "name": "job_id",
+ "in": "path",
+ "description": "The id of the job to delete.",
+ "required": true,
+ "schema": {
+ "type": "string"
+ }
+ }
+ ]
+ }
+ },
"/v1/inference/embeddings": {
"post": {
"responses": {
@@ -968,7 +1264,38 @@
}
}
},
- "/v1/eval/benchmarks/{benchmark_id}/jobs": {
+ "/v1/eval/jobs": {
+ "get": {
+ "responses": {
+ "200": {
+ "description": "A list of evaluation jobs.",
+ "content": {
+ "application/jsonl": {
+ "schema": {
+ "$ref": "#/components/schemas/EvalJob"
+ }
+ }
+ }
+ },
+ "400": {
+ "$ref": "#/components/responses/BadRequest400"
+ },
+ "429": {
+ "$ref": "#/components/responses/TooManyRequests429"
+ },
+ "500": {
+ "$ref": "#/components/responses/InternalServerError500"
+ },
+ "default": {
+ "$ref": "#/components/responses/DefaultError"
+ }
+ },
+ "tags": [
+ "Eval"
+ ],
+ "description": "List all evaluation jobs.",
+ "parameters": []
+ },
"post": {
"responses": {
"200": {
@@ -976,7 +1303,7 @@
"content": {
"application/json": {
"schema": {
- "$ref": "#/components/schemas/Job"
+ "$ref": "#/components/schemas/EvalJob"
}
}
}
@@ -998,17 +1325,7 @@
"Eval"
],
"description": "Run an evaluation on a benchmark.",
- "parameters": [
- {
- "name": "benchmark_id",
- "in": "path",
- "description": "The ID of the benchmark to run the evaluation on.",
- "required": true,
- "schema": {
- "type": "string"
- }
- }
- ],
+ "parameters": [],
"requestBody": {
"content": {
"application/json": {
@@ -2272,160 +2589,6 @@
}
}
},
- "/v1/eval/benchmarks/{benchmark_id}/jobs/{job_id}": {
- "get": {
- "responses": {
- "200": {
- "description": "The status of the evaluationjob.",
- "content": {
- "application/json": {
- "schema": {
- "oneOf": [
- {
- "$ref": "#/components/schemas/JobStatus"
- },
- {
- "type": "null"
- }
- ]
- }
- }
- }
- },
- "400": {
- "$ref": "#/components/responses/BadRequest400"
- },
- "429": {
- "$ref": "#/components/responses/TooManyRequests429"
- },
- "500": {
- "$ref": "#/components/responses/InternalServerError500"
- },
- "default": {
- "$ref": "#/components/responses/DefaultError"
- }
- },
- "tags": [
- "Eval"
- ],
- "description": "Get the status of a job.",
- "parameters": [
- {
- "name": "benchmark_id",
- "in": "path",
- "description": "The ID of the benchmark to run the evaluation on.",
- "required": true,
- "schema": {
- "type": "string"
- }
- },
- {
- "name": "job_id",
- "in": "path",
- "description": "The ID of the job to get the status of.",
- "required": true,
- "schema": {
- "type": "string"
- }
- }
- ]
- },
- "delete": {
- "responses": {
- "200": {
- "description": "OK"
- },
- "400": {
- "$ref": "#/components/responses/BadRequest400"
- },
- "429": {
- "$ref": "#/components/responses/TooManyRequests429"
- },
- "500": {
- "$ref": "#/components/responses/InternalServerError500"
- },
- "default": {
- "$ref": "#/components/responses/DefaultError"
- }
- },
- "tags": [
- "Eval"
- ],
- "description": "Cancel a job.",
- "parameters": [
- {
- "name": "benchmark_id",
- "in": "path",
- "description": "The ID of the benchmark to run the evaluation on.",
- "required": true,
- "schema": {
- "type": "string"
- }
- },
- {
- "name": "job_id",
- "in": "path",
- "description": "The ID of the job to cancel.",
- "required": true,
- "schema": {
- "type": "string"
- }
- }
- ]
- }
- },
- "/v1/eval/benchmarks/{benchmark_id}/jobs/{job_id}/result": {
- "get": {
- "responses": {
- "200": {
- "description": "The result of the job.",
- "content": {
- "application/json": {
- "schema": {
- "$ref": "#/components/schemas/EvaluateResponse"
- }
- }
- }
- },
- "400": {
- "$ref": "#/components/responses/BadRequest400"
- },
- "429": {
- "$ref": "#/components/responses/TooManyRequests429"
- },
- "500": {
- "$ref": "#/components/responses/InternalServerError500"
- },
- "default": {
- "$ref": "#/components/responses/DefaultError"
- }
- },
- "tags": [
- "Eval"
- ],
- "description": "Get the result of a job.",
- "parameters": [
- {
- "name": "benchmark_id",
- "in": "path",
- "description": "The ID of the benchmark to run the evaluation on.",
- "required": true,
- "schema": {
- "type": "string"
- }
- },
- {
- "name": "job_id",
- "in": "path",
- "description": "The ID of the job to get the result of.",
- "required": true,
- "schema": {
- "type": "string"
- }
- }
- ]
- }
- },
"/v1/agents/{agent_id}/sessions": {
"get": {
"responses": {
@@ -2957,6 +3120,80 @@
}
}
},
+ "/v1/scoring/jobs": {
+ "get": {
+ "responses": {
+ "200": {
+ "description": "A list of scoring jobs.",
+ "content": {
+ "application/jsonl": {
+ "schema": {
+ "$ref": "#/components/schemas/ScoringJob"
+ }
+ }
+ }
+ },
+ "400": {
+ "$ref": "#/components/responses/BadRequest400"
+ },
+ "429": {
+ "$ref": "#/components/responses/TooManyRequests429"
+ },
+ "500": {
+ "$ref": "#/components/responses/InternalServerError500"
+ },
+ "default": {
+ "$ref": "#/components/responses/DefaultError"
+ }
+ },
+ "tags": [
+ "Scoring"
+ ],
+ "description": "List all scoring jobs.",
+ "parameters": []
+ },
+ "post": {
+ "responses": {
+ "200": {
+ "description": "OK",
+ "content": {
+ "application/json": {
+ "schema": {
+ "$ref": "#/components/schemas/ScoringJob"
+ }
+ }
+ }
+ },
+ "400": {
+ "$ref": "#/components/responses/BadRequest400"
+ },
+ "429": {
+ "$ref": "#/components/responses/TooManyRequests429"
+ },
+ "500": {
+ "$ref": "#/components/responses/InternalServerError500"
+ },
+ "default": {
+ "$ref": "#/components/responses/DefaultError"
+ }
+ },
+ "tags": [
+ "Scoring"
+ ],
+ "description": "",
+ "parameters": [],
+ "requestBody": {
+ "content": {
+ "application/json": {
+ "schema": {
+ "$ref": "#/components/schemas/ScoreDatasetRequest"
+ }
+ }
+ },
+ "required": true
+ }
+ }
+ },
"/v1/shields": {
"get": {
"responses": {
@@ -3663,49 +3900,6 @@
}
}
},
- "/v1/scoring/jobs": {
- "post": {
- "responses": {
- "200": {
- "description": "OK",
- "content": {
- "application/json": {
- "schema": {
- "$ref": "#/components/schemas/ScoreBatchResponse"
- }
- }
- }
- },
- "400": {
- "$ref": "#/components/responses/BadRequest400"
- },
- "429": {
- "$ref": "#/components/responses/TooManyRequests429"
- },
- "500": {
- "$ref": "#/components/responses/InternalServerError500"
- },
- "default": {
- "$ref": "#/components/responses/DefaultError"
- }
- },
- "tags": [
- "Scoring"
- ],
- "description": "",
- "parameters": [],
- "requestBody": {
- "content": {
- "application/json": {
- "schema": {
- "$ref": "#/components/schemas/ScoreDatasetRequest"
- }
- }
- },
- "required": true
- }
- }
- },
"/v1/post-training/supervised-fine-tune": {
"post": {
"responses": {
@@ -4768,18 +4962,268 @@
"title": "CompletionResponse",
"description": "Response from a completion request."
},
- "CancelTrainingJobRequest": {
+ "AgentCandidate": {
"type": "object",
"properties": {
- "job_uuid": {
- "type": "string"
+ "type": {
+ "type": "string",
+ "const": "agent",
+ "default": "agent"
+ },
+ "config": {
+ "$ref": "#/components/schemas/AgentConfig",
+ "description": "The configuration for the agent candidate."
}
},
"additionalProperties": false,
"required": [
- "job_uuid"
+ "type",
+ "config"
],
- "title": "CancelTrainingJobRequest"
+ "title": "AgentCandidate",
+ "description": "An agent candidate for evaluation."
+ },
+ "AgentConfig": {
+ "type": "object",
+ "properties": {
+ "sampling_params": {
+ "$ref": "#/components/schemas/SamplingParams"
+ },
+ "input_shields": {
+ "type": "array",
+ "items": {
+ "type": "string"
+ }
+ },
+ "output_shields": {
+ "type": "array",
+ "items": {
+ "type": "string"
+ }
+ },
+ "toolgroups": {
+ "type": "array",
+ "items": {
+ "$ref": "#/components/schemas/AgentTool"
+ }
+ },
+ "client_tools": {
+ "type": "array",
+ "items": {
+ "$ref": "#/components/schemas/ToolDef"
+ }
+ },
+ "tool_choice": {
+ "type": "string",
+ "enum": [
+ "auto",
+ "required",
+ "none"
+ ],
+ "title": "ToolChoice",
+ "description": "Whether tool use is required or automatic. This is a hint to the model which may not be followed. It depends on the Instruction Following capabilities of the model.",
+ "deprecated": true
+ },
+ "tool_prompt_format": {
+ "type": "string",
+ "enum": [
+ "json",
+ "function_tag",
+ "python_list"
+ ],
+ "title": "ToolPromptFormat",
+ "description": "Prompt format for calling custom / zero shot tools.",
+ "deprecated": true
+ },
+ "tool_config": {
+ "$ref": "#/components/schemas/ToolConfig"
+ },
+ "max_infer_iters": {
+ "type": "integer",
+ "default": 10
+ },
+ "model": {
+ "type": "string"
+ },
+ "instructions": {
+ "type": "string"
+ },
+ "enable_session_persistence": {
+ "type": "boolean",
+ "default": false
+ },
+ "response_format": {
+ "$ref": "#/components/schemas/ResponseFormat"
+ }
+ },
+ "additionalProperties": false,
+ "required": [
+ "model",
+ "instructions"
+ ],
+ "title": "AgentConfig"
+ },
+ "AgentTool": {
+ "oneOf": [
+ {
+ "type": "string"
+ },
+ {
+ "type": "object",
+ "properties": {
+ "name": {
+ "type": "string"
+ },
+ "args": {
+ "type": "object",
+ "additionalProperties": {
+ "oneOf": [
+ {
+ "type": "null"
+ },
+ {
+ "type": "boolean"
+ },
+ {
+ "type": "number"
+ },
+ {
+ "type": "string"
+ },
+ {
+ "type": "array"
+ },
+ {
+ "type": "object"
+ }
+ ]
+ }
+ }
+ },
+ "additionalProperties": false,
+ "required": [
+ "name",
+ "args"
+ ],
+ "title": "AgentToolGroupWithArgs"
+ }
+ ]
+ },
+ "EvalCandidate": {
+ "oneOf": [
+ {
+ "$ref": "#/components/schemas/ModelCandidate"
+ },
+ {
+ "$ref": "#/components/schemas/AgentCandidate"
+ }
+ ],
+ "discriminator": {
+ "propertyName": "type",
+ "mapping": {
+ "model": "#/components/schemas/ModelCandidate",
+ "agent": "#/components/schemas/AgentCandidate"
+ }
+ }
+ },
+ "EvalJob": {
+ "type": "object",
+ "properties": {
+ "id": {
+ "type": "string",
+ "description": "The ID of the job."
+ },
+ "status": {
+ "type": "string",
+ "enum": [
+ "completed",
+ "in_progress",
+ "failed",
+ "scheduled",
+ "cancelled"
+ ],
+ "description": "The status of the job."
+ },
+ "created_at": {
+ "type": "string",
+ "format": "date-time",
+ "description": "The time the job was created."
+ },
+ "finished_at": {
+ "type": "string",
+ "format": "date-time",
+ "description": "The time the job finished."
+ },
+ "error": {
+ "type": "string",
+ "description": "If status of the job is failed, this will contain the error message."
+ },
+ "type": {
+ "type": "string",
+ "const": "eval",
+ "default": "eval"
+ },
+ "result_files": {
+ "type": "array",
+ "items": {
+ "type": "string"
+ }
+ },
+ "result_datasets": {
+ "type": "array",
+ "items": {
+ "type": "string"
+ }
+ },
+ "benchmark_id": {
+ "type": "string"
+ },
+ "candidate": {
+ "$ref": "#/components/schemas/EvalCandidate"
+ }
+ },
+ "additionalProperties": false,
+ "required": [
+ "id",
+ "status",
+ "created_at",
+ "type",
+ "result_files",
+ "result_datasets",
+ "benchmark_id",
+ "candidate"
+ ],
+ "title": "EvalJob"
+ },
+ "ModelCandidate": {
+ "type": "object",
+ "properties": {
+ "type": {
+ "type": "string",
+ "const": "model",
+ "default": "model"
+ },
+ "model": {
+ "type": "string",
+ "description": "The model ID to evaluate."
+ },
+ "sampling_params": {
+ "$ref": "#/components/schemas/SamplingParams",
+ "description": "The sampling parameters for the model."
+ },
+ "system_message": {
+ "$ref": "#/components/schemas/SystemMessage",
+ "description": "(Optional) The system message providing instructions or context to the model."
+ }
+ },
+ "additionalProperties": false,
+ "required": [
+ "type",
+ "model",
+ "sampling_params"
+ ],
+ "title": "ModelCandidate",
+ "description": "A model candidate for evaluation."
},
"ToolConfig": {
"type": "object",
@@ -4826,6 +5270,186 @@
"title": "ToolConfig",
"description": "Configuration for tool use."
},
+ "ToolDef": {
+ "type": "object",
+ "properties": {
+ "name": {
+ "type": "string"
+ },
+ "description": {
+ "type": "string"
+ },
+ "parameters": {
+ "type": "array",
+ "items": {
+ "$ref": "#/components/schemas/ToolParameter"
+ }
+ },
+ "metadata": {
+ "type": "object",
+ "additionalProperties": {
+ "oneOf": [
+ {
+ "type": "null"
+ },
+ {
+ "type": "boolean"
+ },
+ {
+ "type": "number"
+ },
+ {
+ "type": "string"
+ },
+ {
+ "type": "array"
+ },
+ {
+ "type": "object"
+ }
+ ]
+ }
+ }
+ },
+ "additionalProperties": false,
+ "required": [
+ "name"
+ ],
+ "title": "ToolDef"
+ },
+ "ToolParameter": {
+ "type": "object",
+ "properties": {
+ "name": {
+ "type": "string"
+ },
+ "parameter_type": {
+ "type": "string"
+ },
+ "description": {
+ "type": "string"
+ },
+ "required": {
+ "type": "boolean",
+ "default": true
+ },
+ "default": {
+ "oneOf": [
+ {
+ "type": "null"
+ },
+ {
+ "type": "boolean"
+ },
+ {
+ "type": "number"
+ },
+ {
+ "type": "string"
+ },
+ {
+ "type": "array"
+ },
+ {
+ "type": "object"
+ }
+ ]
+ }
+ },
+ "additionalProperties": false,
+ "required": [
+ "name",
+ "parameter_type",
+ "description",
+ "required"
+ ],
+ "title": "ToolParameter"
+ },
+ "ScoringJob": {
+ "type": "object",
+ "properties": {
+ "id": {
+ "type": "string",
+ "description": "The ID of the job."
+ },
+ "status": {
+ "type": "string",
+ "enum": [
+ "completed",
+ "in_progress",
+ "failed",
+ "scheduled",
+ "cancelled"
+ ],
+ "description": "The status of the job."
+ },
+ "created_at": {
+ "type": "string",
+ "format": "date-time",
+ "description": "The time the job was created."
+ },
+ "finished_at": {
+ "type": "string",
+ "format": "date-time",
+ "description": "The time the job finished."
+ },
+ "error": {
+ "type": "string",
+ "description": "If status of the job is failed, this will contain the error message."
+ },
+ "type": {
+ "type": "string",
+ "const": "scoring",
+ "default": "scoring"
+ },
+ "result_files": {
+ "type": "array",
+ "items": {
+ "type": "string"
+ }
+ },
+ "result_datasets": {
+ "type": "array",
+ "items": {
+ "type": "string"
+ }
+ },
+ "dataset_id": {
+ "type": "string"
+ },
+ "scoring_fn_ids": {
+ "type": "array",
+ "items": {
+ "type": "string"
+ }
+ }
+ },
+ "additionalProperties": false,
+ "required": [
+ "id",
+ "status",
+ "created_at",
+ "type",
+ "result_files",
+ "result_datasets",
+ "dataset_id",
+ "scoring_fn_ids"
+ ],
+ "title": "ScoringJob"
+ },
+ "CancelTrainingJobRequest": {
+ "type": "object",
+ "properties": {
+ "job_uuid": {
+ "type": "string"
+ }
+ },
+ "additionalProperties": false,
+ "required": [
+ "job_uuid"
+ ],
+ "title": "CancelTrainingJobRequest"
+ },
"ChatCompletionRequest": {
"type": "object",
"properties": {
@@ -5140,227 +5764,6 @@
"title": "CompletionResponseStreamChunk",
"description": "A chunk of a streamed completion response."
},
- "AgentConfig": {
- "type": "object",
- "properties": {
- "sampling_params": {
- "$ref": "#/components/schemas/SamplingParams"
- },
- "input_shields": {
- "type": "array",
- "items": {
- "type": "string"
- }
- },
- "output_shields": {
- "type": "array",
- "items": {
- "type": "string"
- }
- },
- "toolgroups": {
- "type": "array",
- "items": {
- "$ref": "#/components/schemas/AgentTool"
- }
- },
- "client_tools": {
- "type": "array",
- "items": {
- "$ref": "#/components/schemas/ToolDef"
- }
- },
- "tool_choice": {
- "type": "string",
- "enum": [
- "auto",
- "required",
- "none"
- ],
- "title": "ToolChoice",
- "description": "Whether tool use is required or automatic. This is a hint to the model which may not be followed. It depends on the Instruction Following capabilities of the model.",
- "deprecated": true
- },
- "tool_prompt_format": {
- "type": "string",
- "enum": [
- "json",
- "function_tag",
- "python_list"
- ],
- "title": "ToolPromptFormat",
- "description": "Prompt format for calling custom / zero shot tools.",
- "deprecated": true
- },
- "tool_config": {
- "$ref": "#/components/schemas/ToolConfig"
- },
- "max_infer_iters": {
- "type": "integer",
- "default": 10
- },
- "model": {
- "type": "string"
- },
- "instructions": {
- "type": "string"
- },
- "enable_session_persistence": {
- "type": "boolean",
- "default": false
- },
- "response_format": {
- "$ref": "#/components/schemas/ResponseFormat"
- }
- },
- "additionalProperties": false,
- "required": [
- "model",
- "instructions"
- ],
- "title": "AgentConfig"
- },
- "AgentTool": {
- "oneOf": [
- {
- "type": "string"
- },
- {
- "type": "object",
- "properties": {
- "name": {
- "type": "string"
- },
- "args": {
- "type": "object",
- "additionalProperties": {
- "oneOf": [
- {
- "type": "null"
- },
- {
- "type": "boolean"
- },
- {
- "type": "number"
- },
- {
- "type": "string"
- },
- {
- "type": "array"
- },
- {
- "type": "object"
- }
- ]
- }
- }
- },
- "additionalProperties": false,
- "required": [
- "name",
- "args"
- ],
- "title": "AgentToolGroupWithArgs"
- }
- ]
- },
- "ToolDef": {
- "type": "object",
- "properties": {
- "name": {
- "type": "string"
- },
- "description": {
- "type": "string"
- },
- "parameters": {
- "type": "array",
- "items": {
- "$ref": "#/components/schemas/ToolParameter"
- }
- },
- "metadata": {
- "type": "object",
- "additionalProperties": {
- "oneOf": [
- {
- "type": "null"
- },
- {
- "type": "boolean"
- },
- {
- "type": "number"
- },
- {
- "type": "string"
- },
- {
- "type": "array"
- },
- {
- "type": "object"
- }
- ]
- }
- }
- },
- "additionalProperties": false,
- "required": [
- "name"
- ],
- "title": "ToolDef"
- },
- "ToolParameter": {
- "type": "object",
- "properties": {
- "name": {
- "type": "string"
- },
- "parameter_type": {
- "type": "string"
- },
- "description": {
- "type": "string"
- },
- "required": {
- "type": "boolean",
- "default": true
- },
- "default": {
- "oneOf": [
- {
- "type": "null"
- },
- {
- "type": "boolean"
- },
- {
- "type": "number"
- },
- {
- "type": "string"
- },
- {
- "type": "array"
- },
- {
- "type": "object"
- }
- ]
- }
- },
- "additionalProperties": false,
- "required": [
- "name",
- "parameter_type",
- "description",
- "required"
- ],
- "title": "ToolParameter"
- },
"CreateAgentRequest": {
"type": "object",
"properties": {
@@ -6335,77 +6738,13 @@
"title": "EmbeddingsResponse",
"description": "Response containing generated embeddings."
},
- "AgentCandidate": {
- "type": "object",
- "properties": {
- "type": {
- "type": "string",
- "const": "agent",
- "default": "agent"
- },
- "config": {
- "$ref": "#/components/schemas/AgentConfig",
- "description": "The configuration for the agent candidate."
- }
- },
- "additionalProperties": false,
- "required": [
- "type",
- "config"
- ],
- "title": "AgentCandidate",
- "description": "An agent candidate for evaluation."
- },
- "EvalCandidate": {
- "oneOf": [
- {
- "$ref": "#/components/schemas/ModelCandidate"
- },
- {
- "$ref": "#/components/schemas/AgentCandidate"
- }
- ],
- "discriminator": {
- "propertyName": "type",
- "mapping": {
- "model": "#/components/schemas/ModelCandidate",
- "agent": "#/components/schemas/AgentCandidate"
- }
- }
- },
- "ModelCandidate": {
- "type": "object",
- "properties": {
- "type": {
- "type": "string",
- "const": "model",
- "default": "model"
- },
- "model": {
- "type": "string",
- "description": "The model ID to evaluate."
- },
- "sampling_params": {
- "$ref": "#/components/schemas/SamplingParams",
- "description": "The sampling parameters for the model."
- },
- "system_message": {
- "$ref": "#/components/schemas/SystemMessage",
- "description": "(Optional) The system message providing instructions or context to the model."
- }
- },
- "additionalProperties": false,
- "required": [
- "type",
- "model",
- "sampling_params"
- ],
- "title": "ModelCandidate",
- "description": "A model candidate for evaluation."
- },
"EvaluateBenchmarkRequest": {
"type": "object",
"properties": {
+ "benchmark_id": {
+ "type": "string",
+ "description": "The ID of the benchmark to run the evaluation on."
+ },
"candidate": {
"$ref": "#/components/schemas/EvalCandidate",
"description": "The candidate to evaluate on."
@@ -6413,23 +6752,11 @@
},
"additionalProperties": false,
"required": [
+ "benchmark_id",
"candidate"
],
"title": "EvaluateBenchmarkRequest"
},
- "Job": {
- "type": "object",
- "properties": {
- "job_id": {
- "type": "string"
- }
- },
- "additionalProperties": false,
- "required": [
- "job_id"
- ],
- "title": "Job"
- },
"EvaluateRowsRequest": {
"type": "object",
"properties": {
@@ -8163,16 +8490,6 @@
"title": "PostTrainingJobArtifactsResponse",
"description": "Artifacts of a finetuning job."
},
- "JobStatus": {
- "type": "string",
- "enum": [
- "completed",
- "in_progress",
- "failed",
- "scheduled"
- ],
- "title": "JobStatus"
- },
"PostTrainingJobStatusResponse": {
"type": "object",
"properties": {
@@ -8180,7 +8497,15 @@
"type": "string"
},
"status": {
- "$ref": "#/components/schemas/JobStatus"
+ "type": "string",
+ "enum": [
+ "completed",
+ "in_progress",
+ "failed",
+ "scheduled",
+ "cancelled"
+ ],
+ "title": "JobStatus"
},
"scheduled_at": {
"type": "string",
@@ -10322,25 +10647,6 @@
],
"title": "ScoreDatasetRequest"
},
- "ScoreBatchResponse": {
- "type": "object",
- "properties": {
- "dataset_id": {
- "type": "string"
- },
- "results": {
- "type": "object",
- "additionalProperties": {
- "$ref": "#/components/schemas/ScoringResult"
- }
- }
- },
- "additionalProperties": false,
- "required": [
- "results"
- ],
- "title": "ScoreBatchResponse"
- },
"AlgorithmConfig": {
"oneOf": [
{
diff --git a/docs/_static/llama-stack-spec.yaml b/docs/_static/llama-stack-spec.yaml
index de24e41c6..45058fbdc 100644
--- a/docs/_static/llama-stack-spec.yaml
+++ b/docs/_static/llama-stack-spec.yaml
@@ -142,6 +142,68 @@ paths:
schema:
$ref: '#/components/schemas/BatchCompletionRequest'
required: true
+ /v1/eval/job/{job_id}/cancel:
+ post:
+ responses:
+ '200':
+ description: OK
+ content:
+ application/json:
+ schema:
+ oneOf:
+ - $ref: '#/components/schemas/EvalJob'
+ - type: 'null'
+ '400':
+ $ref: '#/components/responses/BadRequest400'
+ '429':
+ $ref: >-
+ #/components/responses/TooManyRequests429
+ '500':
+ $ref: >-
+ #/components/responses/InternalServerError500
+ default:
+ $ref: '#/components/responses/DefaultError'
+ tags:
+ - Eval
+ description: Cancel a job.
+ parameters:
+ - name: job_id
+ in: path
+ description: The id of the job to cancel.
+ required: true
+ schema:
+ type: string
+ /v1/scoring/job/{job_id}/cancel:
+ post:
+ responses:
+ '200':
+ description: OK
+ content:
+ application/json:
+ schema:
+ oneOf:
+ - $ref: '#/components/schemas/ScoringJob'
+ - type: 'null'
+ '400':
+ $ref: '#/components/responses/BadRequest400'
+ '429':
+ $ref: >-
+ #/components/responses/TooManyRequests429
+ '500':
+ $ref: >-
+ #/components/responses/InternalServerError500
+ default:
+ $ref: '#/components/responses/DefaultError'
+ tags:
+ - Scoring
+ description: Cancel a job.
+ parameters:
+ - name: job_id
+ in: path
+ description: The id of the job to cancel.
+ required: true
+ schema:
+ type: string
/v1/post-training/job/cancel:
post:
responses:
@@ -560,6 +622,67 @@ paths:
required: true
schema:
type: string
+ /v1/eval/job/{job_id}:
+ get:
+ responses:
+ '200':
+ description: The job.
+ content:
+ application/json:
+ schema:
+ oneOf:
+ - $ref: '#/components/schemas/EvalJob'
+ - type: 'null'
+ '400':
+ $ref: '#/components/responses/BadRequest400'
+ '429':
+ $ref: >-
+ #/components/responses/TooManyRequests429
+ '500':
+ $ref: >-
+ #/components/responses/InternalServerError500
+ default:
+ $ref: '#/components/responses/DefaultError'
+ tags:
+ - Eval
+ description: Get a job by id.
+ parameters:
+ - name: job_id
+ in: path
+ description: The id of the job to get.
+ required: true
+ schema:
+ type: string
+ delete:
+ responses:
+ '200':
+ description: OK
+ content:
+ application/json:
+ schema:
+ oneOf:
+ - $ref: '#/components/schemas/EvalJob'
+ - type: 'null'
+ '400':
+ $ref: '#/components/responses/BadRequest400'
+ '429':
+ $ref: >-
+ #/components/responses/TooManyRequests429
+ '500':
+ $ref: >-
+ #/components/responses/InternalServerError500
+ default:
+ $ref: '#/components/responses/DefaultError'
+ tags:
+ - Eval
+ description: Delete a job.
+ parameters:
+ - name: job_id
+ in: path
+ description: The id of the job to delete.
+ required: true
+ schema:
+ type: string
/v1/files/{bucket}/{key}:
get:
responses:
@@ -633,6 +756,67 @@ paths:
required: true
schema:
type: string
+ /v1/scoring/job/{job_id}:
+ get:
+ responses:
+ '200':
+ description: The job.
+ content:
+ application/json:
+ schema:
+ oneOf:
+ - $ref: '#/components/schemas/ScoringJob'
+ - type: 'null'
+ '400':
+ $ref: '#/components/responses/BadRequest400'
+ '429':
+ $ref: >-
+ #/components/responses/TooManyRequests429
+ '500':
+ $ref: >-
+ #/components/responses/InternalServerError500
+ default:
+ $ref: '#/components/responses/DefaultError'
+ tags:
+ - Scoring
+ description: Get a job by id.
+ parameters:
+ - name: job_id
+ in: path
+ description: The id of the job to get.
+ required: true
+ schema:
+ type: string
+ delete:
+ responses:
+ '200':
+ description: OK
+ content:
+ application/json:
+ schema:
+ oneOf:
+ - $ref: '#/components/schemas/ScoringJob'
+ - type: 'null'
+ '400':
+ $ref: '#/components/responses/BadRequest400'
+ '429':
+ $ref: >-
+ #/components/responses/TooManyRequests429
+ '500':
+ $ref: >-
+ #/components/responses/InternalServerError500
+ default:
+ $ref: '#/components/responses/DefaultError'
+ tags:
+ - Scoring
+ description: Delete a job.
+ parameters:
+ - name: job_id
+ in: path
+ description: The id of the job to delete.
+ required: true
+ schema:
+ type: string
/v1/inference/embeddings:
post:
responses:
@@ -666,7 +850,29 @@ paths:
schema:
$ref: '#/components/schemas/EmbeddingsRequest'
required: true
- /v1/eval/benchmarks/{benchmark_id}/jobs:
+ /v1/eval/jobs:
+ get:
+ responses:
+ '200':
+ description: A list of evaluation jobs.
+ content:
+ application/jsonl:
+ schema:
+ $ref: '#/components/schemas/EvalJob'
+ '400':
+ $ref: '#/components/responses/BadRequest400'
+ '429':
+ $ref: >-
+ #/components/responses/TooManyRequests429
+ '500':
+ $ref: >-
+ #/components/responses/InternalServerError500
+ default:
+ $ref: '#/components/responses/DefaultError'
+ tags:
+ - Eval
+ description: List all evaluation jobs.
+ parameters: []
post:
responses:
'200':
@@ -675,7 +881,7 @@ paths:
content:
application/json:
schema:
- $ref: '#/components/schemas/Job'
+ $ref: '#/components/schemas/EvalJob'
'400':
$ref: '#/components/responses/BadRequest400'
'429':
@@ -689,14 +895,7 @@ paths:
tags:
- Eval
description: Run an evaluation on a benchmark.
- parameters:
- - name: benchmark_id
- in: path
- description: >-
- The ID of the benchmark to run the evaluation on.
- required: true
- schema:
- type: string
+ parameters: []
requestBody:
content:
application/json:
@@ -1529,111 +1728,6 @@ paths:
schema:
$ref: '#/components/schemas/InvokeToolRequest'
required: true
- /v1/eval/benchmarks/{benchmark_id}/jobs/{job_id}:
- get:
- responses:
- '200':
- description: The status of the evaluationjob.
- content:
- application/json:
- schema:
- oneOf:
- - $ref: '#/components/schemas/JobStatus'
- - type: 'null'
- '400':
- $ref: '#/components/responses/BadRequest400'
- '429':
- $ref: >-
- #/components/responses/TooManyRequests429
- '500':
- $ref: >-
- #/components/responses/InternalServerError500
- default:
- $ref: '#/components/responses/DefaultError'
- tags:
- - Eval
- description: Get the status of a job.
- parameters:
- - name: benchmark_id
- in: path
- description: >-
- The ID of the benchmark to run the evaluation on.
- required: true
- schema:
- type: string
- - name: job_id
- in: path
- description: The ID of the job to get the status of.
- required: true
- schema:
- type: string
- delete:
- responses:
- '200':
- description: OK
- '400':
- $ref: '#/components/responses/BadRequest400'
- '429':
- $ref: >-
- #/components/responses/TooManyRequests429
- '500':
- $ref: >-
- #/components/responses/InternalServerError500
- default:
- $ref: '#/components/responses/DefaultError'
- tags:
- - Eval
- description: Cancel a job.
- parameters:
- - name: benchmark_id
- in: path
- description: >-
- The ID of the benchmark to run the evaluation on.
- required: true
- schema:
- type: string
- - name: job_id
- in: path
- description: The ID of the job to cancel.
- required: true
- schema:
- type: string
- /v1/eval/benchmarks/{benchmark_id}/jobs/{job_id}/result:
- get:
- responses:
- '200':
- description: The result of the job.
- content:
- application/json:
- schema:
- $ref: '#/components/schemas/EvaluateResponse'
- '400':
- $ref: '#/components/responses/BadRequest400'
- '429':
- $ref: >-
- #/components/responses/TooManyRequests429
- '500':
- $ref: >-
- #/components/responses/InternalServerError500
- default:
- $ref: '#/components/responses/DefaultError'
- tags:
- - Eval
- description: Get the result of a job.
- parameters:
- - name: benchmark_id
- in: path
- description: >-
- The ID of the benchmark to run the evaluation on.
- required: true
- schema:
- type: string
- - name: job_id
- in: path
- description: The ID of the job to get the result of.
- required: true
- schema:
- type: string
/v1/agents/{agent_id}/sessions:
get:
responses:
@@ -2002,6 +2096,57 @@ paths:
schema:
$ref: '#/components/schemas/RegisterScoringFunctionRequest'
required: true
+ /v1/scoring/jobs:
+ get:
+ responses:
+ '200':
+ description: A list of scoring jobs.
+ content:
+ application/jsonl:
+ schema:
+ $ref: '#/components/schemas/ScoringJob'
+ '400':
+ $ref: '#/components/responses/BadRequest400'
+ '429':
+ $ref: >-
+ #/components/responses/TooManyRequests429
+ '500':
+ $ref: >-
+ #/components/responses/InternalServerError500
+ default:
+ $ref: '#/components/responses/DefaultError'
+ tags:
+ - Scoring
+ description: List all scoring jobs.
+ parameters: []
+ post:
+ responses:
+ '200':
+ description: OK
+ content:
+ application/json:
+ schema:
+ $ref: '#/components/schemas/ScoringJob'
+ '400':
+ $ref: '#/components/responses/BadRequest400'
+ '429':
+ $ref: >-
+ #/components/responses/TooManyRequests429
+ '500':
+ $ref: >-
+ #/components/responses/InternalServerError500
+ default:
+ $ref: '#/components/responses/DefaultError'
+ tags:
+ - Scoring
+ description: ''
+ parameters: []
+ requestBody:
+ content:
+ application/json:
+ schema:
+ $ref: '#/components/schemas/ScoreDatasetRequest'
+ required: true
/v1/shields:
get:
responses:
@@ -2491,35 +2636,6 @@ paths:
schema:
$ref: '#/components/schemas/ScoreRequest'
required: true
- /v1/scoring/jobs:
- post:
- responses:
- '200':
- description: OK
- content:
- application/json:
- schema:
- $ref: '#/components/schemas/ScoreBatchResponse'
- '400':
- $ref: '#/components/responses/BadRequest400'
- '429':
- $ref: >-
- #/components/responses/TooManyRequests429
- '500':
- $ref: >-
- #/components/responses/InternalServerError500
- default:
- $ref: '#/components/responses/DefaultError'
- tags:
- - Scoring
- description: ''
- parameters: []
- requestBody:
- content:
- application/json:
- schema:
- $ref: '#/components/schemas/ScoreDatasetRequest'
- required: true
/v1/post-training/supervised-fine-tune:
post:
responses:
@@ -3259,15 +3375,195 @@ components:
- stop_reason
title: CompletionResponse
description: Response from a completion request.
- CancelTrainingJobRequest:
+ AgentCandidate:
type: object
properties:
- job_uuid:
+ type:
type: string
+ const: agent
+ default: agent
+ config:
+ $ref: '#/components/schemas/AgentConfig'
+ description: >-
+ The configuration for the agent candidate.
additionalProperties: false
required:
- - job_uuid
- title: CancelTrainingJobRequest
+ - type
+ - config
+ title: AgentCandidate
+ description: An agent candidate for evaluation.
+ AgentConfig:
+ type: object
+ properties:
+ sampling_params:
+ $ref: '#/components/schemas/SamplingParams'
+ input_shields:
+ type: array
+ items:
+ type: string
+ output_shields:
+ type: array
+ items:
+ type: string
+ toolgroups:
+ type: array
+ items:
+ $ref: '#/components/schemas/AgentTool'
+ client_tools:
+ type: array
+ items:
+ $ref: '#/components/schemas/ToolDef'
+ tool_choice:
+ type: string
+ enum:
+ - auto
+ - required
+ - none
+ title: ToolChoice
+ description: >-
+ Whether tool use is required or automatic. This is a hint to the model
+ which may not be followed. It depends on the Instruction Following capabilities
+ of the model.
+ deprecated: true
+ tool_prompt_format:
+ type: string
+ enum:
+ - json
+ - function_tag
+ - python_list
+ title: ToolPromptFormat
+ description: >-
+ Prompt format for calling custom / zero shot tools.
+ deprecated: true
+ tool_config:
+ $ref: '#/components/schemas/ToolConfig'
+ max_infer_iters:
+ type: integer
+ default: 10
+ model:
+ type: string
+ instructions:
+ type: string
+ enable_session_persistence:
+ type: boolean
+ default: false
+ response_format:
+ $ref: '#/components/schemas/ResponseFormat'
+ additionalProperties: false
+ required:
+ - model
+ - instructions
+ title: AgentConfig
+ AgentTool:
+ oneOf:
+ - type: string
+ - type: object
+ properties:
+ name:
+ type: string
+ args:
+ type: object
+ additionalProperties:
+ oneOf:
+ - type: 'null'
+ - type: boolean
+ - type: number
+ - type: string
+ - type: array
+ - type: object
+ additionalProperties: false
+ required:
+ - name
+ - args
+ title: AgentToolGroupWithArgs
+ EvalCandidate:
+ oneOf:
+ - $ref: '#/components/schemas/ModelCandidate'
+ - $ref: '#/components/schemas/AgentCandidate'
+ discriminator:
+ propertyName: type
+ mapping:
+ model: '#/components/schemas/ModelCandidate'
+ agent: '#/components/schemas/AgentCandidate'
+ EvalJob:
+ type: object
+ properties:
+ id:
+ type: string
+ description: The ID of the job.
+ status:
+ type: string
+ enum:
+ - completed
+ - in_progress
+ - failed
+ - scheduled
+ - cancelled
+ description: The status of the job.
+ created_at:
+ type: string
+ format: date-time
+ description: The time the job was created.
+ finished_at:
+ type: string
+ format: date-time
+ description: The time the job finished.
+ error:
+ type: string
+ description: >-
+ If status of the job is failed, this will contain the error message.
+ type:
+ type: string
+ const: eval
+ default: eval
+ result_files:
+ type: array
+ items:
+ type: string
+ result_datasets:
+ type: array
+ items:
+ type: string
+ benchmark_id:
+ type: string
+ candidate:
+ $ref: '#/components/schemas/EvalCandidate'
+ additionalProperties: false
+ required:
+ - id
+ - status
+ - created_at
+ - type
+ - result_files
+ - result_datasets
+ - benchmark_id
+ - candidate
+ title: EvalJob
+ ModelCandidate:
+ type: object
+ properties:
+ type:
+ type: string
+ const: model
+ default: model
+ model:
+ type: string
+ description: The model ID to evaluate.
+ sampling_params:
+ $ref: '#/components/schemas/SamplingParams'
+ description: The sampling parameters for the model.
+ system_message:
+ $ref: '#/components/schemas/SystemMessage'
+ description: >-
+ (Optional) The system message providing instructions or context to the
+ model.
+ additionalProperties: false
+ required:
+ - type
+ - model
+ - sampling_params
+ title: ModelCandidate
+ description: A model candidate for evaluation.
ToolConfig:
type: object
properties:
@@ -3316,6 +3612,123 @@ components:
additionalProperties: false
title: ToolConfig
description: Configuration for tool use.
+ ToolDef:
+ type: object
+ properties:
+ name:
+ type: string
+ description:
+ type: string
+ parameters:
+ type: array
+ items:
+ $ref: '#/components/schemas/ToolParameter'
+ metadata:
+ type: object
+ additionalProperties:
+ oneOf:
+ - type: 'null'
+ - type: boolean
+ - type: number
+ - type: string
+ - type: array
+ - type: object
+ additionalProperties: false
+ required:
+ - name
+ title: ToolDef
+ ToolParameter:
+ type: object
+ properties:
+ name:
+ type: string
+ parameter_type:
+ type: string
+ description:
+ type: string
+ required:
+ type: boolean
+ default: true
+ default:
+ oneOf:
+ - type: 'null'
+ - type: boolean
+ - type: number
+ - type: string
+ - type: array
+ - type: object
+ additionalProperties: false
+ required:
+ - name
+ - parameter_type
+ - description
+ - required
+ title: ToolParameter
+ ScoringJob:
+ type: object
+ properties:
+ id:
+ type: string
+ description: The ID of the job.
+ status:
+ type: string
+ enum:
+ - completed
+ - in_progress
+ - failed
+ - scheduled
+ - cancelled
+ description: The status of the job.
+ created_at:
+ type: string
+ format: date-time
+ description: The time the job was created.
+ finished_at:
+ type: string
+ format: date-time
+ description: The time the job finished.
+ error:
+ type: string
+ description: >-
+ If status of the job is failed, this will contain the error message.
+ type:
+ type: string
+ const: scoring
+ default: scoring
+ result_files:
+ type: array
+ items:
+ type: string
+ result_datasets:
+ type: array
+ items:
+ type: string
+ dataset_id:
+ type: string
+ scoring_fn_ids:
+ type: array
+ items:
+ type: string
+ additionalProperties: false
+ required:
+ - id
+ - status
+ - created_at
+ - type
+ - result_files
+ - result_datasets
+ - dataset_id
+ - scoring_fn_ids
+ title: ScoringJob
+ CancelTrainingJobRequest:
+ type: object
+ properties:
+ job_uuid:
+ type: string
+ additionalProperties: false
+ required:
+ - job_uuid
+ title: CancelTrainingJobRequest
ChatCompletionRequest:
type: object
properties:
@@ -3583,142 +3996,6 @@ components:
title: CompletionResponseStreamChunk
description: >-
A chunk of a streamed completion response.
- AgentConfig:
- type: object
- properties:
- sampling_params:
- $ref: '#/components/schemas/SamplingParams'
- input_shields:
- type: array
- items:
- type: string
- output_shields:
- type: array
- items:
- type: string
- toolgroups:
- type: array
- items:
- $ref: '#/components/schemas/AgentTool'
- client_tools:
- type: array
- items:
- $ref: '#/components/schemas/ToolDef'
- tool_choice:
- type: string
- enum:
- - auto
- - required
- - none
- title: ToolChoice
- description: >-
- Whether tool use is required or automatic. This is a hint to the model
- which may not be followed. It depends on the Instruction Following capabilities
- of the model.
- deprecated: true
- tool_prompt_format:
- type: string
- enum:
- - json
- - function_tag
- - python_list
- title: ToolPromptFormat
- description: >-
- Prompt format for calling custom / zero shot tools.
- deprecated: true
- tool_config:
- $ref: '#/components/schemas/ToolConfig'
- max_infer_iters:
- type: integer
- default: 10
- model:
- type: string
- instructions:
- type: string
- enable_session_persistence:
- type: boolean
- default: false
- response_format:
- $ref: '#/components/schemas/ResponseFormat'
- additionalProperties: false
- required:
- - model
- - instructions
- title: AgentConfig
- AgentTool:
- oneOf:
- - type: string
- - type: object
- properties:
- name:
- type: string
- args:
- type: object
- additionalProperties:
- oneOf:
- - type: 'null'
- - type: boolean
- - type: number
- - type: string
- - type: array
- - type: object
- additionalProperties: false
- required:
- - name
- - args
- title: AgentToolGroupWithArgs
- ToolDef:
- type: object
- properties:
- name:
- type: string
- description:
- type: string
- parameters:
- type: array
- items:
- $ref: '#/components/schemas/ToolParameter'
- metadata:
- type: object
- additionalProperties:
- oneOf:
- - type: 'null'
- - type: boolean
- - type: number
- - type: string
- - type: array
- - type: object
- additionalProperties: false
- required:
- - name
- title: ToolDef
- ToolParameter:
- type: object
- properties:
- name:
- type: string
- parameter_type:
- type: string
- description:
- type: string
- required:
- type: boolean
- default: true
- default:
- oneOf:
- - type: 'null'
- - type: boolean
- - type: number
- - type: string
- - type: array
- - type: object
- additionalProperties: false
- required:
- - name
- - parameter_type
- - description
- - required
- title: ToolParameter
CreateAgentRequest:
type: object
properties:
@@ -4412,76 +4689,21 @@ components:
title: EmbeddingsResponse
description: >-
Response containing generated embeddings.
- AgentCandidate:
- type: object
- properties:
- type:
- type: string
- const: agent
- default: agent
- config:
- $ref: '#/components/schemas/AgentConfig'
- description: >-
- The configuration for the agent candidate.
- additionalProperties: false
- required:
- - type
- - config
- title: AgentCandidate
- description: An agent candidate for evaluation.
- EvalCandidate:
- oneOf:
- - $ref: '#/components/schemas/ModelCandidate'
- - $ref: '#/components/schemas/AgentCandidate'
- discriminator:
- propertyName: type
- mapping:
- model: '#/components/schemas/ModelCandidate'
- agent: '#/components/schemas/AgentCandidate'
- ModelCandidate:
- type: object
- properties:
- type:
- type: string
- const: model
- default: model
- model:
- type: string
- description: The model ID to evaluate.
- sampling_params:
- $ref: '#/components/schemas/SamplingParams'
- description: The sampling parameters for the model.
- system_message:
- $ref: '#/components/schemas/SystemMessage'
- description: >-
- (Optional) The system message providing instructions or context to the
- model.
- additionalProperties: false
- required:
- - type
- - model
- - sampling_params
- title: ModelCandidate
- description: A model candidate for evaluation.
EvaluateBenchmarkRequest:
type: object
properties:
+ benchmark_id:
+ type: string
+ description: >-
+ The ID of the benchmark to run the evaluation on.
candidate:
$ref: '#/components/schemas/EvalCandidate'
description: The candidate to evaluate on.
additionalProperties: false
required:
+ - benchmark_id
- candidate
title: EvaluateBenchmarkRequest
- Job:
- type: object
- properties:
- job_id:
- type: string
- additionalProperties: false
- required:
- - job_id
- title: Job
EvaluateRowsRequest:
type: object
properties:
@@ -5660,21 +5882,20 @@ components:
- checkpoints
title: PostTrainingJobArtifactsResponse
description: Artifacts of a finetuning job.
- JobStatus:
- type: string
- enum:
- - completed
- - in_progress
- - failed
- - scheduled
- title: JobStatus
PostTrainingJobStatusResponse:
type: object
properties:
job_uuid:
type: string
status:
- $ref: '#/components/schemas/JobStatus'
+ type: string
+ enum:
+ - completed
+ - in_progress
+ - failed
+ - scheduled
+ - cancelled
+ title: JobStatus
scheduled_at:
type: string
format: date-time
@@ -7073,19 +7294,6 @@ components:
- dataset_id
- scoring_fn_ids
title: ScoreDatasetRequest
- ScoreBatchResponse:
- type: object
- properties:
- dataset_id:
- type: string
- results:
- type: object
- additionalProperties:
- $ref: '#/components/schemas/ScoringResult'
- additionalProperties: false
- required:
- - results
- title: ScoreBatchResponse
AlgorithmConfig:
oneOf:
- $ref: '#/components/schemas/LoraFinetuningConfig'
diff --git a/llama_stack/apis/common/job_types.py b/llama_stack/apis/common/job_types.py
index cad2bcec8..7330fb0cf 100644
--- a/llama_stack/apis/common/job_types.py
+++ b/llama_stack/apis/common/job_types.py
@@ -20,6 +20,13 @@ class JobStatus(Enum):
cancelled = "cancelled"
+class JobType(Enum):
+ batch_inference = "batch_inference"
+ scoring = "scoring"
+ evaluation = "evaluation"
+ post_training = "post_training"
+
+
@json_schema_type
class CommonJobFields(BaseModel):
"""Common fields for all jobs.
diff --git a/llama_stack/apis/eval/eval.py b/llama_stack/apis/eval/eval.py
index 552afe0a2..b5b916ad8 100644
--- a/llama_stack/apis/eval/eval.py
+++ b/llama_stack/apis/eval/eval.py
@@ -10,7 +10,7 @@ from pydantic import BaseModel, Field
from typing_extensions import Annotated
from llama_stack.apis.agents import AgentConfig
-from llama_stack.apis.common.job_types import Job, JobStatus
+from llama_stack.apis.common.job_types import CommonJobFields, JobStatus
from llama_stack.apis.inference import SamplingParams, SystemMessage
from llama_stack.apis.scoring import ScoringResult
from llama_stack.schema_utils import json_schema_type, register_schema, webmethod
@@ -61,15 +61,32 @@ class EvaluateResponse(BaseModel):
scores: Dict[str, ScoringResult]
+@json_schema_type
+class EvalJob(CommonJobFields):
+ type: Literal["eval"] = "eval"
+ result_files: List[str] = Field(
+ description="The file ids of the eval results.",
+ default_factory=list,
+ )
+ result_datasets: List[str] = Field(
+ description="The ids of the datasets containing the eval results.",
+ default_factory=list,
+ )
+
+ # how the job is created
+ benchmark_id: str = Field(description="The id of the benchmark to evaluate on.")
+ candidate: EvalCandidate = Field(description="The candidate to evaluate on.")
+
+
class Eval(Protocol):
"""Llama Stack Evaluation API for running evaluations on model and agent candidates."""
- @webmethod(route="/eval/benchmarks/{benchmark_id}/jobs", method="POST")
+ @webmethod(route="/eval/jobs", method="POST")
async def evaluate_benchmark(
self,
benchmark_id: str,
candidate: EvalCandidate,
- ) -> Job:
+ ) -> EvalJob:
"""Run an evaluation on a benchmark.
:param benchmark_id: The ID of the benchmark to run the evaluation on.
@@ -85,37 +102,42 @@ class Eval(Protocol):
candidate: EvalCandidate,
) -> EvaluateResponse:
"""Evaluate a list of rows on a candidate.
-
+
:param dataset_rows: The rows to evaluate.
:param scoring_fn_ids: The scoring function ids to use for the evaluation.
:param candidate: The candidate to evaluate on.
:return: EvaluateResponse object containing generations and scores
"""
- @webmethod(route="/eval/benchmarks/{benchmark_id}/jobs/{job_id}", method="GET")
- async def job_status(self, benchmark_id: str, job_id: str) -> Optional[JobStatus]:
- """Get the status of a job.
+ @webmethod(route="/eval/jobs", method="GET")
+ async def list_eval_jobs(self) -> List[EvalJob]:
+ """List all evaluation jobs.
- :param benchmark_id: The ID of the benchmark to run the evaluation on.
- :param job_id: The ID of the job to get the status of.
- :return: The status of the evaluationjob.
+ :return: A list of evaluation jobs.
"""
...
- @webmethod(route="/eval/benchmarks/{benchmark_id}/jobs/{job_id}", method="DELETE")
- async def job_cancel(self, benchmark_id: str, job_id: str) -> None:
+ @webmethod(route="/eval/job/{job_id}", method="GET")
+ async def get_eval_job(self, job_id: str) -> Optional[EvalJob]:
+ """Get a job by id.
+
+ :param job_id: The id of the job to get.
+ :return: The job.
+ """
+ ...
+
+ @webmethod(route="/eval/job/{job_id}", method="DELETE")
+ async def delete_eval_job(self, job_id: str) -> Optional[EvalJob]:
+ """Delete a job.
+
+ :param job_id: The id of the job to delete.
+ """
+ ...
+
+ @webmethod(route="/eval/job/{job_id}/cancel", method="POST")
+ async def cancel_eval_job(self, job_id: str) -> Optional[EvalJob]:
"""Cancel a job.
- :param benchmark_id: The ID of the benchmark to run the evaluation on.
- :param job_id: The ID of the job to cancel.
+ :param job_id: The id of the job to cancel.
"""
...
-
- @webmethod(route="/eval/benchmarks/{benchmark_id}/jobs/{job_id}/result", method="GET")
- async def job_result(self, benchmark_id: str, job_id: str) -> EvaluateResponse:
- """Get the result of a job.
-
- :param benchmark_id: The ID of the benchmark to run the evaluation on.
- :param job_id: The ID of the job to get the result of.
- :return: The result of the job.
- """
diff --git a/llama_stack/apis/scoring/scoring.py b/llama_stack/apis/scoring/scoring.py
index a67623e22..46184eae9 100644
--- a/llama_stack/apis/scoring/scoring.py
+++ b/llama_stack/apis/scoring/scoring.py
@@ -4,10 +4,11 @@
# This source code is licensed under the terms described in the LICENSE file in
# the root directory of this source tree.
-from typing import Any, Dict, List, Optional, Protocol, runtime_checkable
+from typing import Any, Dict, List, Literal, Optional, Protocol, runtime_checkable
-from pydantic import BaseModel
+from pydantic import BaseModel, Field
+from llama_stack.apis.common.job_types import CommonJobFields, JobType
from llama_stack.apis.scoring_functions import ScoringFn
from llama_stack.schema_utils import json_schema_type, webmethod
@@ -47,6 +48,27 @@ class ScoreResponse(BaseModel):
results: Dict[str, ScoringResult]
+@json_schema_type
+class ScoringJob(CommonJobFields):
+ type: Literal["scoring"] = "scoring"
+
+ result_files: List[str] = Field(
+ description="The file ids of the scoring results.",
+ default_factory=list,
+ )
+ result_datasets: List[str] = Field(
+ description="The ids of the datasets containing the scoring results.",
+ default_factory=list,
+ )
+
+ # how the job is created
+ dataset_id: str = Field(description="The id of the dataset used for scoring.")
+ scoring_fn_ids: List[str] = Field(
+ description="The ids of the scoring functions used.",
+ default_factory=list,
+ )
+
+
class ScoringFunctionStore(Protocol):
def get_scoring_function(self, scoring_fn_id: str) -> ScoringFn: ...
@@ -60,7 +82,7 @@ class Scoring(Protocol):
self,
dataset_id: str,
scoring_fn_ids: List[str],
- ) -> ScoreBatchResponse: ...
+ ) -> ScoringJob: ...
@webmethod(route="/scoring/rows", method="POST")
async def score(
@@ -75,3 +97,36 @@ class Scoring(Protocol):
:return: ScoreResponse object containing rows and aggregated results
"""
...
+
+ @webmethod(route="/scoring/jobs", method="GET")
+ async def list_scoring_jobs(self) -> List[ScoringJob]:
+ """List all scoring jobs.
+
+ :return: A list of scoring jobs.
+ """
+ ...
+
+ @webmethod(route="/scoring/job/{job_id}", method="GET")
+ async def get_scoring_job(self, job_id: str) -> Optional[ScoringJob]:
+ """Get a job by id.
+
+ :param job_id: The id of the job to get.
+ :return: The job.
+ """
+ ...
+
+ @webmethod(route="/scoring/job/{job_id}", method="DELETE")
+ async def delete_scoring_job(self, job_id: str) -> Optional[ScoringJob]:
+ """Delete a job.
+
+ :param job_id: The id of the job to delete.
+ """
+ ...
+
+ @webmethod(route="/scoring/job/{job_id}/cancel", method="POST")
+ async def cancel_scoring_job(self, job_id: str) -> Optional[ScoringJob]:
+ """Cancel a job.
+
+ :param job_id: The id of the job to cancel.
+ """
+ ...