diff --git a/docs/_static/llama-stack-spec.html b/docs/_static/llama-stack-spec.html index c4430c8d0..48a433495 100644 --- a/docs/_static/llama-stack-spec.html +++ b/docs/_static/llama-stack-spec.html @@ -230,6 +230,106 @@ } } }, + "/v1/eval/job/{job_id}/cancel": { + "post": { + "responses": { + "200": { + "description": "OK", + "content": { + "application/json": { + "schema": { + "oneOf": [ + { + "$ref": "#/components/schemas/EvalJob" + }, + { + "type": "null" + } + ] + } + } + } + }, + "400": { + "$ref": "#/components/responses/BadRequest400" + }, + "429": { + "$ref": "#/components/responses/TooManyRequests429" + }, + "500": { + "$ref": "#/components/responses/InternalServerError500" + }, + "default": { + "$ref": "#/components/responses/DefaultError" + } + }, + "tags": [ + "Eval" + ], + "description": "Cancel a job.", + "parameters": [ + { + "name": "job_id", + "in": "path", + "description": "The id of the job to cancel.", + "required": true, + "schema": { + "type": "string" + } + } + ] + } + }, + "/v1/scoring/job/{job_id}/cancel": { + "post": { + "responses": { + "200": { + "description": "OK", + "content": { + "application/json": { + "schema": { + "oneOf": [ + { + "$ref": "#/components/schemas/ScoringJob" + }, + { + "type": "null" + } + ] + } + } + } + }, + "400": { + "$ref": "#/components/responses/BadRequest400" + }, + "429": { + "$ref": "#/components/responses/TooManyRequests429" + }, + "500": { + "$ref": "#/components/responses/InternalServerError500" + }, + "default": { + "$ref": "#/components/responses/DefaultError" + } + }, + "tags": [ + "Scoring" + ], + "description": "Cancel a job.", + "parameters": [ + { + "name": "job_id", + "in": "path", + "description": "The id of the job to cancel.", + "required": true, + "schema": { + "type": "string" + } + } + ] + } + }, "/v1/post-training/job/cancel": { "post": { "responses": { @@ -823,6 +923,104 @@ ] } }, + "/v1/eval/job/{job_id}": { + "get": { + "responses": { + "200": { + "description": "The job.", + "content": { + "application/json": { + "schema": { + "oneOf": [ + { + "$ref": "#/components/schemas/EvalJob" + }, + { + "type": "null" + } + ] + } + } + } + }, + "400": { + "$ref": "#/components/responses/BadRequest400" + }, + "429": { + "$ref": "#/components/responses/TooManyRequests429" + }, + "500": { + "$ref": "#/components/responses/InternalServerError500" + }, + "default": { + "$ref": "#/components/responses/DefaultError" + } + }, + "tags": [ + "Eval" + ], + "description": "Get a job by id.", + "parameters": [ + { + "name": "job_id", + "in": "path", + "description": "The id of the job to get.", + "required": true, + "schema": { + "type": "string" + } + } + ] + }, + "delete": { + "responses": { + "200": { + "description": "OK", + "content": { + "application/json": { + "schema": { + "oneOf": [ + { + "$ref": "#/components/schemas/EvalJob" + }, + { + "type": "null" + } + ] + } + } + } + }, + "400": { + "$ref": "#/components/responses/BadRequest400" + }, + "429": { + "$ref": "#/components/responses/TooManyRequests429" + }, + "500": { + "$ref": "#/components/responses/InternalServerError500" + }, + "default": { + "$ref": "#/components/responses/DefaultError" + } + }, + "tags": [ + "Eval" + ], + "description": "Delete a job.", + "parameters": [ + { + "name": "job_id", + "in": "path", + "description": "The id of the job to delete.", + "required": true, + "schema": { + "type": "string" + } + } + ] + } + }, "/v1/files/{bucket}/{key}": { "get": { "responses": { @@ -925,6 +1123,104 @@ ] } }, + "/v1/scoring/job/{job_id}": { + "get": { + "responses": { + "200": { + "description": "The job.", + "content": { + "application/json": { + "schema": { + "oneOf": [ + { + "$ref": "#/components/schemas/ScoringJob" + }, + { + "type": "null" + } + ] + } + } + } + }, + "400": { + "$ref": "#/components/responses/BadRequest400" + }, + "429": { + "$ref": "#/components/responses/TooManyRequests429" + }, + "500": { + "$ref": "#/components/responses/InternalServerError500" + }, + "default": { + "$ref": "#/components/responses/DefaultError" + } + }, + "tags": [ + "Scoring" + ], + "description": "Get a job by id.", + "parameters": [ + { + "name": "job_id", + "in": "path", + "description": "The id of the job to get.", + "required": true, + "schema": { + "type": "string" + } + } + ] + }, + "delete": { + "responses": { + "200": { + "description": "OK", + "content": { + "application/json": { + "schema": { + "oneOf": [ + { + "$ref": "#/components/schemas/ScoringJob" + }, + { + "type": "null" + } + ] + } + } + } + }, + "400": { + "$ref": "#/components/responses/BadRequest400" + }, + "429": { + "$ref": "#/components/responses/TooManyRequests429" + }, + "500": { + "$ref": "#/components/responses/InternalServerError500" + }, + "default": { + "$ref": "#/components/responses/DefaultError" + } + }, + "tags": [ + "Scoring" + ], + "description": "Delete a job.", + "parameters": [ + { + "name": "job_id", + "in": "path", + "description": "The id of the job to delete.", + "required": true, + "schema": { + "type": "string" + } + } + ] + } + }, "/v1/inference/embeddings": { "post": { "responses": { @@ -968,7 +1264,38 @@ } } }, - "/v1/eval/benchmarks/{benchmark_id}/jobs": { + "/v1/eval/jobs": { + "get": { + "responses": { + "200": { + "description": "A list of evaluation jobs.", + "content": { + "application/jsonl": { + "schema": { + "$ref": "#/components/schemas/EvalJob" + } + } + } + }, + "400": { + "$ref": "#/components/responses/BadRequest400" + }, + "429": { + "$ref": "#/components/responses/TooManyRequests429" + }, + "500": { + "$ref": "#/components/responses/InternalServerError500" + }, + "default": { + "$ref": "#/components/responses/DefaultError" + } + }, + "tags": [ + "Eval" + ], + "description": "List all evaluation jobs.", + "parameters": [] + }, "post": { "responses": { "200": { @@ -976,7 +1303,7 @@ "content": { "application/json": { "schema": { - "$ref": "#/components/schemas/Job" + "$ref": "#/components/schemas/EvalJob" } } } @@ -998,17 +1325,7 @@ "Eval" ], "description": "Run an evaluation on a benchmark.", - "parameters": [ - { - "name": "benchmark_id", - "in": "path", - "description": "The ID of the benchmark to run the evaluation on.", - "required": true, - "schema": { - "type": "string" - } - } - ], + "parameters": [], "requestBody": { "content": { "application/json": { @@ -2272,160 +2589,6 @@ } } }, - "/v1/eval/benchmarks/{benchmark_id}/jobs/{job_id}": { - "get": { - "responses": { - "200": { - "description": "The status of the evaluationjob.", - "content": { - "application/json": { - "schema": { - "oneOf": [ - { - "$ref": "#/components/schemas/JobStatus" - }, - { - "type": "null" - } - ] - } - } - } - }, - "400": { - "$ref": "#/components/responses/BadRequest400" - }, - "429": { - "$ref": "#/components/responses/TooManyRequests429" - }, - "500": { - "$ref": "#/components/responses/InternalServerError500" - }, - "default": { - "$ref": "#/components/responses/DefaultError" - } - }, - "tags": [ - "Eval" - ], - "description": "Get the status of a job.", - "parameters": [ - { - "name": "benchmark_id", - "in": "path", - "description": "The ID of the benchmark to run the evaluation on.", - "required": true, - "schema": { - "type": "string" - } - }, - { - "name": "job_id", - "in": "path", - "description": "The ID of the job to get the status of.", - "required": true, - "schema": { - "type": "string" - } - } - ] - }, - "delete": { - "responses": { - "200": { - "description": "OK" - }, - "400": { - "$ref": "#/components/responses/BadRequest400" - }, - "429": { - "$ref": "#/components/responses/TooManyRequests429" - }, - "500": { - "$ref": "#/components/responses/InternalServerError500" - }, - "default": { - "$ref": "#/components/responses/DefaultError" - } - }, - "tags": [ - "Eval" - ], - "description": "Cancel a job.", - "parameters": [ - { - "name": "benchmark_id", - "in": "path", - "description": "The ID of the benchmark to run the evaluation on.", - "required": true, - "schema": { - "type": "string" - } - }, - { - "name": "job_id", - "in": "path", - "description": "The ID of the job to cancel.", - "required": true, - "schema": { - "type": "string" - } - } - ] - } - }, - "/v1/eval/benchmarks/{benchmark_id}/jobs/{job_id}/result": { - "get": { - "responses": { - "200": { - "description": "The result of the job.", - "content": { - "application/json": { - "schema": { - "$ref": "#/components/schemas/EvaluateResponse" - } - } - } - }, - "400": { - "$ref": "#/components/responses/BadRequest400" - }, - "429": { - "$ref": "#/components/responses/TooManyRequests429" - }, - "500": { - "$ref": "#/components/responses/InternalServerError500" - }, - "default": { - "$ref": "#/components/responses/DefaultError" - } - }, - "tags": [ - "Eval" - ], - "description": "Get the result of a job.", - "parameters": [ - { - "name": "benchmark_id", - "in": "path", - "description": "The ID of the benchmark to run the evaluation on.", - "required": true, - "schema": { - "type": "string" - } - }, - { - "name": "job_id", - "in": "path", - "description": "The ID of the job to get the result of.", - "required": true, - "schema": { - "type": "string" - } - } - ] - } - }, "/v1/agents/{agent_id}/sessions": { "get": { "responses": { @@ -2957,6 +3120,80 @@ } } }, + "/v1/scoring/jobs": { + "get": { + "responses": { + "200": { + "description": "A list of scoring jobs.", + "content": { + "application/jsonl": { + "schema": { + "$ref": "#/components/schemas/ScoringJob" + } + } + } + }, + "400": { + "$ref": "#/components/responses/BadRequest400" + }, + "429": { + "$ref": "#/components/responses/TooManyRequests429" + }, + "500": { + "$ref": "#/components/responses/InternalServerError500" + }, + "default": { + "$ref": "#/components/responses/DefaultError" + } + }, + "tags": [ + "Scoring" + ], + "description": "List all scoring jobs.", + "parameters": [] + }, + "post": { + "responses": { + "200": { + "description": "OK", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/ScoringJob" + } + } + } + }, + "400": { + "$ref": "#/components/responses/BadRequest400" + }, + "429": { + "$ref": "#/components/responses/TooManyRequests429" + }, + "500": { + "$ref": "#/components/responses/InternalServerError500" + }, + "default": { + "$ref": "#/components/responses/DefaultError" + } + }, + "tags": [ + "Scoring" + ], + "description": "", + "parameters": [], + "requestBody": { + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/ScoreDatasetRequest" + } + } + }, + "required": true + } + } + }, "/v1/shields": { "get": { "responses": { @@ -3663,49 +3900,6 @@ } } }, - "/v1/scoring/jobs": { - "post": { - "responses": { - "200": { - "description": "OK", - "content": { - "application/json": { - "schema": { - "$ref": "#/components/schemas/ScoreBatchResponse" - } - } - } - }, - "400": { - "$ref": "#/components/responses/BadRequest400" - }, - "429": { - "$ref": "#/components/responses/TooManyRequests429" - }, - "500": { - "$ref": "#/components/responses/InternalServerError500" - }, - "default": { - "$ref": "#/components/responses/DefaultError" - } - }, - "tags": [ - "Scoring" - ], - "description": "", - "parameters": [], - "requestBody": { - "content": { - "application/json": { - "schema": { - "$ref": "#/components/schemas/ScoreDatasetRequest" - } - } - }, - "required": true - } - } - }, "/v1/post-training/supervised-fine-tune": { "post": { "responses": { @@ -4768,18 +4962,268 @@ "title": "CompletionResponse", "description": "Response from a completion request." }, - "CancelTrainingJobRequest": { + "AgentCandidate": { "type": "object", "properties": { - "job_uuid": { - "type": "string" + "type": { + "type": "string", + "const": "agent", + "default": "agent" + }, + "config": { + "$ref": "#/components/schemas/AgentConfig", + "description": "The configuration for the agent candidate." } }, "additionalProperties": false, "required": [ - "job_uuid" + "type", + "config" ], - "title": "CancelTrainingJobRequest" + "title": "AgentCandidate", + "description": "An agent candidate for evaluation." + }, + "AgentConfig": { + "type": "object", + "properties": { + "sampling_params": { + "$ref": "#/components/schemas/SamplingParams" + }, + "input_shields": { + "type": "array", + "items": { + "type": "string" + } + }, + "output_shields": { + "type": "array", + "items": { + "type": "string" + } + }, + "toolgroups": { + "type": "array", + "items": { + "$ref": "#/components/schemas/AgentTool" + } + }, + "client_tools": { + "type": "array", + "items": { + "$ref": "#/components/schemas/ToolDef" + } + }, + "tool_choice": { + "type": "string", + "enum": [ + "auto", + "required", + "none" + ], + "title": "ToolChoice", + "description": "Whether tool use is required or automatic. This is a hint to the model which may not be followed. It depends on the Instruction Following capabilities of the model.", + "deprecated": true + }, + "tool_prompt_format": { + "type": "string", + "enum": [ + "json", + "function_tag", + "python_list" + ], + "title": "ToolPromptFormat", + "description": "Prompt format for calling custom / zero shot tools.", + "deprecated": true + }, + "tool_config": { + "$ref": "#/components/schemas/ToolConfig" + }, + "max_infer_iters": { + "type": "integer", + "default": 10 + }, + "model": { + "type": "string" + }, + "instructions": { + "type": "string" + }, + "enable_session_persistence": { + "type": "boolean", + "default": false + }, + "response_format": { + "$ref": "#/components/schemas/ResponseFormat" + } + }, + "additionalProperties": false, + "required": [ + "model", + "instructions" + ], + "title": "AgentConfig" + }, + "AgentTool": { + "oneOf": [ + { + "type": "string" + }, + { + "type": "object", + "properties": { + "name": { + "type": "string" + }, + "args": { + "type": "object", + "additionalProperties": { + "oneOf": [ + { + "type": "null" + }, + { + "type": "boolean" + }, + { + "type": "number" + }, + { + "type": "string" + }, + { + "type": "array" + }, + { + "type": "object" + } + ] + } + } + }, + "additionalProperties": false, + "required": [ + "name", + "args" + ], + "title": "AgentToolGroupWithArgs" + } + ] + }, + "EvalCandidate": { + "oneOf": [ + { + "$ref": "#/components/schemas/ModelCandidate" + }, + { + "$ref": "#/components/schemas/AgentCandidate" + } + ], + "discriminator": { + "propertyName": "type", + "mapping": { + "model": "#/components/schemas/ModelCandidate", + "agent": "#/components/schemas/AgentCandidate" + } + } + }, + "EvalJob": { + "type": "object", + "properties": { + "id": { + "type": "string", + "description": "The ID of the job." + }, + "status": { + "type": "string", + "enum": [ + "completed", + "in_progress", + "failed", + "scheduled", + "cancelled" + ], + "description": "The status of the job." + }, + "created_at": { + "type": "string", + "format": "date-time", + "description": "The time the job was created." + }, + "finished_at": { + "type": "string", + "format": "date-time", + "description": "The time the job finished." + }, + "error": { + "type": "string", + "description": "If status of the job is failed, this will contain the error message." + }, + "type": { + "type": "string", + "const": "eval", + "default": "eval" + }, + "result_files": { + "type": "array", + "items": { + "type": "string" + } + }, + "result_datasets": { + "type": "array", + "items": { + "type": "string" + } + }, + "benchmark_id": { + "type": "string" + }, + "candidate": { + "$ref": "#/components/schemas/EvalCandidate" + } + }, + "additionalProperties": false, + "required": [ + "id", + "status", + "created_at", + "type", + "result_files", + "result_datasets", + "benchmark_id", + "candidate" + ], + "title": "EvalJob" + }, + "ModelCandidate": { + "type": "object", + "properties": { + "type": { + "type": "string", + "const": "model", + "default": "model" + }, + "model": { + "type": "string", + "description": "The model ID to evaluate." + }, + "sampling_params": { + "$ref": "#/components/schemas/SamplingParams", + "description": "The sampling parameters for the model." + }, + "system_message": { + "$ref": "#/components/schemas/SystemMessage", + "description": "(Optional) The system message providing instructions or context to the model." + } + }, + "additionalProperties": false, + "required": [ + "type", + "model", + "sampling_params" + ], + "title": "ModelCandidate", + "description": "A model candidate for evaluation." }, "ToolConfig": { "type": "object", @@ -4826,6 +5270,186 @@ "title": "ToolConfig", "description": "Configuration for tool use." }, + "ToolDef": { + "type": "object", + "properties": { + "name": { + "type": "string" + }, + "description": { + "type": "string" + }, + "parameters": { + "type": "array", + "items": { + "$ref": "#/components/schemas/ToolParameter" + } + }, + "metadata": { + "type": "object", + "additionalProperties": { + "oneOf": [ + { + "type": "null" + }, + { + "type": "boolean" + }, + { + "type": "number" + }, + { + "type": "string" + }, + { + "type": "array" + }, + { + "type": "object" + } + ] + } + } + }, + "additionalProperties": false, + "required": [ + "name" + ], + "title": "ToolDef" + }, + "ToolParameter": { + "type": "object", + "properties": { + "name": { + "type": "string" + }, + "parameter_type": { + "type": "string" + }, + "description": { + "type": "string" + }, + "required": { + "type": "boolean", + "default": true + }, + "default": { + "oneOf": [ + { + "type": "null" + }, + { + "type": "boolean" + }, + { + "type": "number" + }, + { + "type": "string" + }, + { + "type": "array" + }, + { + "type": "object" + } + ] + } + }, + "additionalProperties": false, + "required": [ + "name", + "parameter_type", + "description", + "required" + ], + "title": "ToolParameter" + }, + "ScoringJob": { + "type": "object", + "properties": { + "id": { + "type": "string", + "description": "The ID of the job." + }, + "status": { + "type": "string", + "enum": [ + "completed", + "in_progress", + "failed", + "scheduled", + "cancelled" + ], + "description": "The status of the job." + }, + "created_at": { + "type": "string", + "format": "date-time", + "description": "The time the job was created." + }, + "finished_at": { + "type": "string", + "format": "date-time", + "description": "The time the job finished." + }, + "error": { + "type": "string", + "description": "If status of the job is failed, this will contain the error message." + }, + "type": { + "type": "string", + "const": "scoring", + "default": "scoring" + }, + "result_files": { + "type": "array", + "items": { + "type": "string" + } + }, + "result_datasets": { + "type": "array", + "items": { + "type": "string" + } + }, + "dataset_id": { + "type": "string" + }, + "scoring_fn_ids": { + "type": "array", + "items": { + "type": "string" + } + } + }, + "additionalProperties": false, + "required": [ + "id", + "status", + "created_at", + "type", + "result_files", + "result_datasets", + "dataset_id", + "scoring_fn_ids" + ], + "title": "ScoringJob" + }, + "CancelTrainingJobRequest": { + "type": "object", + "properties": { + "job_uuid": { + "type": "string" + } + }, + "additionalProperties": false, + "required": [ + "job_uuid" + ], + "title": "CancelTrainingJobRequest" + }, "ChatCompletionRequest": { "type": "object", "properties": { @@ -5140,227 +5764,6 @@ "title": "CompletionResponseStreamChunk", "description": "A chunk of a streamed completion response." }, - "AgentConfig": { - "type": "object", - "properties": { - "sampling_params": { - "$ref": "#/components/schemas/SamplingParams" - }, - "input_shields": { - "type": "array", - "items": { - "type": "string" - } - }, - "output_shields": { - "type": "array", - "items": { - "type": "string" - } - }, - "toolgroups": { - "type": "array", - "items": { - "$ref": "#/components/schemas/AgentTool" - } - }, - "client_tools": { - "type": "array", - "items": { - "$ref": "#/components/schemas/ToolDef" - } - }, - "tool_choice": { - "type": "string", - "enum": [ - "auto", - "required", - "none" - ], - "title": "ToolChoice", - "description": "Whether tool use is required or automatic. This is a hint to the model which may not be followed. It depends on the Instruction Following capabilities of the model.", - "deprecated": true - }, - "tool_prompt_format": { - "type": "string", - "enum": [ - "json", - "function_tag", - "python_list" - ], - "title": "ToolPromptFormat", - "description": "Prompt format for calling custom / zero shot tools.", - "deprecated": true - }, - "tool_config": { - "$ref": "#/components/schemas/ToolConfig" - }, - "max_infer_iters": { - "type": "integer", - "default": 10 - }, - "model": { - "type": "string" - }, - "instructions": { - "type": "string" - }, - "enable_session_persistence": { - "type": "boolean", - "default": false - }, - "response_format": { - "$ref": "#/components/schemas/ResponseFormat" - } - }, - "additionalProperties": false, - "required": [ - "model", - "instructions" - ], - "title": "AgentConfig" - }, - "AgentTool": { - "oneOf": [ - { - "type": "string" - }, - { - "type": "object", - "properties": { - "name": { - "type": "string" - }, - "args": { - "type": "object", - "additionalProperties": { - "oneOf": [ - { - "type": "null" - }, - { - "type": "boolean" - }, - { - "type": "number" - }, - { - "type": "string" - }, - { - "type": "array" - }, - { - "type": "object" - } - ] - } - } - }, - "additionalProperties": false, - "required": [ - "name", - "args" - ], - "title": "AgentToolGroupWithArgs" - } - ] - }, - "ToolDef": { - "type": "object", - "properties": { - "name": { - "type": "string" - }, - "description": { - "type": "string" - }, - "parameters": { - "type": "array", - "items": { - "$ref": "#/components/schemas/ToolParameter" - } - }, - "metadata": { - "type": "object", - "additionalProperties": { - "oneOf": [ - { - "type": "null" - }, - { - "type": "boolean" - }, - { - "type": "number" - }, - { - "type": "string" - }, - { - "type": "array" - }, - { - "type": "object" - } - ] - } - } - }, - "additionalProperties": false, - "required": [ - "name" - ], - "title": "ToolDef" - }, - "ToolParameter": { - "type": "object", - "properties": { - "name": { - "type": "string" - }, - "parameter_type": { - "type": "string" - }, - "description": { - "type": "string" - }, - "required": { - "type": "boolean", - "default": true - }, - "default": { - "oneOf": [ - { - "type": "null" - }, - { - "type": "boolean" - }, - { - "type": "number" - }, - { - "type": "string" - }, - { - "type": "array" - }, - { - "type": "object" - } - ] - } - }, - "additionalProperties": false, - "required": [ - "name", - "parameter_type", - "description", - "required" - ], - "title": "ToolParameter" - }, "CreateAgentRequest": { "type": "object", "properties": { @@ -6335,77 +6738,13 @@ "title": "EmbeddingsResponse", "description": "Response containing generated embeddings." }, - "AgentCandidate": { - "type": "object", - "properties": { - "type": { - "type": "string", - "const": "agent", - "default": "agent" - }, - "config": { - "$ref": "#/components/schemas/AgentConfig", - "description": "The configuration for the agent candidate." - } - }, - "additionalProperties": false, - "required": [ - "type", - "config" - ], - "title": "AgentCandidate", - "description": "An agent candidate for evaluation." - }, - "EvalCandidate": { - "oneOf": [ - { - "$ref": "#/components/schemas/ModelCandidate" - }, - { - "$ref": "#/components/schemas/AgentCandidate" - } - ], - "discriminator": { - "propertyName": "type", - "mapping": { - "model": "#/components/schemas/ModelCandidate", - "agent": "#/components/schemas/AgentCandidate" - } - } - }, - "ModelCandidate": { - "type": "object", - "properties": { - "type": { - "type": "string", - "const": "model", - "default": "model" - }, - "model": { - "type": "string", - "description": "The model ID to evaluate." - }, - "sampling_params": { - "$ref": "#/components/schemas/SamplingParams", - "description": "The sampling parameters for the model." - }, - "system_message": { - "$ref": "#/components/schemas/SystemMessage", - "description": "(Optional) The system message providing instructions or context to the model." - } - }, - "additionalProperties": false, - "required": [ - "type", - "model", - "sampling_params" - ], - "title": "ModelCandidate", - "description": "A model candidate for evaluation." - }, "EvaluateBenchmarkRequest": { "type": "object", "properties": { + "benchmark_id": { + "type": "string", + "description": "The ID of the benchmark to run the evaluation on." + }, "candidate": { "$ref": "#/components/schemas/EvalCandidate", "description": "The candidate to evaluate on." @@ -6413,23 +6752,11 @@ }, "additionalProperties": false, "required": [ + "benchmark_id", "candidate" ], "title": "EvaluateBenchmarkRequest" }, - "Job": { - "type": "object", - "properties": { - "job_id": { - "type": "string" - } - }, - "additionalProperties": false, - "required": [ - "job_id" - ], - "title": "Job" - }, "EvaluateRowsRequest": { "type": "object", "properties": { @@ -8163,16 +8490,6 @@ "title": "PostTrainingJobArtifactsResponse", "description": "Artifacts of a finetuning job." }, - "JobStatus": { - "type": "string", - "enum": [ - "completed", - "in_progress", - "failed", - "scheduled" - ], - "title": "JobStatus" - }, "PostTrainingJobStatusResponse": { "type": "object", "properties": { @@ -8180,7 +8497,15 @@ "type": "string" }, "status": { - "$ref": "#/components/schemas/JobStatus" + "type": "string", + "enum": [ + "completed", + "in_progress", + "failed", + "scheduled", + "cancelled" + ], + "title": "JobStatus" }, "scheduled_at": { "type": "string", @@ -10322,25 +10647,6 @@ ], "title": "ScoreDatasetRequest" }, - "ScoreBatchResponse": { - "type": "object", - "properties": { - "dataset_id": { - "type": "string" - }, - "results": { - "type": "object", - "additionalProperties": { - "$ref": "#/components/schemas/ScoringResult" - } - } - }, - "additionalProperties": false, - "required": [ - "results" - ], - "title": "ScoreBatchResponse" - }, "AlgorithmConfig": { "oneOf": [ { diff --git a/docs/_static/llama-stack-spec.yaml b/docs/_static/llama-stack-spec.yaml index de24e41c6..45058fbdc 100644 --- a/docs/_static/llama-stack-spec.yaml +++ b/docs/_static/llama-stack-spec.yaml @@ -142,6 +142,68 @@ paths: schema: $ref: '#/components/schemas/BatchCompletionRequest' required: true + /v1/eval/job/{job_id}/cancel: + post: + responses: + '200': + description: OK + content: + application/json: + schema: + oneOf: + - $ref: '#/components/schemas/EvalJob' + - type: 'null' + '400': + $ref: '#/components/responses/BadRequest400' + '429': + $ref: >- + #/components/responses/TooManyRequests429 + '500': + $ref: >- + #/components/responses/InternalServerError500 + default: + $ref: '#/components/responses/DefaultError' + tags: + - Eval + description: Cancel a job. + parameters: + - name: job_id + in: path + description: The id of the job to cancel. + required: true + schema: + type: string + /v1/scoring/job/{job_id}/cancel: + post: + responses: + '200': + description: OK + content: + application/json: + schema: + oneOf: + - $ref: '#/components/schemas/ScoringJob' + - type: 'null' + '400': + $ref: '#/components/responses/BadRequest400' + '429': + $ref: >- + #/components/responses/TooManyRequests429 + '500': + $ref: >- + #/components/responses/InternalServerError500 + default: + $ref: '#/components/responses/DefaultError' + tags: + - Scoring + description: Cancel a job. + parameters: + - name: job_id + in: path + description: The id of the job to cancel. + required: true + schema: + type: string /v1/post-training/job/cancel: post: responses: @@ -560,6 +622,67 @@ paths: required: true schema: type: string + /v1/eval/job/{job_id}: + get: + responses: + '200': + description: The job. + content: + application/json: + schema: + oneOf: + - $ref: '#/components/schemas/EvalJob' + - type: 'null' + '400': + $ref: '#/components/responses/BadRequest400' + '429': + $ref: >- + #/components/responses/TooManyRequests429 + '500': + $ref: >- + #/components/responses/InternalServerError500 + default: + $ref: '#/components/responses/DefaultError' + tags: + - Eval + description: Get a job by id. + parameters: + - name: job_id + in: path + description: The id of the job to get. + required: true + schema: + type: string + delete: + responses: + '200': + description: OK + content: + application/json: + schema: + oneOf: + - $ref: '#/components/schemas/EvalJob' + - type: 'null' + '400': + $ref: '#/components/responses/BadRequest400' + '429': + $ref: >- + #/components/responses/TooManyRequests429 + '500': + $ref: >- + #/components/responses/InternalServerError500 + default: + $ref: '#/components/responses/DefaultError' + tags: + - Eval + description: Delete a job. + parameters: + - name: job_id + in: path + description: The id of the job to delete. + required: true + schema: + type: string /v1/files/{bucket}/{key}: get: responses: @@ -633,6 +756,67 @@ paths: required: true schema: type: string + /v1/scoring/job/{job_id}: + get: + responses: + '200': + description: The job. + content: + application/json: + schema: + oneOf: + - $ref: '#/components/schemas/ScoringJob' + - type: 'null' + '400': + $ref: '#/components/responses/BadRequest400' + '429': + $ref: >- + #/components/responses/TooManyRequests429 + '500': + $ref: >- + #/components/responses/InternalServerError500 + default: + $ref: '#/components/responses/DefaultError' + tags: + - Scoring + description: Get a job by id. + parameters: + - name: job_id + in: path + description: The id of the job to get. + required: true + schema: + type: string + delete: + responses: + '200': + description: OK + content: + application/json: + schema: + oneOf: + - $ref: '#/components/schemas/ScoringJob' + - type: 'null' + '400': + $ref: '#/components/responses/BadRequest400' + '429': + $ref: >- + #/components/responses/TooManyRequests429 + '500': + $ref: >- + #/components/responses/InternalServerError500 + default: + $ref: '#/components/responses/DefaultError' + tags: + - Scoring + description: Delete a job. + parameters: + - name: job_id + in: path + description: The id of the job to delete. + required: true + schema: + type: string /v1/inference/embeddings: post: responses: @@ -666,7 +850,29 @@ paths: schema: $ref: '#/components/schemas/EmbeddingsRequest' required: true - /v1/eval/benchmarks/{benchmark_id}/jobs: + /v1/eval/jobs: + get: + responses: + '200': + description: A list of evaluation jobs. + content: + application/jsonl: + schema: + $ref: '#/components/schemas/EvalJob' + '400': + $ref: '#/components/responses/BadRequest400' + '429': + $ref: >- + #/components/responses/TooManyRequests429 + '500': + $ref: >- + #/components/responses/InternalServerError500 + default: + $ref: '#/components/responses/DefaultError' + tags: + - Eval + description: List all evaluation jobs. + parameters: [] post: responses: '200': @@ -675,7 +881,7 @@ paths: content: application/json: schema: - $ref: '#/components/schemas/Job' + $ref: '#/components/schemas/EvalJob' '400': $ref: '#/components/responses/BadRequest400' '429': @@ -689,14 +895,7 @@ paths: tags: - Eval description: Run an evaluation on a benchmark. - parameters: - - name: benchmark_id - in: path - description: >- - The ID of the benchmark to run the evaluation on. - required: true - schema: - type: string + parameters: [] requestBody: content: application/json: @@ -1529,111 +1728,6 @@ paths: schema: $ref: '#/components/schemas/InvokeToolRequest' required: true - /v1/eval/benchmarks/{benchmark_id}/jobs/{job_id}: - get: - responses: - '200': - description: The status of the evaluationjob. - content: - application/json: - schema: - oneOf: - - $ref: '#/components/schemas/JobStatus' - - type: 'null' - '400': - $ref: '#/components/responses/BadRequest400' - '429': - $ref: >- - #/components/responses/TooManyRequests429 - '500': - $ref: >- - #/components/responses/InternalServerError500 - default: - $ref: '#/components/responses/DefaultError' - tags: - - Eval - description: Get the status of a job. - parameters: - - name: benchmark_id - in: path - description: >- - The ID of the benchmark to run the evaluation on. - required: true - schema: - type: string - - name: job_id - in: path - description: The ID of the job to get the status of. - required: true - schema: - type: string - delete: - responses: - '200': - description: OK - '400': - $ref: '#/components/responses/BadRequest400' - '429': - $ref: >- - #/components/responses/TooManyRequests429 - '500': - $ref: >- - #/components/responses/InternalServerError500 - default: - $ref: '#/components/responses/DefaultError' - tags: - - Eval - description: Cancel a job. - parameters: - - name: benchmark_id - in: path - description: >- - The ID of the benchmark to run the evaluation on. - required: true - schema: - type: string - - name: job_id - in: path - description: The ID of the job to cancel. - required: true - schema: - type: string - /v1/eval/benchmarks/{benchmark_id}/jobs/{job_id}/result: - get: - responses: - '200': - description: The result of the job. - content: - application/json: - schema: - $ref: '#/components/schemas/EvaluateResponse' - '400': - $ref: '#/components/responses/BadRequest400' - '429': - $ref: >- - #/components/responses/TooManyRequests429 - '500': - $ref: >- - #/components/responses/InternalServerError500 - default: - $ref: '#/components/responses/DefaultError' - tags: - - Eval - description: Get the result of a job. - parameters: - - name: benchmark_id - in: path - description: >- - The ID of the benchmark to run the evaluation on. - required: true - schema: - type: string - - name: job_id - in: path - description: The ID of the job to get the result of. - required: true - schema: - type: string /v1/agents/{agent_id}/sessions: get: responses: @@ -2002,6 +2096,57 @@ paths: schema: $ref: '#/components/schemas/RegisterScoringFunctionRequest' required: true + /v1/scoring/jobs: + get: + responses: + '200': + description: A list of scoring jobs. + content: + application/jsonl: + schema: + $ref: '#/components/schemas/ScoringJob' + '400': + $ref: '#/components/responses/BadRequest400' + '429': + $ref: >- + #/components/responses/TooManyRequests429 + '500': + $ref: >- + #/components/responses/InternalServerError500 + default: + $ref: '#/components/responses/DefaultError' + tags: + - Scoring + description: List all scoring jobs. + parameters: [] + post: + responses: + '200': + description: OK + content: + application/json: + schema: + $ref: '#/components/schemas/ScoringJob' + '400': + $ref: '#/components/responses/BadRequest400' + '429': + $ref: >- + #/components/responses/TooManyRequests429 + '500': + $ref: >- + #/components/responses/InternalServerError500 + default: + $ref: '#/components/responses/DefaultError' + tags: + - Scoring + description: '' + parameters: [] + requestBody: + content: + application/json: + schema: + $ref: '#/components/schemas/ScoreDatasetRequest' + required: true /v1/shields: get: responses: @@ -2491,35 +2636,6 @@ paths: schema: $ref: '#/components/schemas/ScoreRequest' required: true - /v1/scoring/jobs: - post: - responses: - '200': - description: OK - content: - application/json: - schema: - $ref: '#/components/schemas/ScoreBatchResponse' - '400': - $ref: '#/components/responses/BadRequest400' - '429': - $ref: >- - #/components/responses/TooManyRequests429 - '500': - $ref: >- - #/components/responses/InternalServerError500 - default: - $ref: '#/components/responses/DefaultError' - tags: - - Scoring - description: '' - parameters: [] - requestBody: - content: - application/json: - schema: - $ref: '#/components/schemas/ScoreDatasetRequest' - required: true /v1/post-training/supervised-fine-tune: post: responses: @@ -3259,15 +3375,195 @@ components: - stop_reason title: CompletionResponse description: Response from a completion request. - CancelTrainingJobRequest: + AgentCandidate: type: object properties: - job_uuid: + type: type: string + const: agent + default: agent + config: + $ref: '#/components/schemas/AgentConfig' + description: >- + The configuration for the agent candidate. additionalProperties: false required: - - job_uuid - title: CancelTrainingJobRequest + - type + - config + title: AgentCandidate + description: An agent candidate for evaluation. + AgentConfig: + type: object + properties: + sampling_params: + $ref: '#/components/schemas/SamplingParams' + input_shields: + type: array + items: + type: string + output_shields: + type: array + items: + type: string + toolgroups: + type: array + items: + $ref: '#/components/schemas/AgentTool' + client_tools: + type: array + items: + $ref: '#/components/schemas/ToolDef' + tool_choice: + type: string + enum: + - auto + - required + - none + title: ToolChoice + description: >- + Whether tool use is required or automatic. This is a hint to the model + which may not be followed. It depends on the Instruction Following capabilities + of the model. + deprecated: true + tool_prompt_format: + type: string + enum: + - json + - function_tag + - python_list + title: ToolPromptFormat + description: >- + Prompt format for calling custom / zero shot tools. + deprecated: true + tool_config: + $ref: '#/components/schemas/ToolConfig' + max_infer_iters: + type: integer + default: 10 + model: + type: string + instructions: + type: string + enable_session_persistence: + type: boolean + default: false + response_format: + $ref: '#/components/schemas/ResponseFormat' + additionalProperties: false + required: + - model + - instructions + title: AgentConfig + AgentTool: + oneOf: + - type: string + - type: object + properties: + name: + type: string + args: + type: object + additionalProperties: + oneOf: + - type: 'null' + - type: boolean + - type: number + - type: string + - type: array + - type: object + additionalProperties: false + required: + - name + - args + title: AgentToolGroupWithArgs + EvalCandidate: + oneOf: + - $ref: '#/components/schemas/ModelCandidate' + - $ref: '#/components/schemas/AgentCandidate' + discriminator: + propertyName: type + mapping: + model: '#/components/schemas/ModelCandidate' + agent: '#/components/schemas/AgentCandidate' + EvalJob: + type: object + properties: + id: + type: string + description: The ID of the job. + status: + type: string + enum: + - completed + - in_progress + - failed + - scheduled + - cancelled + description: The status of the job. + created_at: + type: string + format: date-time + description: The time the job was created. + finished_at: + type: string + format: date-time + description: The time the job finished. + error: + type: string + description: >- + If status of the job is failed, this will contain the error message. + type: + type: string + const: eval + default: eval + result_files: + type: array + items: + type: string + result_datasets: + type: array + items: + type: string + benchmark_id: + type: string + candidate: + $ref: '#/components/schemas/EvalCandidate' + additionalProperties: false + required: + - id + - status + - created_at + - type + - result_files + - result_datasets + - benchmark_id + - candidate + title: EvalJob + ModelCandidate: + type: object + properties: + type: + type: string + const: model + default: model + model: + type: string + description: The model ID to evaluate. + sampling_params: + $ref: '#/components/schemas/SamplingParams' + description: The sampling parameters for the model. + system_message: + $ref: '#/components/schemas/SystemMessage' + description: >- + (Optional) The system message providing instructions or context to the + model. + additionalProperties: false + required: + - type + - model + - sampling_params + title: ModelCandidate + description: A model candidate for evaluation. ToolConfig: type: object properties: @@ -3316,6 +3612,123 @@ components: additionalProperties: false title: ToolConfig description: Configuration for tool use. + ToolDef: + type: object + properties: + name: + type: string + description: + type: string + parameters: + type: array + items: + $ref: '#/components/schemas/ToolParameter' + metadata: + type: object + additionalProperties: + oneOf: + - type: 'null' + - type: boolean + - type: number + - type: string + - type: array + - type: object + additionalProperties: false + required: + - name + title: ToolDef + ToolParameter: + type: object + properties: + name: + type: string + parameter_type: + type: string + description: + type: string + required: + type: boolean + default: true + default: + oneOf: + - type: 'null' + - type: boolean + - type: number + - type: string + - type: array + - type: object + additionalProperties: false + required: + - name + - parameter_type + - description + - required + title: ToolParameter + ScoringJob: + type: object + properties: + id: + type: string + description: The ID of the job. + status: + type: string + enum: + - completed + - in_progress + - failed + - scheduled + - cancelled + description: The status of the job. + created_at: + type: string + format: date-time + description: The time the job was created. + finished_at: + type: string + format: date-time + description: The time the job finished. + error: + type: string + description: >- + If status of the job is failed, this will contain the error message. + type: + type: string + const: scoring + default: scoring + result_files: + type: array + items: + type: string + result_datasets: + type: array + items: + type: string + dataset_id: + type: string + scoring_fn_ids: + type: array + items: + type: string + additionalProperties: false + required: + - id + - status + - created_at + - type + - result_files + - result_datasets + - dataset_id + - scoring_fn_ids + title: ScoringJob + CancelTrainingJobRequest: + type: object + properties: + job_uuid: + type: string + additionalProperties: false + required: + - job_uuid + title: CancelTrainingJobRequest ChatCompletionRequest: type: object properties: @@ -3583,142 +3996,6 @@ components: title: CompletionResponseStreamChunk description: >- A chunk of a streamed completion response. - AgentConfig: - type: object - properties: - sampling_params: - $ref: '#/components/schemas/SamplingParams' - input_shields: - type: array - items: - type: string - output_shields: - type: array - items: - type: string - toolgroups: - type: array - items: - $ref: '#/components/schemas/AgentTool' - client_tools: - type: array - items: - $ref: '#/components/schemas/ToolDef' - tool_choice: - type: string - enum: - - auto - - required - - none - title: ToolChoice - description: >- - Whether tool use is required or automatic. This is a hint to the model - which may not be followed. It depends on the Instruction Following capabilities - of the model. - deprecated: true - tool_prompt_format: - type: string - enum: - - json - - function_tag - - python_list - title: ToolPromptFormat - description: >- - Prompt format for calling custom / zero shot tools. - deprecated: true - tool_config: - $ref: '#/components/schemas/ToolConfig' - max_infer_iters: - type: integer - default: 10 - model: - type: string - instructions: - type: string - enable_session_persistence: - type: boolean - default: false - response_format: - $ref: '#/components/schemas/ResponseFormat' - additionalProperties: false - required: - - model - - instructions - title: AgentConfig - AgentTool: - oneOf: - - type: string - - type: object - properties: - name: - type: string - args: - type: object - additionalProperties: - oneOf: - - type: 'null' - - type: boolean - - type: number - - type: string - - type: array - - type: object - additionalProperties: false - required: - - name - - args - title: AgentToolGroupWithArgs - ToolDef: - type: object - properties: - name: - type: string - description: - type: string - parameters: - type: array - items: - $ref: '#/components/schemas/ToolParameter' - metadata: - type: object - additionalProperties: - oneOf: - - type: 'null' - - type: boolean - - type: number - - type: string - - type: array - - type: object - additionalProperties: false - required: - - name - title: ToolDef - ToolParameter: - type: object - properties: - name: - type: string - parameter_type: - type: string - description: - type: string - required: - type: boolean - default: true - default: - oneOf: - - type: 'null' - - type: boolean - - type: number - - type: string - - type: array - - type: object - additionalProperties: false - required: - - name - - parameter_type - - description - - required - title: ToolParameter CreateAgentRequest: type: object properties: @@ -4412,76 +4689,21 @@ components: title: EmbeddingsResponse description: >- Response containing generated embeddings. - AgentCandidate: - type: object - properties: - type: - type: string - const: agent - default: agent - config: - $ref: '#/components/schemas/AgentConfig' - description: >- - The configuration for the agent candidate. - additionalProperties: false - required: - - type - - config - title: AgentCandidate - description: An agent candidate for evaluation. - EvalCandidate: - oneOf: - - $ref: '#/components/schemas/ModelCandidate' - - $ref: '#/components/schemas/AgentCandidate' - discriminator: - propertyName: type - mapping: - model: '#/components/schemas/ModelCandidate' - agent: '#/components/schemas/AgentCandidate' - ModelCandidate: - type: object - properties: - type: - type: string - const: model - default: model - model: - type: string - description: The model ID to evaluate. - sampling_params: - $ref: '#/components/schemas/SamplingParams' - description: The sampling parameters for the model. - system_message: - $ref: '#/components/schemas/SystemMessage' - description: >- - (Optional) The system message providing instructions or context to the - model. - additionalProperties: false - required: - - type - - model - - sampling_params - title: ModelCandidate - description: A model candidate for evaluation. EvaluateBenchmarkRequest: type: object properties: + benchmark_id: + type: string + description: >- + The ID of the benchmark to run the evaluation on. candidate: $ref: '#/components/schemas/EvalCandidate' description: The candidate to evaluate on. additionalProperties: false required: + - benchmark_id - candidate title: EvaluateBenchmarkRequest - Job: - type: object - properties: - job_id: - type: string - additionalProperties: false - required: - - job_id - title: Job EvaluateRowsRequest: type: object properties: @@ -5660,21 +5882,20 @@ components: - checkpoints title: PostTrainingJobArtifactsResponse description: Artifacts of a finetuning job. - JobStatus: - type: string - enum: - - completed - - in_progress - - failed - - scheduled - title: JobStatus PostTrainingJobStatusResponse: type: object properties: job_uuid: type: string status: - $ref: '#/components/schemas/JobStatus' + type: string + enum: + - completed + - in_progress + - failed + - scheduled + - cancelled + title: JobStatus scheduled_at: type: string format: date-time @@ -7073,19 +7294,6 @@ components: - dataset_id - scoring_fn_ids title: ScoreDatasetRequest - ScoreBatchResponse: - type: object - properties: - dataset_id: - type: string - results: - type: object - additionalProperties: - $ref: '#/components/schemas/ScoringResult' - additionalProperties: false - required: - - results - title: ScoreBatchResponse AlgorithmConfig: oneOf: - $ref: '#/components/schemas/LoraFinetuningConfig' diff --git a/llama_stack/apis/common/job_types.py b/llama_stack/apis/common/job_types.py index cad2bcec8..7330fb0cf 100644 --- a/llama_stack/apis/common/job_types.py +++ b/llama_stack/apis/common/job_types.py @@ -20,6 +20,13 @@ class JobStatus(Enum): cancelled = "cancelled" +class JobType(Enum): + batch_inference = "batch_inference" + scoring = "scoring" + evaluation = "evaluation" + post_training = "post_training" + + @json_schema_type class CommonJobFields(BaseModel): """Common fields for all jobs. diff --git a/llama_stack/apis/eval/eval.py b/llama_stack/apis/eval/eval.py index 552afe0a2..b5b916ad8 100644 --- a/llama_stack/apis/eval/eval.py +++ b/llama_stack/apis/eval/eval.py @@ -10,7 +10,7 @@ from pydantic import BaseModel, Field from typing_extensions import Annotated from llama_stack.apis.agents import AgentConfig -from llama_stack.apis.common.job_types import Job, JobStatus +from llama_stack.apis.common.job_types import CommonJobFields, JobStatus from llama_stack.apis.inference import SamplingParams, SystemMessage from llama_stack.apis.scoring import ScoringResult from llama_stack.schema_utils import json_schema_type, register_schema, webmethod @@ -61,15 +61,32 @@ class EvaluateResponse(BaseModel): scores: Dict[str, ScoringResult] +@json_schema_type +class EvalJob(CommonJobFields): + type: Literal["eval"] = "eval" + result_files: List[str] = Field( + description="The file ids of the eval results.", + default_factory=list, + ) + result_datasets: List[str] = Field( + description="The ids of the datasets containing the eval results.", + default_factory=list, + ) + + # how the job is created + benchmark_id: str = Field(description="The id of the benchmark to evaluate on.") + candidate: EvalCandidate = Field(description="The candidate to evaluate on.") + + class Eval(Protocol): """Llama Stack Evaluation API for running evaluations on model and agent candidates.""" - @webmethod(route="/eval/benchmarks/{benchmark_id}/jobs", method="POST") + @webmethod(route="/eval/jobs", method="POST") async def evaluate_benchmark( self, benchmark_id: str, candidate: EvalCandidate, - ) -> Job: + ) -> EvalJob: """Run an evaluation on a benchmark. :param benchmark_id: The ID of the benchmark to run the evaluation on. @@ -85,37 +102,42 @@ class Eval(Protocol): candidate: EvalCandidate, ) -> EvaluateResponse: """Evaluate a list of rows on a candidate. - + :param dataset_rows: The rows to evaluate. :param scoring_fn_ids: The scoring function ids to use for the evaluation. :param candidate: The candidate to evaluate on. :return: EvaluateResponse object containing generations and scores """ - @webmethod(route="/eval/benchmarks/{benchmark_id}/jobs/{job_id}", method="GET") - async def job_status(self, benchmark_id: str, job_id: str) -> Optional[JobStatus]: - """Get the status of a job. + @webmethod(route="/eval/jobs", method="GET") + async def list_eval_jobs(self) -> List[EvalJob]: + """List all evaluation jobs. - :param benchmark_id: The ID of the benchmark to run the evaluation on. - :param job_id: The ID of the job to get the status of. - :return: The status of the evaluationjob. + :return: A list of evaluation jobs. """ ... - @webmethod(route="/eval/benchmarks/{benchmark_id}/jobs/{job_id}", method="DELETE") - async def job_cancel(self, benchmark_id: str, job_id: str) -> None: + @webmethod(route="/eval/job/{job_id}", method="GET") + async def get_eval_job(self, job_id: str) -> Optional[EvalJob]: + """Get a job by id. + + :param job_id: The id of the job to get. + :return: The job. + """ + ... + + @webmethod(route="/eval/job/{job_id}", method="DELETE") + async def delete_eval_job(self, job_id: str) -> Optional[EvalJob]: + """Delete a job. + + :param job_id: The id of the job to delete. + """ + ... + + @webmethod(route="/eval/job/{job_id}/cancel", method="POST") + async def cancel_eval_job(self, job_id: str) -> Optional[EvalJob]: """Cancel a job. - :param benchmark_id: The ID of the benchmark to run the evaluation on. - :param job_id: The ID of the job to cancel. + :param job_id: The id of the job to cancel. """ ... - - @webmethod(route="/eval/benchmarks/{benchmark_id}/jobs/{job_id}/result", method="GET") - async def job_result(self, benchmark_id: str, job_id: str) -> EvaluateResponse: - """Get the result of a job. - - :param benchmark_id: The ID of the benchmark to run the evaluation on. - :param job_id: The ID of the job to get the result of. - :return: The result of the job. - """ diff --git a/llama_stack/apis/scoring/scoring.py b/llama_stack/apis/scoring/scoring.py index a67623e22..46184eae9 100644 --- a/llama_stack/apis/scoring/scoring.py +++ b/llama_stack/apis/scoring/scoring.py @@ -4,10 +4,11 @@ # This source code is licensed under the terms described in the LICENSE file in # the root directory of this source tree. -from typing import Any, Dict, List, Optional, Protocol, runtime_checkable +from typing import Any, Dict, List, Literal, Optional, Protocol, runtime_checkable -from pydantic import BaseModel +from pydantic import BaseModel, Field +from llama_stack.apis.common.job_types import CommonJobFields, JobType from llama_stack.apis.scoring_functions import ScoringFn from llama_stack.schema_utils import json_schema_type, webmethod @@ -47,6 +48,27 @@ class ScoreResponse(BaseModel): results: Dict[str, ScoringResult] +@json_schema_type +class ScoringJob(CommonJobFields): + type: Literal["scoring"] = "scoring" + + result_files: List[str] = Field( + description="The file ids of the scoring results.", + default_factory=list, + ) + result_datasets: List[str] = Field( + description="The ids of the datasets containing the scoring results.", + default_factory=list, + ) + + # how the job is created + dataset_id: str = Field(description="The id of the dataset used for scoring.") + scoring_fn_ids: List[str] = Field( + description="The ids of the scoring functions used.", + default_factory=list, + ) + + class ScoringFunctionStore(Protocol): def get_scoring_function(self, scoring_fn_id: str) -> ScoringFn: ... @@ -60,7 +82,7 @@ class Scoring(Protocol): self, dataset_id: str, scoring_fn_ids: List[str], - ) -> ScoreBatchResponse: ... + ) -> ScoringJob: ... @webmethod(route="/scoring/rows", method="POST") async def score( @@ -75,3 +97,36 @@ class Scoring(Protocol): :return: ScoreResponse object containing rows and aggregated results """ ... + + @webmethod(route="/scoring/jobs", method="GET") + async def list_scoring_jobs(self) -> List[ScoringJob]: + """List all scoring jobs. + + :return: A list of scoring jobs. + """ + ... + + @webmethod(route="/scoring/job/{job_id}", method="GET") + async def get_scoring_job(self, job_id: str) -> Optional[ScoringJob]: + """Get a job by id. + + :param job_id: The id of the job to get. + :return: The job. + """ + ... + + @webmethod(route="/scoring/job/{job_id}", method="DELETE") + async def delete_scoring_job(self, job_id: str) -> Optional[ScoringJob]: + """Delete a job. + + :param job_id: The id of the job to delete. + """ + ... + + @webmethod(route="/scoring/job/{job_id}/cancel", method="POST") + async def cancel_scoring_job(self, job_id: str) -> Optional[ScoringJob]: + """Cancel a job. + + :param job_id: The id of the job to cancel. + """ + ...