From d2b62157a3ef7aa6461e5c1857924578890355d9 Mon Sep 17 00:00:00 2001 From: Xi Yan Date: Tue, 15 Oct 2024 00:44:54 -0700 Subject: [PATCH] openapi gen --- docs/openapi_generator/generate.py | 4 +- docs/resources/llama-stack-spec.html | 1255 ++++++++++------- docs/resources/llama-stack-spec.yaml | 622 ++++---- llama_stack/apis/datasets/datasets.py | 20 + .../apis/post_training/post_training.py | 2 +- 5 files changed, 1126 insertions(+), 777 deletions(-) diff --git a/docs/openapi_generator/generate.py b/docs/openapi_generator/generate.py index 871c01a80..994b06e58 100644 --- a/docs/openapi_generator/generate.py +++ b/docs/openapi_generator/generate.py @@ -33,7 +33,7 @@ schema_utils.json_schema_type = json_schema_type from llama_models.llama3.api.datatypes import * # noqa: F403 from llama_stack.apis.agents import * # noqa: F403 -from llama_stack.apis.dataset import * # noqa: F403 +from llama_stack.apis.datasets import * # noqa: F403 from llama_stack.apis.evals import * # noqa: F403 from llama_stack.apis.inference import * # noqa: F403 from llama_stack.apis.batch_inference import * # noqa: F403 @@ -61,7 +61,7 @@ class LlamaStack( Telemetry, PostTraining, Memory, - Evaluations, + Evals, Models, Shields, Inspect, diff --git a/docs/resources/llama-stack-spec.html b/docs/resources/llama-stack-spec.html index 96ef7e4bb..ac75dbf04 100644 --- a/docs/resources/llama-stack-spec.html +++ b/docs/resources/llama-stack-spec.html @@ -21,7 +21,7 @@ "info": { "title": "[DRAFT] Llama Stack Specification", "version": "0.0.1", - "description": "This is the specification of the llama stack that provides\n a set of endpoints and their corresponding interfaces that are tailored to\n best leverage Llama Models. The specification is still in draft and subject to change.\n Generated at 2024-10-09 21:10:09.073430" + "description": "This is the specification of the llama stack that provides\n a set of endpoints and their corresponding interfaces that are tailored to\n best leverage Llama Models. The specification is still in draft and subject to change.\n Generated at 2024-10-15 00:44:26.278642" }, "servers": [ { @@ -109,39 +109,6 @@ } } }, - "/evaluate/job/cancel": { - "post": { - "responses": { - "200": { - "description": "OK" - } - }, - "tags": [ - "Evaluations" - ], - "parameters": [ - { - "name": "X-LlamaStack-ProviderData", - "in": "header", - "description": "JSON-encoded provider data which will be made available to the adapter servicing the API", - "required": false, - "schema": { - "type": "string" - } - } - ], - "requestBody": { - "content": { - "application/json": { - "schema": { - "$ref": "#/components/schemas/CancelEvaluationJobRequest" - } - } - }, - "required": true - } - } - }, "/post_training/job/cancel": { "post": { "responses": { @@ -355,7 +322,7 @@ "200": { "description": "OK", "content": { - "application/json": { + "text/event-stream": { "schema": { "$ref": "#/components/schemas/AgentTurnResponseStreamChunk" } @@ -393,7 +360,14 @@ "post": { "responses": { "200": { - "description": "OK" + "description": "OK", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/CreateDatasetResponse" + } + } + } } }, "tags": [ @@ -492,7 +466,14 @@ "post": { "responses": { "200": { - "description": "OK" + "description": "OK", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/DeleteDatasetResponse" + } + } + } } }, "tags": [ @@ -561,126 +542,6 @@ } } }, - "/evaluate/question_answering/": { - "post": { - "responses": { - "200": { - "description": "OK", - "content": { - "application/json": { - "schema": { - "$ref": "#/components/schemas/EvaluationJob" - } - } - } - } - }, - "tags": [ - "Evaluations" - ], - "parameters": [ - { - "name": "X-LlamaStack-ProviderData", - "in": "header", - "description": "JSON-encoded provider data which will be made available to the adapter servicing the API", - "required": false, - "schema": { - "type": "string" - } - } - ], - "requestBody": { - "content": { - "application/json": { - "schema": { - "$ref": "#/components/schemas/EvaluateQuestionAnsweringRequest" - } - } - }, - "required": true - } - } - }, - "/evaluate/summarization/": { - "post": { - "responses": { - "200": { - "description": "OK", - "content": { - "application/json": { - "schema": { - "$ref": "#/components/schemas/EvaluationJob" - } - } - } - } - }, - "tags": [ - "Evaluations" - ], - "parameters": [ - { - "name": "X-LlamaStack-ProviderData", - "in": "header", - "description": "JSON-encoded provider data which will be made available to the adapter servicing the API", - "required": false, - "schema": { - "type": "string" - } - } - ], - "requestBody": { - "content": { - "application/json": { - "schema": { - "$ref": "#/components/schemas/EvaluateSummarizationRequest" - } - } - }, - "required": true - } - } - }, - "/evaluate/text_generation/": { - "post": { - "responses": { - "200": { - "description": "OK", - "content": { - "application/json": { - "schema": { - "$ref": "#/components/schemas/EvaluationJob" - } - } - } - } - }, - "tags": [ - "Evaluations" - ], - "parameters": [ - { - "name": "X-LlamaStack-ProviderData", - "in": "header", - "description": "JSON-encoded provider data which will be made available to the adapter servicing the API", - "required": false, - "schema": { - "type": "string" - } - } - ], - "requestBody": { - "content": { - "application/json": { - "schema": { - "$ref": "#/components/schemas/EvaluateTextGenerationRequest" - } - } - }, - "required": true - } - } - }, "/agents/session/get": { "post": { "responses": { @@ -845,7 +706,21 @@ "content": { "application/json": { "schema": { - "$ref": "#/components/schemas/TrainEvalDataset" + "oneOf": [ + { + "oneOf": [ + { + "$ref": "#/components/schemas/HuggingfaceDatasetDef" + }, + { + "$ref": "#/components/schemas/CustomDatasetDef" + } + ] + }, + { + "type": "null" + } + ] } } } @@ -856,7 +731,7 @@ ], "parameters": [ { - "name": "dataset_uuid", + "name": "dataset_identifier", "in": "query", "required": true, "schema": { @@ -875,150 +750,6 @@ ] } }, - "/evaluate/job/artifacts": { - "get": { - "responses": { - "200": { - "description": "OK", - "content": { - "application/json": { - "schema": { - "$ref": "#/components/schemas/EvaluationJobArtifactsResponse" - } - } - } - } - }, - "tags": [ - "Evaluations" - ], - "parameters": [ - { - "name": "job_uuid", - "in": "query", - "required": true, - "schema": { - "type": "string" - } - }, - { - "name": "X-LlamaStack-ProviderData", - "in": "header", - "description": "JSON-encoded provider data which will be made available to the adapter servicing the API", - "required": false, - "schema": { - "type": "string" - } - } - ] - } - }, - "/evaluate/job/logs": { - "get": { - "responses": { - "200": { - "description": "OK", - "content": { - "application/json": { - "schema": { - "$ref": "#/components/schemas/EvaluationJobLogStream" - } - } - } - } - }, - "tags": [ - "Evaluations" - ], - "parameters": [ - { - "name": "job_uuid", - "in": "query", - "required": true, - "schema": { - "type": "string" - } - }, - { - "name": "X-LlamaStack-ProviderData", - "in": "header", - "description": "JSON-encoded provider data which will be made available to the adapter servicing the API", - "required": false, - "schema": { - "type": "string" - } - } - ] - } - }, - "/evaluate/job/status": { - "get": { - "responses": { - "200": { - "description": "OK", - "content": { - "application/json": { - "schema": { - "$ref": "#/components/schemas/EvaluationJobStatusResponse" - } - } - } - } - }, - "tags": [ - "Evaluations" - ], - "parameters": [ - { - "name": "job_uuid", - "in": "query", - "required": true, - "schema": { - "type": "string" - } - }, - { - "name": "X-LlamaStack-ProviderData", - "in": "header", - "description": "JSON-encoded provider data which will be made available to the adapter servicing the API", - "required": false, - "schema": { - "type": "string" - } - } - ] - } - }, - "/evaluate/jobs": { - "get": { - "responses": { - "200": { - "description": "OK", - "content": { - "application/jsonl": { - "schema": { - "$ref": "#/components/schemas/EvaluationJob" - } - } - } - } - }, - "tags": [ - "Evaluations" - ], - "parameters": [ - { - "name": "X-LlamaStack-ProviderData", - "in": "header", - "description": "JSON-encoded provider data which will be made available to the adapter servicing the API", - "required": false, - "schema": { - "type": "string" - } - } - ] - } - }, "/memory_banks/get": { "get": { "responses": { @@ -1412,6 +1143,43 @@ } } }, + "/datasets/list": { + "get": { + "responses": { + "200": { + "description": "OK", + "content": { + "application/jsonl": { + "schema": { + "oneOf": [ + { + "$ref": "#/components/schemas/HuggingfaceDatasetDef" + }, + { + "$ref": "#/components/schemas/CustomDatasetDef" + } + ] + } + } + } + } + }, + "tags": [ + "Datasets" + ], + "parameters": [ + { + "name": "X-LlamaStack-ProviderData", + "in": "header", + "description": "JSON-encoded provider data which will be made available to the adapter servicing the API", + "required": false, + "schema": { + "type": "string" + } + } + ] + } + }, "/memory_banks/list": { "get": { "responses": { @@ -1836,6 +1604,86 @@ } } }, + "/evals/run_eval_task": { + "post": { + "responses": { + "200": { + "description": "OK", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/EvaluateResponse" + } + } + } + } + }, + "tags": [ + "Evals" + ], + "parameters": [ + { + "name": "X-LlamaStack-ProviderData", + "in": "header", + "description": "JSON-encoded provider data which will be made available to the adapter servicing the API", + "required": false, + "schema": { + "type": "string" + } + } + ], + "requestBody": { + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/RunEvalTaskRequest" + } + } + }, + "required": true + } + } + }, + "/evals/run_scorer": { + "post": { + "responses": { + "200": { + "description": "OK", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/EvaluateResponse" + } + } + } + } + }, + "tags": [ + "Evals" + ], + "parameters": [ + { + "name": "X-LlamaStack-ProviderData", + "in": "header", + "description": "JSON-encoded provider data which will be made available to the adapter servicing the API", + "required": false, + "schema": { + "type": "string" + } + } + ], + "requestBody": { + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/RunScorerRequest" + } + } + }, + "required": true + } + } + }, "/safety/run_shield": { "post": { "responses": { @@ -2571,18 +2419,6 @@ "completion_message_batch" ] }, - "CancelEvaluationJobRequest": { - "type": "object", - "properties": { - "job_uuid": { - "type": "string" - } - }, - "additionalProperties": false, - "required": [ - "job_uuid" - ] - }, "CancelTrainingJobRequest": { "type": "object", "properties": { @@ -4090,19 +3926,58 @@ "error" ] }, - "TrainEvalDataset": { + "CustomDatasetDef": { "type": "object", "properties": { - "columns": { + "type": { + "type": "string", + "const": "custom", + "default": "custom" + }, + "identifier": { + "type": "string" + }, + "url": { + "type": "string" + }, + "rename_columns_map": { "type": "object", "additionalProperties": { - "$ref": "#/components/schemas/TrainEvalDatasetColumnType" + "type": "string" + } + } + }, + "additionalProperties": false, + "required": [ + "type", + "identifier", + "url" + ] + }, + "HuggingfaceDatasetDef": { + "type": "object", + "properties": { + "type": { + "type": "string", + "const": "huggingface", + "default": "huggingface" + }, + "identifier": { + "type": "string" + }, + "dataset_path": { + "type": "string" + }, + "dataset_name": { + "type": "string" + }, + "rename_columns_map": { + "type": "object", + "additionalProperties": { + "type": "string" } }, - "content_url": { - "$ref": "#/components/schemas/URL" - }, - "metadata": { + "kwargs": { "type": "object", "additionalProperties": { "oneOf": [ @@ -4130,35 +4005,48 @@ }, "additionalProperties": false, "required": [ - "columns", - "content_url" - ], - "title": "Dataset to be used for training or evaluating language models." - }, - "TrainEvalDatasetColumnType": { - "type": "string", - "enum": [ - "dialog", - "text", - "media", - "number", - "json" + "type", + "identifier", + "dataset_path", + "kwargs" ] }, "CreateDatasetRequest": { "type": "object", "properties": { - "uuid": { - "type": "string" - }, - "dataset": { - "$ref": "#/components/schemas/TrainEvalDataset" + "dataset_def": { + "oneOf": [ + { + "$ref": "#/components/schemas/HuggingfaceDatasetDef" + }, + { + "$ref": "#/components/schemas/CustomDatasetDef" + } + ] } }, "additionalProperties": false, "required": [ - "uuid", - "dataset" + "dataset_def" + ] + }, + "CreateDatasetResponse": { + "type": "object", + "properties": { + "status": { + "type": "string", + "enum": [ + "success", + "fail" + ] + }, + "msg": { + "type": "string" + } + }, + "additionalProperties": false, + "required": [ + "status" ] }, "DeleteAgentsRequest": { @@ -4192,13 +4080,32 @@ "DeleteDatasetRequest": { "type": "object", "properties": { - "dataset_uuid": { + "dataset_identifier": { "type": "string" } }, "additionalProperties": false, "required": [ - "dataset_uuid" + "dataset_identifier" + ] + }, + "DeleteDatasetResponse": { + "type": "object", + "properties": { + "status": { + "type": "string", + "enum": [ + "success", + "fail" + ] + }, + "msg": { + "type": "string" + } + }, + "additionalProperties": false, + "required": [ + "status" ] }, "EmbeddingsRequest": { @@ -4258,76 +4165,6 @@ "embeddings" ] }, - "EvaluateQuestionAnsweringRequest": { - "type": "object", - "properties": { - "metrics": { - "type": "array", - "items": { - "type": "string", - "enum": [ - "em", - "f1" - ] - } - } - }, - "additionalProperties": false, - "required": [ - "metrics" - ] - }, - "EvaluationJob": { - "type": "object", - "properties": { - "job_uuid": { - "type": "string" - } - }, - "additionalProperties": false, - "required": [ - "job_uuid" - ] - }, - "EvaluateSummarizationRequest": { - "type": "object", - "properties": { - "metrics": { - "type": "array", - "items": { - "type": "string", - "enum": [ - "rouge", - "bleu" - ] - } - } - }, - "additionalProperties": false, - "required": [ - "metrics" - ] - }, - "EvaluateTextGenerationRequest": { - "type": "object", - "properties": { - "metrics": { - "type": "array", - "items": { - "type": "string", - "enum": [ - "perplexity", - "rouge", - "bleu" - ] - } - } - }, - "additionalProperties": false, - "required": [ - "metrics" - ] - }, "GetAgentsSessionRequest": { "type": "object", "properties": { @@ -4513,43 +4350,6 @@ "step" ] }, - "EvaluationJobArtifactsResponse": { - "type": "object", - "properties": { - "job_uuid": { - "type": "string" - } - }, - "additionalProperties": false, - "required": [ - "job_uuid" - ], - "title": "Artifacts of a evaluation job." - }, - "EvaluationJobLogStream": { - "type": "object", - "properties": { - "job_uuid": { - "type": "string" - } - }, - "additionalProperties": false, - "required": [ - "job_uuid" - ] - }, - "EvaluationJobStatusResponse": { - "type": "object", - "properties": { - "job_uuid": { - "type": "string" - } - }, - "additionalProperties": false, - "required": [ - "job_uuid" - ] - }, "ModelDefWithProvider": { "type": "object", "properties": { @@ -5265,6 +5065,61 @@ "dpo" ] }, + "TrainEvalDataset": { + "type": "object", + "properties": { + "columns": { + "type": "object", + "additionalProperties": { + "$ref": "#/components/schemas/TrainEvalDatasetColumnType" + } + }, + "content_url": { + "$ref": "#/components/schemas/URL" + }, + "metadata": { + "type": "object", + "additionalProperties": { + "oneOf": [ + { + "type": "null" + }, + { + "type": "boolean" + }, + { + "type": "number" + }, + { + "type": "string" + }, + { + "type": "array" + }, + { + "type": "object" + } + ] + } + } + }, + "additionalProperties": false, + "required": [ + "columns", + "content_url" + ], + "title": "Dataset to be used for training or evaluating language models." + }, + "TrainEvalDatasetColumnType": { + "type": "string", + "enum": [ + "dialog", + "text", + "media", + "number", + "json" + ] + }, "TrainingConfig": { "type": "object", "properties": { @@ -5709,6 +5564,314 @@ "score" ] }, + "EvaluateDatasetConfig": { + "type": "object", + "properties": { + "dataset_identifier": { + "type": "string" + }, + "row_limit": { + "type": "integer" + }, + "kwargs": { + "type": "object", + "additionalProperties": { + "oneOf": [ + { + "type": "null" + }, + { + "type": "boolean" + }, + { + "type": "number" + }, + { + "type": "string" + }, + { + "type": "array" + }, + { + "type": "object" + } + ] + } + } + }, + "additionalProperties": false, + "required": [ + "dataset_identifier" + ] + }, + "EvaluateJudgeScoringConfig": { + "type": "object" + }, + "EvaluateModelGenerationConfig": { + "type": "object", + "properties": { + "model": { + "type": "string" + }, + "sampling_params": { + "$ref": "#/components/schemas/SamplingParams" + }, + "kwargs": { + "type": "object", + "additionalProperties": { + "oneOf": [ + { + "type": "null" + }, + { + "type": "boolean" + }, + { + "type": "number" + }, + { + "type": "string" + }, + { + "type": "array" + }, + { + "type": "object" + } + ] + } + } + }, + "additionalProperties": false, + "required": [ + "model", + "sampling_params" + ] + }, + "EvaluatePostprocessConfig": { + "type": "object", + "properties": { + "kwargs": { + "type": "object", + "additionalProperties": { + "oneOf": [ + { + "type": "null" + }, + { + "type": "boolean" + }, + { + "type": "number" + }, + { + "type": "string" + }, + { + "type": "array" + }, + { + "type": "object" + } + ] + } + } + }, + "additionalProperties": false + }, + "EvaluatePreprocessConfig": { + "type": "object", + "properties": { + "kwargs": { + "type": "object", + "additionalProperties": { + "oneOf": [ + { + "type": "null" + }, + { + "type": "boolean" + }, + { + "type": "number" + }, + { + "type": "string" + }, + { + "type": "array" + }, + { + "type": "object" + } + ] + } + } + }, + "additionalProperties": false + }, + "EvaluateProcessorConfig": { + "type": "object", + "properties": { + "processor_identifier": { + "type": "string" + }, + "preprocess_config": { + "$ref": "#/components/schemas/EvaluatePreprocessConfig" + }, + "postprocess_config": { + "$ref": "#/components/schemas/EvaluatePostprocessConfig" + } + }, + "additionalProperties": false, + "required": [ + "processor_identifier" + ] + }, + "EvaluateScoringConfig": { + "type": "object", + "properties": { + "scorer_config_list": { + "type": "array", + "items": { + "$ref": "#/components/schemas/EvaluateSingleScorerConfig" + } + } + }, + "additionalProperties": false, + "required": [ + "scorer_config_list" + ] + }, + "EvaluateSingleScorerConfig": { + "type": "object", + "properties": { + "scorer_name": { + "type": "string" + }, + "llm_judge_config": { + "$ref": "#/components/schemas/LLMJudgeConfig" + } + }, + "additionalProperties": false, + "required": [ + "scorer_name" + ] + }, + "EvaluateTaskConfig": { + "type": "object", + "properties": { + "dataset_config": { + "$ref": "#/components/schemas/EvaluateDatasetConfig" + }, + "processor_config": { + "$ref": "#/components/schemas/EvaluateProcessorConfig" + }, + "generation_config": { + "$ref": "#/components/schemas/EvaluateModelGenerationConfig" + }, + "scoring_config": { + "$ref": "#/components/schemas/EvaluateScoringConfig" + } + }, + "additionalProperties": false, + "required": [ + "dataset_config", + "processor_config", + "generation_config", + "scoring_config" + ] + }, + "LLMJudgeConfig": { + "type": "object", + "properties": { + "judge_processor_config": { + "$ref": "#/components/schemas/EvaluateProcessorConfig" + }, + "judge_model_generation_config": { + "$ref": "#/components/schemas/EvaluateModelGenerationConfig" + }, + "judge_scoring_config": { + "$ref": "#/components/schemas/EvaluateJudgeScoringConfig" + } + }, + "additionalProperties": false, + "required": [ + "judge_processor_config", + "judge_model_generation_config", + "judge_scoring_config" + ] + }, + "RunEvalTaskRequest": { + "type": "object", + "properties": { + "model": { + "type": "string" + }, + "task": { + "type": "string" + }, + "dataset": { + "type": "string" + }, + "eval_task_config": { + "$ref": "#/components/schemas/EvaluateTaskConfig" + } + }, + "additionalProperties": false, + "required": [ + "model", + "task" + ] + }, + "EvalResult": { + "type": "object", + "properties": { + "metrics": { + "type": "object", + "additionalProperties": { + "type": "number" + } + } + }, + "additionalProperties": false, + "required": [ + "metrics" + ], + "title": "Aggregated final evaluation result." + }, + "EvaluateResponse": { + "type": "object", + "properties": { + "eval_result": { + "$ref": "#/components/schemas/EvalResult" + }, + "formatted_report": { + "type": "string" + } + }, + "additionalProperties": false, + "required": [ + "eval_result" + ], + "title": "Scores for evaluation." + }, + "RunScorerRequest": { + "type": "object", + "properties": { + "dataset_config": { + "$ref": "#/components/schemas/EvaluateDatasetConfig" + }, + "eval_scoring_config": { + "$ref": "#/components/schemas/EvaluateScoringConfig" + } + }, + "additionalProperties": false, + "required": [ + "dataset_config", + "eval_scoring_config" + ] + }, "RunShieldRequest": { "type": "object", "properties": { @@ -6075,7 +6238,28 @@ ], "tags": [ { - "name": "RewardScoring" + "name": "Models" + }, + { + "name": "BatchInference" + }, + { + "name": "Inspect" + }, + { + "name": "Evals" + }, + { + "name": "Safety" + }, + { + "name": "Shields" + }, + { + "name": "Telemetry" + }, + { + "name": "Agents" }, { "name": "Memory" @@ -6084,37 +6268,16 @@ "name": "SyntheticDataGeneration" }, { - "name": "Models" - }, - { - "name": "Safety" - }, - { - "name": "BatchInference" - }, - { - "name": "Agents" - }, - { - "name": "MemoryBanks" - }, - { - "name": "Shields" + "name": "PostTraining" }, { "name": "Datasets" }, { - "name": "Evaluations" + "name": "MemoryBanks" }, { - "name": "Inspect" - }, - { - "name": "PostTraining" - }, - { - "name": "Telemetry" + "name": "RewardScoring" }, { "name": "Inference" @@ -6195,10 +6358,6 @@ "name": "BatchCompletionResponse", "description": "" }, - { - "name": "CancelEvaluationJobRequest", - "description": "" - }, { "name": "CancelTrainingJobRequest", "description": "" @@ -6368,17 +6527,21 @@ "description": "" }, { - "name": "TrainEvalDataset", - "description": "Dataset to be used for training or evaluating language models.\n\n" + "name": "CustomDatasetDef", + "description": "" }, { - "name": "TrainEvalDatasetColumnType", - "description": "" + "name": "HuggingfaceDatasetDef", + "description": "" }, { "name": "CreateDatasetRequest", "description": "" }, + { + "name": "CreateDatasetResponse", + "description": "" + }, { "name": "DeleteAgentsRequest", "description": "" @@ -6391,6 +6554,10 @@ "name": "DeleteDatasetRequest", "description": "" }, + { + "name": "DeleteDatasetResponse", + "description": "" + }, { "name": "EmbeddingsRequest", "description": "" @@ -6399,22 +6566,6 @@ "name": "EmbeddingsResponse", "description": "" }, - { - "name": "EvaluateQuestionAnsweringRequest", - "description": "" - }, - { - "name": "EvaluationJob", - "description": "" - }, - { - "name": "EvaluateSummarizationRequest", - "description": "" - }, - { - "name": "EvaluateTextGenerationRequest", - "description": "" - }, { "name": "GetAgentsSessionRequest", "description": "" @@ -6443,18 +6594,6 @@ "name": "AgentStepResponse", "description": "" }, - { - "name": "EvaluationJobArtifactsResponse", - "description": "Artifacts of a evaluation job.\n\n" - }, - { - "name": "EvaluationJobLogStream", - "description": "" - }, - { - "name": "EvaluationJobStatusResponse", - "description": "" - }, { "name": "ModelDefWithProvider", "description": "" @@ -6555,6 +6694,14 @@ "name": "RLHFAlgorithm", "description": "" }, + { + "name": "TrainEvalDataset", + "description": "Dataset to be used for training or evaluating language models.\n\n" + }, + { + "name": "TrainEvalDatasetColumnType", + "description": "" + }, { "name": "TrainingConfig", "description": "" @@ -6603,6 +6750,62 @@ "name": "ScoredMessage", "description": "" }, + { + "name": "EvaluateDatasetConfig", + "description": "" + }, + { + "name": "EvaluateJudgeScoringConfig", + "description": "" + }, + { + "name": "EvaluateModelGenerationConfig", + "description": "" + }, + { + "name": "EvaluatePostprocessConfig", + "description": "" + }, + { + "name": "EvaluatePreprocessConfig", + "description": "" + }, + { + "name": "EvaluateProcessorConfig", + "description": "" + }, + { + "name": "EvaluateScoringConfig", + "description": "" + }, + { + "name": "EvaluateSingleScorerConfig", + "description": "" + }, + { + "name": "EvaluateTaskConfig", + "description": "" + }, + { + "name": "LLMJudgeConfig", + "description": "" + }, + { + "name": "RunEvalTaskRequest", + "description": "" + }, + { + "name": "EvalResult", + "description": "Aggregated final evaluation result.\n\n" + }, + { + "name": "EvaluateResponse", + "description": "Scores for evaluation.\n\n" + }, + { + "name": "RunScorerRequest", + "description": "" + }, { "name": "RunShieldRequest", "description": "" @@ -6647,7 +6850,7 @@ "Agents", "BatchInference", "Datasets", - "Evaluations", + "Evals", "Inference", "Inspect", "Memory", @@ -6681,7 +6884,6 @@ "BatchCompletionRequest", "BatchCompletionResponse", "BuiltinTool", - "CancelEvaluationJobRequest", "CancelTrainingJobRequest", "ChatCompletionRequest", "ChatCompletionResponse", @@ -6698,31 +6900,40 @@ "CreateAgentSessionRequest", "CreateAgentTurnRequest", "CreateDatasetRequest", + "CreateDatasetResponse", + "CustomDatasetDef", "DPOAlignmentConfig", "DeleteAgentsRequest", "DeleteAgentsSessionRequest", "DeleteDatasetRequest", + "DeleteDatasetResponse", "DialogGenerations", "DoraFinetuningConfig", "EmbeddingsRequest", "EmbeddingsResponse", - "EvaluateQuestionAnsweringRequest", - "EvaluateSummarizationRequest", - "EvaluateTextGenerationRequest", - "EvaluationJob", - "EvaluationJobArtifactsResponse", - "EvaluationJobLogStream", - "EvaluationJobStatusResponse", + "EvalResult", + "EvaluateDatasetConfig", + "EvaluateJudgeScoringConfig", + "EvaluateModelGenerationConfig", + "EvaluatePostprocessConfig", + "EvaluatePreprocessConfig", + "EvaluateProcessorConfig", + "EvaluateResponse", + "EvaluateScoringConfig", + "EvaluateSingleScorerConfig", + "EvaluateTaskConfig", "FinetuningAlgorithm", "FunctionCallToolDefinition", "GetAgentsSessionRequest", "GraphMemoryBankDef", "HealthInfo", + "HuggingfaceDatasetDef", "ImageMedia", "InferenceStep", "InsertDocumentsRequest", "KeyValueMemoryBankDef", "KeywordMemoryBankDef", + "LLMJudgeConfig", "LogEventRequest", "LogSeverity", "LoraFinetuningConfig", @@ -6752,6 +6963,8 @@ "RewardScoreRequest", "RewardScoringResponse", "RouteInfo", + "RunEvalTaskRequest", + "RunScorerRequest", "RunShieldRequest", "RunShieldResponse", "SafetyViolation", diff --git a/docs/resources/llama-stack-spec.yaml b/docs/resources/llama-stack-spec.yaml index 9307ee47b..ab54c4c09 100644 --- a/docs/resources/llama-stack-spec.yaml +++ b/docs/resources/llama-stack-spec.yaml @@ -315,14 +315,6 @@ components: - photogen - code_interpreter type: string - CancelEvaluationJobRequest: - additionalProperties: false - properties: - job_uuid: - type: string - required: - - job_uuid - type: object CancelTrainingJobRequest: additionalProperties: false properties: @@ -572,13 +564,45 @@ components: CreateDatasetRequest: additionalProperties: false properties: - dataset: - $ref: '#/components/schemas/TrainEvalDataset' - uuid: + dataset_def: + oneOf: + - $ref: '#/components/schemas/HuggingfaceDatasetDef' + - $ref: '#/components/schemas/CustomDatasetDef' + required: + - dataset_def + type: object + CreateDatasetResponse: + additionalProperties: false + properties: + msg: + type: string + status: + enum: + - success + - fail type: string required: - - uuid - - dataset + - status + type: object + CustomDatasetDef: + additionalProperties: false + properties: + identifier: + type: string + rename_columns_map: + additionalProperties: + type: string + type: object + type: + const: custom + default: custom + type: string + url: + type: string + required: + - type + - identifier + - url type: object DPOAlignmentConfig: additionalProperties: false @@ -619,10 +643,23 @@ components: DeleteDatasetRequest: additionalProperties: false properties: - dataset_uuid: + dataset_identifier: type: string required: - - dataset_uuid + - dataset_identifier + type: object + DeleteDatasetResponse: + additionalProperties: false + properties: + msg: + type: string + status: + enum: + - success + - fail + type: string + required: + - status type: object DialogGenerations: additionalProperties: false @@ -701,78 +738,147 @@ components: required: - embeddings type: object - EvaluateQuestionAnsweringRequest: + EvalResult: additionalProperties: false properties: metrics: - items: - enum: - - em - - f1 - type: string - type: array + additionalProperties: + type: number + type: object required: - metrics + title: Aggregated final evaluation result. type: object - EvaluateSummarizationRequest: + EvaluateDatasetConfig: additionalProperties: false properties: - metrics: + dataset_identifier: + type: string + kwargs: + additionalProperties: + oneOf: + - type: 'null' + - type: boolean + - type: number + - type: string + - type: array + - type: object + type: object + row_limit: + type: integer + required: + - dataset_identifier + type: object + EvaluateJudgeScoringConfig: + type: object + EvaluateModelGenerationConfig: + additionalProperties: false + properties: + kwargs: + additionalProperties: + oneOf: + - type: 'null' + - type: boolean + - type: number + - type: string + - type: array + - type: object + type: object + model: + type: string + sampling_params: + $ref: '#/components/schemas/SamplingParams' + required: + - model + - sampling_params + type: object + EvaluatePostprocessConfig: + additionalProperties: false + properties: + kwargs: + additionalProperties: + oneOf: + - type: 'null' + - type: boolean + - type: number + - type: string + - type: array + - type: object + type: object + type: object + EvaluatePreprocessConfig: + additionalProperties: false + properties: + kwargs: + additionalProperties: + oneOf: + - type: 'null' + - type: boolean + - type: number + - type: string + - type: array + - type: object + type: object + type: object + EvaluateProcessorConfig: + additionalProperties: false + properties: + postprocess_config: + $ref: '#/components/schemas/EvaluatePostprocessConfig' + preprocess_config: + $ref: '#/components/schemas/EvaluatePreprocessConfig' + processor_identifier: + type: string + required: + - processor_identifier + type: object + EvaluateResponse: + additionalProperties: false + properties: + eval_result: + $ref: '#/components/schemas/EvalResult' + formatted_report: + type: string + required: + - eval_result + title: Scores for evaluation. + type: object + EvaluateScoringConfig: + additionalProperties: false + properties: + scorer_config_list: items: - enum: - - rouge - - bleu - type: string + $ref: '#/components/schemas/EvaluateSingleScorerConfig' type: array required: - - metrics + - scorer_config_list type: object - EvaluateTextGenerationRequest: + EvaluateSingleScorerConfig: additionalProperties: false properties: - metrics: - items: - enum: - - perplexity - - rouge - - bleu - type: string - type: array - required: - - metrics - type: object - EvaluationJob: - additionalProperties: false - properties: - job_uuid: + llm_judge_config: + $ref: '#/components/schemas/LLMJudgeConfig' + scorer_name: type: string required: - - job_uuid + - scorer_name type: object - EvaluationJobArtifactsResponse: + EvaluateTaskConfig: additionalProperties: false properties: - job_uuid: - type: string + dataset_config: + $ref: '#/components/schemas/EvaluateDatasetConfig' + generation_config: + $ref: '#/components/schemas/EvaluateModelGenerationConfig' + processor_config: + $ref: '#/components/schemas/EvaluateProcessorConfig' + scoring_config: + $ref: '#/components/schemas/EvaluateScoringConfig' required: - - job_uuid - title: Artifacts of a evaluation job. - type: object - EvaluationJobLogStream: - additionalProperties: false - properties: - job_uuid: - type: string - required: - - job_uuid - type: object - EvaluationJobStatusResponse: - additionalProperties: false - properties: - job_uuid: - type: string - required: - - job_uuid + - dataset_config + - processor_config + - generation_config + - scoring_config type: object FinetuningAlgorithm: enum: @@ -845,6 +951,39 @@ components: required: - status type: object + HuggingfaceDatasetDef: + additionalProperties: false + properties: + dataset_name: + type: string + dataset_path: + type: string + identifier: + type: string + kwargs: + additionalProperties: + oneOf: + - type: 'null' + - type: boolean + - type: number + - type: string + - type: array + - type: object + type: object + rename_columns_map: + additionalProperties: + type: string + type: object + type: + const: huggingface + default: huggingface + type: string + required: + - type + - identifier + - dataset_path + - kwargs + type: object ImageMedia: additionalProperties: false properties: @@ -936,6 +1075,20 @@ components: - provider_id - type type: object + LLMJudgeConfig: + additionalProperties: false + properties: + judge_model_generation_config: + $ref: '#/components/schemas/EvaluateModelGenerationConfig' + judge_processor_config: + $ref: '#/components/schemas/EvaluateProcessorConfig' + judge_scoring_config: + $ref: '#/components/schemas/EvaluateJudgeScoringConfig' + required: + - judge_processor_config + - judge_model_generation_config + - judge_scoring_config + type: object LogEventRequest: additionalProperties: false properties: @@ -1629,6 +1782,32 @@ components: - method - provider_types type: object + RunEvalTaskRequest: + additionalProperties: false + properties: + dataset: + type: string + eval_task_config: + $ref: '#/components/schemas/EvaluateTaskConfig' + model: + type: string + task: + type: string + required: + - model + - task + type: object + RunScorerRequest: + additionalProperties: false + properties: + dataset_config: + $ref: '#/components/schemas/EvaluateDatasetConfig' + eval_scoring_config: + $ref: '#/components/schemas/EvaluateScoringConfig' + required: + - dataset_config + - eval_scoring_config + type: object RunShieldRequest: additionalProperties: false properties: @@ -2507,7 +2686,7 @@ info: description: "This is the specification of the llama stack that provides\n \ \ a set of endpoints and their corresponding interfaces that are tailored\ \ to\n best leverage Llama Models. The specification is still in\ - \ draft and subject to change.\n Generated at 2024-10-09 21:10:09.073430" + \ draft and subject to change.\n Generated at 2024-10-15 00:44:26.278642" title: '[DRAFT] Llama Stack Specification' version: 0.0.1 jsonSchemaDialect: https://json-schema.org/draft/2020-12/schema @@ -2693,7 +2872,7 @@ paths: responses: '200': content: - application/json: + text/event-stream: schema: $ref: '#/components/schemas/AgentTurnResponseStreamChunk' description: OK @@ -2796,6 +2975,10 @@ paths: required: true responses: '200': + content: + application/json: + schema: + $ref: '#/components/schemas/CreateDatasetResponse' description: OK tags: - Datasets @@ -2817,6 +3000,10 @@ paths: required: true responses: '200': + content: + application/json: + schema: + $ref: '#/components/schemas/DeleteDatasetResponse' description: OK tags: - Datasets @@ -2824,7 +3011,7 @@ paths: get: parameters: - in: query - name: dataset_uuid + name: dataset_identifier required: true schema: type: string @@ -2840,104 +3027,15 @@ paths: content: application/json: schema: - $ref: '#/components/schemas/TrainEvalDataset' + oneOf: + - oneOf: + - $ref: '#/components/schemas/HuggingfaceDatasetDef' + - $ref: '#/components/schemas/CustomDatasetDef' + - type: 'null' description: OK tags: - Datasets - /evaluate/job/artifacts: - get: - parameters: - - in: query - name: job_uuid - required: true - schema: - type: string - - description: JSON-encoded provider data which will be made available to the - adapter servicing the API - in: header - name: X-LlamaStack-ProviderData - required: false - schema: - type: string - responses: - '200': - content: - application/json: - schema: - $ref: '#/components/schemas/EvaluationJobArtifactsResponse' - description: OK - tags: - - Evaluations - /evaluate/job/cancel: - post: - parameters: - - description: JSON-encoded provider data which will be made available to the - adapter servicing the API - in: header - name: X-LlamaStack-ProviderData - required: false - schema: - type: string - requestBody: - content: - application/json: - schema: - $ref: '#/components/schemas/CancelEvaluationJobRequest' - required: true - responses: - '200': - description: OK - tags: - - Evaluations - /evaluate/job/logs: - get: - parameters: - - in: query - name: job_uuid - required: true - schema: - type: string - - description: JSON-encoded provider data which will be made available to the - adapter servicing the API - in: header - name: X-LlamaStack-ProviderData - required: false - schema: - type: string - responses: - '200': - content: - application/json: - schema: - $ref: '#/components/schemas/EvaluationJobLogStream' - description: OK - tags: - - Evaluations - /evaluate/job/status: - get: - parameters: - - in: query - name: job_uuid - required: true - schema: - type: string - - description: JSON-encoded provider data which will be made available to the - adapter servicing the API - in: header - name: X-LlamaStack-ProviderData - required: false - schema: - type: string - responses: - '200': - content: - application/json: - schema: - $ref: '#/components/schemas/EvaluationJobStatusResponse' - description: OK - tags: - - Evaluations - /evaluate/jobs: + /datasets/list: get: parameters: - description: JSON-encoded provider data which will be made available to the @@ -2952,11 +3050,13 @@ paths: content: application/jsonl: schema: - $ref: '#/components/schemas/EvaluationJob' + oneOf: + - $ref: '#/components/schemas/HuggingfaceDatasetDef' + - $ref: '#/components/schemas/CustomDatasetDef' description: OK tags: - - Evaluations - /evaluate/question_answering/: + - Datasets + /evals/run_eval_task: post: parameters: - description: JSON-encoded provider data which will be made available to the @@ -2970,18 +3070,18 @@ paths: content: application/json: schema: - $ref: '#/components/schemas/EvaluateQuestionAnsweringRequest' + $ref: '#/components/schemas/RunEvalTaskRequest' required: true responses: '200': content: application/json: schema: - $ref: '#/components/schemas/EvaluationJob' + $ref: '#/components/schemas/EvaluateResponse' description: OK tags: - - Evaluations - /evaluate/summarization/: + - Evals + /evals/run_scorer: post: parameters: - description: JSON-encoded provider data which will be made available to the @@ -2995,42 +3095,17 @@ paths: content: application/json: schema: - $ref: '#/components/schemas/EvaluateSummarizationRequest' + $ref: '#/components/schemas/RunScorerRequest' required: true responses: '200': content: application/json: schema: - $ref: '#/components/schemas/EvaluationJob' + $ref: '#/components/schemas/EvaluateResponse' description: OK tags: - - Evaluations - /evaluate/text_generation/: - post: - parameters: - - description: JSON-encoded provider data which will be made available to the - adapter servicing the API - in: header - name: X-LlamaStack-ProviderData - required: false - schema: - type: string - requestBody: - content: - application/json: - schema: - $ref: '#/components/schemas/EvaluateTextGenerationRequest' - required: true - responses: - '200': - content: - application/json: - schema: - $ref: '#/components/schemas/EvaluationJob' - description: OK - tags: - - Evaluations + - Evals /health: get: parameters: @@ -3712,20 +3787,20 @@ security: servers: - url: http://any-hosted-llama-stack.com tags: -- name: RewardScoring +- name: Models +- name: BatchInference +- name: Inspect +- name: Evals +- name: Safety +- name: Shields +- name: Telemetry +- name: Agents - name: Memory - name: SyntheticDataGeneration -- name: Models -- name: Safety -- name: BatchInference -- name: Agents -- name: MemoryBanks -- name: Shields -- name: Datasets -- name: Evaluations -- name: Inspect - name: PostTraining -- name: Telemetry +- name: Datasets +- name: MemoryBanks +- name: RewardScoring - name: Inference - description: name: BuiltinTool @@ -3782,9 +3857,6 @@ tags: - description: name: BatchCompletionResponse -- description: - name: CancelEvaluationJobRequest - description: name: CancelTrainingJobRequest @@ -3919,17 +3991,18 @@ tags: name: Turn - description: name: ViolationLevel -- description: 'Dataset to be used for training or evaluating language models. - - - ' - name: TrainEvalDataset -- description: - name: TrainEvalDatasetColumnType + name: CustomDatasetDef +- description: + name: HuggingfaceDatasetDef - description: name: CreateDatasetRequest +- description: + name: CreateDatasetResponse - description: name: DeleteAgentsRequest @@ -3939,23 +4012,15 @@ tags: - description: name: DeleteDatasetRequest +- description: + name: DeleteDatasetResponse - description: name: EmbeddingsRequest - description: name: EmbeddingsResponse -- description: - name: EvaluateQuestionAnsweringRequest -- description: - name: EvaluationJob -- description: - name: EvaluateSummarizationRequest -- description: - name: EvaluateTextGenerationRequest - description: name: GetAgentsSessionRequest @@ -3979,18 +4044,6 @@ tags: - description: name: AgentStepResponse -- description: 'Artifacts of a evaluation job. - - - ' - name: EvaluationJobArtifactsResponse -- description: - name: EvaluationJobLogStream -- description: - name: EvaluationJobStatusResponse - description: name: ModelDefWithProvider @@ -4067,6 +4120,14 @@ tags: name: OptimizerConfig - description: name: RLHFAlgorithm +- description: 'Dataset to be used for training or evaluating language models. + + + ' + name: TrainEvalDataset +- description: + name: TrainEvalDatasetColumnType - description: name: TrainingConfig - description: name: ScoredMessage +- description: + name: EvaluateDatasetConfig +- description: + name: EvaluateJudgeScoringConfig +- description: + name: EvaluateModelGenerationConfig +- description: + name: EvaluatePostprocessConfig +- description: + name: EvaluatePreprocessConfig +- description: + name: EvaluateProcessorConfig +- description: + name: EvaluateScoringConfig +- description: + name: EvaluateSingleScorerConfig +- description: + name: EvaluateTaskConfig +- description: + name: LLMJudgeConfig +- description: + name: RunEvalTaskRequest +- description: 'Aggregated final evaluation result. + + + ' + name: EvalResult +- description: 'Scores for evaluation. + + + ' + name: EvaluateResponse +- description: + name: RunScorerRequest - description: name: RunShieldRequest @@ -4141,7 +4247,7 @@ x-tagGroups: - Agents - BatchInference - Datasets - - Evaluations + - Evals - Inference - Inspect - Memory @@ -4172,7 +4278,6 @@ x-tagGroups: - BatchCompletionRequest - BatchCompletionResponse - BuiltinTool - - CancelEvaluationJobRequest - CancelTrainingJobRequest - ChatCompletionRequest - ChatCompletionResponse @@ -4189,31 +4294,40 @@ x-tagGroups: - CreateAgentSessionRequest - CreateAgentTurnRequest - CreateDatasetRequest + - CreateDatasetResponse + - CustomDatasetDef - DPOAlignmentConfig - DeleteAgentsRequest - DeleteAgentsSessionRequest - DeleteDatasetRequest + - DeleteDatasetResponse - DialogGenerations - DoraFinetuningConfig - EmbeddingsRequest - EmbeddingsResponse - - EvaluateQuestionAnsweringRequest - - EvaluateSummarizationRequest - - EvaluateTextGenerationRequest - - EvaluationJob - - EvaluationJobArtifactsResponse - - EvaluationJobLogStream - - EvaluationJobStatusResponse + - EvalResult + - EvaluateDatasetConfig + - EvaluateJudgeScoringConfig + - EvaluateModelGenerationConfig + - EvaluatePostprocessConfig + - EvaluatePreprocessConfig + - EvaluateProcessorConfig + - EvaluateResponse + - EvaluateScoringConfig + - EvaluateSingleScorerConfig + - EvaluateTaskConfig - FinetuningAlgorithm - FunctionCallToolDefinition - GetAgentsSessionRequest - GraphMemoryBankDef - HealthInfo + - HuggingfaceDatasetDef - ImageMedia - InferenceStep - InsertDocumentsRequest - KeyValueMemoryBankDef - KeywordMemoryBankDef + - LLMJudgeConfig - LogEventRequest - LogSeverity - LoraFinetuningConfig @@ -4243,6 +4357,8 @@ x-tagGroups: - RewardScoreRequest - RewardScoringResponse - RouteInfo + - RunEvalTaskRequest + - RunScorerRequest - RunShieldRequest - RunShieldResponse - SafetyViolation diff --git a/llama_stack/apis/datasets/datasets.py b/llama_stack/apis/datasets/datasets.py index c0aa4d161..f5991c52e 100644 --- a/llama_stack/apis/datasets/datasets.py +++ b/llama_stack/apis/datasets/datasets.py @@ -15,6 +15,26 @@ from pydantic import BaseModel, Field from typing_extensions import Annotated +@json_schema_type +class TrainEvalDatasetColumnType(Enum): + dialog = "dialog" + text = "text" + media = "media" + number = "number" + json = "json" + + +@json_schema_type +class TrainEvalDataset(BaseModel): + """Dataset to be used for training or evaluating language models.""" + + # TODO(ashwin): figure out if we need to add an enum for a "dataset type" + + columns: Dict[str, TrainEvalDatasetColumnType] + content_url: URL + metadata: Optional[Dict[str, Any]] = None + + @json_schema_type class GenerationInput(BaseModel): messages: List[Message] diff --git a/llama_stack/apis/post_training/post_training.py b/llama_stack/apis/post_training/post_training.py index d943f48b2..cdfe5c467 100644 --- a/llama_stack/apis/post_training/post_training.py +++ b/llama_stack/apis/post_training/post_training.py @@ -14,7 +14,7 @@ from llama_models.schema_utils import json_schema_type, webmethod from pydantic import BaseModel, Field from llama_models.llama3.api.datatypes import * # noqa: F403 -from llama_stack.apis.dataset import * # noqa: F403 +from llama_stack.apis.datasets import * # noqa: F403 from llama_stack.apis.common.training_types import * # noqa: F403