diff --git a/docs/openapi_generator/generate.py b/docs/openapi_generator/generate.py
index 871c01a80..994b06e58 100644
--- a/docs/openapi_generator/generate.py
+++ b/docs/openapi_generator/generate.py
@@ -33,7 +33,7 @@ schema_utils.json_schema_type = json_schema_type
from llama_models.llama3.api.datatypes import * # noqa: F403
from llama_stack.apis.agents import * # noqa: F403
-from llama_stack.apis.dataset import * # noqa: F403
+from llama_stack.apis.datasets import * # noqa: F403
from llama_stack.apis.evals import * # noqa: F403
from llama_stack.apis.inference import * # noqa: F403
from llama_stack.apis.batch_inference import * # noqa: F403
@@ -61,7 +61,7 @@ class LlamaStack(
Telemetry,
PostTraining,
Memory,
- Evaluations,
+ Evals,
Models,
Shields,
Inspect,
diff --git a/docs/resources/llama-stack-spec.html b/docs/resources/llama-stack-spec.html
index 96ef7e4bb..ac75dbf04 100644
--- a/docs/resources/llama-stack-spec.html
+++ b/docs/resources/llama-stack-spec.html
@@ -21,7 +21,7 @@
"info": {
"title": "[DRAFT] Llama Stack Specification",
"version": "0.0.1",
- "description": "This is the specification of the llama stack that provides\n a set of endpoints and their corresponding interfaces that are tailored to\n best leverage Llama Models. The specification is still in draft and subject to change.\n Generated at 2024-10-09 21:10:09.073430"
+ "description": "This is the specification of the llama stack that provides\n a set of endpoints and their corresponding interfaces that are tailored to\n best leverage Llama Models. The specification is still in draft and subject to change.\n Generated at 2024-10-15 00:44:26.278642"
},
"servers": [
{
@@ -109,39 +109,6 @@
}
}
},
- "/evaluate/job/cancel": {
- "post": {
- "responses": {
- "200": {
- "description": "OK"
- }
- },
- "tags": [
- "Evaluations"
- ],
- "parameters": [
- {
- "name": "X-LlamaStack-ProviderData",
- "in": "header",
- "description": "JSON-encoded provider data which will be made available to the adapter servicing the API",
- "required": false,
- "schema": {
- "type": "string"
- }
- }
- ],
- "requestBody": {
- "content": {
- "application/json": {
- "schema": {
- "$ref": "#/components/schemas/CancelEvaluationJobRequest"
- }
- }
- },
- "required": true
- }
- }
- },
"/post_training/job/cancel": {
"post": {
"responses": {
@@ -355,7 +322,7 @@
"200": {
"description": "OK",
"content": {
- "application/json": {
+ "text/event-stream": {
"schema": {
"$ref": "#/components/schemas/AgentTurnResponseStreamChunk"
}
@@ -393,7 +360,14 @@
"post": {
"responses": {
"200": {
- "description": "OK"
+ "description": "OK",
+ "content": {
+ "application/json": {
+ "schema": {
+ "$ref": "#/components/schemas/CreateDatasetResponse"
+ }
+ }
+ }
}
},
"tags": [
@@ -492,7 +466,14 @@
"post": {
"responses": {
"200": {
- "description": "OK"
+ "description": "OK",
+ "content": {
+ "application/json": {
+ "schema": {
+ "$ref": "#/components/schemas/DeleteDatasetResponse"
+ }
+ }
+ }
}
},
"tags": [
@@ -561,126 +542,6 @@
}
}
},
- "/evaluate/question_answering/": {
- "post": {
- "responses": {
- "200": {
- "description": "OK",
- "content": {
- "application/json": {
- "schema": {
- "$ref": "#/components/schemas/EvaluationJob"
- }
- }
- }
- }
- },
- "tags": [
- "Evaluations"
- ],
- "parameters": [
- {
- "name": "X-LlamaStack-ProviderData",
- "in": "header",
- "description": "JSON-encoded provider data which will be made available to the adapter servicing the API",
- "required": false,
- "schema": {
- "type": "string"
- }
- }
- ],
- "requestBody": {
- "content": {
- "application/json": {
- "schema": {
- "$ref": "#/components/schemas/EvaluateQuestionAnsweringRequest"
- }
- }
- },
- "required": true
- }
- }
- },
- "/evaluate/summarization/": {
- "post": {
- "responses": {
- "200": {
- "description": "OK",
- "content": {
- "application/json": {
- "schema": {
- "$ref": "#/components/schemas/EvaluationJob"
- }
- }
- }
- }
- },
- "tags": [
- "Evaluations"
- ],
- "parameters": [
- {
- "name": "X-LlamaStack-ProviderData",
- "in": "header",
- "description": "JSON-encoded provider data which will be made available to the adapter servicing the API",
- "required": false,
- "schema": {
- "type": "string"
- }
- }
- ],
- "requestBody": {
- "content": {
- "application/json": {
- "schema": {
- "$ref": "#/components/schemas/EvaluateSummarizationRequest"
- }
- }
- },
- "required": true
- }
- }
- },
- "/evaluate/text_generation/": {
- "post": {
- "responses": {
- "200": {
- "description": "OK",
- "content": {
- "application/json": {
- "schema": {
- "$ref": "#/components/schemas/EvaluationJob"
- }
- }
- }
- }
- },
- "tags": [
- "Evaluations"
- ],
- "parameters": [
- {
- "name": "X-LlamaStack-ProviderData",
- "in": "header",
- "description": "JSON-encoded provider data which will be made available to the adapter servicing the API",
- "required": false,
- "schema": {
- "type": "string"
- }
- }
- ],
- "requestBody": {
- "content": {
- "application/json": {
- "schema": {
- "$ref": "#/components/schemas/EvaluateTextGenerationRequest"
- }
- }
- },
- "required": true
- }
- }
- },
"/agents/session/get": {
"post": {
"responses": {
@@ -845,7 +706,21 @@
"content": {
"application/json": {
"schema": {
- "$ref": "#/components/schemas/TrainEvalDataset"
+ "oneOf": [
+ {
+ "oneOf": [
+ {
+ "$ref": "#/components/schemas/HuggingfaceDatasetDef"
+ },
+ {
+ "$ref": "#/components/schemas/CustomDatasetDef"
+ }
+ ]
+ },
+ {
+ "type": "null"
+ }
+ ]
}
}
}
@@ -856,7 +731,7 @@
],
"parameters": [
{
- "name": "dataset_uuid",
+ "name": "dataset_identifier",
"in": "query",
"required": true,
"schema": {
@@ -875,150 +750,6 @@
]
}
},
- "/evaluate/job/artifacts": {
- "get": {
- "responses": {
- "200": {
- "description": "OK",
- "content": {
- "application/json": {
- "schema": {
- "$ref": "#/components/schemas/EvaluationJobArtifactsResponse"
- }
- }
- }
- }
- },
- "tags": [
- "Evaluations"
- ],
- "parameters": [
- {
- "name": "job_uuid",
- "in": "query",
- "required": true,
- "schema": {
- "type": "string"
- }
- },
- {
- "name": "X-LlamaStack-ProviderData",
- "in": "header",
- "description": "JSON-encoded provider data which will be made available to the adapter servicing the API",
- "required": false,
- "schema": {
- "type": "string"
- }
- }
- ]
- }
- },
- "/evaluate/job/logs": {
- "get": {
- "responses": {
- "200": {
- "description": "OK",
- "content": {
- "application/json": {
- "schema": {
- "$ref": "#/components/schemas/EvaluationJobLogStream"
- }
- }
- }
- }
- },
- "tags": [
- "Evaluations"
- ],
- "parameters": [
- {
- "name": "job_uuid",
- "in": "query",
- "required": true,
- "schema": {
- "type": "string"
- }
- },
- {
- "name": "X-LlamaStack-ProviderData",
- "in": "header",
- "description": "JSON-encoded provider data which will be made available to the adapter servicing the API",
- "required": false,
- "schema": {
- "type": "string"
- }
- }
- ]
- }
- },
- "/evaluate/job/status": {
- "get": {
- "responses": {
- "200": {
- "description": "OK",
- "content": {
- "application/json": {
- "schema": {
- "$ref": "#/components/schemas/EvaluationJobStatusResponse"
- }
- }
- }
- }
- },
- "tags": [
- "Evaluations"
- ],
- "parameters": [
- {
- "name": "job_uuid",
- "in": "query",
- "required": true,
- "schema": {
- "type": "string"
- }
- },
- {
- "name": "X-LlamaStack-ProviderData",
- "in": "header",
- "description": "JSON-encoded provider data which will be made available to the adapter servicing the API",
- "required": false,
- "schema": {
- "type": "string"
- }
- }
- ]
- }
- },
- "/evaluate/jobs": {
- "get": {
- "responses": {
- "200": {
- "description": "OK",
- "content": {
- "application/jsonl": {
- "schema": {
- "$ref": "#/components/schemas/EvaluationJob"
- }
- }
- }
- }
- },
- "tags": [
- "Evaluations"
- ],
- "parameters": [
- {
- "name": "X-LlamaStack-ProviderData",
- "in": "header",
- "description": "JSON-encoded provider data which will be made available to the adapter servicing the API",
- "required": false,
- "schema": {
- "type": "string"
- }
- }
- ]
- }
- },
"/memory_banks/get": {
"get": {
"responses": {
@@ -1412,6 +1143,43 @@
}
}
},
+ "/datasets/list": {
+ "get": {
+ "responses": {
+ "200": {
+ "description": "OK",
+ "content": {
+ "application/jsonl": {
+ "schema": {
+ "oneOf": [
+ {
+ "$ref": "#/components/schemas/HuggingfaceDatasetDef"
+ },
+ {
+ "$ref": "#/components/schemas/CustomDatasetDef"
+ }
+ ]
+ }
+ }
+ }
+ }
+ },
+ "tags": [
+ "Datasets"
+ ],
+ "parameters": [
+ {
+ "name": "X-LlamaStack-ProviderData",
+ "in": "header",
+ "description": "JSON-encoded provider data which will be made available to the adapter servicing the API",
+ "required": false,
+ "schema": {
+ "type": "string"
+ }
+ }
+ ]
+ }
+ },
"/memory_banks/list": {
"get": {
"responses": {
@@ -1836,6 +1604,86 @@
}
}
},
+ "/evals/run_eval_task": {
+ "post": {
+ "responses": {
+ "200": {
+ "description": "OK",
+ "content": {
+ "application/json": {
+ "schema": {
+ "$ref": "#/components/schemas/EvaluateResponse"
+ }
+ }
+ }
+ }
+ },
+ "tags": [
+ "Evals"
+ ],
+ "parameters": [
+ {
+ "name": "X-LlamaStack-ProviderData",
+ "in": "header",
+ "description": "JSON-encoded provider data which will be made available to the adapter servicing the API",
+ "required": false,
+ "schema": {
+ "type": "string"
+ }
+ }
+ ],
+ "requestBody": {
+ "content": {
+ "application/json": {
+ "schema": {
+ "$ref": "#/components/schemas/RunEvalTaskRequest"
+ }
+ }
+ },
+ "required": true
+ }
+ }
+ },
+ "/evals/run_scorer": {
+ "post": {
+ "responses": {
+ "200": {
+ "description": "OK",
+ "content": {
+ "application/json": {
+ "schema": {
+ "$ref": "#/components/schemas/EvaluateResponse"
+ }
+ }
+ }
+ }
+ },
+ "tags": [
+ "Evals"
+ ],
+ "parameters": [
+ {
+ "name": "X-LlamaStack-ProviderData",
+ "in": "header",
+ "description": "JSON-encoded provider data which will be made available to the adapter servicing the API",
+ "required": false,
+ "schema": {
+ "type": "string"
+ }
+ }
+ ],
+ "requestBody": {
+ "content": {
+ "application/json": {
+ "schema": {
+ "$ref": "#/components/schemas/RunScorerRequest"
+ }
+ }
+ },
+ "required": true
+ }
+ }
+ },
"/safety/run_shield": {
"post": {
"responses": {
@@ -2571,18 +2419,6 @@
"completion_message_batch"
]
},
- "CancelEvaluationJobRequest": {
- "type": "object",
- "properties": {
- "job_uuid": {
- "type": "string"
- }
- },
- "additionalProperties": false,
- "required": [
- "job_uuid"
- ]
- },
"CancelTrainingJobRequest": {
"type": "object",
"properties": {
@@ -4090,19 +3926,58 @@
"error"
]
},
- "TrainEvalDataset": {
+ "CustomDatasetDef": {
"type": "object",
"properties": {
- "columns": {
+ "type": {
+ "type": "string",
+ "const": "custom",
+ "default": "custom"
+ },
+ "identifier": {
+ "type": "string"
+ },
+ "url": {
+ "type": "string"
+ },
+ "rename_columns_map": {
"type": "object",
"additionalProperties": {
- "$ref": "#/components/schemas/TrainEvalDatasetColumnType"
+ "type": "string"
+ }
+ }
+ },
+ "additionalProperties": false,
+ "required": [
+ "type",
+ "identifier",
+ "url"
+ ]
+ },
+ "HuggingfaceDatasetDef": {
+ "type": "object",
+ "properties": {
+ "type": {
+ "type": "string",
+ "const": "huggingface",
+ "default": "huggingface"
+ },
+ "identifier": {
+ "type": "string"
+ },
+ "dataset_path": {
+ "type": "string"
+ },
+ "dataset_name": {
+ "type": "string"
+ },
+ "rename_columns_map": {
+ "type": "object",
+ "additionalProperties": {
+ "type": "string"
}
},
- "content_url": {
- "$ref": "#/components/schemas/URL"
- },
- "metadata": {
+ "kwargs": {
"type": "object",
"additionalProperties": {
"oneOf": [
@@ -4130,35 +4005,48 @@
},
"additionalProperties": false,
"required": [
- "columns",
- "content_url"
- ],
- "title": "Dataset to be used for training or evaluating language models."
- },
- "TrainEvalDatasetColumnType": {
- "type": "string",
- "enum": [
- "dialog",
- "text",
- "media",
- "number",
- "json"
+ "type",
+ "identifier",
+ "dataset_path",
+ "kwargs"
]
},
"CreateDatasetRequest": {
"type": "object",
"properties": {
- "uuid": {
- "type": "string"
- },
- "dataset": {
- "$ref": "#/components/schemas/TrainEvalDataset"
+ "dataset_def": {
+ "oneOf": [
+ {
+ "$ref": "#/components/schemas/HuggingfaceDatasetDef"
+ },
+ {
+ "$ref": "#/components/schemas/CustomDatasetDef"
+ }
+ ]
}
},
"additionalProperties": false,
"required": [
- "uuid",
- "dataset"
+ "dataset_def"
+ ]
+ },
+ "CreateDatasetResponse": {
+ "type": "object",
+ "properties": {
+ "status": {
+ "type": "string",
+ "enum": [
+ "success",
+ "fail"
+ ]
+ },
+ "msg": {
+ "type": "string"
+ }
+ },
+ "additionalProperties": false,
+ "required": [
+ "status"
]
},
"DeleteAgentsRequest": {
@@ -4192,13 +4080,32 @@
"DeleteDatasetRequest": {
"type": "object",
"properties": {
- "dataset_uuid": {
+ "dataset_identifier": {
"type": "string"
}
},
"additionalProperties": false,
"required": [
- "dataset_uuid"
+ "dataset_identifier"
+ ]
+ },
+ "DeleteDatasetResponse": {
+ "type": "object",
+ "properties": {
+ "status": {
+ "type": "string",
+ "enum": [
+ "success",
+ "fail"
+ ]
+ },
+ "msg": {
+ "type": "string"
+ }
+ },
+ "additionalProperties": false,
+ "required": [
+ "status"
]
},
"EmbeddingsRequest": {
@@ -4258,76 +4165,6 @@
"embeddings"
]
},
- "EvaluateQuestionAnsweringRequest": {
- "type": "object",
- "properties": {
- "metrics": {
- "type": "array",
- "items": {
- "type": "string",
- "enum": [
- "em",
- "f1"
- ]
- }
- }
- },
- "additionalProperties": false,
- "required": [
- "metrics"
- ]
- },
- "EvaluationJob": {
- "type": "object",
- "properties": {
- "job_uuid": {
- "type": "string"
- }
- },
- "additionalProperties": false,
- "required": [
- "job_uuid"
- ]
- },
- "EvaluateSummarizationRequest": {
- "type": "object",
- "properties": {
- "metrics": {
- "type": "array",
- "items": {
- "type": "string",
- "enum": [
- "rouge",
- "bleu"
- ]
- }
- }
- },
- "additionalProperties": false,
- "required": [
- "metrics"
- ]
- },
- "EvaluateTextGenerationRequest": {
- "type": "object",
- "properties": {
- "metrics": {
- "type": "array",
- "items": {
- "type": "string",
- "enum": [
- "perplexity",
- "rouge",
- "bleu"
- ]
- }
- }
- },
- "additionalProperties": false,
- "required": [
- "metrics"
- ]
- },
"GetAgentsSessionRequest": {
"type": "object",
"properties": {
@@ -4513,43 +4350,6 @@
"step"
]
},
- "EvaluationJobArtifactsResponse": {
- "type": "object",
- "properties": {
- "job_uuid": {
- "type": "string"
- }
- },
- "additionalProperties": false,
- "required": [
- "job_uuid"
- ],
- "title": "Artifacts of a evaluation job."
- },
- "EvaluationJobLogStream": {
- "type": "object",
- "properties": {
- "job_uuid": {
- "type": "string"
- }
- },
- "additionalProperties": false,
- "required": [
- "job_uuid"
- ]
- },
- "EvaluationJobStatusResponse": {
- "type": "object",
- "properties": {
- "job_uuid": {
- "type": "string"
- }
- },
- "additionalProperties": false,
- "required": [
- "job_uuid"
- ]
- },
"ModelDefWithProvider": {
"type": "object",
"properties": {
@@ -5265,6 +5065,61 @@
"dpo"
]
},
+ "TrainEvalDataset": {
+ "type": "object",
+ "properties": {
+ "columns": {
+ "type": "object",
+ "additionalProperties": {
+ "$ref": "#/components/schemas/TrainEvalDatasetColumnType"
+ }
+ },
+ "content_url": {
+ "$ref": "#/components/schemas/URL"
+ },
+ "metadata": {
+ "type": "object",
+ "additionalProperties": {
+ "oneOf": [
+ {
+ "type": "null"
+ },
+ {
+ "type": "boolean"
+ },
+ {
+ "type": "number"
+ },
+ {
+ "type": "string"
+ },
+ {
+ "type": "array"
+ },
+ {
+ "type": "object"
+ }
+ ]
+ }
+ }
+ },
+ "additionalProperties": false,
+ "required": [
+ "columns",
+ "content_url"
+ ],
+ "title": "Dataset to be used for training or evaluating language models."
+ },
+ "TrainEvalDatasetColumnType": {
+ "type": "string",
+ "enum": [
+ "dialog",
+ "text",
+ "media",
+ "number",
+ "json"
+ ]
+ },
"TrainingConfig": {
"type": "object",
"properties": {
@@ -5709,6 +5564,314 @@
"score"
]
},
+ "EvaluateDatasetConfig": {
+ "type": "object",
+ "properties": {
+ "dataset_identifier": {
+ "type": "string"
+ },
+ "row_limit": {
+ "type": "integer"
+ },
+ "kwargs": {
+ "type": "object",
+ "additionalProperties": {
+ "oneOf": [
+ {
+ "type": "null"
+ },
+ {
+ "type": "boolean"
+ },
+ {
+ "type": "number"
+ },
+ {
+ "type": "string"
+ },
+ {
+ "type": "array"
+ },
+ {
+ "type": "object"
+ }
+ ]
+ }
+ }
+ },
+ "additionalProperties": false,
+ "required": [
+ "dataset_identifier"
+ ]
+ },
+ "EvaluateJudgeScoringConfig": {
+ "type": "object"
+ },
+ "EvaluateModelGenerationConfig": {
+ "type": "object",
+ "properties": {
+ "model": {
+ "type": "string"
+ },
+ "sampling_params": {
+ "$ref": "#/components/schemas/SamplingParams"
+ },
+ "kwargs": {
+ "type": "object",
+ "additionalProperties": {
+ "oneOf": [
+ {
+ "type": "null"
+ },
+ {
+ "type": "boolean"
+ },
+ {
+ "type": "number"
+ },
+ {
+ "type": "string"
+ },
+ {
+ "type": "array"
+ },
+ {
+ "type": "object"
+ }
+ ]
+ }
+ }
+ },
+ "additionalProperties": false,
+ "required": [
+ "model",
+ "sampling_params"
+ ]
+ },
+ "EvaluatePostprocessConfig": {
+ "type": "object",
+ "properties": {
+ "kwargs": {
+ "type": "object",
+ "additionalProperties": {
+ "oneOf": [
+ {
+ "type": "null"
+ },
+ {
+ "type": "boolean"
+ },
+ {
+ "type": "number"
+ },
+ {
+ "type": "string"
+ },
+ {
+ "type": "array"
+ },
+ {
+ "type": "object"
+ }
+ ]
+ }
+ }
+ },
+ "additionalProperties": false
+ },
+ "EvaluatePreprocessConfig": {
+ "type": "object",
+ "properties": {
+ "kwargs": {
+ "type": "object",
+ "additionalProperties": {
+ "oneOf": [
+ {
+ "type": "null"
+ },
+ {
+ "type": "boolean"
+ },
+ {
+ "type": "number"
+ },
+ {
+ "type": "string"
+ },
+ {
+ "type": "array"
+ },
+ {
+ "type": "object"
+ }
+ ]
+ }
+ }
+ },
+ "additionalProperties": false
+ },
+ "EvaluateProcessorConfig": {
+ "type": "object",
+ "properties": {
+ "processor_identifier": {
+ "type": "string"
+ },
+ "preprocess_config": {
+ "$ref": "#/components/schemas/EvaluatePreprocessConfig"
+ },
+ "postprocess_config": {
+ "$ref": "#/components/schemas/EvaluatePostprocessConfig"
+ }
+ },
+ "additionalProperties": false,
+ "required": [
+ "processor_identifier"
+ ]
+ },
+ "EvaluateScoringConfig": {
+ "type": "object",
+ "properties": {
+ "scorer_config_list": {
+ "type": "array",
+ "items": {
+ "$ref": "#/components/schemas/EvaluateSingleScorerConfig"
+ }
+ }
+ },
+ "additionalProperties": false,
+ "required": [
+ "scorer_config_list"
+ ]
+ },
+ "EvaluateSingleScorerConfig": {
+ "type": "object",
+ "properties": {
+ "scorer_name": {
+ "type": "string"
+ },
+ "llm_judge_config": {
+ "$ref": "#/components/schemas/LLMJudgeConfig"
+ }
+ },
+ "additionalProperties": false,
+ "required": [
+ "scorer_name"
+ ]
+ },
+ "EvaluateTaskConfig": {
+ "type": "object",
+ "properties": {
+ "dataset_config": {
+ "$ref": "#/components/schemas/EvaluateDatasetConfig"
+ },
+ "processor_config": {
+ "$ref": "#/components/schemas/EvaluateProcessorConfig"
+ },
+ "generation_config": {
+ "$ref": "#/components/schemas/EvaluateModelGenerationConfig"
+ },
+ "scoring_config": {
+ "$ref": "#/components/schemas/EvaluateScoringConfig"
+ }
+ },
+ "additionalProperties": false,
+ "required": [
+ "dataset_config",
+ "processor_config",
+ "generation_config",
+ "scoring_config"
+ ]
+ },
+ "LLMJudgeConfig": {
+ "type": "object",
+ "properties": {
+ "judge_processor_config": {
+ "$ref": "#/components/schemas/EvaluateProcessorConfig"
+ },
+ "judge_model_generation_config": {
+ "$ref": "#/components/schemas/EvaluateModelGenerationConfig"
+ },
+ "judge_scoring_config": {
+ "$ref": "#/components/schemas/EvaluateJudgeScoringConfig"
+ }
+ },
+ "additionalProperties": false,
+ "required": [
+ "judge_processor_config",
+ "judge_model_generation_config",
+ "judge_scoring_config"
+ ]
+ },
+ "RunEvalTaskRequest": {
+ "type": "object",
+ "properties": {
+ "model": {
+ "type": "string"
+ },
+ "task": {
+ "type": "string"
+ },
+ "dataset": {
+ "type": "string"
+ },
+ "eval_task_config": {
+ "$ref": "#/components/schemas/EvaluateTaskConfig"
+ }
+ },
+ "additionalProperties": false,
+ "required": [
+ "model",
+ "task"
+ ]
+ },
+ "EvalResult": {
+ "type": "object",
+ "properties": {
+ "metrics": {
+ "type": "object",
+ "additionalProperties": {
+ "type": "number"
+ }
+ }
+ },
+ "additionalProperties": false,
+ "required": [
+ "metrics"
+ ],
+ "title": "Aggregated final evaluation result."
+ },
+ "EvaluateResponse": {
+ "type": "object",
+ "properties": {
+ "eval_result": {
+ "$ref": "#/components/schemas/EvalResult"
+ },
+ "formatted_report": {
+ "type": "string"
+ }
+ },
+ "additionalProperties": false,
+ "required": [
+ "eval_result"
+ ],
+ "title": "Scores for evaluation."
+ },
+ "RunScorerRequest": {
+ "type": "object",
+ "properties": {
+ "dataset_config": {
+ "$ref": "#/components/schemas/EvaluateDatasetConfig"
+ },
+ "eval_scoring_config": {
+ "$ref": "#/components/schemas/EvaluateScoringConfig"
+ }
+ },
+ "additionalProperties": false,
+ "required": [
+ "dataset_config",
+ "eval_scoring_config"
+ ]
+ },
"RunShieldRequest": {
"type": "object",
"properties": {
@@ -6075,7 +6238,28 @@
],
"tags": [
{
- "name": "RewardScoring"
+ "name": "Models"
+ },
+ {
+ "name": "BatchInference"
+ },
+ {
+ "name": "Inspect"
+ },
+ {
+ "name": "Evals"
+ },
+ {
+ "name": "Safety"
+ },
+ {
+ "name": "Shields"
+ },
+ {
+ "name": "Telemetry"
+ },
+ {
+ "name": "Agents"
},
{
"name": "Memory"
@@ -6084,37 +6268,16 @@
"name": "SyntheticDataGeneration"
},
{
- "name": "Models"
- },
- {
- "name": "Safety"
- },
- {
- "name": "BatchInference"
- },
- {
- "name": "Agents"
- },
- {
- "name": "MemoryBanks"
- },
- {
- "name": "Shields"
+ "name": "PostTraining"
},
{
"name": "Datasets"
},
{
- "name": "Evaluations"
+ "name": "MemoryBanks"
},
{
- "name": "Inspect"
- },
- {
- "name": "PostTraining"
- },
- {
- "name": "Telemetry"
+ "name": "RewardScoring"
},
{
"name": "Inference"
@@ -6195,10 +6358,6 @@
"name": "BatchCompletionResponse",
"description": ""
},
- {
- "name": "CancelEvaluationJobRequest",
- "description": ""
- },
{
"name": "CancelTrainingJobRequest",
"description": ""
@@ -6368,17 +6527,21 @@
"description": ""
},
{
- "name": "TrainEvalDataset",
- "description": "Dataset to be used for training or evaluating language models.\n\n"
+ "name": "CustomDatasetDef",
+ "description": ""
},
{
- "name": "TrainEvalDatasetColumnType",
- "description": ""
+ "name": "HuggingfaceDatasetDef",
+ "description": ""
},
{
"name": "CreateDatasetRequest",
"description": ""
},
+ {
+ "name": "CreateDatasetResponse",
+ "description": ""
+ },
{
"name": "DeleteAgentsRequest",
"description": ""
@@ -6391,6 +6554,10 @@
"name": "DeleteDatasetRequest",
"description": ""
},
+ {
+ "name": "DeleteDatasetResponse",
+ "description": ""
+ },
{
"name": "EmbeddingsRequest",
"description": ""
@@ -6399,22 +6566,6 @@
"name": "EmbeddingsResponse",
"description": ""
},
- {
- "name": "EvaluateQuestionAnsweringRequest",
- "description": ""
- },
- {
- "name": "EvaluationJob",
- "description": ""
- },
- {
- "name": "EvaluateSummarizationRequest",
- "description": ""
- },
- {
- "name": "EvaluateTextGenerationRequest",
- "description": ""
- },
{
"name": "GetAgentsSessionRequest",
"description": ""
@@ -6443,18 +6594,6 @@
"name": "AgentStepResponse",
"description": ""
},
- {
- "name": "EvaluationJobArtifactsResponse",
- "description": "Artifacts of a evaluation job.\n\n"
- },
- {
- "name": "EvaluationJobLogStream",
- "description": ""
- },
- {
- "name": "EvaluationJobStatusResponse",
- "description": ""
- },
{
"name": "ModelDefWithProvider",
"description": ""
@@ -6555,6 +6694,14 @@
"name": "RLHFAlgorithm",
"description": ""
},
+ {
+ "name": "TrainEvalDataset",
+ "description": "Dataset to be used for training or evaluating language models.\n\n"
+ },
+ {
+ "name": "TrainEvalDatasetColumnType",
+ "description": ""
+ },
{
"name": "TrainingConfig",
"description": ""
@@ -6603,6 +6750,62 @@
"name": "ScoredMessage",
"description": ""
},
+ {
+ "name": "EvaluateDatasetConfig",
+ "description": ""
+ },
+ {
+ "name": "EvaluateJudgeScoringConfig",
+ "description": ""
+ },
+ {
+ "name": "EvaluateModelGenerationConfig",
+ "description": ""
+ },
+ {
+ "name": "EvaluatePostprocessConfig",
+ "description": ""
+ },
+ {
+ "name": "EvaluatePreprocessConfig",
+ "description": ""
+ },
+ {
+ "name": "EvaluateProcessorConfig",
+ "description": ""
+ },
+ {
+ "name": "EvaluateScoringConfig",
+ "description": ""
+ },
+ {
+ "name": "EvaluateSingleScorerConfig",
+ "description": ""
+ },
+ {
+ "name": "EvaluateTaskConfig",
+ "description": ""
+ },
+ {
+ "name": "LLMJudgeConfig",
+ "description": ""
+ },
+ {
+ "name": "RunEvalTaskRequest",
+ "description": ""
+ },
+ {
+ "name": "EvalResult",
+ "description": "Aggregated final evaluation result.\n\n"
+ },
+ {
+ "name": "EvaluateResponse",
+ "description": "Scores for evaluation.\n\n"
+ },
+ {
+ "name": "RunScorerRequest",
+ "description": ""
+ },
{
"name": "RunShieldRequest",
"description": ""
@@ -6647,7 +6850,7 @@
"Agents",
"BatchInference",
"Datasets",
- "Evaluations",
+ "Evals",
"Inference",
"Inspect",
"Memory",
@@ -6681,7 +6884,6 @@
"BatchCompletionRequest",
"BatchCompletionResponse",
"BuiltinTool",
- "CancelEvaluationJobRequest",
"CancelTrainingJobRequest",
"ChatCompletionRequest",
"ChatCompletionResponse",
@@ -6698,31 +6900,40 @@
"CreateAgentSessionRequest",
"CreateAgentTurnRequest",
"CreateDatasetRequest",
+ "CreateDatasetResponse",
+ "CustomDatasetDef",
"DPOAlignmentConfig",
"DeleteAgentsRequest",
"DeleteAgentsSessionRequest",
"DeleteDatasetRequest",
+ "DeleteDatasetResponse",
"DialogGenerations",
"DoraFinetuningConfig",
"EmbeddingsRequest",
"EmbeddingsResponse",
- "EvaluateQuestionAnsweringRequest",
- "EvaluateSummarizationRequest",
- "EvaluateTextGenerationRequest",
- "EvaluationJob",
- "EvaluationJobArtifactsResponse",
- "EvaluationJobLogStream",
- "EvaluationJobStatusResponse",
+ "EvalResult",
+ "EvaluateDatasetConfig",
+ "EvaluateJudgeScoringConfig",
+ "EvaluateModelGenerationConfig",
+ "EvaluatePostprocessConfig",
+ "EvaluatePreprocessConfig",
+ "EvaluateProcessorConfig",
+ "EvaluateResponse",
+ "EvaluateScoringConfig",
+ "EvaluateSingleScorerConfig",
+ "EvaluateTaskConfig",
"FinetuningAlgorithm",
"FunctionCallToolDefinition",
"GetAgentsSessionRequest",
"GraphMemoryBankDef",
"HealthInfo",
+ "HuggingfaceDatasetDef",
"ImageMedia",
"InferenceStep",
"InsertDocumentsRequest",
"KeyValueMemoryBankDef",
"KeywordMemoryBankDef",
+ "LLMJudgeConfig",
"LogEventRequest",
"LogSeverity",
"LoraFinetuningConfig",
@@ -6752,6 +6963,8 @@
"RewardScoreRequest",
"RewardScoringResponse",
"RouteInfo",
+ "RunEvalTaskRequest",
+ "RunScorerRequest",
"RunShieldRequest",
"RunShieldResponse",
"SafetyViolation",
diff --git a/docs/resources/llama-stack-spec.yaml b/docs/resources/llama-stack-spec.yaml
index 9307ee47b..ab54c4c09 100644
--- a/docs/resources/llama-stack-spec.yaml
+++ b/docs/resources/llama-stack-spec.yaml
@@ -315,14 +315,6 @@ components:
- photogen
- code_interpreter
type: string
- CancelEvaluationJobRequest:
- additionalProperties: false
- properties:
- job_uuid:
- type: string
- required:
- - job_uuid
- type: object
CancelTrainingJobRequest:
additionalProperties: false
properties:
@@ -572,13 +564,45 @@ components:
CreateDatasetRequest:
additionalProperties: false
properties:
- dataset:
- $ref: '#/components/schemas/TrainEvalDataset'
- uuid:
+ dataset_def:
+ oneOf:
+ - $ref: '#/components/schemas/HuggingfaceDatasetDef'
+ - $ref: '#/components/schemas/CustomDatasetDef'
+ required:
+ - dataset_def
+ type: object
+ CreateDatasetResponse:
+ additionalProperties: false
+ properties:
+ msg:
+ type: string
+ status:
+ enum:
+ - success
+ - fail
type: string
required:
- - uuid
- - dataset
+ - status
+ type: object
+ CustomDatasetDef:
+ additionalProperties: false
+ properties:
+ identifier:
+ type: string
+ rename_columns_map:
+ additionalProperties:
+ type: string
+ type: object
+ type:
+ const: custom
+ default: custom
+ type: string
+ url:
+ type: string
+ required:
+ - type
+ - identifier
+ - url
type: object
DPOAlignmentConfig:
additionalProperties: false
@@ -619,10 +643,23 @@ components:
DeleteDatasetRequest:
additionalProperties: false
properties:
- dataset_uuid:
+ dataset_identifier:
type: string
required:
- - dataset_uuid
+ - dataset_identifier
+ type: object
+ DeleteDatasetResponse:
+ additionalProperties: false
+ properties:
+ msg:
+ type: string
+ status:
+ enum:
+ - success
+ - fail
+ type: string
+ required:
+ - status
type: object
DialogGenerations:
additionalProperties: false
@@ -701,78 +738,147 @@ components:
required:
- embeddings
type: object
- EvaluateQuestionAnsweringRequest:
+ EvalResult:
additionalProperties: false
properties:
metrics:
- items:
- enum:
- - em
- - f1
- type: string
- type: array
+ additionalProperties:
+ type: number
+ type: object
required:
- metrics
+ title: Aggregated final evaluation result.
type: object
- EvaluateSummarizationRequest:
+ EvaluateDatasetConfig:
additionalProperties: false
properties:
- metrics:
+ dataset_identifier:
+ type: string
+ kwargs:
+ additionalProperties:
+ oneOf:
+ - type: 'null'
+ - type: boolean
+ - type: number
+ - type: string
+ - type: array
+ - type: object
+ type: object
+ row_limit:
+ type: integer
+ required:
+ - dataset_identifier
+ type: object
+ EvaluateJudgeScoringConfig:
+ type: object
+ EvaluateModelGenerationConfig:
+ additionalProperties: false
+ properties:
+ kwargs:
+ additionalProperties:
+ oneOf:
+ - type: 'null'
+ - type: boolean
+ - type: number
+ - type: string
+ - type: array
+ - type: object
+ type: object
+ model:
+ type: string
+ sampling_params:
+ $ref: '#/components/schemas/SamplingParams'
+ required:
+ - model
+ - sampling_params
+ type: object
+ EvaluatePostprocessConfig:
+ additionalProperties: false
+ properties:
+ kwargs:
+ additionalProperties:
+ oneOf:
+ - type: 'null'
+ - type: boolean
+ - type: number
+ - type: string
+ - type: array
+ - type: object
+ type: object
+ type: object
+ EvaluatePreprocessConfig:
+ additionalProperties: false
+ properties:
+ kwargs:
+ additionalProperties:
+ oneOf:
+ - type: 'null'
+ - type: boolean
+ - type: number
+ - type: string
+ - type: array
+ - type: object
+ type: object
+ type: object
+ EvaluateProcessorConfig:
+ additionalProperties: false
+ properties:
+ postprocess_config:
+ $ref: '#/components/schemas/EvaluatePostprocessConfig'
+ preprocess_config:
+ $ref: '#/components/schemas/EvaluatePreprocessConfig'
+ processor_identifier:
+ type: string
+ required:
+ - processor_identifier
+ type: object
+ EvaluateResponse:
+ additionalProperties: false
+ properties:
+ eval_result:
+ $ref: '#/components/schemas/EvalResult'
+ formatted_report:
+ type: string
+ required:
+ - eval_result
+ title: Scores for evaluation.
+ type: object
+ EvaluateScoringConfig:
+ additionalProperties: false
+ properties:
+ scorer_config_list:
items:
- enum:
- - rouge
- - bleu
- type: string
+ $ref: '#/components/schemas/EvaluateSingleScorerConfig'
type: array
required:
- - metrics
+ - scorer_config_list
type: object
- EvaluateTextGenerationRequest:
+ EvaluateSingleScorerConfig:
additionalProperties: false
properties:
- metrics:
- items:
- enum:
- - perplexity
- - rouge
- - bleu
- type: string
- type: array
- required:
- - metrics
- type: object
- EvaluationJob:
- additionalProperties: false
- properties:
- job_uuid:
+ llm_judge_config:
+ $ref: '#/components/schemas/LLMJudgeConfig'
+ scorer_name:
type: string
required:
- - job_uuid
+ - scorer_name
type: object
- EvaluationJobArtifactsResponse:
+ EvaluateTaskConfig:
additionalProperties: false
properties:
- job_uuid:
- type: string
+ dataset_config:
+ $ref: '#/components/schemas/EvaluateDatasetConfig'
+ generation_config:
+ $ref: '#/components/schemas/EvaluateModelGenerationConfig'
+ processor_config:
+ $ref: '#/components/schemas/EvaluateProcessorConfig'
+ scoring_config:
+ $ref: '#/components/schemas/EvaluateScoringConfig'
required:
- - job_uuid
- title: Artifacts of a evaluation job.
- type: object
- EvaluationJobLogStream:
- additionalProperties: false
- properties:
- job_uuid:
- type: string
- required:
- - job_uuid
- type: object
- EvaluationJobStatusResponse:
- additionalProperties: false
- properties:
- job_uuid:
- type: string
- required:
- - job_uuid
+ - dataset_config
+ - processor_config
+ - generation_config
+ - scoring_config
type: object
FinetuningAlgorithm:
enum:
@@ -845,6 +951,39 @@ components:
required:
- status
type: object
+ HuggingfaceDatasetDef:
+ additionalProperties: false
+ properties:
+ dataset_name:
+ type: string
+ dataset_path:
+ type: string
+ identifier:
+ type: string
+ kwargs:
+ additionalProperties:
+ oneOf:
+ - type: 'null'
+ - type: boolean
+ - type: number
+ - type: string
+ - type: array
+ - type: object
+ type: object
+ rename_columns_map:
+ additionalProperties:
+ type: string
+ type: object
+ type:
+ const: huggingface
+ default: huggingface
+ type: string
+ required:
+ - type
+ - identifier
+ - dataset_path
+ - kwargs
+ type: object
ImageMedia:
additionalProperties: false
properties:
@@ -936,6 +1075,20 @@ components:
- provider_id
- type
type: object
+ LLMJudgeConfig:
+ additionalProperties: false
+ properties:
+ judge_model_generation_config:
+ $ref: '#/components/schemas/EvaluateModelGenerationConfig'
+ judge_processor_config:
+ $ref: '#/components/schemas/EvaluateProcessorConfig'
+ judge_scoring_config:
+ $ref: '#/components/schemas/EvaluateJudgeScoringConfig'
+ required:
+ - judge_processor_config
+ - judge_model_generation_config
+ - judge_scoring_config
+ type: object
LogEventRequest:
additionalProperties: false
properties:
@@ -1629,6 +1782,32 @@ components:
- method
- provider_types
type: object
+ RunEvalTaskRequest:
+ additionalProperties: false
+ properties:
+ dataset:
+ type: string
+ eval_task_config:
+ $ref: '#/components/schemas/EvaluateTaskConfig'
+ model:
+ type: string
+ task:
+ type: string
+ required:
+ - model
+ - task
+ type: object
+ RunScorerRequest:
+ additionalProperties: false
+ properties:
+ dataset_config:
+ $ref: '#/components/schemas/EvaluateDatasetConfig'
+ eval_scoring_config:
+ $ref: '#/components/schemas/EvaluateScoringConfig'
+ required:
+ - dataset_config
+ - eval_scoring_config
+ type: object
RunShieldRequest:
additionalProperties: false
properties:
@@ -2507,7 +2686,7 @@ info:
description: "This is the specification of the llama stack that provides\n \
\ a set of endpoints and their corresponding interfaces that are tailored\
\ to\n best leverage Llama Models. The specification is still in\
- \ draft and subject to change.\n Generated at 2024-10-09 21:10:09.073430"
+ \ draft and subject to change.\n Generated at 2024-10-15 00:44:26.278642"
title: '[DRAFT] Llama Stack Specification'
version: 0.0.1
jsonSchemaDialect: https://json-schema.org/draft/2020-12/schema
@@ -2693,7 +2872,7 @@ paths:
responses:
'200':
content:
- application/json:
+ text/event-stream:
schema:
$ref: '#/components/schemas/AgentTurnResponseStreamChunk'
description: OK
@@ -2796,6 +2975,10 @@ paths:
required: true
responses:
'200':
+ content:
+ application/json:
+ schema:
+ $ref: '#/components/schemas/CreateDatasetResponse'
description: OK
tags:
- Datasets
@@ -2817,6 +3000,10 @@ paths:
required: true
responses:
'200':
+ content:
+ application/json:
+ schema:
+ $ref: '#/components/schemas/DeleteDatasetResponse'
description: OK
tags:
- Datasets
@@ -2824,7 +3011,7 @@ paths:
get:
parameters:
- in: query
- name: dataset_uuid
+ name: dataset_identifier
required: true
schema:
type: string
@@ -2840,104 +3027,15 @@ paths:
content:
application/json:
schema:
- $ref: '#/components/schemas/TrainEvalDataset'
+ oneOf:
+ - oneOf:
+ - $ref: '#/components/schemas/HuggingfaceDatasetDef'
+ - $ref: '#/components/schemas/CustomDatasetDef'
+ - type: 'null'
description: OK
tags:
- Datasets
- /evaluate/job/artifacts:
- get:
- parameters:
- - in: query
- name: job_uuid
- required: true
- schema:
- type: string
- - description: JSON-encoded provider data which will be made available to the
- adapter servicing the API
- in: header
- name: X-LlamaStack-ProviderData
- required: false
- schema:
- type: string
- responses:
- '200':
- content:
- application/json:
- schema:
- $ref: '#/components/schemas/EvaluationJobArtifactsResponse'
- description: OK
- tags:
- - Evaluations
- /evaluate/job/cancel:
- post:
- parameters:
- - description: JSON-encoded provider data which will be made available to the
- adapter servicing the API
- in: header
- name: X-LlamaStack-ProviderData
- required: false
- schema:
- type: string
- requestBody:
- content:
- application/json:
- schema:
- $ref: '#/components/schemas/CancelEvaluationJobRequest'
- required: true
- responses:
- '200':
- description: OK
- tags:
- - Evaluations
- /evaluate/job/logs:
- get:
- parameters:
- - in: query
- name: job_uuid
- required: true
- schema:
- type: string
- - description: JSON-encoded provider data which will be made available to the
- adapter servicing the API
- in: header
- name: X-LlamaStack-ProviderData
- required: false
- schema:
- type: string
- responses:
- '200':
- content:
- application/json:
- schema:
- $ref: '#/components/schemas/EvaluationJobLogStream'
- description: OK
- tags:
- - Evaluations
- /evaluate/job/status:
- get:
- parameters:
- - in: query
- name: job_uuid
- required: true
- schema:
- type: string
- - description: JSON-encoded provider data which will be made available to the
- adapter servicing the API
- in: header
- name: X-LlamaStack-ProviderData
- required: false
- schema:
- type: string
- responses:
- '200':
- content:
- application/json:
- schema:
- $ref: '#/components/schemas/EvaluationJobStatusResponse'
- description: OK
- tags:
- - Evaluations
- /evaluate/jobs:
+ /datasets/list:
get:
parameters:
- description: JSON-encoded provider data which will be made available to the
@@ -2952,11 +3050,13 @@ paths:
content:
application/jsonl:
schema:
- $ref: '#/components/schemas/EvaluationJob'
+ oneOf:
+ - $ref: '#/components/schemas/HuggingfaceDatasetDef'
+ - $ref: '#/components/schemas/CustomDatasetDef'
description: OK
tags:
- - Evaluations
- /evaluate/question_answering/:
+ - Datasets
+ /evals/run_eval_task:
post:
parameters:
- description: JSON-encoded provider data which will be made available to the
@@ -2970,18 +3070,18 @@ paths:
content:
application/json:
schema:
- $ref: '#/components/schemas/EvaluateQuestionAnsweringRequest'
+ $ref: '#/components/schemas/RunEvalTaskRequest'
required: true
responses:
'200':
content:
application/json:
schema:
- $ref: '#/components/schemas/EvaluationJob'
+ $ref: '#/components/schemas/EvaluateResponse'
description: OK
tags:
- - Evaluations
- /evaluate/summarization/:
+ - Evals
+ /evals/run_scorer:
post:
parameters:
- description: JSON-encoded provider data which will be made available to the
@@ -2995,42 +3095,17 @@ paths:
content:
application/json:
schema:
- $ref: '#/components/schemas/EvaluateSummarizationRequest'
+ $ref: '#/components/schemas/RunScorerRequest'
required: true
responses:
'200':
content:
application/json:
schema:
- $ref: '#/components/schemas/EvaluationJob'
+ $ref: '#/components/schemas/EvaluateResponse'
description: OK
tags:
- - Evaluations
- /evaluate/text_generation/:
- post:
- parameters:
- - description: JSON-encoded provider data which will be made available to the
- adapter servicing the API
- in: header
- name: X-LlamaStack-ProviderData
- required: false
- schema:
- type: string
- requestBody:
- content:
- application/json:
- schema:
- $ref: '#/components/schemas/EvaluateTextGenerationRequest'
- required: true
- responses:
- '200':
- content:
- application/json:
- schema:
- $ref: '#/components/schemas/EvaluationJob'
- description: OK
- tags:
- - Evaluations
+ - Evals
/health:
get:
parameters:
@@ -3712,20 +3787,20 @@ security:
servers:
- url: http://any-hosted-llama-stack.com
tags:
-- name: RewardScoring
+- name: Models
+- name: BatchInference
+- name: Inspect
+- name: Evals
+- name: Safety
+- name: Shields
+- name: Telemetry
+- name: Agents
- name: Memory
- name: SyntheticDataGeneration
-- name: Models
-- name: Safety
-- name: BatchInference
-- name: Agents
-- name: MemoryBanks
-- name: Shields
-- name: Datasets
-- name: Evaluations
-- name: Inspect
- name: PostTraining
-- name: Telemetry
+- name: Datasets
+- name: MemoryBanks
+- name: RewardScoring
- name: Inference
- description:
name: BuiltinTool
@@ -3782,9 +3857,6 @@ tags:
- description:
name: BatchCompletionResponse
-- description:
- name: CancelEvaluationJobRequest
- description:
name: CancelTrainingJobRequest
@@ -3919,17 +3991,18 @@ tags:
name: Turn
- description:
name: ViolationLevel
-- description: 'Dataset to be used for training or evaluating language models.
-
-
- '
- name: TrainEvalDataset
-- description:
- name: TrainEvalDatasetColumnType
+ name: CustomDatasetDef
+- description:
+ name: HuggingfaceDatasetDef
- description:
name: CreateDatasetRequest
+- description:
+ name: CreateDatasetResponse
- description:
name: DeleteAgentsRequest
@@ -3939,23 +4012,15 @@ tags:
- description:
name: DeleteDatasetRequest
+- description:
+ name: DeleteDatasetResponse
- description:
name: EmbeddingsRequest
- description:
name: EmbeddingsResponse
-- description:
- name: EvaluateQuestionAnsweringRequest
-- description:
- name: EvaluationJob
-- description:
- name: EvaluateSummarizationRequest
-- description:
- name: EvaluateTextGenerationRequest
- description:
name: GetAgentsSessionRequest
@@ -3979,18 +4044,6 @@ tags:
- description:
name: AgentStepResponse
-- description: 'Artifacts of a evaluation job.
-
-
- '
- name: EvaluationJobArtifactsResponse
-- description:
- name: EvaluationJobLogStream
-- description:
- name: EvaluationJobStatusResponse
- description:
name: ModelDefWithProvider
@@ -4067,6 +4120,14 @@ tags:
name: OptimizerConfig
- description:
name: RLHFAlgorithm
+- description: 'Dataset to be used for training or evaluating language models.
+
+
+ '
+ name: TrainEvalDataset
+- description:
+ name: TrainEvalDatasetColumnType
- description:
name: TrainingConfig
- description:
name: ScoredMessage
+- description:
+ name: EvaluateDatasetConfig
+- description:
+ name: EvaluateJudgeScoringConfig
+- description:
+ name: EvaluateModelGenerationConfig
+- description:
+ name: EvaluatePostprocessConfig
+- description:
+ name: EvaluatePreprocessConfig
+- description:
+ name: EvaluateProcessorConfig
+- description:
+ name: EvaluateScoringConfig
+- description:
+ name: EvaluateSingleScorerConfig
+- description:
+ name: EvaluateTaskConfig
+- description:
+ name: LLMJudgeConfig
+- description:
+ name: RunEvalTaskRequest
+- description: 'Aggregated final evaluation result.
+
+
+ '
+ name: EvalResult
+- description: 'Scores for evaluation.
+
+
+ '
+ name: EvaluateResponse
+- description:
+ name: RunScorerRequest
- description:
name: RunShieldRequest
@@ -4141,7 +4247,7 @@ x-tagGroups:
- Agents
- BatchInference
- Datasets
- - Evaluations
+ - Evals
- Inference
- Inspect
- Memory
@@ -4172,7 +4278,6 @@ x-tagGroups:
- BatchCompletionRequest
- BatchCompletionResponse
- BuiltinTool
- - CancelEvaluationJobRequest
- CancelTrainingJobRequest
- ChatCompletionRequest
- ChatCompletionResponse
@@ -4189,31 +4294,40 @@ x-tagGroups:
- CreateAgentSessionRequest
- CreateAgentTurnRequest
- CreateDatasetRequest
+ - CreateDatasetResponse
+ - CustomDatasetDef
- DPOAlignmentConfig
- DeleteAgentsRequest
- DeleteAgentsSessionRequest
- DeleteDatasetRequest
+ - DeleteDatasetResponse
- DialogGenerations
- DoraFinetuningConfig
- EmbeddingsRequest
- EmbeddingsResponse
- - EvaluateQuestionAnsweringRequest
- - EvaluateSummarizationRequest
- - EvaluateTextGenerationRequest
- - EvaluationJob
- - EvaluationJobArtifactsResponse
- - EvaluationJobLogStream
- - EvaluationJobStatusResponse
+ - EvalResult
+ - EvaluateDatasetConfig
+ - EvaluateJudgeScoringConfig
+ - EvaluateModelGenerationConfig
+ - EvaluatePostprocessConfig
+ - EvaluatePreprocessConfig
+ - EvaluateProcessorConfig
+ - EvaluateResponse
+ - EvaluateScoringConfig
+ - EvaluateSingleScorerConfig
+ - EvaluateTaskConfig
- FinetuningAlgorithm
- FunctionCallToolDefinition
- GetAgentsSessionRequest
- GraphMemoryBankDef
- HealthInfo
+ - HuggingfaceDatasetDef
- ImageMedia
- InferenceStep
- InsertDocumentsRequest
- KeyValueMemoryBankDef
- KeywordMemoryBankDef
+ - LLMJudgeConfig
- LogEventRequest
- LogSeverity
- LoraFinetuningConfig
@@ -4243,6 +4357,8 @@ x-tagGroups:
- RewardScoreRequest
- RewardScoringResponse
- RouteInfo
+ - RunEvalTaskRequest
+ - RunScorerRequest
- RunShieldRequest
- RunShieldResponse
- SafetyViolation
diff --git a/llama_stack/apis/datasets/datasets.py b/llama_stack/apis/datasets/datasets.py
index c0aa4d161..f5991c52e 100644
--- a/llama_stack/apis/datasets/datasets.py
+++ b/llama_stack/apis/datasets/datasets.py
@@ -15,6 +15,26 @@ from pydantic import BaseModel, Field
from typing_extensions import Annotated
+@json_schema_type
+class TrainEvalDatasetColumnType(Enum):
+ dialog = "dialog"
+ text = "text"
+ media = "media"
+ number = "number"
+ json = "json"
+
+
+@json_schema_type
+class TrainEvalDataset(BaseModel):
+ """Dataset to be used for training or evaluating language models."""
+
+ # TODO(ashwin): figure out if we need to add an enum for a "dataset type"
+
+ columns: Dict[str, TrainEvalDatasetColumnType]
+ content_url: URL
+ metadata: Optional[Dict[str, Any]] = None
+
+
@json_schema_type
class GenerationInput(BaseModel):
messages: List[Message]
diff --git a/llama_stack/apis/post_training/post_training.py b/llama_stack/apis/post_training/post_training.py
index d943f48b2..cdfe5c467 100644
--- a/llama_stack/apis/post_training/post_training.py
+++ b/llama_stack/apis/post_training/post_training.py
@@ -14,7 +14,7 @@ from llama_models.schema_utils import json_schema_type, webmethod
from pydantic import BaseModel, Field
from llama_models.llama3.api.datatypes import * # noqa: F403
-from llama_stack.apis.dataset import * # noqa: F403
+from llama_stack.apis.datasets import * # noqa: F403
from llama_stack.apis.common.training_types import * # noqa: F403