diff --git a/docs/openapi_generator/generate.py b/docs/openapi_generator/generate.py
index 871c01a80..f9f56119b 100644
--- a/docs/openapi_generator/generate.py
+++ b/docs/openapi_generator/generate.py
@@ -33,14 +33,16 @@ schema_utils.json_schema_type = json_schema_type
from llama_models.llama3.api.datatypes import * # noqa: F403
from llama_stack.apis.agents import * # noqa: F403
-from llama_stack.apis.dataset import * # noqa: F403
-from llama_stack.apis.evals import * # noqa: F403
+from llama_stack.apis.datasets import * # noqa: F403
+from llama_stack.apis.datasetio import * # noqa: F403
+from llama_stack.apis.scoring import * # noqa: F403
+from llama_stack.apis.scoring_functions import * # noqa: F403
+from llama_stack.apis.eval import * # noqa: F403
from llama_stack.apis.inference import * # noqa: F403
from llama_stack.apis.batch_inference import * # noqa: F403
from llama_stack.apis.memory import * # noqa: F403
from llama_stack.apis.telemetry import * # noqa: F403
from llama_stack.apis.post_training import * # noqa: F403
-from llama_stack.apis.reward_scoring import * # noqa: F403
from llama_stack.apis.synthetic_data_generation import * # noqa: F403
from llama_stack.apis.safety import * # noqa: F403
from llama_stack.apis.models import * # noqa: F403
@@ -54,14 +56,16 @@ class LlamaStack(
Inference,
BatchInference,
Agents,
- RewardScoring,
Safety,
SyntheticDataGeneration,
Datasets,
Telemetry,
PostTraining,
Memory,
- Evaluations,
+ Eval,
+ Scoring,
+ ScoringFunctions,
+ DatasetIO,
Models,
Shields,
Inspect,
diff --git a/docs/resources/llama-stack-spec.html b/docs/resources/llama-stack-spec.html
index 8e6683931..886634fba 100644
--- a/docs/resources/llama-stack-spec.html
+++ b/docs/resources/llama-stack-spec.html
@@ -21,7 +21,7 @@
"info": {
"title": "[DRAFT] Llama Stack Specification",
"version": "0.0.1",
- "description": "This is the specification of the llama stack that provides\n a set of endpoints and their corresponding interfaces that are tailored to\n best leverage Llama Models. The specification is still in draft and subject to change.\n Generated at 2024-10-18 20:48:17.730988"
+ "description": "This is the specification of the llama stack that provides\n a set of endpoints and their corresponding interfaces that are tailored to\n best leverage Llama Models. The specification is still in draft and subject to change.\n Generated at 2024-10-24 17:40:59.576117"
},
"servers": [
{
@@ -109,39 +109,6 @@
}
}
},
- "/evaluate/job/cancel": {
- "post": {
- "responses": {
- "200": {
- "description": "OK"
- }
- },
- "tags": [
- "Evaluations"
- ],
- "parameters": [
- {
- "name": "X-LlamaStack-ProviderData",
- "in": "header",
- "description": "JSON-encoded provider data which will be made available to the adapter servicing the API",
- "required": false,
- "schema": {
- "type": "string"
- }
- }
- ],
- "requestBody": {
- "content": {
- "application/json": {
- "schema": {
- "$ref": "#/components/schemas/CancelEvaluationJobRequest"
- }
- }
- },
- "required": true
- }
- }
- },
"/post_training/job/cancel": {
"post": {
"responses": {
@@ -389,39 +356,6 @@
}
}
},
- "/datasets/create": {
- "post": {
- "responses": {
- "200": {
- "description": "OK"
- }
- },
- "tags": [
- "Datasets"
- ],
- "parameters": [
- {
- "name": "X-LlamaStack-ProviderData",
- "in": "header",
- "description": "JSON-encoded provider data which will be made available to the adapter servicing the API",
- "required": false,
- "schema": {
- "type": "string"
- }
- }
- ],
- "requestBody": {
- "content": {
- "application/json": {
- "schema": {
- "$ref": "#/components/schemas/CreateDatasetRequest"
- }
- }
- },
- "required": true
- }
- }
- },
"/agents/delete": {
"post": {
"responses": {
@@ -488,39 +422,6 @@
}
}
},
- "/datasets/delete": {
- "post": {
- "responses": {
- "200": {
- "description": "OK"
- }
- },
- "tags": [
- "Datasets"
- ],
- "parameters": [
- {
- "name": "X-LlamaStack-ProviderData",
- "in": "header",
- "description": "JSON-encoded provider data which will be made available to the adapter servicing the API",
- "required": false,
- "schema": {
- "type": "string"
- }
- }
- ],
- "requestBody": {
- "content": {
- "application/json": {
- "schema": {
- "$ref": "#/components/schemas/DeleteDatasetRequest"
- }
- }
- },
- "required": true
- }
- }
- },
"/inference/embeddings": {
"post": {
"responses": {
@@ -561,7 +462,7 @@
}
}
},
- "/evaluate/question_answering/": {
+ "/eval/evaluate": {
"post": {
"responses": {
"200": {
@@ -569,14 +470,14 @@
"content": {
"application/json": {
"schema": {
- "$ref": "#/components/schemas/EvaluationJob"
+ "$ref": "#/components/schemas/EvaluateResponse"
}
}
}
}
},
"tags": [
- "Evaluations"
+ "Eval"
],
"parameters": [
{
@@ -593,7 +494,7 @@
"content": {
"application/json": {
"schema": {
- "$ref": "#/components/schemas/EvaluateQuestionAnsweringRequest"
+ "$ref": "#/components/schemas/EvaluateRequest"
}
}
},
@@ -601,7 +502,7 @@
}
}
},
- "/evaluate/summarization/": {
+ "/eval/evaluate_batch": {
"post": {
"responses": {
"200": {
@@ -609,14 +510,14 @@
"content": {
"application/json": {
"schema": {
- "$ref": "#/components/schemas/EvaluationJob"
+ "$ref": "#/components/schemas/Job"
}
}
}
}
},
"tags": [
- "Evaluations"
+ "Eval"
],
"parameters": [
{
@@ -633,47 +534,7 @@
"content": {
"application/json": {
"schema": {
- "$ref": "#/components/schemas/EvaluateSummarizationRequest"
- }
- }
- },
- "required": true
- }
- }
- },
- "/evaluate/text_generation/": {
- "post": {
- "responses": {
- "200": {
- "description": "OK",
- "content": {
- "application/json": {
- "schema": {
- "$ref": "#/components/schemas/EvaluationJob"
- }
- }
- }
- }
- },
- "tags": [
- "Evaluations"
- ],
- "parameters": [
- {
- "name": "X-LlamaStack-ProviderData",
- "in": "header",
- "description": "JSON-encoded provider data which will be made available to the adapter servicing the API",
- "required": false,
- "schema": {
- "type": "string"
- }
- }
- ],
- "requestBody": {
- "content": {
- "application/json": {
- "schema": {
- "$ref": "#/components/schemas/EvaluateTextGenerationRequest"
+ "$ref": "#/components/schemas/EvaluateBatchRequest"
}
}
},
@@ -763,6 +624,14 @@
"type": "string"
}
},
+ {
+ "name": "session_id",
+ "in": "query",
+ "required": true,
+ "schema": {
+ "type": "string"
+ }
+ },
{
"name": "turn_id",
"in": "query",
@@ -817,6 +686,14 @@
"type": "string"
}
},
+ {
+ "name": "session_id",
+ "in": "query",
+ "required": true,
+ "schema": {
+ "type": "string"
+ }
+ },
{
"name": "turn_id",
"in": "query",
@@ -845,7 +722,14 @@
"content": {
"application/json": {
"schema": {
- "$ref": "#/components/schemas/TrainEvalDataset"
+ "oneOf": [
+ {
+ "$ref": "#/components/schemas/DatasetDefWithProvider"
+ },
+ {
+ "type": "null"
+ }
+ ]
}
}
}
@@ -856,7 +740,7 @@
],
"parameters": [
{
- "name": "dataset_uuid",
+ "name": "dataset_identifier",
"in": "query",
"required": true,
"schema": {
@@ -875,150 +759,6 @@
]
}
},
- "/evaluate/job/artifacts": {
- "get": {
- "responses": {
- "200": {
- "description": "OK",
- "content": {
- "application/json": {
- "schema": {
- "$ref": "#/components/schemas/EvaluationJobArtifactsResponse"
- }
- }
- }
- }
- },
- "tags": [
- "Evaluations"
- ],
- "parameters": [
- {
- "name": "job_uuid",
- "in": "query",
- "required": true,
- "schema": {
- "type": "string"
- }
- },
- {
- "name": "X-LlamaStack-ProviderData",
- "in": "header",
- "description": "JSON-encoded provider data which will be made available to the adapter servicing the API",
- "required": false,
- "schema": {
- "type": "string"
- }
- }
- ]
- }
- },
- "/evaluate/job/logs": {
- "get": {
- "responses": {
- "200": {
- "description": "OK",
- "content": {
- "application/json": {
- "schema": {
- "$ref": "#/components/schemas/EvaluationJobLogStream"
- }
- }
- }
- }
- },
- "tags": [
- "Evaluations"
- ],
- "parameters": [
- {
- "name": "job_uuid",
- "in": "query",
- "required": true,
- "schema": {
- "type": "string"
- }
- },
- {
- "name": "X-LlamaStack-ProviderData",
- "in": "header",
- "description": "JSON-encoded provider data which will be made available to the adapter servicing the API",
- "required": false,
- "schema": {
- "type": "string"
- }
- }
- ]
- }
- },
- "/evaluate/job/status": {
- "get": {
- "responses": {
- "200": {
- "description": "OK",
- "content": {
- "application/json": {
- "schema": {
- "$ref": "#/components/schemas/EvaluationJobStatusResponse"
- }
- }
- }
- }
- },
- "tags": [
- "Evaluations"
- ],
- "parameters": [
- {
- "name": "job_uuid",
- "in": "query",
- "required": true,
- "schema": {
- "type": "string"
- }
- },
- {
- "name": "X-LlamaStack-ProviderData",
- "in": "header",
- "description": "JSON-encoded provider data which will be made available to the adapter servicing the API",
- "required": false,
- "schema": {
- "type": "string"
- }
- }
- ]
- }
- },
- "/evaluate/jobs": {
- "get": {
- "responses": {
- "200": {
- "description": "OK",
- "content": {
- "application/jsonl": {
- "schema": {
- "$ref": "#/components/schemas/EvaluationJob"
- }
- }
- }
- }
- },
- "tags": [
- "Evaluations"
- ],
- "parameters": [
- {
- "name": "X-LlamaStack-ProviderData",
- "in": "header",
- "description": "JSON-encoded provider data which will be made available to the adapter servicing the API",
- "required": false,
- "schema": {
- "type": "string"
- }
- }
- ]
- }
- },
"/memory_banks/get": {
"get": {
"responses": {
@@ -1122,6 +862,113 @@
]
}
},
+ "/datasetio/get_rows_paginated": {
+ "get": {
+ "responses": {
+ "200": {
+ "description": "OK",
+ "content": {
+ "application/json": {
+ "schema": {
+ "$ref": "#/components/schemas/PaginatedRowsResult"
+ }
+ }
+ }
+ }
+ },
+ "tags": [
+ "DatasetIO"
+ ],
+ "parameters": [
+ {
+ "name": "dataset_id",
+ "in": "query",
+ "required": true,
+ "schema": {
+ "type": "string"
+ }
+ },
+ {
+ "name": "rows_in_page",
+ "in": "query",
+ "required": true,
+ "schema": {
+ "type": "integer"
+ }
+ },
+ {
+ "name": "page_token",
+ "in": "query",
+ "required": false,
+ "schema": {
+ "type": "string"
+ }
+ },
+ {
+ "name": "filter_condition",
+ "in": "query",
+ "required": false,
+ "schema": {
+ "type": "string"
+ }
+ },
+ {
+ "name": "X-LlamaStack-ProviderData",
+ "in": "header",
+ "description": "JSON-encoded provider data which will be made available to the adapter servicing the API",
+ "required": false,
+ "schema": {
+ "type": "string"
+ }
+ }
+ ]
+ }
+ },
+ "/scoring_functions/get": {
+ "get": {
+ "responses": {
+ "200": {
+ "description": "OK",
+ "content": {
+ "application/json": {
+ "schema": {
+ "oneOf": [
+ {
+ "$ref": "#/components/schemas/ScoringFunctionDefWithProvider"
+ },
+ {
+ "type": "null"
+ }
+ ]
+ }
+ }
+ }
+ }
+ },
+ "tags": [
+ "ScoringFunctions"
+ ],
+ "parameters": [
+ {
+ "name": "name",
+ "in": "query",
+ "required": true,
+ "schema": {
+ "type": "string"
+ }
+ },
+ {
+ "name": "X-LlamaStack-ProviderData",
+ "in": "header",
+ "description": "JSON-encoded provider data which will be made available to the adapter servicing the API",
+ "required": false,
+ "schema": {
+ "type": "string"
+ }
+ }
+ ]
+ }
+ },
"/shields/get": {
"get": {
"responses": {
@@ -1412,6 +1259,152 @@
}
}
},
+ "/eval/job/cancel": {
+ "post": {
+ "responses": {
+ "200": {
+ "description": "OK"
+ }
+ },
+ "tags": [
+ "Eval"
+ ],
+ "parameters": [
+ {
+ "name": "X-LlamaStack-ProviderData",
+ "in": "header",
+ "description": "JSON-encoded provider data which will be made available to the adapter servicing the API",
+ "required": false,
+ "schema": {
+ "type": "string"
+ }
+ }
+ ],
+ "requestBody": {
+ "content": {
+ "application/json": {
+ "schema": {
+ "$ref": "#/components/schemas/JobCancelRequest"
+ }
+ }
+ },
+ "required": true
+ }
+ }
+ },
+ "/eval/job/result": {
+ "get": {
+ "responses": {
+ "200": {
+ "description": "OK",
+ "content": {
+ "application/json": {
+ "schema": {
+ "$ref": "#/components/schemas/EvaluateResponse"
+ }
+ }
+ }
+ }
+ },
+ "tags": [
+ "Eval"
+ ],
+ "parameters": [
+ {
+ "name": "job_id",
+ "in": "query",
+ "required": true,
+ "schema": {
+ "type": "string"
+ }
+ },
+ {
+ "name": "X-LlamaStack-ProviderData",
+ "in": "header",
+ "description": "JSON-encoded provider data which will be made available to the adapter servicing the API",
+ "required": false,
+ "schema": {
+ "type": "string"
+ }
+ }
+ ]
+ }
+ },
+ "/eval/job/status": {
+ "get": {
+ "responses": {
+ "200": {
+ "description": "OK",
+ "content": {
+ "application/json": {
+ "schema": {
+ "oneOf": [
+ {
+ "$ref": "#/components/schemas/JobStatus"
+ },
+ {
+ "type": "null"
+ }
+ ]
+ }
+ }
+ }
+ }
+ },
+ "tags": [
+ "Eval"
+ ],
+ "parameters": [
+ {
+ "name": "job_id",
+ "in": "query",
+ "required": true,
+ "schema": {
+ "type": "string"
+ }
+ },
+ {
+ "name": "X-LlamaStack-ProviderData",
+ "in": "header",
+ "description": "JSON-encoded provider data which will be made available to the adapter servicing the API",
+ "required": false,
+ "schema": {
+ "type": "string"
+ }
+ }
+ ]
+ }
+ },
+ "/datasets/list": {
+ "get": {
+ "responses": {
+ "200": {
+ "description": "OK",
+ "content": {
+ "application/jsonl": {
+ "schema": {
+ "$ref": "#/components/schemas/DatasetDefWithProvider"
+ }
+ }
+ }
+ }
+ },
+ "tags": [
+ "Datasets"
+ ],
+ "parameters": [
+ {
+ "name": "X-LlamaStack-ProviderData",
+ "in": "header",
+ "description": "JSON-encoded provider data which will be made available to the adapter servicing the API",
+ "required": false,
+ "schema": {
+ "type": "string"
+ }
+ }
+ ]
+ }
+ },
"/memory_banks/list": {
"get": {
"responses": {
@@ -1554,6 +1547,36 @@
]
}
},
+ "/scoring_functions/list": {
+ "get": {
+ "responses": {
+ "200": {
+ "description": "OK",
+ "content": {
+ "application/jsonl": {
+ "schema": {
+ "$ref": "#/components/schemas/ScoringFunctionDefWithProvider"
+ }
+ }
+ }
+ }
+ },
+ "tags": [
+ "ScoringFunctions"
+ ],
+ "parameters": [
+ {
+ "name": "X-LlamaStack-ProviderData",
+ "in": "header",
+ "description": "JSON-encoded provider data which will be made available to the adapter servicing the API",
+ "required": false,
+ "schema": {
+ "type": "string"
+ }
+ }
+ ]
+ }
+ },
"/shields/list": {
"get": {
"responses": {
@@ -1697,6 +1720,39 @@
}
}
},
+ "/datasets/register": {
+ "post": {
+ "responses": {
+ "200": {
+ "description": "OK"
+ }
+ },
+ "tags": [
+ "Datasets"
+ ],
+ "parameters": [
+ {
+ "name": "X-LlamaStack-ProviderData",
+ "in": "header",
+ "description": "JSON-encoded provider data which will be made available to the adapter servicing the API",
+ "required": false,
+ "schema": {
+ "type": "string"
+ }
+ }
+ ],
+ "requestBody": {
+ "content": {
+ "application/json": {
+ "schema": {
+ "$ref": "#/components/schemas/RegisterDatasetRequest"
+ }
+ }
+ },
+ "required": true
+ }
+ }
+ },
"/memory_banks/register": {
"post": {
"responses": {
@@ -1763,6 +1819,39 @@
}
}
},
+ "/scoring_functions/register": {
+ "post": {
+ "responses": {
+ "200": {
+ "description": "OK"
+ }
+ },
+ "tags": [
+ "ScoringFunctions"
+ ],
+ "parameters": [
+ {
+ "name": "X-LlamaStack-ProviderData",
+ "in": "header",
+ "description": "JSON-encoded provider data which will be made available to the adapter servicing the API",
+ "required": false,
+ "schema": {
+ "type": "string"
+ }
+ }
+ ],
+ "requestBody": {
+ "content": {
+ "application/json": {
+ "schema": {
+ "$ref": "#/components/schemas/RegisterScoringFunctionRequest"
+ }
+ }
+ },
+ "required": true
+ }
+ }
+ },
"/shields/register": {
"post": {
"responses": {
@@ -1796,46 +1885,6 @@
}
}
},
- "/reward_scoring/score": {
- "post": {
- "responses": {
- "200": {
- "description": "OK",
- "content": {
- "application/json": {
- "schema": {
- "$ref": "#/components/schemas/RewardScoringResponse"
- }
- }
- }
- }
- },
- "tags": [
- "RewardScoring"
- ],
- "parameters": [
- {
- "name": "X-LlamaStack-ProviderData",
- "in": "header",
- "description": "JSON-encoded provider data which will be made available to the adapter servicing the API",
- "required": false,
- "schema": {
- "type": "string"
- }
- }
- ],
- "requestBody": {
- "content": {
- "application/json": {
- "schema": {
- "$ref": "#/components/schemas/RewardScoreRequest"
- }
- }
- },
- "required": true
- }
- }
- },
"/safety/run_shield": {
"post": {
"responses": {
@@ -1876,6 +1925,86 @@
}
}
},
+ "/scoring/score": {
+ "post": {
+ "responses": {
+ "200": {
+ "description": "OK",
+ "content": {
+ "application/json": {
+ "schema": {
+ "$ref": "#/components/schemas/ScoreResponse"
+ }
+ }
+ }
+ }
+ },
+ "tags": [
+ "Scoring"
+ ],
+ "parameters": [
+ {
+ "name": "X-LlamaStack-ProviderData",
+ "in": "header",
+ "description": "JSON-encoded provider data which will be made available to the adapter servicing the API",
+ "required": false,
+ "schema": {
+ "type": "string"
+ }
+ }
+ ],
+ "requestBody": {
+ "content": {
+ "application/json": {
+ "schema": {
+ "$ref": "#/components/schemas/ScoreRequest"
+ }
+ }
+ },
+ "required": true
+ }
+ }
+ },
+ "/scoring/score_batch": {
+ "post": {
+ "responses": {
+ "200": {
+ "description": "OK",
+ "content": {
+ "application/json": {
+ "schema": {
+ "$ref": "#/components/schemas/ScoreBatchResponse"
+ }
+ }
+ }
+ }
+ },
+ "tags": [
+ "Scoring"
+ ],
+ "parameters": [
+ {
+ "name": "X-LlamaStack-ProviderData",
+ "in": "header",
+ "description": "JSON-encoded provider data which will be made available to the adapter servicing the API",
+ "required": false,
+ "schema": {
+ "type": "string"
+ }
+ }
+ ],
+ "requestBody": {
+ "content": {
+ "application/json": {
+ "schema": {
+ "$ref": "#/components/schemas/ScoreBatchRequest"
+ }
+ }
+ },
+ "required": true
+ }
+ }
+ },
"/post_training/supervised_fine_tune": {
"post": {
"responses": {
@@ -2571,18 +2700,6 @@
"completion_message_batch"
]
},
- "CancelEvaluationJobRequest": {
- "type": "object",
- "properties": {
- "job_uuid": {
- "type": "string"
- }
- },
- "additionalProperties": false,
- "required": [
- "job_uuid"
- ]
- },
"CancelTrainingJobRequest": {
"type": "object",
"properties": {
@@ -2635,6 +2752,90 @@
"tool_prompt_format": {
"$ref": "#/components/schemas/ToolPromptFormat"
},
+ "response_format": {
+ "oneOf": [
+ {
+ "type": "object",
+ "properties": {
+ "type": {
+ "type": "string",
+ "const": "json_schema",
+ "default": "json_schema"
+ },
+ "schema": {
+ "type": "object",
+ "additionalProperties": {
+ "oneOf": [
+ {
+ "type": "null"
+ },
+ {
+ "type": "boolean"
+ },
+ {
+ "type": "number"
+ },
+ {
+ "type": "string"
+ },
+ {
+ "type": "array"
+ },
+ {
+ "type": "object"
+ }
+ ]
+ }
+ }
+ },
+ "additionalProperties": false,
+ "required": [
+ "type",
+ "schema"
+ ]
+ },
+ {
+ "type": "object",
+ "properties": {
+ "type": {
+ "type": "string",
+ "const": "grammar",
+ "default": "grammar"
+ },
+ "bnf": {
+ "type": "object",
+ "additionalProperties": {
+ "oneOf": [
+ {
+ "type": "null"
+ },
+ {
+ "type": "boolean"
+ },
+ {
+ "type": "number"
+ },
+ {
+ "type": "string"
+ },
+ {
+ "type": "array"
+ },
+ {
+ "type": "object"
+ }
+ ]
+ }
+ }
+ },
+ "additionalProperties": false,
+ "required": [
+ "type",
+ "bnf"
+ ]
+ }
+ ]
+ },
"stream": {
"type": "boolean"
},
@@ -2807,6 +3008,90 @@
"sampling_params": {
"$ref": "#/components/schemas/SamplingParams"
},
+ "response_format": {
+ "oneOf": [
+ {
+ "type": "object",
+ "properties": {
+ "type": {
+ "type": "string",
+ "const": "json_schema",
+ "default": "json_schema"
+ },
+ "schema": {
+ "type": "object",
+ "additionalProperties": {
+ "oneOf": [
+ {
+ "type": "null"
+ },
+ {
+ "type": "boolean"
+ },
+ {
+ "type": "number"
+ },
+ {
+ "type": "string"
+ },
+ {
+ "type": "array"
+ },
+ {
+ "type": "object"
+ }
+ ]
+ }
+ }
+ },
+ "additionalProperties": false,
+ "required": [
+ "type",
+ "schema"
+ ]
+ },
+ {
+ "type": "object",
+ "properties": {
+ "type": {
+ "type": "string",
+ "const": "grammar",
+ "default": "grammar"
+ },
+ "bnf": {
+ "type": "object",
+ "additionalProperties": {
+ "oneOf": [
+ {
+ "type": "null"
+ },
+ {
+ "type": "boolean"
+ },
+ {
+ "type": "number"
+ },
+ {
+ "type": "string"
+ },
+ {
+ "type": "array"
+ },
+ {
+ "type": "object"
+ }
+ ]
+ }
+ }
+ },
+ "additionalProperties": false,
+ "required": [
+ "type",
+ "bnf"
+ ]
+ }
+ ]
+ },
"stream": {
"type": "boolean"
},
@@ -4094,77 +4379,6 @@
"error"
]
},
- "TrainEvalDataset": {
- "type": "object",
- "properties": {
- "columns": {
- "type": "object",
- "additionalProperties": {
- "$ref": "#/components/schemas/TrainEvalDatasetColumnType"
- }
- },
- "content_url": {
- "$ref": "#/components/schemas/URL"
- },
- "metadata": {
- "type": "object",
- "additionalProperties": {
- "oneOf": [
- {
- "type": "null"
- },
- {
- "type": "boolean"
- },
- {
- "type": "number"
- },
- {
- "type": "string"
- },
- {
- "type": "array"
- },
- {
- "type": "object"
- }
- ]
- }
- }
- },
- "additionalProperties": false,
- "required": [
- "columns",
- "content_url"
- ],
- "title": "Dataset to be used for training or evaluating language models."
- },
- "TrainEvalDatasetColumnType": {
- "type": "string",
- "enum": [
- "dialog",
- "text",
- "media",
- "number",
- "json"
- ]
- },
- "CreateDatasetRequest": {
- "type": "object",
- "properties": {
- "uuid": {
- "type": "string"
- },
- "dataset": {
- "$ref": "#/components/schemas/TrainEvalDataset"
- }
- },
- "additionalProperties": false,
- "required": [
- "uuid",
- "dataset"
- ]
- },
"DeleteAgentsRequest": {
"type": "object",
"properties": {
@@ -4193,18 +4407,6 @@
"session_id"
]
},
- "DeleteDatasetRequest": {
- "type": "object",
- "properties": {
- "dataset_uuid": {
- "type": "string"
- }
- },
- "additionalProperties": false,
- "required": [
- "dataset_uuid"
- ]
- },
"EmbeddingsRequest": {
"type": "object",
"properties": {
@@ -4262,74 +4464,251 @@
"embeddings"
]
},
- "EvaluateQuestionAnsweringRequest": {
+ "AgentCandidate": {
"type": "object",
"properties": {
- "metrics": {
+ "type": {
+ "type": "string",
+ "const": "agent",
+ "default": "agent"
+ },
+ "config": {
+ "$ref": "#/components/schemas/AgentConfig"
+ }
+ },
+ "additionalProperties": false,
+ "required": [
+ "type",
+ "config"
+ ]
+ },
+ "ModelCandidate": {
+ "type": "object",
+ "properties": {
+ "type": {
+ "type": "string",
+ "const": "model",
+ "default": "model"
+ },
+ "model": {
+ "type": "string"
+ },
+ "sampling_params": {
+ "$ref": "#/components/schemas/SamplingParams"
+ },
+ "system_message": {
+ "$ref": "#/components/schemas/SystemMessage"
+ }
+ },
+ "additionalProperties": false,
+ "required": [
+ "type",
+ "model",
+ "sampling_params"
+ ]
+ },
+ "EvaluateRequest": {
+ "type": "object",
+ "properties": {
+ "input_rows": {
"type": "array",
"items": {
- "type": "string",
- "enum": [
- "em",
- "f1"
+ "type": "object",
+ "additionalProperties": {
+ "oneOf": [
+ {
+ "type": "null"
+ },
+ {
+ "type": "boolean"
+ },
+ {
+ "type": "number"
+ },
+ {
+ "type": "string"
+ },
+ {
+ "type": "array"
+ },
+ {
+ "type": "object"
+ }
+ ]
+ }
+ }
+ },
+ "candidate": {
+ "oneOf": [
+ {
+ "$ref": "#/components/schemas/ModelCandidate"
+ },
+ {
+ "$ref": "#/components/schemas/AgentCandidate"
+ }
+ ]
+ },
+ "scoring_functions": {
+ "type": "array",
+ "items": {
+ "type": "string"
+ }
+ }
+ },
+ "additionalProperties": false,
+ "required": [
+ "input_rows",
+ "candidate",
+ "scoring_functions"
+ ]
+ },
+ "EvaluateResponse": {
+ "type": "object",
+ "properties": {
+ "generations": {
+ "type": "array",
+ "items": {
+ "type": "object",
+ "additionalProperties": {
+ "oneOf": [
+ {
+ "type": "null"
+ },
+ {
+ "type": "boolean"
+ },
+ {
+ "type": "number"
+ },
+ {
+ "type": "string"
+ },
+ {
+ "type": "array"
+ },
+ {
+ "type": "object"
+ }
+ ]
+ }
+ }
+ },
+ "scores": {
+ "type": "object",
+ "additionalProperties": {
+ "$ref": "#/components/schemas/ScoringResult"
+ }
+ }
+ },
+ "additionalProperties": false,
+ "required": [
+ "generations",
+ "scores"
+ ]
+ },
+ "ScoringResult": {
+ "type": "object",
+ "properties": {
+ "score_rows": {
+ "type": "array",
+ "items": {
+ "type": "object",
+ "additionalProperties": {
+ "oneOf": [
+ {
+ "type": "null"
+ },
+ {
+ "type": "boolean"
+ },
+ {
+ "type": "number"
+ },
+ {
+ "type": "string"
+ },
+ {
+ "type": "array"
+ },
+ {
+ "type": "object"
+ }
+ ]
+ }
+ }
+ },
+ "aggregated_results": {
+ "type": "object",
+ "additionalProperties": {
+ "oneOf": [
+ {
+ "type": "null"
+ },
+ {
+ "type": "boolean"
+ },
+ {
+ "type": "number"
+ },
+ {
+ "type": "string"
+ },
+ {
+ "type": "array"
+ },
+ {
+ "type": "object"
+ }
]
}
}
},
"additionalProperties": false,
"required": [
- "metrics"
+ "score_rows",
+ "aggregated_results"
]
},
- "EvaluationJob": {
+ "EvaluateBatchRequest": {
"type": "object",
"properties": {
- "job_uuid": {
+ "dataset_id": {
+ "type": "string"
+ },
+ "candidate": {
+ "oneOf": [
+ {
+ "$ref": "#/components/schemas/ModelCandidate"
+ },
+ {
+ "$ref": "#/components/schemas/AgentCandidate"
+ }
+ ]
+ },
+ "scoring_functions": {
+ "type": "array",
+ "items": {
+ "type": "string"
+ }
+ }
+ },
+ "additionalProperties": false,
+ "required": [
+ "dataset_id",
+ "candidate",
+ "scoring_functions"
+ ]
+ },
+ "Job": {
+ "type": "object",
+ "properties": {
+ "job_id": {
"type": "string"
}
},
"additionalProperties": false,
"required": [
- "job_uuid"
- ]
- },
- "EvaluateSummarizationRequest": {
- "type": "object",
- "properties": {
- "metrics": {
- "type": "array",
- "items": {
- "type": "string",
- "enum": [
- "rouge",
- "bleu"
- ]
- }
- }
- },
- "additionalProperties": false,
- "required": [
- "metrics"
- ]
- },
- "EvaluateTextGenerationRequest": {
- "type": "object",
- "properties": {
- "metrics": {
- "type": "array",
- "items": {
- "type": "string",
- "enum": [
- "perplexity",
- "rouge",
- "bleu"
- ]
- }
- }
- },
- "additionalProperties": false,
- "required": [
- "metrics"
+ "job_id"
]
},
"GetAgentsSessionRequest": {
@@ -4517,41 +4896,216 @@
"step"
]
},
- "EvaluationJobArtifactsResponse": {
+ "DatasetDefWithProvider": {
"type": "object",
"properties": {
- "job_uuid": {
+ "identifier": {
+ "type": "string"
+ },
+ "dataset_schema": {
+ "type": "object",
+ "additionalProperties": {
+ "oneOf": [
+ {
+ "type": "object",
+ "properties": {
+ "type": {
+ "type": "string",
+ "const": "string",
+ "default": "string"
+ }
+ },
+ "additionalProperties": false,
+ "required": [
+ "type"
+ ]
+ },
+ {
+ "type": "object",
+ "properties": {
+ "type": {
+ "type": "string",
+ "const": "number",
+ "default": "number"
+ }
+ },
+ "additionalProperties": false,
+ "required": [
+ "type"
+ ]
+ },
+ {
+ "type": "object",
+ "properties": {
+ "type": {
+ "type": "string",
+ "const": "boolean",
+ "default": "boolean"
+ }
+ },
+ "additionalProperties": false,
+ "required": [
+ "type"
+ ]
+ },
+ {
+ "type": "object",
+ "properties": {
+ "type": {
+ "type": "string",
+ "const": "array",
+ "default": "array"
+ }
+ },
+ "additionalProperties": false,
+ "required": [
+ "type"
+ ]
+ },
+ {
+ "type": "object",
+ "properties": {
+ "type": {
+ "type": "string",
+ "const": "object",
+ "default": "object"
+ }
+ },
+ "additionalProperties": false,
+ "required": [
+ "type"
+ ]
+ },
+ {
+ "type": "object",
+ "properties": {
+ "type": {
+ "type": "string",
+ "const": "json",
+ "default": "json"
+ }
+ },
+ "additionalProperties": false,
+ "required": [
+ "type"
+ ]
+ },
+ {
+ "type": "object",
+ "properties": {
+ "type": {
+ "type": "string",
+ "const": "union",
+ "default": "union"
+ }
+ },
+ "additionalProperties": false,
+ "required": [
+ "type"
+ ]
+ },
+ {
+ "type": "object",
+ "properties": {
+ "type": {
+ "type": "string",
+ "const": "custom",
+ "default": "custom"
+ },
+ "validator_class": {
+ "type": "string"
+ }
+ },
+ "additionalProperties": false,
+ "required": [
+ "type",
+ "validator_class"
+ ]
+ },
+ {
+ "type": "object",
+ "properties": {
+ "type": {
+ "type": "string",
+ "const": "chat_completion_input",
+ "default": "chat_completion_input"
+ }
+ },
+ "additionalProperties": false,
+ "required": [
+ "type"
+ ]
+ },
+ {
+ "type": "object",
+ "properties": {
+ "type": {
+ "type": "string",
+ "const": "completion_input",
+ "default": "completion_input"
+ }
+ },
+ "additionalProperties": false,
+ "required": [
+ "type"
+ ]
+ },
+ {
+ "type": "object",
+ "properties": {
+ "type": {
+ "type": "string",
+ "const": "agent_turn_input",
+ "default": "agent_turn_input"
+ }
+ },
+ "additionalProperties": false,
+ "required": [
+ "type"
+ ]
+ }
+ ]
+ }
+ },
+ "url": {
+ "$ref": "#/components/schemas/URL"
+ },
+ "metadata": {
+ "type": "object",
+ "additionalProperties": {
+ "oneOf": [
+ {
+ "type": "null"
+ },
+ {
+ "type": "boolean"
+ },
+ {
+ "type": "number"
+ },
+ {
+ "type": "string"
+ },
+ {
+ "type": "array"
+ },
+ {
+ "type": "object"
+ }
+ ]
+ }
+ },
+ "provider_id": {
"type": "string"
}
},
"additionalProperties": false,
"required": [
- "job_uuid"
- ],
- "title": "Artifacts of a evaluation job."
- },
- "EvaluationJobLogStream": {
- "type": "object",
- "properties": {
- "job_uuid": {
- "type": "string"
- }
- },
- "additionalProperties": false,
- "required": [
- "job_uuid"
- ]
- },
- "EvaluationJobStatusResponse": {
- "type": "object",
- "properties": {
- "job_uuid": {
- "type": "string"
- }
- },
- "additionalProperties": false,
- "required": [
- "job_uuid"
+ "identifier",
+ "dataset_schema",
+ "url",
+ "metadata",
+ "provider_id"
]
},
"ModelDefWithProvider": {
@@ -4600,6 +5154,458 @@
"provider_id"
]
},
+ "PaginatedRowsResult": {
+ "type": "object",
+ "properties": {
+ "rows": {
+ "type": "array",
+ "items": {
+ "type": "object",
+ "additionalProperties": {
+ "oneOf": [
+ {
+ "type": "null"
+ },
+ {
+ "type": "boolean"
+ },
+ {
+ "type": "number"
+ },
+ {
+ "type": "string"
+ },
+ {
+ "type": "array"
+ },
+ {
+ "type": "object"
+ }
+ ]
+ }
+ }
+ },
+ "total_count": {
+ "type": "integer"
+ },
+ "next_page_token": {
+ "type": "string"
+ }
+ },
+ "additionalProperties": false,
+ "required": [
+ "rows",
+ "total_count"
+ ]
+ },
+ "Parameter": {
+ "type": "object",
+ "properties": {
+ "name": {
+ "type": "string"
+ },
+ "type": {
+ "oneOf": [
+ {
+ "type": "object",
+ "properties": {
+ "type": {
+ "type": "string",
+ "const": "string",
+ "default": "string"
+ }
+ },
+ "additionalProperties": false,
+ "required": [
+ "type"
+ ]
+ },
+ {
+ "type": "object",
+ "properties": {
+ "type": {
+ "type": "string",
+ "const": "number",
+ "default": "number"
+ }
+ },
+ "additionalProperties": false,
+ "required": [
+ "type"
+ ]
+ },
+ {
+ "type": "object",
+ "properties": {
+ "type": {
+ "type": "string",
+ "const": "boolean",
+ "default": "boolean"
+ }
+ },
+ "additionalProperties": false,
+ "required": [
+ "type"
+ ]
+ },
+ {
+ "type": "object",
+ "properties": {
+ "type": {
+ "type": "string",
+ "const": "array",
+ "default": "array"
+ }
+ },
+ "additionalProperties": false,
+ "required": [
+ "type"
+ ]
+ },
+ {
+ "type": "object",
+ "properties": {
+ "type": {
+ "type": "string",
+ "const": "object",
+ "default": "object"
+ }
+ },
+ "additionalProperties": false,
+ "required": [
+ "type"
+ ]
+ },
+ {
+ "type": "object",
+ "properties": {
+ "type": {
+ "type": "string",
+ "const": "json",
+ "default": "json"
+ }
+ },
+ "additionalProperties": false,
+ "required": [
+ "type"
+ ]
+ },
+ {
+ "type": "object",
+ "properties": {
+ "type": {
+ "type": "string",
+ "const": "union",
+ "default": "union"
+ }
+ },
+ "additionalProperties": false,
+ "required": [
+ "type"
+ ]
+ },
+ {
+ "type": "object",
+ "properties": {
+ "type": {
+ "type": "string",
+ "const": "custom",
+ "default": "custom"
+ },
+ "validator_class": {
+ "type": "string"
+ }
+ },
+ "additionalProperties": false,
+ "required": [
+ "type",
+ "validator_class"
+ ]
+ },
+ {
+ "type": "object",
+ "properties": {
+ "type": {
+ "type": "string",
+ "const": "chat_completion_input",
+ "default": "chat_completion_input"
+ }
+ },
+ "additionalProperties": false,
+ "required": [
+ "type"
+ ]
+ },
+ {
+ "type": "object",
+ "properties": {
+ "type": {
+ "type": "string",
+ "const": "completion_input",
+ "default": "completion_input"
+ }
+ },
+ "additionalProperties": false,
+ "required": [
+ "type"
+ ]
+ },
+ {
+ "type": "object",
+ "properties": {
+ "type": {
+ "type": "string",
+ "const": "agent_turn_input",
+ "default": "agent_turn_input"
+ }
+ },
+ "additionalProperties": false,
+ "required": [
+ "type"
+ ]
+ }
+ ]
+ },
+ "description": {
+ "type": "string"
+ }
+ },
+ "additionalProperties": false,
+ "required": [
+ "name",
+ "type"
+ ]
+ },
+ "ScoringFunctionDefWithProvider": {
+ "type": "object",
+ "properties": {
+ "identifier": {
+ "type": "string"
+ },
+ "description": {
+ "type": "string"
+ },
+ "metadata": {
+ "type": "object",
+ "additionalProperties": {
+ "oneOf": [
+ {
+ "type": "null"
+ },
+ {
+ "type": "boolean"
+ },
+ {
+ "type": "number"
+ },
+ {
+ "type": "string"
+ },
+ {
+ "type": "array"
+ },
+ {
+ "type": "object"
+ }
+ ]
+ }
+ },
+ "parameters": {
+ "type": "array",
+ "items": {
+ "$ref": "#/components/schemas/Parameter"
+ }
+ },
+ "return_type": {
+ "oneOf": [
+ {
+ "type": "object",
+ "properties": {
+ "type": {
+ "type": "string",
+ "const": "string",
+ "default": "string"
+ }
+ },
+ "additionalProperties": false,
+ "required": [
+ "type"
+ ]
+ },
+ {
+ "type": "object",
+ "properties": {
+ "type": {
+ "type": "string",
+ "const": "number",
+ "default": "number"
+ }
+ },
+ "additionalProperties": false,
+ "required": [
+ "type"
+ ]
+ },
+ {
+ "type": "object",
+ "properties": {
+ "type": {
+ "type": "string",
+ "const": "boolean",
+ "default": "boolean"
+ }
+ },
+ "additionalProperties": false,
+ "required": [
+ "type"
+ ]
+ },
+ {
+ "type": "object",
+ "properties": {
+ "type": {
+ "type": "string",
+ "const": "array",
+ "default": "array"
+ }
+ },
+ "additionalProperties": false,
+ "required": [
+ "type"
+ ]
+ },
+ {
+ "type": "object",
+ "properties": {
+ "type": {
+ "type": "string",
+ "const": "object",
+ "default": "object"
+ }
+ },
+ "additionalProperties": false,
+ "required": [
+ "type"
+ ]
+ },
+ {
+ "type": "object",
+ "properties": {
+ "type": {
+ "type": "string",
+ "const": "json",
+ "default": "json"
+ }
+ },
+ "additionalProperties": false,
+ "required": [
+ "type"
+ ]
+ },
+ {
+ "type": "object",
+ "properties": {
+ "type": {
+ "type": "string",
+ "const": "union",
+ "default": "union"
+ }
+ },
+ "additionalProperties": false,
+ "required": [
+ "type"
+ ]
+ },
+ {
+ "type": "object",
+ "properties": {
+ "type": {
+ "type": "string",
+ "const": "custom",
+ "default": "custom"
+ },
+ "validator_class": {
+ "type": "string"
+ }
+ },
+ "additionalProperties": false,
+ "required": [
+ "type",
+ "validator_class"
+ ]
+ },
+ {
+ "type": "object",
+ "properties": {
+ "type": {
+ "type": "string",
+ "const": "chat_completion_input",
+ "default": "chat_completion_input"
+ }
+ },
+ "additionalProperties": false,
+ "required": [
+ "type"
+ ]
+ },
+ {
+ "type": "object",
+ "properties": {
+ "type": {
+ "type": "string",
+ "const": "completion_input",
+ "default": "completion_input"
+ }
+ },
+ "additionalProperties": false,
+ "required": [
+ "type"
+ ]
+ },
+ {
+ "type": "object",
+ "properties": {
+ "type": {
+ "type": "string",
+ "const": "agent_turn_input",
+ "default": "agent_turn_input"
+ }
+ },
+ "additionalProperties": false,
+ "required": [
+ "type"
+ ]
+ }
+ ]
+ },
+ "context": {
+ "type": "object",
+ "properties": {
+ "judge_model": {
+ "type": "string"
+ },
+ "prompt_template": {
+ "type": "string"
+ }
+ },
+ "additionalProperties": false,
+ "required": [
+ "judge_model"
+ ]
+ },
+ "provider_id": {
+ "type": "string"
+ }
+ },
+ "additionalProperties": false,
+ "required": [
+ "identifier",
+ "metadata",
+ "parameters",
+ "return_type",
+ "provider_id"
+ ]
+ },
"ShieldDefWithProvider": {
"type": "object",
"properties": {
@@ -4898,6 +5904,25 @@
"documents"
]
},
+ "JobCancelRequest": {
+ "type": "object",
+ "properties": {
+ "job_id": {
+ "type": "string"
+ }
+ },
+ "additionalProperties": false,
+ "required": [
+ "job_id"
+ ]
+ },
+ "JobStatus": {
+ "type": "string",
+ "enum": [
+ "completed",
+ "in_progress"
+ ]
+ },
"ProviderInfo": {
"type": "object",
"properties": {
@@ -5315,10 +6340,10 @@
"$ref": "#/components/schemas/URL"
},
"dataset": {
- "$ref": "#/components/schemas/TrainEvalDataset"
+ "type": "string"
},
"validation_dataset": {
- "$ref": "#/components/schemas/TrainEvalDataset"
+ "type": "string"
},
"algorithm": {
"$ref": "#/components/schemas/RLHFAlgorithm"
@@ -5517,6 +6542,18 @@
"scores"
]
},
+ "RegisterDatasetRequest": {
+ "type": "object",
+ "properties": {
+ "dataset_def": {
+ "$ref": "#/components/schemas/DatasetDefWithProvider"
+ }
+ },
+ "additionalProperties": false,
+ "required": [
+ "dataset_def"
+ ]
+ },
"RegisterMemoryBankRequest": {
"type": "object",
"properties": {
@@ -5554,6 +6591,18 @@
"model"
]
},
+ "RegisterScoringFunctionRequest": {
+ "type": "object",
+ "properties": {
+ "function_def": {
+ "$ref": "#/components/schemas/ScoringFunctionDefWithProvider"
+ }
+ },
+ "additionalProperties": false,
+ "required": [
+ "function_def"
+ ]
+ },
"RegisterShieldRequest": {
"type": "object",
"properties": {
@@ -5566,153 +6615,6 @@
"shield"
]
},
- "DialogGenerations": {
- "type": "object",
- "properties": {
- "dialog": {
- "type": "array",
- "items": {
- "oneOf": [
- {
- "$ref": "#/components/schemas/UserMessage"
- },
- {
- "$ref": "#/components/schemas/SystemMessage"
- },
- {
- "$ref": "#/components/schemas/ToolResponseMessage"
- },
- {
- "$ref": "#/components/schemas/CompletionMessage"
- }
- ]
- }
- },
- "sampled_generations": {
- "type": "array",
- "items": {
- "oneOf": [
- {
- "$ref": "#/components/schemas/UserMessage"
- },
- {
- "$ref": "#/components/schemas/SystemMessage"
- },
- {
- "$ref": "#/components/schemas/ToolResponseMessage"
- },
- {
- "$ref": "#/components/schemas/CompletionMessage"
- }
- ]
- }
- }
- },
- "additionalProperties": false,
- "required": [
- "dialog",
- "sampled_generations"
- ]
- },
- "RewardScoreRequest": {
- "type": "object",
- "properties": {
- "dialog_generations": {
- "type": "array",
- "items": {
- "$ref": "#/components/schemas/DialogGenerations"
- }
- },
- "model": {
- "type": "string"
- }
- },
- "additionalProperties": false,
- "required": [
- "dialog_generations",
- "model"
- ]
- },
- "RewardScoringResponse": {
- "type": "object",
- "properties": {
- "scored_generations": {
- "type": "array",
- "items": {
- "$ref": "#/components/schemas/ScoredDialogGenerations"
- }
- }
- },
- "additionalProperties": false,
- "required": [
- "scored_generations"
- ],
- "title": "Response from the reward scoring. Batch of (prompt, response, score) tuples that pass the threshold."
- },
- "ScoredDialogGenerations": {
- "type": "object",
- "properties": {
- "dialog": {
- "type": "array",
- "items": {
- "oneOf": [
- {
- "$ref": "#/components/schemas/UserMessage"
- },
- {
- "$ref": "#/components/schemas/SystemMessage"
- },
- {
- "$ref": "#/components/schemas/ToolResponseMessage"
- },
- {
- "$ref": "#/components/schemas/CompletionMessage"
- }
- ]
- }
- },
- "scored_generations": {
- "type": "array",
- "items": {
- "$ref": "#/components/schemas/ScoredMessage"
- }
- }
- },
- "additionalProperties": false,
- "required": [
- "dialog",
- "scored_generations"
- ]
- },
- "ScoredMessage": {
- "type": "object",
- "properties": {
- "message": {
- "oneOf": [
- {
- "$ref": "#/components/schemas/UserMessage"
- },
- {
- "$ref": "#/components/schemas/SystemMessage"
- },
- {
- "$ref": "#/components/schemas/ToolResponseMessage"
- },
- {
- "$ref": "#/components/schemas/CompletionMessage"
- }
- ]
- },
- "score": {
- "type": "number"
- }
- },
- "additionalProperties": false,
- "required": [
- "message",
- "score"
- ]
- },
"RunShieldRequest": {
"type": "object",
"properties": {
@@ -5780,6 +6682,106 @@
},
"additionalProperties": false
},
+ "ScoreRequest": {
+ "type": "object",
+ "properties": {
+ "input_rows": {
+ "type": "array",
+ "items": {
+ "type": "object",
+ "additionalProperties": {
+ "oneOf": [
+ {
+ "type": "null"
+ },
+ {
+ "type": "boolean"
+ },
+ {
+ "type": "number"
+ },
+ {
+ "type": "string"
+ },
+ {
+ "type": "array"
+ },
+ {
+ "type": "object"
+ }
+ ]
+ }
+ }
+ },
+ "scoring_functions": {
+ "type": "array",
+ "items": {
+ "type": "string"
+ }
+ }
+ },
+ "additionalProperties": false,
+ "required": [
+ "input_rows",
+ "scoring_functions"
+ ]
+ },
+ "ScoreResponse": {
+ "type": "object",
+ "properties": {
+ "results": {
+ "type": "object",
+ "additionalProperties": {
+ "$ref": "#/components/schemas/ScoringResult"
+ }
+ }
+ },
+ "additionalProperties": false,
+ "required": [
+ "results"
+ ]
+ },
+ "ScoreBatchRequest": {
+ "type": "object",
+ "properties": {
+ "dataset_id": {
+ "type": "string"
+ },
+ "scoring_functions": {
+ "type": "array",
+ "items": {
+ "type": "string"
+ }
+ },
+ "save_results_dataset": {
+ "type": "boolean"
+ }
+ },
+ "additionalProperties": false,
+ "required": [
+ "dataset_id",
+ "scoring_functions",
+ "save_results_dataset"
+ ]
+ },
+ "ScoreBatchResponse": {
+ "type": "object",
+ "properties": {
+ "dataset_id": {
+ "type": "string"
+ },
+ "results": {
+ "type": "object",
+ "additionalProperties": {
+ "$ref": "#/components/schemas/ScoringResult"
+ }
+ }
+ },
+ "additionalProperties": false,
+ "required": [
+ "results"
+ ]
+ },
"DoraFinetuningConfig": {
"type": "object",
"properties": {
@@ -5892,10 +6894,10 @@
"type": "string"
},
"dataset": {
- "$ref": "#/components/schemas/TrainEvalDataset"
+ "type": "string"
},
"validation_dataset": {
- "$ref": "#/components/schemas/TrainEvalDataset"
+ "type": "string"
},
"algorithm": {
"$ref": "#/components/schemas/FinetuningAlgorithm"
@@ -6034,7 +7036,29 @@
"synthetic_data": {
"type": "array",
"items": {
- "$ref": "#/components/schemas/ScoredDialogGenerations"
+ "type": "object",
+ "additionalProperties": {
+ "oneOf": [
+ {
+ "type": "null"
+ },
+ {
+ "type": "boolean"
+ },
+ {
+ "type": "number"
+ },
+ {
+ "type": "string"
+ },
+ {
+ "type": "array"
+ },
+ {
+ "type": "object"
+ }
+ ]
+ }
}
},
"statistics": {
@@ -6079,49 +7103,55 @@
],
"tags": [
{
- "name": "Models"
+ "name": "Eval"
},
{
- "name": "RewardScoring"
- },
- {
- "name": "MemoryBanks"
- },
- {
- "name": "Shields"
+ "name": "ScoringFunctions"
},
{
"name": "SyntheticDataGeneration"
},
- {
- "name": "Inference"
- },
{
"name": "Inspect"
},
- {
- "name": "BatchInference"
- },
- {
- "name": "Memory"
- },
- {
- "name": "Datasets"
- },
- {
- "name": "Agents"
- },
{
"name": "PostTraining"
},
{
- "name": "Telemetry"
+ "name": "Models"
},
{
"name": "Safety"
},
{
- "name": "Evaluations"
+ "name": "MemoryBanks"
+ },
+ {
+ "name": "DatasetIO"
+ },
+ {
+ "name": "Memory"
+ },
+ {
+ "name": "Scoring"
+ },
+ {
+ "name": "Shields"
+ },
+ {
+ "name": "Datasets"
+ },
+ {
+ "name": "Inference"
+ },
+ {
+ "name": "Telemetry"
+ },
+ {
+ "name": "BatchInference"
+ },
+ {
+ "name": "Agents"
},
{
"name": "BuiltinTool",
@@ -6199,10 +7229,6 @@
"name": "BatchCompletionResponse",
"description": ""
},
- {
- "name": "CancelEvaluationJobRequest",
- "description": ""
- },
{
"name": "CancelTrainingJobRequest",
"description": ""
@@ -6371,18 +7397,6 @@
"name": "ViolationLevel",
"description": ""
},
- {
- "name": "TrainEvalDataset",
- "description": "Dataset to be used for training or evaluating language models.\n\n"
- },
- {
- "name": "TrainEvalDatasetColumnType",
- "description": ""
- },
- {
- "name": "CreateDatasetRequest",
- "description": ""
- },
{
"name": "DeleteAgentsRequest",
"description": ""
@@ -6391,10 +7405,6 @@
"name": "DeleteAgentsSessionRequest",
"description": ""
},
- {
- "name": "DeleteDatasetRequest",
- "description": ""
- },
{
"name": "EmbeddingsRequest",
"description": ""
@@ -6404,20 +7414,32 @@
"description": ""
},
{
- "name": "EvaluateQuestionAnsweringRequest",
- "description": ""
+ "name": "AgentCandidate",
+ "description": ""
},
{
- "name": "EvaluationJob",
- "description": ""
+ "name": "ModelCandidate",
+ "description": ""
},
{
- "name": "EvaluateSummarizationRequest",
- "description": ""
+ "name": "EvaluateRequest",
+ "description": ""
},
{
- "name": "EvaluateTextGenerationRequest",
- "description": ""
+ "name": "EvaluateResponse",
+ "description": ""
+ },
+ {
+ "name": "ScoringResult",
+ "description": ""
+ },
+ {
+ "name": "EvaluateBatchRequest",
+ "description": ""
+ },
+ {
+ "name": "Job",
+ "description": ""
},
{
"name": "GetAgentsSessionRequest",
@@ -6448,21 +7470,25 @@
"description": ""
},
{
- "name": "EvaluationJobArtifactsResponse",
- "description": "Artifacts of a evaluation job.\n\n"
- },
- {
- "name": "EvaluationJobLogStream",
- "description": ""
- },
- {
- "name": "EvaluationJobStatusResponse",
- "description": ""
+ "name": "DatasetDefWithProvider",
+ "description": ""
},
{
"name": "ModelDefWithProvider",
"description": ""
},
+ {
+ "name": "PaginatedRowsResult",
+ "description": ""
+ },
+ {
+ "name": "Parameter",
+ "description": ""
+ },
+ {
+ "name": "ScoringFunctionDefWithProvider",
+ "description": ""
+ },
{
"name": "ShieldDefWithProvider",
"description": ""
@@ -6507,6 +7533,14 @@
"name": "InsertDocumentsRequest",
"description": ""
},
+ {
+ "name": "JobCancelRequest",
+ "description": ""
+ },
+ {
+ "name": "JobStatus",
+ "description": ""
+ },
{
"name": "ProviderInfo",
"description": ""
@@ -6575,6 +7609,10 @@
"name": "QueryDocumentsResponse",
"description": ""
},
+ {
+ "name": "RegisterDatasetRequest",
+ "description": ""
+ },
{
"name": "RegisterMemoryBankRequest",
"description": ""
@@ -6583,30 +7621,14 @@
"name": "RegisterModelRequest",
"description": ""
},
+ {
+ "name": "RegisterScoringFunctionRequest",
+ "description": ""
+ },
{
"name": "RegisterShieldRequest",
"description": ""
},
- {
- "name": "DialogGenerations",
- "description": ""
- },
- {
- "name": "RewardScoreRequest",
- "description": ""
- },
- {
- "name": "RewardScoringResponse",
- "description": "Response from the reward scoring. Batch of (prompt, response, score) tuples that pass the threshold.\n\n"
- },
- {
- "name": "ScoredDialogGenerations",
- "description": ""
- },
- {
- "name": "ScoredMessage",
- "description": ""
- },
{
"name": "RunShieldRequest",
"description": ""
@@ -6615,6 +7637,22 @@
"name": "RunShieldResponse",
"description": ""
},
+ {
+ "name": "ScoreRequest",
+ "description": ""
+ },
+ {
+ "name": "ScoreResponse",
+ "description": ""
+ },
+ {
+ "name": "ScoreBatchRequest",
+ "description": ""
+ },
+ {
+ "name": "ScoreBatchResponse",
+ "description": ""
+ },
{
"name": "DoraFinetuningConfig",
"description": ""
@@ -6650,16 +7688,18 @@
"tags": [
"Agents",
"BatchInference",
+ "DatasetIO",
"Datasets",
- "Evaluations",
+ "Eval",
"Inference",
"Inspect",
"Memory",
"MemoryBanks",
"Models",
"PostTraining",
- "RewardScoring",
"Safety",
+ "Scoring",
+ "ScoringFunctions",
"Shields",
"SyntheticDataGeneration",
"Telemetry"
@@ -6668,6 +7708,7 @@
{
"name": "Types",
"tags": [
+ "AgentCandidate",
"AgentConfig",
"AgentCreateResponse",
"AgentSessionCreateResponse",
@@ -6685,7 +7726,6 @@
"BatchCompletionRequest",
"BatchCompletionResponse",
"BuiltinTool",
- "CancelEvaluationJobRequest",
"CancelTrainingJobRequest",
"ChatCompletionRequest",
"ChatCompletionResponse",
@@ -6701,22 +7741,16 @@
"CreateAgentRequest",
"CreateAgentSessionRequest",
"CreateAgentTurnRequest",
- "CreateDatasetRequest",
"DPOAlignmentConfig",
+ "DatasetDefWithProvider",
"DeleteAgentsRequest",
"DeleteAgentsSessionRequest",
- "DeleteDatasetRequest",
- "DialogGenerations",
"DoraFinetuningConfig",
"EmbeddingsRequest",
"EmbeddingsResponse",
- "EvaluateQuestionAnsweringRequest",
- "EvaluateSummarizationRequest",
- "EvaluateTextGenerationRequest",
- "EvaluationJob",
- "EvaluationJobArtifactsResponse",
- "EvaluationJobLogStream",
- "EvaluationJobStatusResponse",
+ "EvaluateBatchRequest",
+ "EvaluateRequest",
+ "EvaluateResponse",
"FinetuningAlgorithm",
"FunctionCallToolDefinition",
"GetAgentsSessionRequest",
@@ -6725,6 +7759,9 @@
"ImageMedia",
"InferenceStep",
"InsertDocumentsRequest",
+ "Job",
+ "JobCancelRequest",
+ "JobStatus",
"KeyValueMemoryBankDef",
"KeywordMemoryBankDef",
"LogEventRequest",
@@ -6734,8 +7771,11 @@
"MemoryRetrievalStep",
"MemoryToolDefinition",
"MetricEvent",
+ "ModelCandidate",
"ModelDefWithProvider",
"OptimizerConfig",
+ "PaginatedRowsResult",
+ "Parameter",
"PhotogenToolDefinition",
"PostTrainingJob",
"PostTrainingJobArtifactsResponse",
@@ -6748,21 +7788,25 @@
"QueryDocumentsRequest",
"QueryDocumentsResponse",
"RLHFAlgorithm",
+ "RegisterDatasetRequest",
"RegisterMemoryBankRequest",
"RegisterModelRequest",
+ "RegisterScoringFunctionRequest",
"RegisterShieldRequest",
"RestAPIExecutionConfig",
"RestAPIMethod",
- "RewardScoreRequest",
- "RewardScoringResponse",
"RouteInfo",
"RunShieldRequest",
"RunShieldResponse",
"SafetyViolation",
"SamplingParams",
"SamplingStrategy",
- "ScoredDialogGenerations",
- "ScoredMessage",
+ "ScoreBatchRequest",
+ "ScoreBatchResponse",
+ "ScoreRequest",
+ "ScoreResponse",
+ "ScoringFunctionDefWithProvider",
+ "ScoringResult",
"SearchToolDefinition",
"Session",
"ShieldCallStep",
@@ -6788,8 +7832,6 @@
"ToolResponse",
"ToolResponseMessage",
"Trace",
- "TrainEvalDataset",
- "TrainEvalDatasetColumnType",
"TrainingConfig",
"Turn",
"URL",
diff --git a/docs/resources/llama-stack-spec.yaml b/docs/resources/llama-stack-spec.yaml
index 906d3934a..9dcdbb028 100644
--- a/docs/resources/llama-stack-spec.yaml
+++ b/docs/resources/llama-stack-spec.yaml
@@ -1,6 +1,19 @@
components:
responses: {}
schemas:
+ AgentCandidate:
+ additionalProperties: false
+ properties:
+ config:
+ $ref: '#/components/schemas/AgentConfig'
+ type:
+ const: agent
+ default: agent
+ type: string
+ required:
+ - type
+ - config
+ type: object
AgentConfig:
additionalProperties: false
properties:
@@ -315,14 +328,6 @@ components:
- photogen
- code_interpreter
type: string
- CancelEvaluationJobRequest:
- additionalProperties: false
- properties:
- job_uuid:
- type: string
- required:
- - job_uuid
- type: object
CancelTrainingJobRequest:
additionalProperties: false
properties:
@@ -351,6 +356,48 @@ components:
type: array
model:
type: string
+ response_format:
+ oneOf:
+ - additionalProperties: false
+ properties:
+ schema:
+ additionalProperties:
+ oneOf:
+ - type: 'null'
+ - type: boolean
+ - type: number
+ - type: string
+ - type: array
+ - type: object
+ type: object
+ type:
+ const: json_schema
+ default: json_schema
+ type: string
+ required:
+ - type
+ - schema
+ type: object
+ - additionalProperties: false
+ properties:
+ bnf:
+ additionalProperties:
+ oneOf:
+ - type: 'null'
+ - type: boolean
+ - type: number
+ - type: string
+ - type: array
+ - type: object
+ type: object
+ type:
+ const: grammar
+ default: grammar
+ type: string
+ required:
+ - type
+ - bnf
+ type: object
sampling_params:
$ref: '#/components/schemas/SamplingParams'
stream:
@@ -490,6 +537,48 @@ components:
type: object
model:
type: string
+ response_format:
+ oneOf:
+ - additionalProperties: false
+ properties:
+ schema:
+ additionalProperties:
+ oneOf:
+ - type: 'null'
+ - type: boolean
+ - type: number
+ - type: string
+ - type: array
+ - type: object
+ type: object
+ type:
+ const: json_schema
+ default: json_schema
+ type: string
+ required:
+ - type
+ - schema
+ type: object
+ - additionalProperties: false
+ properties:
+ bnf:
+ additionalProperties:
+ oneOf:
+ - type: 'null'
+ - type: boolean
+ - type: number
+ - type: string
+ - type: array
+ - type: object
+ type: object
+ type:
+ const: grammar
+ default: grammar
+ type: string
+ required:
+ - type
+ - bnf
+ type: object
sampling_params:
$ref: '#/components/schemas/SamplingParams'
stream:
@@ -572,17 +661,6 @@ components:
- session_id
- messages
type: object
- CreateDatasetRequest:
- additionalProperties: false
- properties:
- dataset:
- $ref: '#/components/schemas/TrainEvalDataset'
- uuid:
- type: string
- required:
- - uuid
- - dataset
- type: object
DPOAlignmentConfig:
additionalProperties: false
properties:
@@ -600,6 +678,138 @@ components:
- epsilon
- gamma
type: object
+ DatasetDefWithProvider:
+ additionalProperties: false
+ properties:
+ dataset_schema:
+ additionalProperties:
+ oneOf:
+ - additionalProperties: false
+ properties:
+ type:
+ const: string
+ default: string
+ type: string
+ required:
+ - type
+ type: object
+ - additionalProperties: false
+ properties:
+ type:
+ const: number
+ default: number
+ type: string
+ required:
+ - type
+ type: object
+ - additionalProperties: false
+ properties:
+ type:
+ const: boolean
+ default: boolean
+ type: string
+ required:
+ - type
+ type: object
+ - additionalProperties: false
+ properties:
+ type:
+ const: array
+ default: array
+ type: string
+ required:
+ - type
+ type: object
+ - additionalProperties: false
+ properties:
+ type:
+ const: object
+ default: object
+ type: string
+ required:
+ - type
+ type: object
+ - additionalProperties: false
+ properties:
+ type:
+ const: json
+ default: json
+ type: string
+ required:
+ - type
+ type: object
+ - additionalProperties: false
+ properties:
+ type:
+ const: union
+ default: union
+ type: string
+ required:
+ - type
+ type: object
+ - additionalProperties: false
+ properties:
+ type:
+ const: custom
+ default: custom
+ type: string
+ validator_class:
+ type: string
+ required:
+ - type
+ - validator_class
+ type: object
+ - additionalProperties: false
+ properties:
+ type:
+ const: chat_completion_input
+ default: chat_completion_input
+ type: string
+ required:
+ - type
+ type: object
+ - additionalProperties: false
+ properties:
+ type:
+ const: completion_input
+ default: completion_input
+ type: string
+ required:
+ - type
+ type: object
+ - additionalProperties: false
+ properties:
+ type:
+ const: agent_turn_input
+ default: agent_turn_input
+ type: string
+ required:
+ - type
+ type: object
+ type: object
+ identifier:
+ type: string
+ metadata:
+ additionalProperties:
+ oneOf:
+ - type: 'null'
+ - type: boolean
+ - type: number
+ - type: string
+ - type: array
+ - type: object
+ type: object
+ provider_id:
+ type: string
+ url:
+ $ref: '#/components/schemas/URL'
+ required:
+ - identifier
+ - dataset_schema
+ - url
+ - metadata
+ - provider_id
+ type: object
DeleteAgentsRequest:
additionalProperties: false
properties:
@@ -619,37 +829,6 @@ components:
- agent_id
- session_id
type: object
- DeleteDatasetRequest:
- additionalProperties: false
- properties:
- dataset_uuid:
- type: string
- required:
- - dataset_uuid
- type: object
- DialogGenerations:
- additionalProperties: false
- properties:
- dialog:
- items:
- oneOf:
- - $ref: '#/components/schemas/UserMessage'
- - $ref: '#/components/schemas/SystemMessage'
- - $ref: '#/components/schemas/ToolResponseMessage'
- - $ref: '#/components/schemas/CompletionMessage'
- type: array
- sampled_generations:
- items:
- oneOf:
- - $ref: '#/components/schemas/UserMessage'
- - $ref: '#/components/schemas/SystemMessage'
- - $ref: '#/components/schemas/ToolResponseMessage'
- - $ref: '#/components/schemas/CompletionMessage'
- type: array
- required:
- - dialog
- - sampled_generations
- type: object
DoraFinetuningConfig:
additionalProperties: false
properties:
@@ -704,78 +883,74 @@ components:
required:
- embeddings
type: object
- EvaluateQuestionAnsweringRequest:
+ EvaluateBatchRequest:
additionalProperties: false
properties:
- metrics:
+ candidate:
+ oneOf:
+ - $ref: '#/components/schemas/ModelCandidate'
+ - $ref: '#/components/schemas/AgentCandidate'
+ dataset_id:
+ type: string
+ scoring_functions:
items:
- enum:
- - em
- - f1
type: string
type: array
required:
- - metrics
+ - dataset_id
+ - candidate
+ - scoring_functions
type: object
- EvaluateSummarizationRequest:
+ EvaluateRequest:
additionalProperties: false
properties:
- metrics:
+ candidate:
+ oneOf:
+ - $ref: '#/components/schemas/ModelCandidate'
+ - $ref: '#/components/schemas/AgentCandidate'
+ input_rows:
+ items:
+ additionalProperties:
+ oneOf:
+ - type: 'null'
+ - type: boolean
+ - type: number
+ - type: string
+ - type: array
+ - type: object
+ type: object
+ type: array
+ scoring_functions:
items:
- enum:
- - rouge
- - bleu
type: string
type: array
required:
- - metrics
+ - input_rows
+ - candidate
+ - scoring_functions
type: object
- EvaluateTextGenerationRequest:
+ EvaluateResponse:
additionalProperties: false
properties:
- metrics:
+ generations:
items:
- enum:
- - perplexity
- - rouge
- - bleu
- type: string
+ additionalProperties:
+ oneOf:
+ - type: 'null'
+ - type: boolean
+ - type: number
+ - type: string
+ - type: array
+ - type: object
+ type: object
type: array
+ scores:
+ additionalProperties:
+ $ref: '#/components/schemas/ScoringResult'
+ type: object
required:
- - metrics
- type: object
- EvaluationJob:
- additionalProperties: false
- properties:
- job_uuid:
- type: string
- required:
- - job_uuid
- type: object
- EvaluationJobArtifactsResponse:
- additionalProperties: false
- properties:
- job_uuid:
- type: string
- required:
- - job_uuid
- title: Artifacts of a evaluation job.
- type: object
- EvaluationJobLogStream:
- additionalProperties: false
- properties:
- job_uuid:
- type: string
- required:
- - job_uuid
- type: object
- EvaluationJobStatusResponse:
- additionalProperties: false
- properties:
- job_uuid:
- type: string
- required:
- - job_uuid
+ - generations
+ - scores
type: object
FinetuningAlgorithm:
enum:
@@ -905,6 +1080,27 @@ components:
- bank_id
- documents
type: object
+ Job:
+ additionalProperties: false
+ properties:
+ job_id:
+ type: string
+ required:
+ - job_id
+ type: object
+ JobCancelRequest:
+ additionalProperties: false
+ properties:
+ job_id:
+ type: string
+ required:
+ - job_id
+ type: object
+ JobStatus:
+ enum:
+ - completed
+ - in_progress
+ type: string
KeyValueMemoryBankDef:
additionalProperties: false
properties:
@@ -1220,6 +1416,24 @@ components:
- value
- unit
type: object
+ ModelCandidate:
+ additionalProperties: false
+ properties:
+ model:
+ type: string
+ sampling_params:
+ $ref: '#/components/schemas/SamplingParams'
+ system_message:
+ $ref: '#/components/schemas/SystemMessage'
+ type:
+ const: model
+ default: model
+ type: string
+ required:
+ - type
+ - model
+ - sampling_params
+ type: object
ModelDefWithProvider:
additionalProperties: false
properties:
@@ -1266,6 +1480,144 @@ components:
- lr_min
- weight_decay
type: object
+ PaginatedRowsResult:
+ additionalProperties: false
+ properties:
+ next_page_token:
+ type: string
+ rows:
+ items:
+ additionalProperties:
+ oneOf:
+ - type: 'null'
+ - type: boolean
+ - type: number
+ - type: string
+ - type: array
+ - type: object
+ type: object
+ type: array
+ total_count:
+ type: integer
+ required:
+ - rows
+ - total_count
+ type: object
+ Parameter:
+ additionalProperties: false
+ properties:
+ description:
+ type: string
+ name:
+ type: string
+ type:
+ oneOf:
+ - additionalProperties: false
+ properties:
+ type:
+ const: string
+ default: string
+ type: string
+ required:
+ - type
+ type: object
+ - additionalProperties: false
+ properties:
+ type:
+ const: number
+ default: number
+ type: string
+ required:
+ - type
+ type: object
+ - additionalProperties: false
+ properties:
+ type:
+ const: boolean
+ default: boolean
+ type: string
+ required:
+ - type
+ type: object
+ - additionalProperties: false
+ properties:
+ type:
+ const: array
+ default: array
+ type: string
+ required:
+ - type
+ type: object
+ - additionalProperties: false
+ properties:
+ type:
+ const: object
+ default: object
+ type: string
+ required:
+ - type
+ type: object
+ - additionalProperties: false
+ properties:
+ type:
+ const: json
+ default: json
+ type: string
+ required:
+ - type
+ type: object
+ - additionalProperties: false
+ properties:
+ type:
+ const: union
+ default: union
+ type: string
+ required:
+ - type
+ type: object
+ - additionalProperties: false
+ properties:
+ type:
+ const: custom
+ default: custom
+ type: string
+ validator_class:
+ type: string
+ required:
+ - type
+ - validator_class
+ type: object
+ - additionalProperties: false
+ properties:
+ type:
+ const: chat_completion_input
+ default: chat_completion_input
+ type: string
+ required:
+ - type
+ type: object
+ - additionalProperties: false
+ properties:
+ type:
+ const: completion_input
+ default: completion_input
+ type: string
+ required:
+ - type
+ type: object
+ - additionalProperties: false
+ properties:
+ type:
+ const: agent_turn_input
+ default: agent_turn_input
+ type: string
+ required:
+ - type
+ type: object
+ required:
+ - name
+ - type
+ type: object
PhotogenToolDefinition:
additionalProperties: false
properties:
@@ -1373,7 +1725,7 @@ components:
algorithm_config:
$ref: '#/components/schemas/DPOAlignmentConfig'
dataset:
- $ref: '#/components/schemas/TrainEvalDataset'
+ type: string
finetuned_model:
$ref: '#/components/schemas/URL'
hyperparam_search_config:
@@ -1403,7 +1755,7 @@ components:
training_config:
$ref: '#/components/schemas/TrainingConfig'
validation_dataset:
- $ref: '#/components/schemas/TrainEvalDataset'
+ type: string
required:
- job_uuid
- finetuned_model
@@ -1515,6 +1867,14 @@ components:
enum:
- dpo
type: string
+ RegisterDatasetRequest:
+ additionalProperties: false
+ properties:
+ dataset_def:
+ $ref: '#/components/schemas/DatasetDefWithProvider'
+ required:
+ - dataset_def
+ type: object
RegisterMemoryBankRequest:
additionalProperties: false
properties:
@@ -1535,6 +1895,14 @@ components:
required:
- model
type: object
+ RegisterScoringFunctionRequest:
+ additionalProperties: false
+ properties:
+ function_def:
+ $ref: '#/components/schemas/ScoringFunctionDefWithProvider'
+ required:
+ - function_def
+ type: object
RegisterShieldRequest:
additionalProperties: false
properties:
@@ -1591,31 +1959,6 @@ components:
- PUT
- DELETE
type: string
- RewardScoreRequest:
- additionalProperties: false
- properties:
- dialog_generations:
- items:
- $ref: '#/components/schemas/DialogGenerations'
- type: array
- model:
- type: string
- required:
- - dialog_generations
- - model
- type: object
- RewardScoringResponse:
- additionalProperties: false
- properties:
- scored_generations:
- items:
- $ref: '#/components/schemas/ScoredDialogGenerations'
- type: array
- required:
- - scored_generations
- title: Response from the reward scoring. Batch of (prompt, response, score)
- tuples that pass the threshold.
- type: object
RouteInfo:
additionalProperties: false
properties:
@@ -1717,39 +2060,239 @@ components:
- top_p
- top_k
type: string
- ScoredDialogGenerations:
+ ScoreBatchRequest:
additionalProperties: false
properties:
- dialog:
+ dataset_id:
+ type: string
+ save_results_dataset:
+ type: boolean
+ scoring_functions:
items:
- oneOf:
- - $ref: '#/components/schemas/UserMessage'
- - $ref: '#/components/schemas/SystemMessage'
- - $ref: '#/components/schemas/ToolResponseMessage'
- - $ref: '#/components/schemas/CompletionMessage'
- type: array
- scored_generations:
- items:
- $ref: '#/components/schemas/ScoredMessage'
+ type: string
type: array
required:
- - dialog
- - scored_generations
+ - dataset_id
+ - scoring_functions
+ - save_results_dataset
type: object
- ScoredMessage:
+ ScoreBatchResponse:
additionalProperties: false
properties:
- message:
- oneOf:
- - $ref: '#/components/schemas/UserMessage'
- - $ref: '#/components/schemas/SystemMessage'
- - $ref: '#/components/schemas/ToolResponseMessage'
- - $ref: '#/components/schemas/CompletionMessage'
- score:
- type: number
+ dataset_id:
+ type: string
+ results:
+ additionalProperties:
+ $ref: '#/components/schemas/ScoringResult'
+ type: object
required:
- - message
- - score
+ - results
+ type: object
+ ScoreRequest:
+ additionalProperties: false
+ properties:
+ input_rows:
+ items:
+ additionalProperties:
+ oneOf:
+ - type: 'null'
+ - type: boolean
+ - type: number
+ - type: string
+ - type: array
+ - type: object
+ type: object
+ type: array
+ scoring_functions:
+ items:
+ type: string
+ type: array
+ required:
+ - input_rows
+ - scoring_functions
+ type: object
+ ScoreResponse:
+ additionalProperties: false
+ properties:
+ results:
+ additionalProperties:
+ $ref: '#/components/schemas/ScoringResult'
+ type: object
+ required:
+ - results
+ type: object
+ ScoringFunctionDefWithProvider:
+ additionalProperties: false
+ properties:
+ context:
+ additionalProperties: false
+ properties:
+ judge_model:
+ type: string
+ prompt_template:
+ type: string
+ required:
+ - judge_model
+ type: object
+ description:
+ type: string
+ identifier:
+ type: string
+ metadata:
+ additionalProperties:
+ oneOf:
+ - type: 'null'
+ - type: boolean
+ - type: number
+ - type: string
+ - type: array
+ - type: object
+ type: object
+ parameters:
+ items:
+ $ref: '#/components/schemas/Parameter'
+ type: array
+ provider_id:
+ type: string
+ return_type:
+ oneOf:
+ - additionalProperties: false
+ properties:
+ type:
+ const: string
+ default: string
+ type: string
+ required:
+ - type
+ type: object
+ - additionalProperties: false
+ properties:
+ type:
+ const: number
+ default: number
+ type: string
+ required:
+ - type
+ type: object
+ - additionalProperties: false
+ properties:
+ type:
+ const: boolean
+ default: boolean
+ type: string
+ required:
+ - type
+ type: object
+ - additionalProperties: false
+ properties:
+ type:
+ const: array
+ default: array
+ type: string
+ required:
+ - type
+ type: object
+ - additionalProperties: false
+ properties:
+ type:
+ const: object
+ default: object
+ type: string
+ required:
+ - type
+ type: object
+ - additionalProperties: false
+ properties:
+ type:
+ const: json
+ default: json
+ type: string
+ required:
+ - type
+ type: object
+ - additionalProperties: false
+ properties:
+ type:
+ const: union
+ default: union
+ type: string
+ required:
+ - type
+ type: object
+ - additionalProperties: false
+ properties:
+ type:
+ const: custom
+ default: custom
+ type: string
+ validator_class:
+ type: string
+ required:
+ - type
+ - validator_class
+ type: object
+ - additionalProperties: false
+ properties:
+ type:
+ const: chat_completion_input
+ default: chat_completion_input
+ type: string
+ required:
+ - type
+ type: object
+ - additionalProperties: false
+ properties:
+ type:
+ const: completion_input
+ default: completion_input
+ type: string
+ required:
+ - type
+ type: object
+ - additionalProperties: false
+ properties:
+ type:
+ const: agent_turn_input
+ default: agent_turn_input
+ type: string
+ required:
+ - type
+ type: object
+ required:
+ - identifier
+ - metadata
+ - parameters
+ - return_type
+ - provider_id
+ type: object
+ ScoringResult:
+ additionalProperties: false
+ properties:
+ aggregated_results:
+ additionalProperties:
+ oneOf:
+ - type: 'null'
+ - type: boolean
+ - type: number
+ - type: string
+ - type: array
+ - type: object
+ type: object
+ score_rows:
+ items:
+ additionalProperties:
+ oneOf:
+ - type: 'null'
+ - type: boolean
+ - type: number
+ - type: string
+ - type: array
+ - type: object
+ type: object
+ type: array
+ required:
+ - score_rows
+ - aggregated_results
type: object
SearchToolDefinition:
additionalProperties: false
@@ -1942,7 +2485,7 @@ components:
- $ref: '#/components/schemas/QLoraFinetuningConfig'
- $ref: '#/components/schemas/DoraFinetuningConfig'
dataset:
- $ref: '#/components/schemas/TrainEvalDataset'
+ type: string
hyperparam_search_config:
additionalProperties:
oneOf:
@@ -1972,7 +2515,7 @@ components:
training_config:
$ref: '#/components/schemas/TrainingConfig'
validation_dataset:
- $ref: '#/components/schemas/TrainEvalDataset'
+ type: string
required:
- job_uuid
- model
@@ -2027,7 +2570,15 @@ components:
type: object
synthetic_data:
items:
- $ref: '#/components/schemas/ScoredDialogGenerations'
+ additionalProperties:
+ oneOf:
+ - type: 'null'
+ - type: boolean
+ - type: number
+ - type: string
+ - type: array
+ - type: object
+ type: object
type: array
required:
- synthetic_data
@@ -2282,38 +2833,6 @@ components:
- root_span_id
- start_time
type: object
- TrainEvalDataset:
- additionalProperties: false
- properties:
- columns:
- additionalProperties:
- $ref: '#/components/schemas/TrainEvalDatasetColumnType'
- type: object
- content_url:
- $ref: '#/components/schemas/URL'
- metadata:
- additionalProperties:
- oneOf:
- - type: 'null'
- - type: boolean
- - type: number
- - type: string
- - type: array
- - type: object
- type: object
- required:
- - columns
- - content_url
- title: Dataset to be used for training or evaluating language models.
- type: object
- TrainEvalDatasetColumnType:
- enum:
- - dialog
- - text
- - media
- - number
- - json
- type: string
TrainingConfig:
additionalProperties: false
properties:
@@ -2510,7 +3029,7 @@ info:
description: "This is the specification of the llama stack that provides\n \
\ a set of endpoints and their corresponding interfaces that are tailored\
\ to\n best leverage Llama Models. The specification is still in\
- \ draft and subject to change.\n Generated at 2024-10-18 20:48:17.730988"
+ \ draft and subject to change.\n Generated at 2024-10-24 17:40:59.576117"
title: '[DRAFT] Llama Stack Specification'
version: 0.0.1
jsonSchemaDialect: https://json-schema.org/draft/2020-12/schema
@@ -2651,6 +3170,11 @@ paths:
required: true
schema:
type: string
+ - in: query
+ name: session_id
+ required: true
+ schema:
+ type: string
- in: query
name: turn_id
required: true
@@ -2710,6 +3234,11 @@ paths:
required: true
schema:
type: string
+ - in: query
+ name: session_id
+ required: true
+ schema:
+ type: string
- in: query
name: turn_id
required: true
@@ -2781,9 +3310,29 @@ paths:
description: OK
tags:
- BatchInference
- /datasets/create:
- post:
+ /datasetio/get_rows_paginated:
+ get:
parameters:
+ - in: query
+ name: dataset_id
+ required: true
+ schema:
+ type: string
+ - in: query
+ name: rows_in_page
+ required: true
+ schema:
+ type: integer
+ - in: query
+ name: page_token
+ required: false
+ schema:
+ type: string
+ - in: query
+ name: filter_condition
+ required: false
+ schema:
+ type: string
- description: JSON-encoded provider data which will be made available to the
adapter servicing the API
in: header
@@ -2791,43 +3340,20 @@ paths:
required: false
schema:
type: string
- requestBody:
- content:
- application/json:
- schema:
- $ref: '#/components/schemas/CreateDatasetRequest'
- required: true
responses:
'200':
+ content:
+ application/json:
+ schema:
+ $ref: '#/components/schemas/PaginatedRowsResult'
description: OK
tags:
- - Datasets
- /datasets/delete:
- post:
- parameters:
- - description: JSON-encoded provider data which will be made available to the
- adapter servicing the API
- in: header
- name: X-LlamaStack-ProviderData
- required: false
- schema:
- type: string
- requestBody:
- content:
- application/json:
- schema:
- $ref: '#/components/schemas/DeleteDatasetRequest'
- required: true
- responses:
- '200':
- description: OK
- tags:
- - Datasets
+ - DatasetIO
/datasets/get:
get:
parameters:
- in: query
- name: dataset_uuid
+ name: dataset_identifier
required: true
schema:
type: string
@@ -2843,104 +3369,13 @@ paths:
content:
application/json:
schema:
- $ref: '#/components/schemas/TrainEvalDataset'
+ oneOf:
+ - $ref: '#/components/schemas/DatasetDefWithProvider'
+ - type: 'null'
description: OK
tags:
- Datasets
- /evaluate/job/artifacts:
- get:
- parameters:
- - in: query
- name: job_uuid
- required: true
- schema:
- type: string
- - description: JSON-encoded provider data which will be made available to the
- adapter servicing the API
- in: header
- name: X-LlamaStack-ProviderData
- required: false
- schema:
- type: string
- responses:
- '200':
- content:
- application/json:
- schema:
- $ref: '#/components/schemas/EvaluationJobArtifactsResponse'
- description: OK
- tags:
- - Evaluations
- /evaluate/job/cancel:
- post:
- parameters:
- - description: JSON-encoded provider data which will be made available to the
- adapter servicing the API
- in: header
- name: X-LlamaStack-ProviderData
- required: false
- schema:
- type: string
- requestBody:
- content:
- application/json:
- schema:
- $ref: '#/components/schemas/CancelEvaluationJobRequest'
- required: true
- responses:
- '200':
- description: OK
- tags:
- - Evaluations
- /evaluate/job/logs:
- get:
- parameters:
- - in: query
- name: job_uuid
- required: true
- schema:
- type: string
- - description: JSON-encoded provider data which will be made available to the
- adapter servicing the API
- in: header
- name: X-LlamaStack-ProviderData
- required: false
- schema:
- type: string
- responses:
- '200':
- content:
- application/json:
- schema:
- $ref: '#/components/schemas/EvaluationJobLogStream'
- description: OK
- tags:
- - Evaluations
- /evaluate/job/status:
- get:
- parameters:
- - in: query
- name: job_uuid
- required: true
- schema:
- type: string
- - description: JSON-encoded provider data which will be made available to the
- adapter servicing the API
- in: header
- name: X-LlamaStack-ProviderData
- required: false
- schema:
- type: string
- responses:
- '200':
- content:
- application/json:
- schema:
- $ref: '#/components/schemas/EvaluationJobStatusResponse'
- description: OK
- tags:
- - Evaluations
- /evaluate/jobs:
+ /datasets/list:
get:
parameters:
- description: JSON-encoded provider data which will be made available to the
@@ -2955,11 +3390,11 @@ paths:
content:
application/jsonl:
schema:
- $ref: '#/components/schemas/EvaluationJob'
+ $ref: '#/components/schemas/DatasetDefWithProvider'
description: OK
tags:
- - Evaluations
- /evaluate/question_answering/:
+ - Datasets
+ /datasets/register:
post:
parameters:
- description: JSON-encoded provider data which will be made available to the
@@ -2973,18 +3408,14 @@ paths:
content:
application/json:
schema:
- $ref: '#/components/schemas/EvaluateQuestionAnsweringRequest'
+ $ref: '#/components/schemas/RegisterDatasetRequest'
required: true
responses:
'200':
- content:
- application/json:
- schema:
- $ref: '#/components/schemas/EvaluationJob'
description: OK
tags:
- - Evaluations
- /evaluate/summarization/:
+ - Datasets
+ /eval/evaluate:
post:
parameters:
- description: JSON-encoded provider data which will be made available to the
@@ -2998,18 +3429,18 @@ paths:
content:
application/json:
schema:
- $ref: '#/components/schemas/EvaluateSummarizationRequest'
+ $ref: '#/components/schemas/EvaluateRequest'
required: true
responses:
'200':
content:
application/json:
schema:
- $ref: '#/components/schemas/EvaluationJob'
+ $ref: '#/components/schemas/EvaluateResponse'
description: OK
tags:
- - Evaluations
- /evaluate/text_generation/:
+ - Eval
+ /eval/evaluate_batch:
post:
parameters:
- description: JSON-encoded provider data which will be made available to the
@@ -3023,17 +3454,88 @@ paths:
content:
application/json:
schema:
- $ref: '#/components/schemas/EvaluateTextGenerationRequest'
+ $ref: '#/components/schemas/EvaluateBatchRequest'
required: true
responses:
'200':
content:
application/json:
schema:
- $ref: '#/components/schemas/EvaluationJob'
+ $ref: '#/components/schemas/Job'
description: OK
tags:
- - Evaluations
+ - Eval
+ /eval/job/cancel:
+ post:
+ parameters:
+ - description: JSON-encoded provider data which will be made available to the
+ adapter servicing the API
+ in: header
+ name: X-LlamaStack-ProviderData
+ required: false
+ schema:
+ type: string
+ requestBody:
+ content:
+ application/json:
+ schema:
+ $ref: '#/components/schemas/JobCancelRequest'
+ required: true
+ responses:
+ '200':
+ description: OK
+ tags:
+ - Eval
+ /eval/job/result:
+ get:
+ parameters:
+ - in: query
+ name: job_id
+ required: true
+ schema:
+ type: string
+ - description: JSON-encoded provider data which will be made available to the
+ adapter servicing the API
+ in: header
+ name: X-LlamaStack-ProviderData
+ required: false
+ schema:
+ type: string
+ responses:
+ '200':
+ content:
+ application/json:
+ schema:
+ $ref: '#/components/schemas/EvaluateResponse'
+ description: OK
+ tags:
+ - Eval
+ /eval/job/status:
+ get:
+ parameters:
+ - in: query
+ name: job_id
+ required: true
+ schema:
+ type: string
+ - description: JSON-encoded provider data which will be made available to the
+ adapter servicing the API
+ in: header
+ name: X-LlamaStack-ProviderData
+ required: false
+ schema:
+ type: string
+ responses:
+ '200':
+ content:
+ application/json:
+ schema:
+ oneOf:
+ - $ref: '#/components/schemas/JobStatus'
+ - type: 'null'
+ description: OK
+ tags:
+ - Eval
/health:
get:
parameters:
@@ -3501,31 +4003,6 @@ paths:
description: OK
tags:
- Inspect
- /reward_scoring/score:
- post:
- parameters:
- - description: JSON-encoded provider data which will be made available to the
- adapter servicing the API
- in: header
- name: X-LlamaStack-ProviderData
- required: false
- schema:
- type: string
- requestBody:
- content:
- application/json:
- schema:
- $ref: '#/components/schemas/RewardScoreRequest'
- required: true
- responses:
- '200':
- content:
- application/json:
- schema:
- $ref: '#/components/schemas/RewardScoringResponse'
- description: OK
- tags:
- - RewardScoring
/routes/list:
get:
parameters:
@@ -3574,6 +4051,122 @@ paths:
description: OK
tags:
- Safety
+ /scoring/score:
+ post:
+ parameters:
+ - description: JSON-encoded provider data which will be made available to the
+ adapter servicing the API
+ in: header
+ name: X-LlamaStack-ProviderData
+ required: false
+ schema:
+ type: string
+ requestBody:
+ content:
+ application/json:
+ schema:
+ $ref: '#/components/schemas/ScoreRequest'
+ required: true
+ responses:
+ '200':
+ content:
+ application/json:
+ schema:
+ $ref: '#/components/schemas/ScoreResponse'
+ description: OK
+ tags:
+ - Scoring
+ /scoring/score_batch:
+ post:
+ parameters:
+ - description: JSON-encoded provider data which will be made available to the
+ adapter servicing the API
+ in: header
+ name: X-LlamaStack-ProviderData
+ required: false
+ schema:
+ type: string
+ requestBody:
+ content:
+ application/json:
+ schema:
+ $ref: '#/components/schemas/ScoreBatchRequest'
+ required: true
+ responses:
+ '200':
+ content:
+ application/json:
+ schema:
+ $ref: '#/components/schemas/ScoreBatchResponse'
+ description: OK
+ tags:
+ - Scoring
+ /scoring_functions/get:
+ get:
+ parameters:
+ - in: query
+ name: name
+ required: true
+ schema:
+ type: string
+ - description: JSON-encoded provider data which will be made available to the
+ adapter servicing the API
+ in: header
+ name: X-LlamaStack-ProviderData
+ required: false
+ schema:
+ type: string
+ responses:
+ '200':
+ content:
+ application/json:
+ schema:
+ oneOf:
+ - $ref: '#/components/schemas/ScoringFunctionDefWithProvider'
+ - type: 'null'
+ description: OK
+ tags:
+ - ScoringFunctions
+ /scoring_functions/list:
+ get:
+ parameters:
+ - description: JSON-encoded provider data which will be made available to the
+ adapter servicing the API
+ in: header
+ name: X-LlamaStack-ProviderData
+ required: false
+ schema:
+ type: string
+ responses:
+ '200':
+ content:
+ application/jsonl:
+ schema:
+ $ref: '#/components/schemas/ScoringFunctionDefWithProvider'
+ description: OK
+ tags:
+ - ScoringFunctions
+ /scoring_functions/register:
+ post:
+ parameters:
+ - description: JSON-encoded provider data which will be made available to the
+ adapter servicing the API
+ in: header
+ name: X-LlamaStack-ProviderData
+ required: false
+ schema:
+ type: string
+ requestBody:
+ content:
+ application/json:
+ schema:
+ $ref: '#/components/schemas/RegisterScoringFunctionRequest'
+ required: true
+ responses:
+ '200':
+ description: OK
+ tags:
+ - ScoringFunctions
/shields/get:
get:
parameters:
@@ -3715,21 +4308,23 @@ security:
servers:
- url: http://any-hosted-llama-stack.com
tags:
-- name: Models
-- name: RewardScoring
-- name: MemoryBanks
-- name: Shields
+- name: Eval
+- name: ScoringFunctions
- name: SyntheticDataGeneration
-- name: Inference
- name: Inspect
-- name: BatchInference
-- name: Memory
-- name: Datasets
-- name: Agents
- name: PostTraining
-- name: Telemetry
+- name: Models
- name: Safety
-- name: Evaluations
+- name: MemoryBanks
+- name: DatasetIO
+- name: Memory
+- name: Scoring
+- name: Shields
+- name: Datasets
+- name: Inference
+- name: Telemetry
+- name: BatchInference
+- name: Agents
- description:
name: BuiltinTool
- description:
name: BatchCompletionResponse
-- description:
- name: CancelEvaluationJobRequest
- description:
name: CancelTrainingJobRequest
@@ -3922,43 +4514,35 @@ tags:
name: Turn
- description:
name: ViolationLevel
-- description: 'Dataset to be used for training or evaluating language models.
-
-
- '
- name: TrainEvalDataset
-- description:
- name: TrainEvalDatasetColumnType
-- description:
- name: CreateDatasetRequest
- description:
name: DeleteAgentsRequest
- description:
name: DeleteAgentsSessionRequest
-- description:
- name: DeleteDatasetRequest
- description:
name: EmbeddingsRequest
- description:
name: EmbeddingsResponse
-- description:
+ name: AgentCandidate
+- description:
+ name: ModelCandidate
+- description:
- name: EvaluateQuestionAnsweringRequest
-- description:
- name: EvaluationJob
-- description:
- name: EvaluateSummarizationRequest
-- description:
+ name: ScoringResult
+- description:
- name: EvaluateTextGenerationRequest
+ name: EvaluateBatchRequest
+- description:
+ name: Job
- description:
name: GetAgentsSessionRequest
@@ -3982,21 +4566,20 @@ tags:
- description:
name: AgentStepResponse
-- description: 'Artifacts of a evaluation job.
-
-
- '
- name: EvaluationJobArtifactsResponse
-- description:
- name: EvaluationJobLogStream
-- description:
- name: EvaluationJobStatusResponse
+ name: DatasetDefWithProvider
- description:
name: ModelDefWithProvider
+- description:
+ name: PaginatedRowsResult
+- description:
+ name: Parameter
+- description:
+ name: ScoringFunctionDefWithProvider
- description:
name: ShieldDefWithProvider
@@ -4038,6 +4621,11 @@ tags:
- description:
name: InsertDocumentsRequest
+- description:
+ name: JobCancelRequest
+- description:
+ name: JobStatus
- description:
name: ProviderInfo
- description:
@@ -4081,38 +4669,37 @@ tags:
- description:
name: QueryDocumentsResponse
+- description:
+ name: RegisterDatasetRequest
- description:
name: RegisterMemoryBankRequest
- description:
name: RegisterModelRequest
+- description:
+ name: RegisterScoringFunctionRequest
- description:
name: RegisterShieldRequest
-- description:
- name: DialogGenerations
-- description:
- name: RewardScoreRequest
-- description: 'Response from the reward scoring. Batch of (prompt, response, score)
- tuples that pass the threshold.
-
-
- '
- name: RewardScoringResponse
-- description:
- name: ScoredDialogGenerations
-- description:
- name: ScoredMessage
- description:
name: RunShieldRequest
- description:
name: RunShieldResponse
+- description:
+ name: ScoreRequest
+- description:
+ name: ScoreResponse
+- description:
+ name: ScoreBatchRequest
+- description:
+ name: ScoreBatchResponse
- description:
name: DoraFinetuningConfig
@@ -4143,21 +4730,24 @@ x-tagGroups:
tags:
- Agents
- BatchInference
+ - DatasetIO
- Datasets
- - Evaluations
+ - Eval
- Inference
- Inspect
- Memory
- MemoryBanks
- Models
- PostTraining
- - RewardScoring
- Safety
+ - Scoring
+ - ScoringFunctions
- Shields
- SyntheticDataGeneration
- Telemetry
- name: Types
tags:
+ - AgentCandidate
- AgentConfig
- AgentCreateResponse
- AgentSessionCreateResponse
@@ -4175,7 +4765,6 @@ x-tagGroups:
- BatchCompletionRequest
- BatchCompletionResponse
- BuiltinTool
- - CancelEvaluationJobRequest
- CancelTrainingJobRequest
- ChatCompletionRequest
- ChatCompletionResponse
@@ -4191,22 +4780,16 @@ x-tagGroups:
- CreateAgentRequest
- CreateAgentSessionRequest
- CreateAgentTurnRequest
- - CreateDatasetRequest
- DPOAlignmentConfig
+ - DatasetDefWithProvider
- DeleteAgentsRequest
- DeleteAgentsSessionRequest
- - DeleteDatasetRequest
- - DialogGenerations
- DoraFinetuningConfig
- EmbeddingsRequest
- EmbeddingsResponse
- - EvaluateQuestionAnsweringRequest
- - EvaluateSummarizationRequest
- - EvaluateTextGenerationRequest
- - EvaluationJob
- - EvaluationJobArtifactsResponse
- - EvaluationJobLogStream
- - EvaluationJobStatusResponse
+ - EvaluateBatchRequest
+ - EvaluateRequest
+ - EvaluateResponse
- FinetuningAlgorithm
- FunctionCallToolDefinition
- GetAgentsSessionRequest
@@ -4215,6 +4798,9 @@ x-tagGroups:
- ImageMedia
- InferenceStep
- InsertDocumentsRequest
+ - Job
+ - JobCancelRequest
+ - JobStatus
- KeyValueMemoryBankDef
- KeywordMemoryBankDef
- LogEventRequest
@@ -4224,8 +4810,11 @@ x-tagGroups:
- MemoryRetrievalStep
- MemoryToolDefinition
- MetricEvent
+ - ModelCandidate
- ModelDefWithProvider
- OptimizerConfig
+ - PaginatedRowsResult
+ - Parameter
- PhotogenToolDefinition
- PostTrainingJob
- PostTrainingJobArtifactsResponse
@@ -4238,21 +4827,25 @@ x-tagGroups:
- QueryDocumentsRequest
- QueryDocumentsResponse
- RLHFAlgorithm
+ - RegisterDatasetRequest
- RegisterMemoryBankRequest
- RegisterModelRequest
+ - RegisterScoringFunctionRequest
- RegisterShieldRequest
- RestAPIExecutionConfig
- RestAPIMethod
- - RewardScoreRequest
- - RewardScoringResponse
- RouteInfo
- RunShieldRequest
- RunShieldResponse
- SafetyViolation
- SamplingParams
- SamplingStrategy
- - ScoredDialogGenerations
- - ScoredMessage
+ - ScoreBatchRequest
+ - ScoreBatchResponse
+ - ScoreRequest
+ - ScoreResponse
+ - ScoringFunctionDefWithProvider
+ - ScoringResult
- SearchToolDefinition
- Session
- ShieldCallStep
@@ -4278,8 +4871,6 @@ x-tagGroups:
- ToolResponse
- ToolResponseMessage
- Trace
- - TrainEvalDataset
- - TrainEvalDatasetColumnType
- TrainingConfig
- Turn
- URL
diff --git a/llama_stack/apis/common/job_types.py b/llama_stack/apis/common/job_types.py
index ab203ebb8..ab8ab22dc 100644
--- a/llama_stack/apis/common/job_types.py
+++ b/llama_stack/apis/common/job_types.py
@@ -3,6 +3,8 @@
#
# This source code is licensed under the terms described in the LICENSE file in
# the root directory of this source tree.
+from enum import Enum
+
from llama_models.schema_utils import json_schema_type
from pydantic import BaseModel
@@ -10,3 +12,9 @@ from pydantic import BaseModel
@json_schema_type
class Job(BaseModel):
job_id: str
+
+
+@json_schema_type
+class JobStatus(Enum):
+ completed = "completed"
+ in_progress = "in_progress"
diff --git a/llama_stack/apis/common/type_system.py b/llama_stack/apis/common/type_system.py
index 35a26e9ef..93a3c0339 100644
--- a/llama_stack/apis/common/type_system.py
+++ b/llama_stack/apis/common/type_system.py
@@ -4,7 +4,7 @@
# This source code is licensed under the terms described in the LICENSE file in
# the root directory of this source tree.
-from typing import Dict, List, Literal, Union
+from typing import Literal, Union
from pydantic import BaseModel, Field
from typing_extensions import Annotated
@@ -24,12 +24,10 @@ class BooleanType(BaseModel):
class ArrayType(BaseModel):
type: Literal["array"] = "array"
- items: "ParamType"
class ObjectType(BaseModel):
type: Literal["object"] = "object"
- properties: Dict[str, "ParamType"] = Field(default_factory=dict)
class JsonType(BaseModel):
@@ -38,12 +36,21 @@ class JsonType(BaseModel):
class UnionType(BaseModel):
type: Literal["union"] = "union"
- options: List["ParamType"] = Field(default_factory=list)
-class CustomType(BaseModel):
- type: Literal["custom"] = "custom"
- validator_class: str
+class ChatCompletionInputType(BaseModel):
+ # expects List[Message] for messages
+ type: Literal["chat_completion_input"] = "chat_completion_input"
+
+
+class CompletionInputType(BaseModel):
+ # expects InterleavedTextMedia for content
+ type: Literal["completion_input"] = "completion_input"
+
+
+class AgentTurnInputType(BaseModel):
+ # expects List[Message] for messages (may also include attachments?)
+ type: Literal["agent_turn_input"] = "agent_turn_input"
ParamType = Annotated[
@@ -55,11 +62,22 @@ ParamType = Annotated[
ObjectType,
JsonType,
UnionType,
- CustomType,
+ ChatCompletionInputType,
+ CompletionInputType,
+ AgentTurnInputType,
],
Field(discriminator="type"),
]
-ArrayType.model_rebuild()
-ObjectType.model_rebuild()
-UnionType.model_rebuild()
+# TODO: recursive definition of ParamType in these containers
+# will cause infinite recursion in OpenAPI generation script
+# since we are going with ChatCompletionInputType and CompletionInputType
+# we don't need to worry about ArrayType/ObjectType/UnionType for now
+# ArrayType.model_rebuild()
+# ObjectType.model_rebuild()
+# UnionType.model_rebuild()
+
+
+# class CustomType(BaseModel):
+# type: Literal["custom"] = "custom"
+# validator_class: str
diff --git a/llama_stack/apis/eval/eval.py b/llama_stack/apis/eval/eval.py
index a97af1fc0..51f49da15 100644
--- a/llama_stack/apis/eval/eval.py
+++ b/llama_stack/apis/eval/eval.py
@@ -12,7 +12,7 @@ from llama_models.llama3.api.datatypes import * # noqa: F403
from llama_models.schema_utils import json_schema_type, webmethod
from llama_stack.apis.scoring_functions import * # noqa: F403
from llama_stack.apis.agents import AgentConfig
-from llama_stack.apis.common.job_types import Job
+from llama_stack.apis.common.job_types import Job, JobStatus
from llama_stack.apis.scoring import * # noqa: F403
@@ -40,7 +40,7 @@ class EvaluateResponse(BaseModel):
generations: List[Dict[str, Any]]
# each key in the dict is a scoring function name
- scores: List[Dict[str, ScoringResult]]
+ scores: Dict[str, ScoringResult]
class Eval(Protocol):
@@ -61,10 +61,10 @@ class Eval(Protocol):
) -> EvaluateResponse: ...
@webmethod(route="/eval/job/status", method="GET")
- async def job_status(self, job_id: str) -> None: ...
+ async def job_status(self, job_id: str) -> Optional[JobStatus]: ...
@webmethod(route="/eval/job/cancel", method="POST")
async def job_cancel(self, job_id: str) -> None: ...
@webmethod(route="/eval/job/result", method="GET")
- async def job_result(self, job_id: str) -> None: ...
+ async def job_result(self, job_id: str) -> EvaluateResponse: ...
diff --git a/llama_stack/apis/post_training/post_training.py b/llama_stack/apis/post_training/post_training.py
index d943f48b2..eb4992cc6 100644
--- a/llama_stack/apis/post_training/post_training.py
+++ b/llama_stack/apis/post_training/post_training.py
@@ -14,7 +14,7 @@ from llama_models.schema_utils import json_schema_type, webmethod
from pydantic import BaseModel, Field
from llama_models.llama3.api.datatypes import * # noqa: F403
-from llama_stack.apis.dataset import * # noqa: F403
+from llama_stack.apis.datasets import * # noqa: F403
from llama_stack.apis.common.training_types import * # noqa: F403
@@ -107,8 +107,8 @@ class PostTrainingSFTRequest(BaseModel):
job_uuid: str
model: str
- dataset: TrainEvalDataset
- validation_dataset: TrainEvalDataset
+ dataset_id: str
+ validation_dataset_id: str
algorithm: FinetuningAlgorithm
algorithm_config: Union[
@@ -131,8 +131,8 @@ class PostTrainingRLHFRequest(BaseModel):
finetuned_model: URL
- dataset: TrainEvalDataset
- validation_dataset: TrainEvalDataset
+ dataset_id: str
+ validation_dataset_id: str
algorithm: RLHFAlgorithm
algorithm_config: Union[DPOAlignmentConfig]
@@ -181,8 +181,8 @@ class PostTraining(Protocol):
self,
job_uuid: str,
model: str,
- dataset: TrainEvalDataset,
- validation_dataset: TrainEvalDataset,
+ dataset_id: str,
+ validation_dataset_id: str,
algorithm: FinetuningAlgorithm,
algorithm_config: Union[
LoraFinetuningConfig, QLoraFinetuningConfig, DoraFinetuningConfig
@@ -198,8 +198,8 @@ class PostTraining(Protocol):
self,
job_uuid: str,
finetuned_model: URL,
- dataset: TrainEvalDataset,
- validation_dataset: TrainEvalDataset,
+ dataset_id: str,
+ validation_dataset_id: str,
algorithm: RLHFAlgorithm,
algorithm_config: Union[DPOAlignmentConfig],
optimizer_config: OptimizerConfig,
diff --git a/llama_stack/apis/scoring/scoring.py b/llama_stack/apis/scoring/scoring.py
index adac34d55..1fd523dcb 100644
--- a/llama_stack/apis/scoring/scoring.py
+++ b/llama_stack/apis/scoring/scoring.py
@@ -37,7 +37,7 @@ class ScoreResponse(BaseModel):
class ScoringFunctionStore(Protocol):
- def get_scoring_function(self, name: str) -> ScoringFunctionDefWithProvider: ...
+ def get_scoring_function(self, name: str) -> ScoringFnDefWithProvider: ...
@runtime_checkable
diff --git a/llama_stack/apis/scoring_functions/scoring_functions.py b/llama_stack/apis/scoring_functions/scoring_functions.py
index a242215c6..fc3584f90 100644
--- a/llama_stack/apis/scoring_functions/scoring_functions.py
+++ b/llama_stack/apis/scoring_functions/scoring_functions.py
@@ -29,7 +29,7 @@ class LLMAsJudgeContext(BaseModel):
@json_schema_type
-class ScoringFunctionDef(BaseModel):
+class ScoringFnDef(BaseModel):
identifier: str
description: Optional[str] = None
metadata: Dict[str, Any] = Field(
@@ -48,7 +48,7 @@ class ScoringFunctionDef(BaseModel):
@json_schema_type
-class ScoringFunctionDefWithProvider(ScoringFunctionDef):
+class ScoringFnDefWithProvider(ScoringFnDef):
provider_id: str = Field(
description="ID of the provider which serves this dataset",
)
@@ -57,14 +57,14 @@ class ScoringFunctionDefWithProvider(ScoringFunctionDef):
@runtime_checkable
class ScoringFunctions(Protocol):
@webmethod(route="/scoring_functions/list", method="GET")
- async def list_scoring_functions(self) -> List[ScoringFunctionDefWithProvider]: ...
+ async def list_scoring_functions(self) -> List[ScoringFnDefWithProvider]: ...
@webmethod(route="/scoring_functions/get", method="GET")
async def get_scoring_function(
self, name: str
- ) -> Optional[ScoringFunctionDefWithProvider]: ...
+ ) -> Optional[ScoringFnDefWithProvider]: ...
@webmethod(route="/scoring_functions/register", method="POST")
async def register_scoring_function(
- self, function_def: ScoringFunctionDefWithProvider
+ self, function_def: ScoringFnDefWithProvider
) -> None: ...
diff --git a/llama_stack/apis/synthetic_data_generation/synthetic_data_generation.py b/llama_stack/apis/synthetic_data_generation/synthetic_data_generation.py
index 60c756128..05b49036d 100644
--- a/llama_stack/apis/synthetic_data_generation/synthetic_data_generation.py
+++ b/llama_stack/apis/synthetic_data_generation/synthetic_data_generation.py
@@ -13,7 +13,6 @@ from llama_models.schema_utils import json_schema_type, webmethod
from pydantic import BaseModel
from llama_models.llama3.api.datatypes import * # noqa: F403
-from llama_stack.apis.reward_scoring import * # noqa: F403
class FilteringFunction(Enum):
@@ -40,7 +39,7 @@ class SyntheticDataGenerationRequest(BaseModel):
class SyntheticDataGenerationResponse(BaseModel):
"""Response from the synthetic data generation. Batch of (prompt, response, score) tuples that pass the threshold."""
- synthetic_data: List[ScoredDialogGenerations]
+ synthetic_data: List[Dict[str, Any]]
statistics: Optional[Dict[str, Any]] = None
diff --git a/llama_stack/distribution/datatypes.py b/llama_stack/distribution/datatypes.py
index 318809baf..9ad82cd79 100644
--- a/llama_stack/distribution/datatypes.py
+++ b/llama_stack/distribution/datatypes.py
@@ -34,7 +34,7 @@ RoutableObject = Union[
ShieldDef,
MemoryBankDef,
DatasetDef,
- ScoringFunctionDef,
+ ScoringFnDef,
]
RoutableObjectWithProvider = Union[
@@ -42,7 +42,7 @@ RoutableObjectWithProvider = Union[
ShieldDefWithProvider,
MemoryBankDefWithProvider,
DatasetDefWithProvider,
- ScoringFunctionDefWithProvider,
+ ScoringFnDefWithProvider,
]
RoutedProtocol = Union[
diff --git a/llama_stack/distribution/resolver.py b/llama_stack/distribution/resolver.py
index b9b9fb229..cfe31a21d 100644
--- a/llama_stack/distribution/resolver.py
+++ b/llama_stack/distribution/resolver.py
@@ -14,6 +14,7 @@ from llama_stack.distribution.datatypes import * # noqa: F403
from llama_stack.apis.agents import Agents
from llama_stack.apis.datasetio import DatasetIO
from llama_stack.apis.datasets import Datasets
+from llama_stack.apis.eval import Eval
from llama_stack.apis.inference import Inference
from llama_stack.apis.inspect import Inspect
from llama_stack.apis.memory import Memory
@@ -46,6 +47,7 @@ def api_protocol_map() -> Dict[Api, Any]:
Api.datasetio: DatasetIO,
Api.scoring_functions: ScoringFunctions,
Api.scoring: Scoring,
+ Api.eval: Eval,
}
diff --git a/llama_stack/distribution/routers/routing_tables.py b/llama_stack/distribution/routers/routing_tables.py
index dcd588a9e..3e07b9162 100644
--- a/llama_stack/distribution/routers/routing_tables.py
+++ b/llama_stack/distribution/routers/routing_tables.py
@@ -100,7 +100,7 @@ class CommonRoutingTableImpl(RoutingTable):
scoring_functions = await p.list_scoring_functions()
add_objects(
[
- ScoringFunctionDefWithProvider(**s.dict(), provider_id=pid)
+ ScoringFnDefWithProvider(**s.dict(), provider_id=pid)
for s in scoring_functions
]
)
@@ -239,7 +239,7 @@ class DatasetsRoutingTable(CommonRoutingTableImpl, Datasets):
class ScoringFunctionsRoutingTable(CommonRoutingTableImpl, Scoring):
- async def list_scoring_functions(self) -> List[ScoringFunctionDefWithProvider]:
+ async def list_scoring_functions(self) -> List[ScoringFnDefWithProvider]:
objects = []
for objs in self.registry.values():
objects.extend(objs)
@@ -247,10 +247,10 @@ class ScoringFunctionsRoutingTable(CommonRoutingTableImpl, Scoring):
async def get_scoring_function(
self, name: str
- ) -> Optional[ScoringFunctionDefWithProvider]:
+ ) -> Optional[ScoringFnDefWithProvider]:
return self.get_object_by_identifier(name)
async def register_scoring_function(
- self, function_def: ScoringFunctionDefWithProvider
+ self, function_def: ScoringFnDefWithProvider
) -> None:
await self.register_object(function_def)
diff --git a/llama_stack/providers/datatypes.py b/llama_stack/providers/datatypes.py
index 903ff5438..eace0ea1a 100644
--- a/llama_stack/providers/datatypes.py
+++ b/llama_stack/providers/datatypes.py
@@ -13,7 +13,7 @@ from pydantic import BaseModel, Field
from llama_stack.apis.datasets import DatasetDef
from llama_stack.apis.memory_banks import MemoryBankDef
from llama_stack.apis.models import ModelDef
-from llama_stack.apis.scoring_functions import ScoringFunctionDef
+from llama_stack.apis.scoring_functions import ScoringFnDef
from llama_stack.apis.shields import ShieldDef
@@ -25,6 +25,7 @@ class Api(Enum):
memory = "memory"
datasetio = "datasetio"
scoring = "scoring"
+ eval = "eval"
telemetry = "telemetry"
@@ -63,11 +64,9 @@ class DatasetsProtocolPrivate(Protocol):
class ScoringFunctionsProtocolPrivate(Protocol):
- async def list_scoring_functions(self) -> List[ScoringFunctionDef]: ...
+ async def list_scoring_functions(self) -> List[ScoringFnDef]: ...
- async def register_scoring_function(
- self, function_def: ScoringFunctionDef
- ) -> None: ...
+ async def register_scoring_function(self, function_def: ScoringFnDef) -> None: ...
@json_schema_type
diff --git a/llama_stack/providers/impls/meta_reference/datasetio/datasetio.py b/llama_stack/providers/impls/meta_reference/datasetio/datasetio.py
index 43664f394..a96d9bcab 100644
--- a/llama_stack/providers/impls/meta_reference/datasetio/datasetio.py
+++ b/llama_stack/providers/impls/meta_reference/datasetio/datasetio.py
@@ -143,11 +143,12 @@ class MetaReferenceDatasetIOImpl(DatasetIO, DatasetsProtocolPrivate):
else:
next_page_token = int(page_token)
- if rows_in_page == -1:
- rows = dataset_info.dataset_impl[next_page_token:]
-
start = next_page_token
- end = min(start + rows_in_page, len(dataset_info.dataset_impl))
+ if rows_in_page == -1:
+ end = len(dataset_info.dataset_impl)
+ else:
+ end = min(start + rows_in_page, len(dataset_info.dataset_impl))
+
rows = dataset_info.dataset_impl[start:end]
return PaginatedRowsResult(
diff --git a/llama_stack/providers/impls/meta_reference/eval/__init__.py b/llama_stack/providers/impls/meta_reference/eval/__init__.py
new file mode 100644
index 000000000..fb285c668
--- /dev/null
+++ b/llama_stack/providers/impls/meta_reference/eval/__init__.py
@@ -0,0 +1,27 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
+from typing import Dict
+
+from llama_stack.distribution.datatypes import Api, ProviderSpec
+
+from .config import MetaReferenceEvalConfig
+
+
+async def get_provider_impl(
+ config: MetaReferenceEvalConfig,
+ deps: Dict[Api, ProviderSpec],
+):
+ from .eval import MetaReferenceEvalImpl
+
+ impl = MetaReferenceEvalImpl(
+ config,
+ deps[Api.datasetio],
+ deps[Api.datasets],
+ deps[Api.scoring],
+ deps[Api.inference],
+ )
+ await impl.initialize()
+ return impl
diff --git a/llama_stack/providers/impls/meta_reference/eval/config.py b/llama_stack/providers/impls/meta_reference/eval/config.py
new file mode 100644
index 000000000..1892da2a2
--- /dev/null
+++ b/llama_stack/providers/impls/meta_reference/eval/config.py
@@ -0,0 +1,9 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
+from llama_stack.apis.eval import * # noqa: F401, F403
+
+
+class MetaReferenceEvalConfig(BaseModel): ...
diff --git a/llama_stack/providers/impls/meta_reference/eval/eval.py b/llama_stack/providers/impls/meta_reference/eval/eval.py
new file mode 100644
index 000000000..d675e40eb
--- /dev/null
+++ b/llama_stack/providers/impls/meta_reference/eval/eval.py
@@ -0,0 +1,167 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
+from enum import Enum
+from llama_models.llama3.api.datatypes import * # noqa: F403
+
+from llama_stack.apis.common.type_system import * # noqa: F403
+from llama_stack.apis.common.job_types import Job
+from llama_stack.apis.datasetio import DatasetIO
+from llama_stack.apis.datasets import Datasets
+from llama_stack.apis.eval import Eval, EvalCandidate, EvaluateResponse, JobStatus
+from llama_stack.apis.inference import Inference
+from llama_stack.apis.scoring import Scoring
+
+from .config import MetaReferenceEvalConfig
+
+
+class ColumnName(Enum):
+ expected_answer = "expected_answer"
+ chat_completion_input = "chat_completion_input"
+ completion_input = "completion_input"
+ generated_answer = "generated_answer"
+
+
+class MetaReferenceEvalImpl(Eval):
+ def __init__(
+ self,
+ config: MetaReferenceEvalConfig,
+ datasetio_api: DatasetIO,
+ datasets_api: Datasets,
+ scoring_api: Scoring,
+ inference_api: Inference,
+ ) -> None:
+ self.config = config
+ self.datasetio_api = datasetio_api
+ self.datasets_api = datasets_api
+ self.scoring_api = scoring_api
+ self.inference_api = inference_api
+
+ # TODO: assume sync job, will need jobs API for async scheduling
+ self.jobs = {}
+
+ async def initialize(self) -> None: ...
+
+ async def shutdown(self) -> None: ...
+
+ async def validate_eval_input_dataset_schema(self, dataset_id: str) -> None:
+ dataset_def = await self.datasets_api.get_dataset(dataset_identifier=dataset_id)
+ if not dataset_def.dataset_schema or len(dataset_def.dataset_schema) == 0:
+ raise ValueError(f"Dataset {dataset_id} does not have a schema defined.")
+
+ expected_schemas = [
+ {
+ ColumnName.expected_answer.value: StringType(),
+ ColumnName.chat_completion_input.value: ChatCompletionInputType(),
+ },
+ {
+ ColumnName.expected_answer.value: StringType(),
+ ColumnName.completion_input.value: CompletionInputType(),
+ },
+ ]
+
+ if dataset_def.dataset_schema not in expected_schemas:
+ raise ValueError(
+ f"Dataset {dataset_id} does not have a correct input schema in {expected_schemas}"
+ )
+
+ async def evaluate_batch(
+ self,
+ dataset_id: str,
+ candidate: EvalCandidate,
+ scoring_functions: List[str],
+ ) -> Job:
+ await self.validate_eval_input_dataset_schema(dataset_id=dataset_id)
+ all_rows = await self.datasetio_api.get_rows_paginated(
+ dataset_id=dataset_id,
+ rows_in_page=-1,
+ )
+ res = await self.evaluate(
+ input_rows=all_rows.rows,
+ candidate=candidate,
+ scoring_functions=scoring_functions,
+ )
+
+ # TODO: currently needs to wait for generation before returning
+ # need job scheduler queue (ray/celery) w/ jobs api
+ job_id = str(len(self.jobs))
+ self.jobs[job_id] = res
+ return Job(job_id=job_id)
+
+ async def evaluate(
+ self,
+ input_rows: List[Dict[str, Any]],
+ candidate: EvalCandidate,
+ scoring_functions: List[str],
+ ) -> EvaluateResponse:
+ if candidate.type == "agent":
+ raise NotImplementedError(
+ "Evaluation with generation has not been implemented for agents"
+ )
+ assert (
+ candidate.sampling_params.max_tokens is not None
+ ), "SamplingParams.max_tokens must be provided"
+
+ generations = []
+ for x in input_rows:
+ if ColumnName.completion_input.value in x:
+ input_content = eval(str(x[ColumnName.completion_input.value]))
+ response = await self.inference_api.completion(
+ model=candidate.model,
+ content=input_content,
+ sampling_params=candidate.sampling_params,
+ )
+ generations.append(
+ {
+ ColumnName.generated_answer.value: response.completion_message.content
+ }
+ )
+ elif ColumnName.chat_completion_input.value in x:
+ input_messages = eval(str(x[ColumnName.chat_completion_input.value]))
+ input_messages = [UserMessage(**x) for x in input_messages]
+ messages = []
+ if candidate.system_message:
+ messages.append(candidate.system_message)
+ messages += input_messages
+ response = await self.inference_api.chat_completion(
+ model=candidate.model,
+ messages=messages,
+ sampling_params=candidate.sampling_params,
+ )
+ generations.append(
+ {
+ ColumnName.generated_answer.value: response.completion_message.content
+ }
+ )
+ else:
+ raise ValueError("Invalid input row")
+
+ # scoring with generated_answer
+ score_input_rows = [
+ input_r | generated_r
+ for input_r, generated_r in zip(input_rows, generations)
+ ]
+
+ score_response = await self.scoring_api.score(
+ input_rows=score_input_rows, scoring_functions=scoring_functions
+ )
+
+ return EvaluateResponse(generations=generations, scores=score_response.results)
+
+ async def job_status(self, job_id: str) -> Optional[JobStatus]:
+ if job_id in self.jobs:
+ return JobStatus.completed
+
+ return None
+
+ async def job_cancel(self, job_id: str) -> None:
+ raise NotImplementedError("Job cancel is not implemented yet")
+
+ async def job_result(self, job_id: str) -> EvaluateResponse:
+ status = await self.job_status(job_id)
+ if not status or status != JobStatus.completed:
+ raise ValueError(f"Job is not completed, Status: {status.value}")
+
+ return self.jobs[job_id]
diff --git a/llama_stack/providers/impls/meta_reference/scoring/scoring.py b/llama_stack/providers/impls/meta_reference/scoring/scoring.py
index 0d32c8195..b1d561533 100644
--- a/llama_stack/providers/impls/meta_reference/scoring/scoring.py
+++ b/llama_stack/providers/impls/meta_reference/scoring/scoring.py
@@ -13,17 +13,22 @@ from llama_stack.apis.datasetio import * # noqa: F403
from llama_stack.apis.datasets import * # noqa: F403
from llama_stack.providers.datatypes import ScoringFunctionsProtocolPrivate
-from llama_stack.providers.impls.meta_reference.scoring.scorer.equality_scorer import (
- EqualityScorer,
+from llama_stack.providers.impls.meta_reference.scoring.scoring_fn.equality_scoring_fn import (
+ EqualityScoringFn,
+)
+
+from llama_stack.providers.impls.meta_reference.scoring.scoring_fn.subset_of_scoring_fn import (
+ SubsetOfScoringFn,
)
from .config import MetaReferenceScoringConfig
-SUPPORTED_SCORERS = [
- EqualityScorer,
+SUPPORTED_SCORING_FNS = [
+ EqualityScoringFn,
+ SubsetOfScoringFn,
]
-SCORER_REGISTRY = {x.scoring_function_def.identifier: x for x in SUPPORTED_SCORERS}
+SCORER_REGISTRY = {x.scoring_function_def.identifier: x for x in SUPPORTED_SCORING_FNS}
class MetaReferenceScoringImpl(Scoring, ScoringFunctionsProtocolPrivate):
@@ -41,10 +46,10 @@ class MetaReferenceScoringImpl(Scoring, ScoringFunctionsProtocolPrivate):
async def shutdown(self) -> None: ...
- async def list_scoring_functions(self) -> List[ScoringFunctionDef]:
- return [x.scoring_function_def for x in SUPPORTED_SCORERS]
+ async def list_scoring_functions(self) -> List[ScoringFnDef]:
+ return [x.scoring_function_def for x in SUPPORTED_SCORING_FNS]
- async def register_scoring_function(self, function_def: ScoringFunctionDef) -> None:
+ async def register_scoring_function(self, function_def: ScoringFnDef) -> None:
raise NotImplementedError(
"Dynamically registering scoring functions is not supported"
)
@@ -96,9 +101,9 @@ class MetaReferenceScoringImpl(Scoring, ScoringFunctionsProtocolPrivate):
for scoring_fn_id in scoring_functions:
if scoring_fn_id not in SCORER_REGISTRY:
raise ValueError(f"Scoring function {scoring_fn_id} is not supported.")
- scorer = SCORER_REGISTRY[scoring_fn_id]()
- score_results = scorer.score(input_rows)
- agg_results = scorer.aggregate(score_results)
+ scoring_fn = SCORER_REGISTRY[scoring_fn_id]()
+ score_results = scoring_fn.score(input_rows)
+ agg_results = scoring_fn.aggregate(score_results)
res[scoring_fn_id] = ScoringResult(
score_rows=score_results,
aggregated_results=agg_results,
diff --git a/llama_stack/providers/impls/meta_reference/scoring/scorer/__init__.py b/llama_stack/providers/impls/meta_reference/scoring/scoring_fn/__init__.py
similarity index 100%
rename from llama_stack/providers/impls/meta_reference/scoring/scorer/__init__.py
rename to llama_stack/providers/impls/meta_reference/scoring/scoring_fn/__init__.py
diff --git a/llama_stack/providers/impls/meta_reference/scoring/scorer/base_scorer.py b/llama_stack/providers/impls/meta_reference/scoring/scoring_fn/base_scoring_fn.py
similarity index 81%
rename from llama_stack/providers/impls/meta_reference/scoring/scorer/base_scorer.py
rename to llama_stack/providers/impls/meta_reference/scoring/scoring_fn/base_scoring_fn.py
index ea8a3f063..952d46bb2 100644
--- a/llama_stack/providers/impls/meta_reference/scoring/scorer/base_scorer.py
+++ b/llama_stack/providers/impls/meta_reference/scoring/scoring_fn/base_scoring_fn.py
@@ -9,15 +9,15 @@ from llama_stack.apis.scoring_functions import * # noqa: F401, F403
from llama_stack.apis.scoring import * # noqa: F401, F403
-class BaseScorer(ABC):
+class BaseScoringFn(ABC):
"""
- Base interface class for all meta-reference scorers.
- Each scorer needs to implement the following methods:
+ Base interface class for all meta-reference scoring_fns.
+ Each scoring_fn needs to implement the following methods:
- score_row(self, row)
- - aggregate(self, scorer_results)
+ - aggregate(self, scoring_fn_results)
"""
- scoring_function_def: ScoringFunctionDef
+ scoring_function_def: ScoringFnDef
def __init__(self, *args, **kwargs) -> None:
super().__init__(*args, **kwargs)
diff --git a/llama_stack/providers/impls/meta_reference/scoring/scoring_fn/common.py b/llama_stack/providers/impls/meta_reference/scoring/scoring_fn/common.py
new file mode 100644
index 000000000..52eabea2e
--- /dev/null
+++ b/llama_stack/providers/impls/meta_reference/scoring/scoring_fn/common.py
@@ -0,0 +1,19 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
+from typing import Any, Dict, List
+
+from llama_stack.apis.scoring import ScoringResultRow
+
+
+def aggregate_accuracy(scoring_results: List[ScoringResultRow]) -> Dict[str, Any]:
+ num_correct = sum(result["score"] for result in scoring_results)
+ avg_score = num_correct / len(scoring_results)
+
+ return {
+ "accuracy": avg_score,
+ "num_correct": num_correct,
+ "num_total": len(scoring_results),
+ }
diff --git a/llama_stack/providers/impls/meta_reference/scoring/scorer/equality_scorer.py b/llama_stack/providers/impls/meta_reference/scoring/scoring_fn/equality_scoring_fn.py
similarity index 65%
rename from llama_stack/providers/impls/meta_reference/scoring/scorer/equality_scorer.py
rename to llama_stack/providers/impls/meta_reference/scoring/scoring_fn/equality_scoring_fn.py
index ce765bfb5..cce0f948a 100644
--- a/llama_stack/providers/impls/meta_reference/scoring/scorer/equality_scorer.py
+++ b/llama_stack/providers/impls/meta_reference/scoring/scoring_fn/equality_scoring_fn.py
@@ -4,20 +4,23 @@
# This source code is licensed under the terms described in the LICENSE file in
# the root directory of this source tree.
-from llama_stack.providers.impls.meta_reference.scoring.scorer.base_scorer import (
- BaseScorer,
+from llama_stack.providers.impls.meta_reference.scoring.scoring_fn.base_scoring_fn import (
+ BaseScoringFn,
)
from llama_stack.apis.scoring_functions import * # noqa: F401, F403
from llama_stack.apis.scoring import * # noqa: F401, F403
from llama_stack.apis.common.type_system import * # noqa: F403
+from llama_stack.providers.impls.meta_reference.scoring.scoring_fn.common import (
+ aggregate_accuracy,
+)
-class EqualityScorer(BaseScorer):
+class EqualityScoringFn(BaseScoringFn):
"""
- A scorer that assigns a score of 1.0 if the input string matches the target string, and 0.0 otherwise.
+ A scoring_fn that assigns a score of 1.0 if the input string matches the target string, and 0.0 otherwise.
"""
- scoring_function_def = ScoringFunctionDef(
+ scoring_function_def = ScoringFnDef(
identifier="equality",
description="Returns 1.0 if the input is equal to the target, 0.0 otherwise.",
parameters=[],
@@ -38,12 +41,4 @@ class EqualityScorer(BaseScorer):
}
def aggregate(self, scoring_results: List[ScoringResultRow]) -> Dict[str, Any]:
- assert len(scoring_results) > 0, "Empty scoring results provided."
- num_correct = sum(result["score"] for result in scoring_results)
- avg_score = num_correct / len(scoring_results)
-
- return {
- "accuracy": avg_score,
- "num_correct": num_correct,
- "num_total": len(scoring_results),
- }
+ return aggregate_accuracy(scoring_results)
diff --git a/llama_stack/providers/impls/meta_reference/scoring/scoring_fn/subset_of_scoring_fn.py b/llama_stack/providers/impls/meta_reference/scoring/scoring_fn/subset_of_scoring_fn.py
new file mode 100644
index 000000000..c7ee68e26
--- /dev/null
+++ b/llama_stack/providers/impls/meta_reference/scoring/scoring_fn/subset_of_scoring_fn.py
@@ -0,0 +1,44 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
+
+from llama_stack.providers.impls.meta_reference.scoring.scoring_fn.base_scoring_fn import (
+ BaseScoringFn,
+)
+from llama_stack.apis.scoring_functions import * # noqa: F401, F403
+from llama_stack.apis.scoring import * # noqa: F401, F403
+from llama_stack.apis.common.type_system import * # noqa: F403
+from llama_stack.providers.impls.meta_reference.scoring.scoring_fn.common import (
+ aggregate_accuracy,
+)
+
+
+class SubsetOfScoringFn(BaseScoringFn):
+ """
+ A scoring_fn that assigns a score of 1.0 if the expected string is included in the generated string, and 0.0 otherwise.
+ """
+
+ scoring_function_def = ScoringFnDef(
+ identifier="subset_of",
+ description="Returns 1.0 if the expected is included in generated, 0.0 otherwise.",
+ parameters=[],
+ return_type=NumberType(),
+ )
+
+ def score_row(self, input_row: Dict[str, Any]) -> ScoringResultRow:
+ assert "expected_answer" in input_row, "Expected answer not found in input row."
+ assert (
+ "generated_answer" in input_row
+ ), "Generated answer not found in input row."
+
+ expected_answer = input_row["expected_answer"]
+ generated_answer = input_row["generated_answer"]
+ score = 1.0 if expected_answer in generated_answer else 0.0
+ return {
+ "score": score,
+ }
+
+ def aggregate(self, scoring_results: List[ScoringResultRow]) -> Dict[str, Any]:
+ return aggregate_accuracy(scoring_results)
diff --git a/llama_stack/providers/registry/eval.py b/llama_stack/providers/registry/eval.py
new file mode 100644
index 000000000..fc7c923d9
--- /dev/null
+++ b/llama_stack/providers/registry/eval.py
@@ -0,0 +1,27 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
+
+from typing import List
+
+from llama_stack.distribution.datatypes import * # noqa: F403
+
+
+def available_providers() -> List[ProviderSpec]:
+ return [
+ InlineProviderSpec(
+ api=Api.eval,
+ provider_type="meta-reference",
+ pip_packages=[],
+ module="llama_stack.providers.impls.meta_reference.eval",
+ config_class="llama_stack.providers.impls.meta_reference.eval.MetaReferenceEvalConfig",
+ api_dependencies=[
+ Api.datasetio,
+ Api.datasets,
+ Api.scoring,
+ Api.inference,
+ ],
+ ),
+ ]
diff --git a/llama_stack/providers/tests/datasetio/test_dataset.csv b/llama_stack/providers/tests/datasetio/test_dataset.csv
index a1a250753..f682c6d3d 100644
--- a/llama_stack/providers/tests/datasetio/test_dataset.csv
+++ b/llama_stack/providers/tests/datasetio/test_dataset.csv
@@ -1,6 +1,6 @@
-input_query,generated_answer,expected_answer
-What is the capital of France?,London,Paris
-Who is the CEO of Meta?,Mark Zuckerberg,Mark Zuckerberg
-What is the largest planet in our solar system?,Jupiter,Jupiter
-What is the smallest country in the world?,China,Vatican City
-What is the currency of Japan?,Yen,Yen
+input_query,generated_answer,expected_answer,chat_completion_input
+What is the capital of France?,London,Paris,"[{'role': 'user', 'content': 'What is the capital of France?'}]"
+Who is the CEO of Meta?,Mark Zuckerberg,Mark Zuckerberg,"[{'role': 'user', 'content': 'Who is the CEO of Meta?'}]"
+What is the largest planet in our solar system?,Jupiter,Jupiter,"[{'role': 'user', 'content': 'What is the largest planet in our solar system?'}]"
+What is the smallest country in the world?,China,Vatican City,"[{'role': 'user', 'content': 'What is the smallest country in the world?'}]"
+What is the currency of Japan?,Yen,Yen,"[{'role': 'user', 'content': 'What is the currency of Japan?'}]"
diff --git a/llama_stack/providers/tests/datasetio/test_datasetio.py b/llama_stack/providers/tests/datasetio/test_datasetio.py
index 9a351ba30..9bd80f94d 100644
--- a/llama_stack/providers/tests/datasetio/test_datasetio.py
+++ b/llama_stack/providers/tests/datasetio/test_datasetio.py
@@ -61,20 +61,31 @@ def data_url_from_file(file_path: str) -> str:
return data_url
-async def register_dataset(datasets_impl: Datasets):
+async def register_dataset(
+ datasets_impl: Datasets, for_generation=False, dataset_id="test_dataset"
+):
test_file = Path(os.path.abspath(__file__)).parent / "test_dataset.csv"
test_url = data_url_from_file(str(test_file))
+
+ if for_generation:
+ dataset_schema = {
+ "expected_answer": StringType(),
+ "chat_completion_input": ChatCompletionInputType(),
+ }
+ else:
+ dataset_schema = {
+ "expected_answer": StringType(),
+ "input_query": StringType(),
+ "generated_answer": StringType(),
+ }
+
dataset = DatasetDefWithProvider(
- identifier="test_dataset",
+ identifier=dataset_id,
provider_id=os.environ["PROVIDER_ID"],
url=URL(
uri=test_url,
),
- dataset_schema={
- "generated_answer": StringType(),
- "expected_answer": StringType(),
- "input_query": StringType(),
- },
+ dataset_schema=dataset_schema,
)
await datasets_impl.register_dataset(dataset)
diff --git a/llama_stack/providers/tests/eval/__init__.py b/llama_stack/providers/tests/eval/__init__.py
new file mode 100644
index 000000000..756f351d8
--- /dev/null
+++ b/llama_stack/providers/tests/eval/__init__.py
@@ -0,0 +1,5 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
diff --git a/llama_stack/providers/tests/eval/provider_config_example.yaml b/llama_stack/providers/tests/eval/provider_config_example.yaml
new file mode 100644
index 000000000..1576d2ef0
--- /dev/null
+++ b/llama_stack/providers/tests/eval/provider_config_example.yaml
@@ -0,0 +1,18 @@
+providers:
+ datasetio:
+ - provider_id: test-meta
+ provider_type: meta-reference
+ config: {}
+ scoring:
+ - provider_id: test-meta
+ provider_type: meta-reference
+ config: {}
+ eval:
+ - provider_id: test-meta
+ provider_type: meta-reference
+ config: {}
+ inference:
+ - provider_id: test-tgi
+ provider_type: remote::tgi
+ config:
+ url: http://127.0.0.1:5009
diff --git a/llama_stack/providers/tests/eval/test_eval.py b/llama_stack/providers/tests/eval/test_eval.py
new file mode 100644
index 000000000..6b0d99a22
--- /dev/null
+++ b/llama_stack/providers/tests/eval/test_eval.py
@@ -0,0 +1,79 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
+import pytest
+import pytest_asyncio
+
+from llama_stack.apis.common.type_system import * # noqa: F403
+from llama_stack.apis.datasetio import * # noqa: F403
+from llama_stack.apis.eval.eval import ModelCandidate
+from llama_stack.distribution.datatypes import * # noqa: F403
+
+from llama_models.llama3.api import SamplingParams
+
+from llama_stack.providers.tests.datasetio.test_datasetio import register_dataset
+from llama_stack.providers.tests.resolver import resolve_impls_for_test
+
+# How to run this test:
+#
+# 1. Ensure you have a conda with the right dependencies installed. This is a bit tricky
+# since it depends on the provider you are testing. On top of that you need
+# `pytest` and `pytest-asyncio` installed.
+#
+# 2. Copy and modify the provider_config_example.yaml depending on the provider you are testing.
+#
+# 3. Run:
+#
+# ```bash
+# PROVIDER_ID= \
+# PROVIDER_CONFIG=provider_config.yaml \
+# pytest -s llama_stack/providers/tests/eval/test_eval.py \
+# --tb=short --disable-warnings
+# ```
+
+
+@pytest_asyncio.fixture(scope="session")
+async def eval_settings():
+ impls = await resolve_impls_for_test(
+ Api.eval, deps=[Api.datasetio, Api.scoring, Api.inference]
+ )
+ return {
+ "eval_impl": impls[Api.eval],
+ "scoring_impl": impls[Api.scoring],
+ "datasets_impl": impls[Api.datasets],
+ }
+
+
+@pytest.mark.asyncio
+async def test_eval(eval_settings):
+ datasets_impl = eval_settings["datasets_impl"]
+ await register_dataset(
+ datasets_impl,
+ for_generation=True,
+ dataset_id="test_dataset_for_eval",
+ )
+
+ response = await datasets_impl.list_datasets()
+ assert len(response) == 1
+
+ eval_impl = eval_settings["eval_impl"]
+ response = await eval_impl.evaluate_batch(
+ dataset_id=response[0].identifier,
+ candidate=ModelCandidate(
+ model="Llama3.2-1B-Instruct",
+ sampling_params=SamplingParams(),
+ ),
+ scoring_functions=["subset_of"],
+ )
+ assert response.job_id == "0"
+ job_status = await eval_impl.job_status(response.job_id)
+
+ assert job_status and job_status.value == "completed"
+
+ eval_response = await eval_impl.job_result(response.job_id)
+
+ assert eval_response is not None
+ assert len(eval_response.generations) == 5
+ assert "subset_of" in eval_response.scores
diff --git a/tests/examples/evals-tgi-run.yaml b/tests/examples/evals-tgi-run.yaml
index e56c43420..e63523889 100644
--- a/tests/examples/evals-tgi-run.yaml
+++ b/tests/examples/evals-tgi-run.yaml
@@ -14,7 +14,12 @@ apis:
- datasets
- datasetio
- scoring
+- eval
providers:
+ eval:
+ - provider_id: meta0
+ provider_type: meta-reference
+ config: {}
scoring:
- provider_id: meta0
provider_type: meta-reference