mirror of
https://github.com/meta-llama/llama-stack.git
synced 2025-06-27 18:50:41 +00:00
feat(api): (1/n) datasets api clean up (#1573)
## PR Stack - https://github.com/meta-llama/llama-stack/pull/1573 - https://github.com/meta-llama/llama-stack/pull/1625 - https://github.com/meta-llama/llama-stack/pull/1656 - https://github.com/meta-llama/llama-stack/pull/1657 - https://github.com/meta-llama/llama-stack/pull/1658 - https://github.com/meta-llama/llama-stack/pull/1659 - https://github.com/meta-llama/llama-stack/pull/1660 **Client SDK** - https://github.com/meta-llama/llama-stack-client-python/pull/203 **CI** -1391130488
<img width="1042" alt="image" src="https://github.com/user-attachments/assets/69636067-376d-436b-9204-896e2dd490ca" /> -- the test_rag_agent_with_attachments is flaky and not related to this PR ## Doc <img width="789" alt="image" src="https://github.com/user-attachments/assets/b88390f3-73d6-4483-b09a-a192064e32d9" /> ## Client Usage ```python client.datasets.register( source={ "type": "uri", "uri": "lsfs://mydata.jsonl", }, schema="jsonl_messages", # optional dataset_id="my_first_train_data" ) # quick prototype debugging client.datasets.register( data_reference={ "type": "rows", "rows": [ "messages": [...], ], }, schema="jsonl_messages", ) ``` ## Test Plan - CI:1387805545
``` LLAMA_STACK_CONFIG=fireworks pytest -v tests/integration/datasets/test_datasets.py ``` ``` LLAMA_STACK_CONFIG=fireworks pytest -v tests/integration/scoring/test_scoring.py ``` ``` pytest -v -s --nbval-lax ./docs/notebooks/Llama_Stack_Benchmark_Evals.ipynb ```
This commit is contained in:
parent
3b35a39b8b
commit
5287b437ae
29 changed files with 2593 additions and 2296 deletions
715
docs/_static/llama-stack-spec.html
vendored
715
docs/_static/llama-stack-spec.html
vendored
|
@ -40,75 +40,7 @@
|
|||
}
|
||||
],
|
||||
"paths": {
|
||||
"/v1/datasetio/rows": {
|
||||
"get": {
|
||||
"responses": {
|
||||
"200": {
|
||||
"description": "OK",
|
||||
"content": {
|
||||
"application/json": {
|
||||
"schema": {
|
||||
"$ref": "#/components/schemas/PaginatedRowsResult"
|
||||
}
|
||||
}
|
||||
}
|
||||
},
|
||||
"400": {
|
||||
"$ref": "#/components/responses/BadRequest400"
|
||||
},
|
||||
"429": {
|
||||
"$ref": "#/components/responses/TooManyRequests429"
|
||||
},
|
||||
"500": {
|
||||
"$ref": "#/components/responses/InternalServerError500"
|
||||
},
|
||||
"default": {
|
||||
"$ref": "#/components/responses/DefaultError"
|
||||
}
|
||||
},
|
||||
"tags": [
|
||||
"DatasetIO"
|
||||
],
|
||||
"description": "Get a paginated list of rows from a dataset.",
|
||||
"parameters": [
|
||||
{
|
||||
"name": "dataset_id",
|
||||
"in": "query",
|
||||
"description": "The ID of the dataset to get the rows from.",
|
||||
"required": true,
|
||||
"schema": {
|
||||
"type": "string"
|
||||
}
|
||||
},
|
||||
{
|
||||
"name": "rows_in_page",
|
||||
"in": "query",
|
||||
"description": "The number of rows to get per page.",
|
||||
"required": true,
|
||||
"schema": {
|
||||
"type": "integer"
|
||||
}
|
||||
},
|
||||
{
|
||||
"name": "page_token",
|
||||
"in": "query",
|
||||
"description": "The token to get the next page of rows.",
|
||||
"required": false,
|
||||
"schema": {
|
||||
"type": "string"
|
||||
}
|
||||
},
|
||||
{
|
||||
"name": "filter_condition",
|
||||
"in": "query",
|
||||
"description": "(Optional) A condition to filter the rows by.",
|
||||
"required": false,
|
||||
"schema": {
|
||||
"type": "string"
|
||||
}
|
||||
}
|
||||
]
|
||||
},
|
||||
"/v1/datasetio/append-rows/{dataset_id}": {
|
||||
"post": {
|
||||
"responses": {
|
||||
"200": {
|
||||
|
@ -131,7 +63,16 @@
|
|||
"DatasetIO"
|
||||
],
|
||||
"description": "",
|
||||
"parameters": [],
|
||||
"parameters": [
|
||||
{
|
||||
"name": "dataset_id",
|
||||
"in": "path",
|
||||
"required": true,
|
||||
"schema": {
|
||||
"type": "string"
|
||||
}
|
||||
}
|
||||
],
|
||||
"requestBody": {
|
||||
"content": {
|
||||
"application/json": {
|
||||
|
@ -583,7 +524,7 @@
|
|||
}
|
||||
},
|
||||
"tags": [
|
||||
"Files (Coming Soon)"
|
||||
"Files"
|
||||
],
|
||||
"description": "List all buckets.",
|
||||
"parameters": [
|
||||
|
@ -623,7 +564,7 @@
|
|||
}
|
||||
},
|
||||
"tags": [
|
||||
"Files (Coming Soon)"
|
||||
"Files"
|
||||
],
|
||||
"description": "Create a new upload session for a file identified by a bucket and key.",
|
||||
"parameters": [],
|
||||
|
@ -850,7 +791,7 @@
|
|||
}
|
||||
},
|
||||
"tags": [
|
||||
"Files (Coming Soon)"
|
||||
"Files"
|
||||
],
|
||||
"description": "Get a file info identified by a bucket and key.",
|
||||
"parameters": [
|
||||
|
@ -900,7 +841,7 @@
|
|||
}
|
||||
},
|
||||
"tags": [
|
||||
"Files (Coming Soon)"
|
||||
"Files"
|
||||
],
|
||||
"description": "Delete a file identified by a bucket and key.",
|
||||
"parameters": [
|
||||
|
@ -1889,7 +1830,7 @@
|
|||
}
|
||||
},
|
||||
"tags": [
|
||||
"Files (Coming Soon)"
|
||||
"Files"
|
||||
],
|
||||
"description": "Returns information about an existsing upload session",
|
||||
"parameters": [
|
||||
|
@ -1937,7 +1878,7 @@
|
|||
}
|
||||
},
|
||||
"tags": [
|
||||
"Files (Coming Soon)"
|
||||
"Files"
|
||||
],
|
||||
"description": "Upload file content to an existing upload session. On the server, request body will have the raw bytes that are uploaded.",
|
||||
"parameters": [
|
||||
|
@ -2236,6 +2177,67 @@
|
|||
}
|
||||
}
|
||||
},
|
||||
"/v1/datasetio/iterrows/{dataset_id}": {
|
||||
"get": {
|
||||
"responses": {
|
||||
"200": {
|
||||
"description": "OK",
|
||||
"content": {
|
||||
"application/json": {
|
||||
"schema": {
|
||||
"$ref": "#/components/schemas/IterrowsResponse"
|
||||
}
|
||||
}
|
||||
}
|
||||
},
|
||||
"400": {
|
||||
"$ref": "#/components/responses/BadRequest400"
|
||||
},
|
||||
"429": {
|
||||
"$ref": "#/components/responses/TooManyRequests429"
|
||||
},
|
||||
"500": {
|
||||
"$ref": "#/components/responses/InternalServerError500"
|
||||
},
|
||||
"default": {
|
||||
"$ref": "#/components/responses/DefaultError"
|
||||
}
|
||||
},
|
||||
"tags": [
|
||||
"DatasetIO"
|
||||
],
|
||||
"description": "Get a paginated list of rows from a dataset. Uses cursor-based pagination.",
|
||||
"parameters": [
|
||||
{
|
||||
"name": "dataset_id",
|
||||
"in": "path",
|
||||
"description": "The ID of the dataset to get the rows from.",
|
||||
"required": true,
|
||||
"schema": {
|
||||
"type": "string"
|
||||
}
|
||||
},
|
||||
{
|
||||
"name": "start_index",
|
||||
"in": "query",
|
||||
"description": "Index into dataset for the first row to get. Get all rows if None.",
|
||||
"required": false,
|
||||
"schema": {
|
||||
"type": "integer"
|
||||
}
|
||||
},
|
||||
{
|
||||
"name": "limit",
|
||||
"in": "query",
|
||||
"description": "The number of rows to get.",
|
||||
"required": false,
|
||||
"schema": {
|
||||
"type": "integer"
|
||||
}
|
||||
}
|
||||
]
|
||||
}
|
||||
},
|
||||
"/v1/eval/benchmarks/{benchmark_id}/jobs/{job_id}": {
|
||||
"get": {
|
||||
"responses": {
|
||||
|
@ -2535,7 +2537,14 @@
|
|||
"post": {
|
||||
"responses": {
|
||||
"200": {
|
||||
"description": "OK"
|
||||
"description": "OK",
|
||||
"content": {
|
||||
"application/json": {
|
||||
"schema": {
|
||||
"$ref": "#/components/schemas/Dataset"
|
||||
}
|
||||
}
|
||||
}
|
||||
},
|
||||
"400": {
|
||||
"$ref": "#/components/responses/BadRequest400"
|
||||
|
@ -2553,7 +2562,7 @@
|
|||
"tags": [
|
||||
"Datasets"
|
||||
],
|
||||
"description": "",
|
||||
"description": "Register a new dataset.",
|
||||
"parameters": [],
|
||||
"requestBody": {
|
||||
"content": {
|
||||
|
@ -2594,7 +2603,7 @@
|
|||
}
|
||||
},
|
||||
"tags": [
|
||||
"Files (Coming Soon)"
|
||||
"Files"
|
||||
],
|
||||
"description": "List all files in a bucket.",
|
||||
"parameters": [
|
||||
|
@ -3824,9 +3833,6 @@
|
|||
"AppendRowsRequest": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"dataset_id": {
|
||||
"type": "string"
|
||||
},
|
||||
"rows": {
|
||||
"type": "array",
|
||||
"items": {
|
||||
|
@ -3858,7 +3864,6 @@
|
|||
},
|
||||
"additionalProperties": false,
|
||||
"required": [
|
||||
"dataset_id",
|
||||
"rows"
|
||||
],
|
||||
"title": "AppendRowsRequest"
|
||||
|
@ -6824,6 +6829,224 @@
|
|||
],
|
||||
"title": "Benchmark"
|
||||
},
|
||||
"DataSource": {
|
||||
"oneOf": [
|
||||
{
|
||||
"$ref": "#/components/schemas/URIDataSource"
|
||||
},
|
||||
{
|
||||
"$ref": "#/components/schemas/RowsDataSource"
|
||||
}
|
||||
],
|
||||
"discriminator": {
|
||||
"propertyName": "type",
|
||||
"mapping": {
|
||||
"uri": "#/components/schemas/URIDataSource",
|
||||
"rows": "#/components/schemas/RowsDataSource"
|
||||
}
|
||||
}
|
||||
},
|
||||
"Dataset": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"identifier": {
|
||||
"type": "string"
|
||||
},
|
||||
"provider_resource_id": {
|
||||
"type": "string"
|
||||
},
|
||||
"provider_id": {
|
||||
"type": "string"
|
||||
},
|
||||
"type": {
|
||||
"type": "string",
|
||||
"const": "dataset",
|
||||
"default": "dataset"
|
||||
},
|
||||
"purpose": {
|
||||
"type": "string",
|
||||
"enum": [
|
||||
"post-training/messages",
|
||||
"eval/question-answer",
|
||||
"eval/messages-answer"
|
||||
],
|
||||
"title": "DatasetPurpose",
|
||||
"description": "Purpose of the dataset. Each purpose has a required input data schema."
|
||||
},
|
||||
"source": {
|
||||
"$ref": "#/components/schemas/DataSource"
|
||||
},
|
||||
"metadata": {
|
||||
"type": "object",
|
||||
"additionalProperties": {
|
||||
"oneOf": [
|
||||
{
|
||||
"type": "null"
|
||||
},
|
||||
{
|
||||
"type": "boolean"
|
||||
},
|
||||
{
|
||||
"type": "number"
|
||||
},
|
||||
{
|
||||
"type": "string"
|
||||
},
|
||||
{
|
||||
"type": "array"
|
||||
},
|
||||
{
|
||||
"type": "object"
|
||||
}
|
||||
]
|
||||
}
|
||||
}
|
||||
},
|
||||
"additionalProperties": false,
|
||||
"required": [
|
||||
"identifier",
|
||||
"provider_resource_id",
|
||||
"provider_id",
|
||||
"type",
|
||||
"purpose",
|
||||
"source",
|
||||
"metadata"
|
||||
],
|
||||
"title": "Dataset"
|
||||
},
|
||||
"RowsDataSource": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"type": {
|
||||
"type": "string",
|
||||
"const": "rows",
|
||||
"default": "rows"
|
||||
},
|
||||
"rows": {
|
||||
"type": "array",
|
||||
"items": {
|
||||
"type": "object",
|
||||
"additionalProperties": {
|
||||
"oneOf": [
|
||||
{
|
||||
"type": "null"
|
||||
},
|
||||
{
|
||||
"type": "boolean"
|
||||
},
|
||||
{
|
||||
"type": "number"
|
||||
},
|
||||
{
|
||||
"type": "string"
|
||||
},
|
||||
{
|
||||
"type": "array"
|
||||
},
|
||||
{
|
||||
"type": "object"
|
||||
}
|
||||
]
|
||||
}
|
||||
},
|
||||
"description": "The dataset is stored in rows. E.g. - [ {\"messages\": [{\"role\": \"user\", \"content\": \"Hello, world!\"}, {\"role\": \"assistant\", \"content\": \"Hello, world!\"}]} ]"
|
||||
}
|
||||
},
|
||||
"additionalProperties": false,
|
||||
"required": [
|
||||
"type",
|
||||
"rows"
|
||||
],
|
||||
"title": "RowsDataSource",
|
||||
"description": "A dataset stored in rows."
|
||||
},
|
||||
"URIDataSource": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"type": {
|
||||
"type": "string",
|
||||
"const": "uri",
|
||||
"default": "uri"
|
||||
},
|
||||
"uri": {
|
||||
"type": "string",
|
||||
"description": "The dataset can be obtained from a URI. E.g. - \"https://mywebsite.com/mydata.jsonl\" - \"lsfs://mydata.jsonl\" - \"data:csv;base64,{base64_content}\""
|
||||
}
|
||||
},
|
||||
"additionalProperties": false,
|
||||
"required": [
|
||||
"type",
|
||||
"uri"
|
||||
],
|
||||
"title": "URIDataSource",
|
||||
"description": "A dataset that can be obtained from a URI."
|
||||
},
|
||||
"Model": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"identifier": {
|
||||
"type": "string"
|
||||
},
|
||||
"provider_resource_id": {
|
||||
"type": "string"
|
||||
},
|
||||
"provider_id": {
|
||||
"type": "string"
|
||||
},
|
||||
"type": {
|
||||
"type": "string",
|
||||
"const": "model",
|
||||
"default": "model"
|
||||
},
|
||||
"metadata": {
|
||||
"type": "object",
|
||||
"additionalProperties": {
|
||||
"oneOf": [
|
||||
{
|
||||
"type": "null"
|
||||
},
|
||||
{
|
||||
"type": "boolean"
|
||||
},
|
||||
{
|
||||
"type": "number"
|
||||
},
|
||||
{
|
||||
"type": "string"
|
||||
},
|
||||
{
|
||||
"type": "array"
|
||||
},
|
||||
{
|
||||
"type": "object"
|
||||
}
|
||||
]
|
||||
}
|
||||
},
|
||||
"model_type": {
|
||||
"$ref": "#/components/schemas/ModelType",
|
||||
"default": "llm"
|
||||
}
|
||||
},
|
||||
"additionalProperties": false,
|
||||
"required": [
|
||||
"identifier",
|
||||
"provider_resource_id",
|
||||
"provider_id",
|
||||
"type",
|
||||
"metadata",
|
||||
"model_type"
|
||||
],
|
||||
"title": "Model"
|
||||
},
|
||||
"ModelType": {
|
||||
"type": "string",
|
||||
"enum": [
|
||||
"llm",
|
||||
"embedding"
|
||||
],
|
||||
"title": "ModelType"
|
||||
},
|
||||
"AgentTurnInputType": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
|
@ -6899,70 +7122,6 @@
|
|||
],
|
||||
"title": "CompletionInputType"
|
||||
},
|
||||
"Dataset": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"identifier": {
|
||||
"type": "string"
|
||||
},
|
||||
"provider_resource_id": {
|
||||
"type": "string"
|
||||
},
|
||||
"provider_id": {
|
||||
"type": "string"
|
||||
},
|
||||
"type": {
|
||||
"type": "string",
|
||||
"const": "dataset",
|
||||
"default": "dataset"
|
||||
},
|
||||
"dataset_schema": {
|
||||
"type": "object",
|
||||
"additionalProperties": {
|
||||
"$ref": "#/components/schemas/ParamType"
|
||||
}
|
||||
},
|
||||
"url": {
|
||||
"$ref": "#/components/schemas/URL"
|
||||
},
|
||||
"metadata": {
|
||||
"type": "object",
|
||||
"additionalProperties": {
|
||||
"oneOf": [
|
||||
{
|
||||
"type": "null"
|
||||
},
|
||||
{
|
||||
"type": "boolean"
|
||||
},
|
||||
{
|
||||
"type": "number"
|
||||
},
|
||||
{
|
||||
"type": "string"
|
||||
},
|
||||
{
|
||||
"type": "array"
|
||||
},
|
||||
{
|
||||
"type": "object"
|
||||
}
|
||||
]
|
||||
}
|
||||
}
|
||||
},
|
||||
"additionalProperties": false,
|
||||
"required": [
|
||||
"identifier",
|
||||
"provider_resource_id",
|
||||
"provider_id",
|
||||
"type",
|
||||
"dataset_schema",
|
||||
"url",
|
||||
"metadata"
|
||||
],
|
||||
"title": "Dataset"
|
||||
},
|
||||
"JsonType": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
|
@ -7057,151 +7216,6 @@
|
|||
}
|
||||
}
|
||||
},
|
||||
"StringType": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"type": {
|
||||
"type": "string",
|
||||
"const": "string",
|
||||
"default": "string"
|
||||
}
|
||||
},
|
||||
"additionalProperties": false,
|
||||
"required": [
|
||||
"type"
|
||||
],
|
||||
"title": "StringType"
|
||||
},
|
||||
"UnionType": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"type": {
|
||||
"type": "string",
|
||||
"const": "union",
|
||||
"default": "union"
|
||||
}
|
||||
},
|
||||
"additionalProperties": false,
|
||||
"required": [
|
||||
"type"
|
||||
],
|
||||
"title": "UnionType"
|
||||
},
|
||||
"Model": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"identifier": {
|
||||
"type": "string"
|
||||
},
|
||||
"provider_resource_id": {
|
||||
"type": "string"
|
||||
},
|
||||
"provider_id": {
|
||||
"type": "string"
|
||||
},
|
||||
"type": {
|
||||
"type": "string",
|
||||
"const": "model",
|
||||
"default": "model"
|
||||
},
|
||||
"metadata": {
|
||||
"type": "object",
|
||||
"additionalProperties": {
|
||||
"oneOf": [
|
||||
{
|
||||
"type": "null"
|
||||
},
|
||||
{
|
||||
"type": "boolean"
|
||||
},
|
||||
{
|
||||
"type": "number"
|
||||
},
|
||||
{
|
||||
"type": "string"
|
||||
},
|
||||
{
|
||||
"type": "array"
|
||||
},
|
||||
{
|
||||
"type": "object"
|
||||
}
|
||||
]
|
||||
}
|
||||
},
|
||||
"model_type": {
|
||||
"$ref": "#/components/schemas/ModelType",
|
||||
"default": "llm"
|
||||
}
|
||||
},
|
||||
"additionalProperties": false,
|
||||
"required": [
|
||||
"identifier",
|
||||
"provider_resource_id",
|
||||
"provider_id",
|
||||
"type",
|
||||
"metadata",
|
||||
"model_type"
|
||||
],
|
||||
"title": "Model"
|
||||
},
|
||||
"ModelType": {
|
||||
"type": "string",
|
||||
"enum": [
|
||||
"llm",
|
||||
"embedding"
|
||||
],
|
||||
"title": "ModelType"
|
||||
},
|
||||
"PaginatedRowsResult": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"rows": {
|
||||
"type": "array",
|
||||
"items": {
|
||||
"type": "object",
|
||||
"additionalProperties": {
|
||||
"oneOf": [
|
||||
{
|
||||
"type": "null"
|
||||
},
|
||||
{
|
||||
"type": "boolean"
|
||||
},
|
||||
{
|
||||
"type": "number"
|
||||
},
|
||||
{
|
||||
"type": "string"
|
||||
},
|
||||
{
|
||||
"type": "array"
|
||||
},
|
||||
{
|
||||
"type": "object"
|
||||
}
|
||||
]
|
||||
}
|
||||
},
|
||||
"description": "The rows in the current page."
|
||||
},
|
||||
"total_count": {
|
||||
"type": "integer",
|
||||
"description": "The total number of rows in the dataset."
|
||||
},
|
||||
"next_page_token": {
|
||||
"type": "string",
|
||||
"description": "The token to get the next page of rows."
|
||||
}
|
||||
},
|
||||
"additionalProperties": false,
|
||||
"required": [
|
||||
"rows",
|
||||
"total_count"
|
||||
],
|
||||
"title": "PaginatedRowsResult",
|
||||
"description": "A paginated list of rows from a dataset."
|
||||
},
|
||||
"ScoringFn": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
|
@ -7265,6 +7279,36 @@
|
|||
],
|
||||
"title": "ScoringFn"
|
||||
},
|
||||
"StringType": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"type": {
|
||||
"type": "string",
|
||||
"const": "string",
|
||||
"default": "string"
|
||||
}
|
||||
},
|
||||
"additionalProperties": false,
|
||||
"required": [
|
||||
"type"
|
||||
],
|
||||
"title": "StringType"
|
||||
},
|
||||
"UnionType": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"type": {
|
||||
"type": "string",
|
||||
"const": "union",
|
||||
"default": "union"
|
||||
}
|
||||
},
|
||||
"additionalProperties": false,
|
||||
"required": [
|
||||
"type"
|
||||
],
|
||||
"title": "UnionType"
|
||||
},
|
||||
"Shield": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
|
@ -8084,6 +8128,50 @@
|
|||
],
|
||||
"title": "ToolInvocationResult"
|
||||
},
|
||||
"IterrowsResponse": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"data": {
|
||||
"type": "array",
|
||||
"items": {
|
||||
"type": "object",
|
||||
"additionalProperties": {
|
||||
"oneOf": [
|
||||
{
|
||||
"type": "null"
|
||||
},
|
||||
{
|
||||
"type": "boolean"
|
||||
},
|
||||
{
|
||||
"type": "number"
|
||||
},
|
||||
{
|
||||
"type": "string"
|
||||
},
|
||||
{
|
||||
"type": "array"
|
||||
},
|
||||
{
|
||||
"type": "object"
|
||||
}
|
||||
]
|
||||
}
|
||||
},
|
||||
"description": "The rows in the current page."
|
||||
},
|
||||
"next_start_index": {
|
||||
"type": "integer",
|
||||
"description": "Index into dataset for the first row in the next page. None if there are no more rows."
|
||||
}
|
||||
},
|
||||
"additionalProperties": false,
|
||||
"required": [
|
||||
"data"
|
||||
],
|
||||
"title": "IterrowsResponse",
|
||||
"description": "A paginated list of rows from a dataset."
|
||||
},
|
||||
"ListAgentSessionsResponse": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
|
@ -9330,23 +9418,18 @@
|
|||
"RegisterDatasetRequest": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"dataset_id": {
|
||||
"type": "string"
|
||||
"purpose": {
|
||||
"type": "string",
|
||||
"enum": [
|
||||
"post-training/messages",
|
||||
"eval/question-answer",
|
||||
"eval/messages-answer"
|
||||
],
|
||||
"description": "The purpose of the dataset. One of - \"post-training/messages\": The dataset contains a messages column with list of messages for post-training. { \"messages\": [ {\"role\": \"user\", \"content\": \"Hello, world!\"}, {\"role\": \"assistant\", \"content\": \"Hello, world!\"}, ] } - \"eval/question-answer\": The dataset contains a question column and an answer column for evaluation. { \"question\": \"What is the capital of France?\", \"answer\": \"Paris\" } - \"eval/messages-answer\": The dataset contains a messages column with list of messages and an answer column for evaluation. { \"messages\": [ {\"role\": \"user\", \"content\": \"Hello, my name is John Doe.\"}, {\"role\": \"assistant\", \"content\": \"Hello, John Doe. How can I help you today?\"}, {\"role\": \"user\", \"content\": \"What's my name?\"}, ], \"answer\": \"John Doe\" }"
|
||||
},
|
||||
"dataset_schema": {
|
||||
"type": "object",
|
||||
"additionalProperties": {
|
||||
"$ref": "#/components/schemas/ParamType"
|
||||
}
|
||||
},
|
||||
"url": {
|
||||
"$ref": "#/components/schemas/URL"
|
||||
},
|
||||
"provider_dataset_id": {
|
||||
"type": "string"
|
||||
},
|
||||
"provider_id": {
|
||||
"type": "string"
|
||||
"source": {
|
||||
"$ref": "#/components/schemas/DataSource",
|
||||
"description": "The data source of the dataset. Ensure that the data source schema is compatible with the purpose of the dataset. Examples: - { \"type\": \"uri\", \"uri\": \"https://mywebsite.com/mydata.jsonl\" } - { \"type\": \"uri\", \"uri\": \"lsfs://mydata.jsonl\" } - { \"type\": \"uri\", \"uri\": \"data:csv;base64,{base64_content}\" } - { \"type\": \"uri\", \"uri\": \"huggingface://llamastack/simpleqa?split=train\" } - { \"type\": \"rows\", \"rows\": [ { \"messages\": [ {\"role\": \"user\", \"content\": \"Hello, world!\"}, {\"role\": \"assistant\", \"content\": \"Hello, world!\"}, ] } ] }"
|
||||
},
|
||||
"metadata": {
|
||||
"type": "object",
|
||||
|
@ -9371,14 +9454,18 @@
|
|||
"type": "object"
|
||||
}
|
||||
]
|
||||
}
|
||||
},
|
||||
"description": "The metadata for the dataset. - E.g. {\"description\": \"My dataset\"}"
|
||||
},
|
||||
"dataset_id": {
|
||||
"type": "string",
|
||||
"description": "The ID of the dataset. If not provided, an ID will be generated."
|
||||
}
|
||||
},
|
||||
"additionalProperties": false,
|
||||
"required": [
|
||||
"dataset_id",
|
||||
"dataset_schema",
|
||||
"url"
|
||||
"purpose",
|
||||
"source"
|
||||
],
|
||||
"title": "RegisterDatasetRequest"
|
||||
},
|
||||
|
@ -10197,7 +10284,7 @@
|
|||
"x-displayName": "Llama Stack Evaluation API for running evaluations on model and agent candidates."
|
||||
},
|
||||
{
|
||||
"name": "Files (Coming Soon)"
|
||||
"name": "Files"
|
||||
},
|
||||
{
|
||||
"name": "Inference",
|
||||
|
@ -10258,7 +10345,7 @@
|
|||
"DatasetIO",
|
||||
"Datasets",
|
||||
"Eval",
|
||||
"Files (Coming Soon)",
|
||||
"Files",
|
||||
"Inference",
|
||||
"Inspect",
|
||||
"Models",
|
||||
|
|
499
docs/_static/llama-stack-spec.yaml
vendored
499
docs/_static/llama-stack-spec.yaml
vendored
|
@ -10,56 +10,7 @@ info:
|
|||
servers:
|
||||
- url: http://any-hosted-llama-stack.com
|
||||
paths:
|
||||
/v1/datasetio/rows:
|
||||
get:
|
||||
responses:
|
||||
'200':
|
||||
description: OK
|
||||
content:
|
||||
application/json:
|
||||
schema:
|
||||
$ref: '#/components/schemas/PaginatedRowsResult'
|
||||
'400':
|
||||
$ref: '#/components/responses/BadRequest400'
|
||||
'429':
|
||||
$ref: >-
|
||||
#/components/responses/TooManyRequests429
|
||||
'500':
|
||||
$ref: >-
|
||||
#/components/responses/InternalServerError500
|
||||
default:
|
||||
$ref: '#/components/responses/DefaultError'
|
||||
tags:
|
||||
- DatasetIO
|
||||
description: >-
|
||||
Get a paginated list of rows from a dataset.
|
||||
parameters:
|
||||
- name: dataset_id
|
||||
in: query
|
||||
description: >-
|
||||
The ID of the dataset to get the rows from.
|
||||
required: true
|
||||
schema:
|
||||
type: string
|
||||
- name: rows_in_page
|
||||
in: query
|
||||
description: The number of rows to get per page.
|
||||
required: true
|
||||
schema:
|
||||
type: integer
|
||||
- name: page_token
|
||||
in: query
|
||||
description: The token to get the next page of rows.
|
||||
required: false
|
||||
schema:
|
||||
type: string
|
||||
- name: filter_condition
|
||||
in: query
|
||||
description: >-
|
||||
(Optional) A condition to filter the rows by.
|
||||
required: false
|
||||
schema:
|
||||
type: string
|
||||
/v1/datasetio/append-rows/{dataset_id}:
|
||||
post:
|
||||
responses:
|
||||
'200':
|
||||
|
@ -77,7 +28,12 @@ paths:
|
|||
tags:
|
||||
- DatasetIO
|
||||
description: ''
|
||||
parameters: []
|
||||
parameters:
|
||||
- name: dataset_id
|
||||
in: path
|
||||
required: true
|
||||
schema:
|
||||
type: string
|
||||
requestBody:
|
||||
content:
|
||||
application/json:
|
||||
|
@ -394,7 +350,7 @@ paths:
|
|||
default:
|
||||
$ref: '#/components/responses/DefaultError'
|
||||
tags:
|
||||
- Files (Coming Soon)
|
||||
- Files
|
||||
description: List all buckets.
|
||||
parameters:
|
||||
- name: bucket
|
||||
|
@ -421,7 +377,7 @@ paths:
|
|||
default:
|
||||
$ref: '#/components/responses/DefaultError'
|
||||
tags:
|
||||
- Files (Coming Soon)
|
||||
- Files
|
||||
description: >-
|
||||
Create a new upload session for a file identified by a bucket and key.
|
||||
parameters: []
|
||||
|
@ -580,7 +536,7 @@ paths:
|
|||
default:
|
||||
$ref: '#/components/responses/DefaultError'
|
||||
tags:
|
||||
- Files (Coming Soon)
|
||||
- Files
|
||||
description: >-
|
||||
Get a file info identified by a bucket and key.
|
||||
parameters:
|
||||
|
@ -616,7 +572,7 @@ paths:
|
|||
default:
|
||||
$ref: '#/components/responses/DefaultError'
|
||||
tags:
|
||||
- Files (Coming Soon)
|
||||
- Files
|
||||
description: >-
|
||||
Delete a file identified by a bucket and key.
|
||||
parameters:
|
||||
|
@ -1268,7 +1224,7 @@ paths:
|
|||
default:
|
||||
$ref: '#/components/responses/DefaultError'
|
||||
tags:
|
||||
- Files (Coming Soon)
|
||||
- Files
|
||||
description: >-
|
||||
Returns information about an existsing upload session
|
||||
parameters:
|
||||
|
@ -1299,7 +1255,7 @@ paths:
|
|||
default:
|
||||
$ref: '#/components/responses/DefaultError'
|
||||
tags:
|
||||
- Files (Coming Soon)
|
||||
- Files
|
||||
description: >-
|
||||
Upload file content to an existing upload session. On the server, request
|
||||
body will have the raw bytes that are uploaded.
|
||||
|
@ -1501,6 +1457,50 @@ paths:
|
|||
schema:
|
||||
$ref: '#/components/schemas/InvokeToolRequest'
|
||||
required: true
|
||||
/v1/datasetio/iterrows/{dataset_id}:
|
||||
get:
|
||||
responses:
|
||||
'200':
|
||||
description: OK
|
||||
content:
|
||||
application/json:
|
||||
schema:
|
||||
$ref: '#/components/schemas/IterrowsResponse'
|
||||
'400':
|
||||
$ref: '#/components/responses/BadRequest400'
|
||||
'429':
|
||||
$ref: >-
|
||||
#/components/responses/TooManyRequests429
|
||||
'500':
|
||||
$ref: >-
|
||||
#/components/responses/InternalServerError500
|
||||
default:
|
||||
$ref: '#/components/responses/DefaultError'
|
||||
tags:
|
||||
- DatasetIO
|
||||
description: >-
|
||||
Get a paginated list of rows from a dataset. Uses cursor-based pagination.
|
||||
parameters:
|
||||
- name: dataset_id
|
||||
in: path
|
||||
description: >-
|
||||
The ID of the dataset to get the rows from.
|
||||
required: true
|
||||
schema:
|
||||
type: string
|
||||
- name: start_index
|
||||
in: query
|
||||
description: >-
|
||||
Index into dataset for the first row to get. Get all rows if None.
|
||||
required: false
|
||||
schema:
|
||||
type: integer
|
||||
- name: limit
|
||||
in: query
|
||||
description: The number of rows to get.
|
||||
required: false
|
||||
schema:
|
||||
type: integer
|
||||
/v1/eval/benchmarks/{benchmark_id}/jobs/{job_id}:
|
||||
get:
|
||||
responses:
|
||||
|
@ -1710,6 +1710,10 @@ paths:
|
|||
responses:
|
||||
'200':
|
||||
description: OK
|
||||
content:
|
||||
application/json:
|
||||
schema:
|
||||
$ref: '#/components/schemas/Dataset'
|
||||
'400':
|
||||
$ref: '#/components/responses/BadRequest400'
|
||||
'429':
|
||||
|
@ -1722,7 +1726,7 @@ paths:
|
|||
$ref: '#/components/responses/DefaultError'
|
||||
tags:
|
||||
- Datasets
|
||||
description: ''
|
||||
description: Register a new dataset.
|
||||
parameters: []
|
||||
requestBody:
|
||||
content:
|
||||
|
@ -1750,7 +1754,7 @@ paths:
|
|||
default:
|
||||
$ref: '#/components/responses/DefaultError'
|
||||
tags:
|
||||
- Files (Coming Soon)
|
||||
- Files
|
||||
description: List all files in a bucket.
|
||||
parameters:
|
||||
- name: bucket
|
||||
|
@ -2607,8 +2611,6 @@ components:
|
|||
AppendRowsRequest:
|
||||
type: object
|
||||
properties:
|
||||
dataset_id:
|
||||
type: string
|
||||
rows:
|
||||
type: array
|
||||
items:
|
||||
|
@ -2623,7 +2625,6 @@ components:
|
|||
- type: object
|
||||
additionalProperties: false
|
||||
required:
|
||||
- dataset_id
|
||||
- rows
|
||||
title: AppendRowsRequest
|
||||
CompletionMessage:
|
||||
|
@ -4726,6 +4727,148 @@ components:
|
|||
- scoring_functions
|
||||
- metadata
|
||||
title: Benchmark
|
||||
DataSource:
|
||||
oneOf:
|
||||
- $ref: '#/components/schemas/URIDataSource'
|
||||
- $ref: '#/components/schemas/RowsDataSource'
|
||||
discriminator:
|
||||
propertyName: type
|
||||
mapping:
|
||||
uri: '#/components/schemas/URIDataSource'
|
||||
rows: '#/components/schemas/RowsDataSource'
|
||||
Dataset:
|
||||
type: object
|
||||
properties:
|
||||
identifier:
|
||||
type: string
|
||||
provider_resource_id:
|
||||
type: string
|
||||
provider_id:
|
||||
type: string
|
||||
type:
|
||||
type: string
|
||||
const: dataset
|
||||
default: dataset
|
||||
purpose:
|
||||
type: string
|
||||
enum:
|
||||
- post-training/messages
|
||||
- eval/question-answer
|
||||
- eval/messages-answer
|
||||
title: DatasetPurpose
|
||||
description: >-
|
||||
Purpose of the dataset. Each purpose has a required input data schema.
|
||||
source:
|
||||
$ref: '#/components/schemas/DataSource'
|
||||
metadata:
|
||||
type: object
|
||||
additionalProperties:
|
||||
oneOf:
|
||||
- type: 'null'
|
||||
- type: boolean
|
||||
- type: number
|
||||
- type: string
|
||||
- type: array
|
||||
- type: object
|
||||
additionalProperties: false
|
||||
required:
|
||||
- identifier
|
||||
- provider_resource_id
|
||||
- provider_id
|
||||
- type
|
||||
- purpose
|
||||
- source
|
||||
- metadata
|
||||
title: Dataset
|
||||
RowsDataSource:
|
||||
type: object
|
||||
properties:
|
||||
type:
|
||||
type: string
|
||||
const: rows
|
||||
default: rows
|
||||
rows:
|
||||
type: array
|
||||
items:
|
||||
type: object
|
||||
additionalProperties:
|
||||
oneOf:
|
||||
- type: 'null'
|
||||
- type: boolean
|
||||
- type: number
|
||||
- type: string
|
||||
- type: array
|
||||
- type: object
|
||||
description: >-
|
||||
The dataset is stored in rows. E.g. - [ {"messages": [{"role": "user",
|
||||
"content": "Hello, world!"}, {"role": "assistant", "content": "Hello,
|
||||
world!"}]} ]
|
||||
additionalProperties: false
|
||||
required:
|
||||
- type
|
||||
- rows
|
||||
title: RowsDataSource
|
||||
description: A dataset stored in rows.
|
||||
URIDataSource:
|
||||
type: object
|
||||
properties:
|
||||
type:
|
||||
type: string
|
||||
const: uri
|
||||
default: uri
|
||||
uri:
|
||||
type: string
|
||||
description: >-
|
||||
The dataset can be obtained from a URI. E.g. - "https://mywebsite.com/mydata.jsonl"
|
||||
- "lsfs://mydata.jsonl" - "data:csv;base64,{base64_content}"
|
||||
additionalProperties: false
|
||||
required:
|
||||
- type
|
||||
- uri
|
||||
title: URIDataSource
|
||||
description: >-
|
||||
A dataset that can be obtained from a URI.
|
||||
Model:
|
||||
type: object
|
||||
properties:
|
||||
identifier:
|
||||
type: string
|
||||
provider_resource_id:
|
||||
type: string
|
||||
provider_id:
|
||||
type: string
|
||||
type:
|
||||
type: string
|
||||
const: model
|
||||
default: model
|
||||
metadata:
|
||||
type: object
|
||||
additionalProperties:
|
||||
oneOf:
|
||||
- type: 'null'
|
||||
- type: boolean
|
||||
- type: number
|
||||
- type: string
|
||||
- type: array
|
||||
- type: object
|
||||
model_type:
|
||||
$ref: '#/components/schemas/ModelType'
|
||||
default: llm
|
||||
additionalProperties: false
|
||||
required:
|
||||
- identifier
|
||||
- provider_resource_id
|
||||
- provider_id
|
||||
- type
|
||||
- metadata
|
||||
- model_type
|
||||
title: Model
|
||||
ModelType:
|
||||
type: string
|
||||
enum:
|
||||
- llm
|
||||
- embedding
|
||||
title: ModelType
|
||||
AgentTurnInputType:
|
||||
type: object
|
||||
properties:
|
||||
|
@ -4781,45 +4924,6 @@ components:
|
|||
required:
|
||||
- type
|
||||
title: CompletionInputType
|
||||
Dataset:
|
||||
type: object
|
||||
properties:
|
||||
identifier:
|
||||
type: string
|
||||
provider_resource_id:
|
||||
type: string
|
||||
provider_id:
|
||||
type: string
|
||||
type:
|
||||
type: string
|
||||
const: dataset
|
||||
default: dataset
|
||||
dataset_schema:
|
||||
type: object
|
||||
additionalProperties:
|
||||
$ref: '#/components/schemas/ParamType'
|
||||
url:
|
||||
$ref: '#/components/schemas/URL'
|
||||
metadata:
|
||||
type: object
|
||||
additionalProperties:
|
||||
oneOf:
|
||||
- type: 'null'
|
||||
- type: boolean
|
||||
- type: number
|
||||
- type: string
|
||||
- type: array
|
||||
- type: object
|
||||
additionalProperties: false
|
||||
required:
|
||||
- identifier
|
||||
- provider_resource_id
|
||||
- provider_id
|
||||
- type
|
||||
- dataset_schema
|
||||
- url
|
||||
- metadata
|
||||
title: Dataset
|
||||
JsonType:
|
||||
type: object
|
||||
properties:
|
||||
|
@ -4878,97 +4982,6 @@ components:
|
|||
chat_completion_input: '#/components/schemas/ChatCompletionInputType'
|
||||
completion_input: '#/components/schemas/CompletionInputType'
|
||||
agent_turn_input: '#/components/schemas/AgentTurnInputType'
|
||||
StringType:
|
||||
type: object
|
||||
properties:
|
||||
type:
|
||||
type: string
|
||||
const: string
|
||||
default: string
|
||||
additionalProperties: false
|
||||
required:
|
||||
- type
|
||||
title: StringType
|
||||
UnionType:
|
||||
type: object
|
||||
properties:
|
||||
type:
|
||||
type: string
|
||||
const: union
|
||||
default: union
|
||||
additionalProperties: false
|
||||
required:
|
||||
- type
|
||||
title: UnionType
|
||||
Model:
|
||||
type: object
|
||||
properties:
|
||||
identifier:
|
||||
type: string
|
||||
provider_resource_id:
|
||||
type: string
|
||||
provider_id:
|
||||
type: string
|
||||
type:
|
||||
type: string
|
||||
const: model
|
||||
default: model
|
||||
metadata:
|
||||
type: object
|
||||
additionalProperties:
|
||||
oneOf:
|
||||
- type: 'null'
|
||||
- type: boolean
|
||||
- type: number
|
||||
- type: string
|
||||
- type: array
|
||||
- type: object
|
||||
model_type:
|
||||
$ref: '#/components/schemas/ModelType'
|
||||
default: llm
|
||||
additionalProperties: false
|
||||
required:
|
||||
- identifier
|
||||
- provider_resource_id
|
||||
- provider_id
|
||||
- type
|
||||
- metadata
|
||||
- model_type
|
||||
title: Model
|
||||
ModelType:
|
||||
type: string
|
||||
enum:
|
||||
- llm
|
||||
- embedding
|
||||
title: ModelType
|
||||
PaginatedRowsResult:
|
||||
type: object
|
||||
properties:
|
||||
rows:
|
||||
type: array
|
||||
items:
|
||||
type: object
|
||||
additionalProperties:
|
||||
oneOf:
|
||||
- type: 'null'
|
||||
- type: boolean
|
||||
- type: number
|
||||
- type: string
|
||||
- type: array
|
||||
- type: object
|
||||
description: The rows in the current page.
|
||||
total_count:
|
||||
type: integer
|
||||
description: The total number of rows in the dataset.
|
||||
next_page_token:
|
||||
type: string
|
||||
description: The token to get the next page of rows.
|
||||
additionalProperties: false
|
||||
required:
|
||||
- rows
|
||||
- total_count
|
||||
title: PaginatedRowsResult
|
||||
description: A paginated list of rows from a dataset.
|
||||
ScoringFn:
|
||||
type: object
|
||||
properties:
|
||||
|
@ -5007,6 +5020,28 @@ components:
|
|||
- metadata
|
||||
- return_type
|
||||
title: ScoringFn
|
||||
StringType:
|
||||
type: object
|
||||
properties:
|
||||
type:
|
||||
type: string
|
||||
const: string
|
||||
default: string
|
||||
additionalProperties: false
|
||||
required:
|
||||
- type
|
||||
title: StringType
|
||||
UnionType:
|
||||
type: object
|
||||
properties:
|
||||
type:
|
||||
type: string
|
||||
const: union
|
||||
default: union
|
||||
additionalProperties: false
|
||||
required:
|
||||
- type
|
||||
title: UnionType
|
||||
Shield:
|
||||
type: object
|
||||
properties:
|
||||
|
@ -5506,6 +5541,32 @@ components:
|
|||
required:
|
||||
- content
|
||||
title: ToolInvocationResult
|
||||
IterrowsResponse:
|
||||
type: object
|
||||
properties:
|
||||
data:
|
||||
type: array
|
||||
items:
|
||||
type: object
|
||||
additionalProperties:
|
||||
oneOf:
|
||||
- type: 'null'
|
||||
- type: boolean
|
||||
- type: number
|
||||
- type: string
|
||||
- type: array
|
||||
- type: object
|
||||
description: The rows in the current page.
|
||||
next_start_index:
|
||||
type: integer
|
||||
description: >-
|
||||
Index into dataset for the first row in the next page. None if there are
|
||||
no more rows.
|
||||
additionalProperties: false
|
||||
required:
|
||||
- data
|
||||
title: IterrowsResponse
|
||||
description: A paginated list of rows from a dataset.
|
||||
ListAgentSessionsResponse:
|
||||
type: object
|
||||
properties:
|
||||
|
@ -6313,18 +6374,35 @@ components:
|
|||
RegisterDatasetRequest:
|
||||
type: object
|
||||
properties:
|
||||
dataset_id:
|
||||
type: string
|
||||
dataset_schema:
|
||||
type: object
|
||||
additionalProperties:
|
||||
$ref: '#/components/schemas/ParamType'
|
||||
url:
|
||||
$ref: '#/components/schemas/URL'
|
||||
provider_dataset_id:
|
||||
type: string
|
||||
provider_id:
|
||||
purpose:
|
||||
type: string
|
||||
enum:
|
||||
- post-training/messages
|
||||
- eval/question-answer
|
||||
- eval/messages-answer
|
||||
description: >-
|
||||
The purpose of the dataset. One of - "post-training/messages": The dataset
|
||||
contains a messages column with list of messages for post-training. {
|
||||
"messages": [ {"role": "user", "content": "Hello, world!"}, {"role": "assistant",
|
||||
"content": "Hello, world!"}, ] } - "eval/question-answer": The dataset
|
||||
contains a question column and an answer column for evaluation. { "question":
|
||||
"What is the capital of France?", "answer": "Paris" } - "eval/messages-answer":
|
||||
The dataset contains a messages column with list of messages and an answer
|
||||
column for evaluation. { "messages": [ {"role": "user", "content": "Hello,
|
||||
my name is John Doe."}, {"role": "assistant", "content": "Hello, John
|
||||
Doe. How can I help you today?"}, {"role": "user", "content": "What's
|
||||
my name?"}, ], "answer": "John Doe" }
|
||||
source:
|
||||
$ref: '#/components/schemas/DataSource'
|
||||
description: >-
|
||||
The data source of the dataset. Ensure that the data source schema is
|
||||
compatible with the purpose of the dataset. Examples: - { "type": "uri",
|
||||
"uri": "https://mywebsite.com/mydata.jsonl" } - { "type": "uri", "uri":
|
||||
"lsfs://mydata.jsonl" } - { "type": "uri", "uri": "data:csv;base64,{base64_content}"
|
||||
} - { "type": "uri", "uri": "huggingface://llamastack/simpleqa?split=train"
|
||||
} - { "type": "rows", "rows": [ { "messages": [ {"role": "user", "content":
|
||||
"Hello, world!"}, {"role": "assistant", "content": "Hello, world!"}, ]
|
||||
} ] }
|
||||
metadata:
|
||||
type: object
|
||||
additionalProperties:
|
||||
|
@ -6335,11 +6413,16 @@ components:
|
|||
- type: string
|
||||
- type: array
|
||||
- type: object
|
||||
description: >-
|
||||
The metadata for the dataset. - E.g. {"description": "My dataset"}
|
||||
dataset_id:
|
||||
type: string
|
||||
description: >-
|
||||
The ID of the dataset. If not provided, an ID will be generated.
|
||||
additionalProperties: false
|
||||
required:
|
||||
- dataset_id
|
||||
- dataset_schema
|
||||
- url
|
||||
- purpose
|
||||
- source
|
||||
title: RegisterDatasetRequest
|
||||
RegisterModelRequest:
|
||||
type: object
|
||||
|
@ -6855,7 +6938,7 @@ tags:
|
|||
- name: Eval
|
||||
x-displayName: >-
|
||||
Llama Stack Evaluation API for running evaluations on model and agent candidates.
|
||||
- name: Files (Coming Soon)
|
||||
- name: Files
|
||||
- name: Inference
|
||||
description: >-
|
||||
This API provides the raw interface to the underlying models. Two kinds of models
|
||||
|
@ -6893,7 +6976,7 @@ x-tagGroups:
|
|||
- DatasetIO
|
||||
- Datasets
|
||||
- Eval
|
||||
- Files (Coming Soon)
|
||||
- Files
|
||||
- Inference
|
||||
- Inspect
|
||||
- Models
|
||||
|
|
File diff suppressed because one or more lines are too long
|
@ -84,16 +84,14 @@
|
|||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Not in Google Colab environment\n",
|
||||
"\u001b[33mWarning: `bwrap` is not available. Code interpreter tool will not work correctly.\u001b[0m\n"
|
||||
"Not in Google Colab environment\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"/opt/anaconda3/envs/master/lib/python3.10/site-packages/tqdm/auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n",
|
||||
" from .autonotebook import tqdm as notebook_tqdm\n"
|
||||
"Warning: `bwrap` is not available. Code interpreter tool will not work correctly.\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
|
@ -117,76 +115,146 @@
|
|||
"- datasetio\n",
|
||||
"- eval\n",
|
||||
"- inference\n",
|
||||
"- memory\n",
|
||||
"- safety\n",
|
||||
"- scoring\n",
|
||||
"- telemetry\n",
|
||||
"- tool_runtime\n",
|
||||
"datasets: <span style=\"font-weight: bold\">[]</span>\n",
|
||||
"container_image: null\n",
|
||||
"- vector_io\n",
|
||||
"benchmarks: <span style=\"font-weight: bold\">[]</span>\n",
|
||||
"container_image: null\n",
|
||||
"datasets: <span style=\"font-weight: bold\">[]</span>\n",
|
||||
"image_name: together\n",
|
||||
"memory_banks: <span style=\"font-weight: bold\">[]</span>\n",
|
||||
"logging: null\n",
|
||||
"metadata_store:\n",
|
||||
" db_path: <span style=\"color: #800080; text-decoration-color: #800080\">/Users/xiyan/.llama/distributions/together/</span><span style=\"color: #ff00ff; text-decoration-color: #ff00ff\">registry.db</span>\n",
|
||||
" namespace: null\n",
|
||||
" type: sqlite\n",
|
||||
"models:\n",
|
||||
"- metadata: <span style=\"font-weight: bold\">{}</span>\n",
|
||||
" model_id: meta-llama/Meta-Llama-<span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">3.1</span>-8B-Instruct-Turbo\n",
|
||||
" model_type: !!python/object/apply:llama_stack.apis.models.models.ModelType\n",
|
||||
" - llm\n",
|
||||
" provider_id: together\n",
|
||||
" provider_model_id: meta-llama/Meta-Llama-<span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">3.1</span>-8B-Instruct-Turbo\n",
|
||||
"- metadata: <span style=\"font-weight: bold\">{}</span>\n",
|
||||
" model_id: meta-llama/Llama-<span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">3.1</span>-8B-Instruct\n",
|
||||
" model_type: !!python/object/apply:llama_stack.apis.models.models.ModelType\n",
|
||||
" - llm\n",
|
||||
" provider_id: together\n",
|
||||
" provider_model_id: meta-llama/Meta-Llama-<span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">3.1</span>-8B-Instruct-Turbo\n",
|
||||
"- metadata: <span style=\"font-weight: bold\">{}</span>\n",
|
||||
" model_id: meta-llama/Meta-Llama-<span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">3.1</span>-70B-Instruct-Turbo\n",
|
||||
" model_type: !!python/object/apply:llama_stack.apis.models.models.ModelType\n",
|
||||
" - llm\n",
|
||||
" provider_id: together\n",
|
||||
" provider_model_id: meta-llama/Meta-Llama-<span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">3.1</span>-70B-Instruct-Turbo\n",
|
||||
"- metadata: <span style=\"font-weight: bold\">{}</span>\n",
|
||||
" model_id: meta-llama/Llama-<span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">3.1</span>-70B-Instruct\n",
|
||||
" model_type: !!python/object/apply:llama_stack.apis.models.models.ModelType\n",
|
||||
" - llm\n",
|
||||
" provider_id: together\n",
|
||||
" provider_model_id: meta-llama/Meta-Llama-<span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">3.1</span>-70B-Instruct-Turbo\n",
|
||||
"- metadata: <span style=\"font-weight: bold\">{}</span>\n",
|
||||
" model_id: meta-llama/Meta-Llama-<span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">3.1</span>-405B-Instruct-Turbo\n",
|
||||
" model_type: !!python/object/apply:llama_stack.apis.models.models.ModelType\n",
|
||||
" - llm\n",
|
||||
" provider_id: together\n",
|
||||
" provider_model_id: meta-llama/Meta-Llama-<span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">3.1</span>-405B-Instruct-Turbo\n",
|
||||
"- metadata: <span style=\"font-weight: bold\">{}</span>\n",
|
||||
" model_id: meta-llama/Llama-<span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">3.1</span>-405B-Instruct-FP8\n",
|
||||
" model_type: !!python/object/apply:llama_stack.apis.models.models.ModelType\n",
|
||||
" - llm\n",
|
||||
" provider_id: together\n",
|
||||
" provider_model_id: meta-llama/Meta-Llama-<span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">3.1</span>-405B-Instruct-Turbo\n",
|
||||
"- metadata: <span style=\"font-weight: bold\">{}</span>\n",
|
||||
" model_id: meta-llama/Llama-<span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">3.2</span>-3B-Instruct-Turbo\n",
|
||||
" model_type: !!python/object/apply:llama_stack.apis.models.models.ModelType\n",
|
||||
" - llm\n",
|
||||
" provider_id: together\n",
|
||||
" provider_model_id: meta-llama/Llama-<span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">3.2</span>-3B-Instruct-Turbo\n",
|
||||
"- metadata: <span style=\"font-weight: bold\">{}</span>\n",
|
||||
" model_id: meta-llama/Llama-<span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">3.2</span>-3B-Instruct\n",
|
||||
" model_type: !!python/object/apply:llama_stack.apis.models.models.ModelType\n",
|
||||
" - llm\n",
|
||||
" provider_id: together\n",
|
||||
" provider_model_id: meta-llama/Llama-<span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">3.2</span>-3B-Instruct-Turbo\n",
|
||||
"- metadata: <span style=\"font-weight: bold\">{}</span>\n",
|
||||
" model_id: meta-llama/Llama-<span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">3.2</span>-11B-Vision-Instruct-Turbo\n",
|
||||
" model_type: !!python/object/apply:llama_stack.apis.models.models.ModelType\n",
|
||||
" - llm\n",
|
||||
" provider_id: together\n",
|
||||
" provider_model_id: meta-llama/Llama-<span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">3.2</span>-11B-Vision-Instruct-Turbo\n",
|
||||
"- metadata: <span style=\"font-weight: bold\">{}</span>\n",
|
||||
" model_id: meta-llama/Llama-<span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">3.2</span>-11B-Vision-Instruct\n",
|
||||
" model_type: !!python/object/apply:llama_stack.apis.models.models.ModelType\n",
|
||||
" - llm\n",
|
||||
" provider_id: together\n",
|
||||
" provider_model_id: meta-llama/Llama-<span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">3.2</span>-11B-Vision-Instruct-Turbo\n",
|
||||
"- metadata: <span style=\"font-weight: bold\">{}</span>\n",
|
||||
" model_id: meta-llama/Llama-<span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">3.2</span>-90B-Vision-Instruct-Turbo\n",
|
||||
" model_type: !!python/object/apply:llama_stack.apis.models.models.ModelType\n",
|
||||
" - llm\n",
|
||||
" provider_id: together\n",
|
||||
" provider_model_id: meta-llama/Llama-<span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">3.2</span>-90B-Vision-Instruct-Turbo\n",
|
||||
"- metadata: <span style=\"font-weight: bold\">{}</span>\n",
|
||||
" model_id: meta-llama/Llama-<span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">3.2</span>-90B-Vision-Instruct\n",
|
||||
" model_type: !!python/object/apply:llama_stack.apis.models.models.ModelType\n",
|
||||
" - llm\n",
|
||||
" provider_id: together\n",
|
||||
" provider_model_id: meta-llama/Llama-<span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">3.2</span>-90B-Vision-Instruct-Turbo\n",
|
||||
"- metadata: <span style=\"font-weight: bold\">{}</span>\n",
|
||||
" model_id: meta-llama/Llama-<span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">3.3</span>-70B-Instruct-Turbo\n",
|
||||
" model_type: !!python/object/apply:llama_stack.apis.models.models.ModelType\n",
|
||||
" - llm\n",
|
||||
" provider_id: together\n",
|
||||
" provider_model_id: meta-llama/Llama-<span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">3.3</span>-70B-Instruct-Turbo\n",
|
||||
"- metadata: <span style=\"font-weight: bold\">{}</span>\n",
|
||||
" model_id: meta-llama/Llama-<span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">3.3</span>-70B-Instruct\n",
|
||||
" model_type: !!python/object/apply:llama_stack.apis.models.models.ModelType\n",
|
||||
" - llm\n",
|
||||
" provider_id: together\n",
|
||||
" provider_model_id: meta-llama/Llama-<span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">3.3</span>-70B-Instruct-Turbo\n",
|
||||
"- metadata: <span style=\"font-weight: bold\">{}</span>\n",
|
||||
" model_id: meta-llama/Meta-Llama-Guard-<span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">3</span>-8B\n",
|
||||
" model_type: !!python/object/apply:llama_stack.apis.models.models.ModelType\n",
|
||||
" - llm\n",
|
||||
" provider_id: together\n",
|
||||
" provider_model_id: meta-llama/Meta-Llama-Guard-<span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">3</span>-8B\n",
|
||||
"- metadata: <span style=\"font-weight: bold\">{}</span>\n",
|
||||
" model_id: meta-llama/Llama-Guard-<span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">3</span>-8B\n",
|
||||
" model_type: !!python/object/apply:llama_stack.apis.models.models.ModelType\n",
|
||||
" - llm\n",
|
||||
" provider_id: together\n",
|
||||
" provider_model_id: meta-llama/Meta-Llama-Guard-<span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">3</span>-8B\n",
|
||||
"- metadata: <span style=\"font-weight: bold\">{}</span>\n",
|
||||
" model_id: meta-llama/Llama-Guard-<span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">3</span>-11B-Vision-Turbo\n",
|
||||
" model_type: !!python/object/apply:llama_stack.apis.models.models.ModelType\n",
|
||||
" - llm\n",
|
||||
" provider_id: together\n",
|
||||
" provider_model_id: meta-llama/Llama-Guard-<span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">3</span>-11B-Vision-Turbo\n",
|
||||
"- metadata: <span style=\"font-weight: bold\">{}</span>\n",
|
||||
" model_id: meta-llama/Llama-Guard-<span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">3</span>-11B-Vision\n",
|
||||
" model_type: !!python/object/apply:llama_stack.apis.models.models.ModelType\n",
|
||||
" - llm\n",
|
||||
" provider_id: together\n",
|
||||
" provider_model_id: meta-llama/Llama-Guard-<span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">3</span>-11B-Vision-Turbo\n",
|
||||
"- metadata:\n",
|
||||
" context_length: <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">8192</span>\n",
|
||||
" embedding_dimension: <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">768</span>\n",
|
||||
" model_id: togethercomputer/m2-bert-80M-8k-retrieval\n",
|
||||
" model_type: !!python/object/apply:llama_stack.apis.models.models.ModelType\n",
|
||||
" - embedding\n",
|
||||
" provider_id: together\n",
|
||||
" provider_model_id: togethercomputer/m2-bert-80M-8k-retrieval\n",
|
||||
"- metadata:\n",
|
||||
" context_length: <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">32768</span>\n",
|
||||
" embedding_dimension: <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">768</span>\n",
|
||||
" model_id: togethercomputer/m2-bert-80M-32k-retrieval\n",
|
||||
" model_type: !!python/object/apply:llama_stack.apis.models.models.ModelType\n",
|
||||
" - embedding\n",
|
||||
" provider_id: together\n",
|
||||
" provider_model_id: togethercomputer/m2-bert-80M-32k-retrieval\n",
|
||||
"- metadata:\n",
|
||||
" embedding_dimension: <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">384</span>\n",
|
||||
" model_id: all-MiniLM-L6-v2\n",
|
||||
" model_type: !!python/object/apply:llama_stack.apis.models.models.ModelType\n",
|
||||
|
@ -203,14 +271,26 @@
|
|||
" provider_id: meta-reference\n",
|
||||
" provider_type: inline::meta-reference\n",
|
||||
" datasetio:\n",
|
||||
" - config: <span style=\"font-weight: bold\">{}</span>\n",
|
||||
" - config:\n",
|
||||
" kvstore:\n",
|
||||
" db_path: <span style=\"color: #800080; text-decoration-color: #800080\">/Users/xiyan/.llama/distributions/together/</span><span style=\"color: #ff00ff; text-decoration-color: #ff00ff\">huggingface_datasetio.db</span>\n",
|
||||
" namespace: null\n",
|
||||
" type: sqlite\n",
|
||||
" provider_id: huggingface\n",
|
||||
" provider_type: remote::huggingface\n",
|
||||
" - config: <span style=\"font-weight: bold\">{}</span>\n",
|
||||
" - config:\n",
|
||||
" kvstore:\n",
|
||||
" db_path: <span style=\"color: #800080; text-decoration-color: #800080\">/Users/xiyan/.llama/distributions/together/</span><span style=\"color: #ff00ff; text-decoration-color: #ff00ff\">localfs_datasetio.db</span>\n",
|
||||
" namespace: null\n",
|
||||
" type: sqlite\n",
|
||||
" provider_id: localfs\n",
|
||||
" provider_type: inline::localfs\n",
|
||||
" eval:\n",
|
||||
" - config: <span style=\"font-weight: bold\">{}</span>\n",
|
||||
" - config:\n",
|
||||
" kvstore:\n",
|
||||
" db_path: <span style=\"color: #800080; text-decoration-color: #800080\">/Users/xiyan/.llama/distributions/together/</span><span style=\"color: #ff00ff; text-decoration-color: #ff00ff\">meta_reference_eval.db</span>\n",
|
||||
" namespace: null\n",
|
||||
" type: sqlite\n",
|
||||
" provider_id: meta-reference\n",
|
||||
" provider_type: inline::meta-reference\n",
|
||||
" inference:\n",
|
||||
|
@ -222,16 +302,9 @@
|
|||
" - config: <span style=\"font-weight: bold\">{}</span>\n",
|
||||
" provider_id: sentence-transformers\n",
|
||||
" provider_type: inline::sentence-transformers\n",
|
||||
" memory:\n",
|
||||
" - config:\n",
|
||||
" kvstore:\n",
|
||||
" db_path: <span style=\"color: #800080; text-decoration-color: #800080\">/Users/xiyan/.llama/distributions/together/</span><span style=\"color: #ff00ff; text-decoration-color: #ff00ff\">faiss_store.db</span>\n",
|
||||
" namespace: null\n",
|
||||
" type: sqlite\n",
|
||||
" provider_id: faiss\n",
|
||||
" provider_type: inlin<span style=\"color: #00ff00; text-decoration-color: #00ff00; font-weight: bold\">e::fa</span>iss\n",
|
||||
" safety:\n",
|
||||
" - config: <span style=\"font-weight: bold\">{}</span>\n",
|
||||
" - config:\n",
|
||||
" excluded_categories: <span style=\"font-weight: bold\">[]</span>\n",
|
||||
" provider_id: llama-guard\n",
|
||||
" provider_type: inline::llama-guard\n",
|
||||
" scoring:\n",
|
||||
|
@ -269,7 +342,26 @@
|
|||
" - config: <span style=\"font-weight: bold\">{}</span>\n",
|
||||
" provider_id: rag-runtime\n",
|
||||
" provider_type: inline::rag-runtime\n",
|
||||
" - config: <span style=\"font-weight: bold\">{}</span>\n",
|
||||
" provider_id: model-context-protocol\n",
|
||||
" provider_type: remote::model-context-protocol\n",
|
||||
" - config:\n",
|
||||
" api_key: <span style=\"color: #008000; text-decoration-color: #008000\">'********'</span>\n",
|
||||
" provider_id: wolfram-alpha\n",
|
||||
" provider_type: remote::wolfram-alpha\n",
|
||||
" vector_io:\n",
|
||||
" - config:\n",
|
||||
" kvstore:\n",
|
||||
" db_path: <span style=\"color: #800080; text-decoration-color: #800080\">/Users/xiyan/.llama/distributions/together/</span><span style=\"color: #ff00ff; text-decoration-color: #ff00ff\">faiss_store.db</span>\n",
|
||||
" namespace: null\n",
|
||||
" type: sqlite\n",
|
||||
" provider_id: faiss\n",
|
||||
" provider_type: inlin<span style=\"color: #00ff00; text-decoration-color: #00ff00; font-weight: bold\">e::fa</span>iss\n",
|
||||
"scoring_fns: <span style=\"font-weight: bold\">[]</span>\n",
|
||||
"server:\n",
|
||||
" port: <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">8321</span>\n",
|
||||
" tls_certfile: null\n",
|
||||
" tls_keyfile: null\n",
|
||||
"shields:\n",
|
||||
"- params: null\n",
|
||||
" provider_id: null\n",
|
||||
|
@ -288,6 +380,11 @@
|
|||
" mcp_endpoint: null\n",
|
||||
" provider_id: code-interpreter\n",
|
||||
" toolgroup_id: builtin::code_interpreter\n",
|
||||
"- args: null\n",
|
||||
" mcp_endpoint: null\n",
|
||||
" provider_id: wolfram-alpha\n",
|
||||
" toolgroup_id: builtin::wolfram_alpha\n",
|
||||
"vector_dbs: <span style=\"font-weight: bold\">[]</span>\n",
|
||||
"version: <span style=\"color: #008000; text-decoration-color: #008000\">'2'</span>\n",
|
||||
"\n",
|
||||
"</pre>\n"
|
||||
|
@ -298,76 +395,146 @@
|
|||
"- datasetio\n",
|
||||
"- eval\n",
|
||||
"- inference\n",
|
||||
"- memory\n",
|
||||
"- safety\n",
|
||||
"- scoring\n",
|
||||
"- telemetry\n",
|
||||
"- tool_runtime\n",
|
||||
"datasets: \u001b[1m[\u001b[0m\u001b[1m]\u001b[0m\n",
|
||||
"container_image: null\n",
|
||||
"- vector_io\n",
|
||||
"benchmarks: \u001b[1m[\u001b[0m\u001b[1m]\u001b[0m\n",
|
||||
"container_image: null\n",
|
||||
"datasets: \u001b[1m[\u001b[0m\u001b[1m]\u001b[0m\n",
|
||||
"image_name: together\n",
|
||||
"memory_banks: \u001b[1m[\u001b[0m\u001b[1m]\u001b[0m\n",
|
||||
"logging: null\n",
|
||||
"metadata_store:\n",
|
||||
" db_path: \u001b[35m/Users/xiyan/.llama/distributions/together/\u001b[0m\u001b[95mregistry.db\u001b[0m\n",
|
||||
" namespace: null\n",
|
||||
" type: sqlite\n",
|
||||
"models:\n",
|
||||
"- metadata: \u001b[1m{\u001b[0m\u001b[1m}\u001b[0m\n",
|
||||
" model_id: meta-llama/Meta-Llama-\u001b[1;36m3.1\u001b[0m-8B-Instruct-Turbo\n",
|
||||
" model_type: !!python/object/apply:llama_stack.apis.models.models.ModelType\n",
|
||||
" - llm\n",
|
||||
" provider_id: together\n",
|
||||
" provider_model_id: meta-llama/Meta-Llama-\u001b[1;36m3.1\u001b[0m-8B-Instruct-Turbo\n",
|
||||
"- metadata: \u001b[1m{\u001b[0m\u001b[1m}\u001b[0m\n",
|
||||
" model_id: meta-llama/Llama-\u001b[1;36m3.1\u001b[0m-8B-Instruct\n",
|
||||
" model_type: !!python/object/apply:llama_stack.apis.models.models.ModelType\n",
|
||||
" - llm\n",
|
||||
" provider_id: together\n",
|
||||
" provider_model_id: meta-llama/Meta-Llama-\u001b[1;36m3.1\u001b[0m-8B-Instruct-Turbo\n",
|
||||
"- metadata: \u001b[1m{\u001b[0m\u001b[1m}\u001b[0m\n",
|
||||
" model_id: meta-llama/Meta-Llama-\u001b[1;36m3.1\u001b[0m-70B-Instruct-Turbo\n",
|
||||
" model_type: !!python/object/apply:llama_stack.apis.models.models.ModelType\n",
|
||||
" - llm\n",
|
||||
" provider_id: together\n",
|
||||
" provider_model_id: meta-llama/Meta-Llama-\u001b[1;36m3.1\u001b[0m-70B-Instruct-Turbo\n",
|
||||
"- metadata: \u001b[1m{\u001b[0m\u001b[1m}\u001b[0m\n",
|
||||
" model_id: meta-llama/Llama-\u001b[1;36m3.1\u001b[0m-70B-Instruct\n",
|
||||
" model_type: !!python/object/apply:llama_stack.apis.models.models.ModelType\n",
|
||||
" - llm\n",
|
||||
" provider_id: together\n",
|
||||
" provider_model_id: meta-llama/Meta-Llama-\u001b[1;36m3.1\u001b[0m-70B-Instruct-Turbo\n",
|
||||
"- metadata: \u001b[1m{\u001b[0m\u001b[1m}\u001b[0m\n",
|
||||
" model_id: meta-llama/Meta-Llama-\u001b[1;36m3.1\u001b[0m-405B-Instruct-Turbo\n",
|
||||
" model_type: !!python/object/apply:llama_stack.apis.models.models.ModelType\n",
|
||||
" - llm\n",
|
||||
" provider_id: together\n",
|
||||
" provider_model_id: meta-llama/Meta-Llama-\u001b[1;36m3.1\u001b[0m-405B-Instruct-Turbo\n",
|
||||
"- metadata: \u001b[1m{\u001b[0m\u001b[1m}\u001b[0m\n",
|
||||
" model_id: meta-llama/Llama-\u001b[1;36m3.1\u001b[0m-405B-Instruct-FP8\n",
|
||||
" model_type: !!python/object/apply:llama_stack.apis.models.models.ModelType\n",
|
||||
" - llm\n",
|
||||
" provider_id: together\n",
|
||||
" provider_model_id: meta-llama/Meta-Llama-\u001b[1;36m3.1\u001b[0m-405B-Instruct-Turbo\n",
|
||||
"- metadata: \u001b[1m{\u001b[0m\u001b[1m}\u001b[0m\n",
|
||||
" model_id: meta-llama/Llama-\u001b[1;36m3.2\u001b[0m-3B-Instruct-Turbo\n",
|
||||
" model_type: !!python/object/apply:llama_stack.apis.models.models.ModelType\n",
|
||||
" - llm\n",
|
||||
" provider_id: together\n",
|
||||
" provider_model_id: meta-llama/Llama-\u001b[1;36m3.2\u001b[0m-3B-Instruct-Turbo\n",
|
||||
"- metadata: \u001b[1m{\u001b[0m\u001b[1m}\u001b[0m\n",
|
||||
" model_id: meta-llama/Llama-\u001b[1;36m3.2\u001b[0m-3B-Instruct\n",
|
||||
" model_type: !!python/object/apply:llama_stack.apis.models.models.ModelType\n",
|
||||
" - llm\n",
|
||||
" provider_id: together\n",
|
||||
" provider_model_id: meta-llama/Llama-\u001b[1;36m3.2\u001b[0m-3B-Instruct-Turbo\n",
|
||||
"- metadata: \u001b[1m{\u001b[0m\u001b[1m}\u001b[0m\n",
|
||||
" model_id: meta-llama/Llama-\u001b[1;36m3.2\u001b[0m-11B-Vision-Instruct-Turbo\n",
|
||||
" model_type: !!python/object/apply:llama_stack.apis.models.models.ModelType\n",
|
||||
" - llm\n",
|
||||
" provider_id: together\n",
|
||||
" provider_model_id: meta-llama/Llama-\u001b[1;36m3.2\u001b[0m-11B-Vision-Instruct-Turbo\n",
|
||||
"- metadata: \u001b[1m{\u001b[0m\u001b[1m}\u001b[0m\n",
|
||||
" model_id: meta-llama/Llama-\u001b[1;36m3.2\u001b[0m-11B-Vision-Instruct\n",
|
||||
" model_type: !!python/object/apply:llama_stack.apis.models.models.ModelType\n",
|
||||
" - llm\n",
|
||||
" provider_id: together\n",
|
||||
" provider_model_id: meta-llama/Llama-\u001b[1;36m3.2\u001b[0m-11B-Vision-Instruct-Turbo\n",
|
||||
"- metadata: \u001b[1m{\u001b[0m\u001b[1m}\u001b[0m\n",
|
||||
" model_id: meta-llama/Llama-\u001b[1;36m3.2\u001b[0m-90B-Vision-Instruct-Turbo\n",
|
||||
" model_type: !!python/object/apply:llama_stack.apis.models.models.ModelType\n",
|
||||
" - llm\n",
|
||||
" provider_id: together\n",
|
||||
" provider_model_id: meta-llama/Llama-\u001b[1;36m3.2\u001b[0m-90B-Vision-Instruct-Turbo\n",
|
||||
"- metadata: \u001b[1m{\u001b[0m\u001b[1m}\u001b[0m\n",
|
||||
" model_id: meta-llama/Llama-\u001b[1;36m3.2\u001b[0m-90B-Vision-Instruct\n",
|
||||
" model_type: !!python/object/apply:llama_stack.apis.models.models.ModelType\n",
|
||||
" - llm\n",
|
||||
" provider_id: together\n",
|
||||
" provider_model_id: meta-llama/Llama-\u001b[1;36m3.2\u001b[0m-90B-Vision-Instruct-Turbo\n",
|
||||
"- metadata: \u001b[1m{\u001b[0m\u001b[1m}\u001b[0m\n",
|
||||
" model_id: meta-llama/Llama-\u001b[1;36m3.3\u001b[0m-70B-Instruct-Turbo\n",
|
||||
" model_type: !!python/object/apply:llama_stack.apis.models.models.ModelType\n",
|
||||
" - llm\n",
|
||||
" provider_id: together\n",
|
||||
" provider_model_id: meta-llama/Llama-\u001b[1;36m3.3\u001b[0m-70B-Instruct-Turbo\n",
|
||||
"- metadata: \u001b[1m{\u001b[0m\u001b[1m}\u001b[0m\n",
|
||||
" model_id: meta-llama/Llama-\u001b[1;36m3.3\u001b[0m-70B-Instruct\n",
|
||||
" model_type: !!python/object/apply:llama_stack.apis.models.models.ModelType\n",
|
||||
" - llm\n",
|
||||
" provider_id: together\n",
|
||||
" provider_model_id: meta-llama/Llama-\u001b[1;36m3.3\u001b[0m-70B-Instruct-Turbo\n",
|
||||
"- metadata: \u001b[1m{\u001b[0m\u001b[1m}\u001b[0m\n",
|
||||
" model_id: meta-llama/Meta-Llama-Guard-\u001b[1;36m3\u001b[0m-8B\n",
|
||||
" model_type: !!python/object/apply:llama_stack.apis.models.models.ModelType\n",
|
||||
" - llm\n",
|
||||
" provider_id: together\n",
|
||||
" provider_model_id: meta-llama/Meta-Llama-Guard-\u001b[1;36m3\u001b[0m-8B\n",
|
||||
"- metadata: \u001b[1m{\u001b[0m\u001b[1m}\u001b[0m\n",
|
||||
" model_id: meta-llama/Llama-Guard-\u001b[1;36m3\u001b[0m-8B\n",
|
||||
" model_type: !!python/object/apply:llama_stack.apis.models.models.ModelType\n",
|
||||
" - llm\n",
|
||||
" provider_id: together\n",
|
||||
" provider_model_id: meta-llama/Meta-Llama-Guard-\u001b[1;36m3\u001b[0m-8B\n",
|
||||
"- metadata: \u001b[1m{\u001b[0m\u001b[1m}\u001b[0m\n",
|
||||
" model_id: meta-llama/Llama-Guard-\u001b[1;36m3\u001b[0m-11B-Vision-Turbo\n",
|
||||
" model_type: !!python/object/apply:llama_stack.apis.models.models.ModelType\n",
|
||||
" - llm\n",
|
||||
" provider_id: together\n",
|
||||
" provider_model_id: meta-llama/Llama-Guard-\u001b[1;36m3\u001b[0m-11B-Vision-Turbo\n",
|
||||
"- metadata: \u001b[1m{\u001b[0m\u001b[1m}\u001b[0m\n",
|
||||
" model_id: meta-llama/Llama-Guard-\u001b[1;36m3\u001b[0m-11B-Vision\n",
|
||||
" model_type: !!python/object/apply:llama_stack.apis.models.models.ModelType\n",
|
||||
" - llm\n",
|
||||
" provider_id: together\n",
|
||||
" provider_model_id: meta-llama/Llama-Guard-\u001b[1;36m3\u001b[0m-11B-Vision-Turbo\n",
|
||||
"- metadata:\n",
|
||||
" context_length: \u001b[1;36m8192\u001b[0m\n",
|
||||
" embedding_dimension: \u001b[1;36m768\u001b[0m\n",
|
||||
" model_id: togethercomputer/m2-bert-80M-8k-retrieval\n",
|
||||
" model_type: !!python/object/apply:llama_stack.apis.models.models.ModelType\n",
|
||||
" - embedding\n",
|
||||
" provider_id: together\n",
|
||||
" provider_model_id: togethercomputer/m2-bert-80M-8k-retrieval\n",
|
||||
"- metadata:\n",
|
||||
" context_length: \u001b[1;36m32768\u001b[0m\n",
|
||||
" embedding_dimension: \u001b[1;36m768\u001b[0m\n",
|
||||
" model_id: togethercomputer/m2-bert-80M-32k-retrieval\n",
|
||||
" model_type: !!python/object/apply:llama_stack.apis.models.models.ModelType\n",
|
||||
" - embedding\n",
|
||||
" provider_id: together\n",
|
||||
" provider_model_id: togethercomputer/m2-bert-80M-32k-retrieval\n",
|
||||
"- metadata:\n",
|
||||
" embedding_dimension: \u001b[1;36m384\u001b[0m\n",
|
||||
" model_id: all-MiniLM-L6-v2\n",
|
||||
" model_type: !!python/object/apply:llama_stack.apis.models.models.ModelType\n",
|
||||
|
@ -384,14 +551,26 @@
|
|||
" provider_id: meta-reference\n",
|
||||
" provider_type: inline::meta-reference\n",
|
||||
" datasetio:\n",
|
||||
" - config: \u001b[1m{\u001b[0m\u001b[1m}\u001b[0m\n",
|
||||
" - config:\n",
|
||||
" kvstore:\n",
|
||||
" db_path: \u001b[35m/Users/xiyan/.llama/distributions/together/\u001b[0m\u001b[95mhuggingface_datasetio.db\u001b[0m\n",
|
||||
" namespace: null\n",
|
||||
" type: sqlite\n",
|
||||
" provider_id: huggingface\n",
|
||||
" provider_type: remote::huggingface\n",
|
||||
" - config: \u001b[1m{\u001b[0m\u001b[1m}\u001b[0m\n",
|
||||
" - config:\n",
|
||||
" kvstore:\n",
|
||||
" db_path: \u001b[35m/Users/xiyan/.llama/distributions/together/\u001b[0m\u001b[95mlocalfs_datasetio.db\u001b[0m\n",
|
||||
" namespace: null\n",
|
||||
" type: sqlite\n",
|
||||
" provider_id: localfs\n",
|
||||
" provider_type: inline::localfs\n",
|
||||
" eval:\n",
|
||||
" - config: \u001b[1m{\u001b[0m\u001b[1m}\u001b[0m\n",
|
||||
" - config:\n",
|
||||
" kvstore:\n",
|
||||
" db_path: \u001b[35m/Users/xiyan/.llama/distributions/together/\u001b[0m\u001b[95mmeta_reference_eval.db\u001b[0m\n",
|
||||
" namespace: null\n",
|
||||
" type: sqlite\n",
|
||||
" provider_id: meta-reference\n",
|
||||
" provider_type: inline::meta-reference\n",
|
||||
" inference:\n",
|
||||
|
@ -403,16 +582,9 @@
|
|||
" - config: \u001b[1m{\u001b[0m\u001b[1m}\u001b[0m\n",
|
||||
" provider_id: sentence-transformers\n",
|
||||
" provider_type: inline::sentence-transformers\n",
|
||||
" memory:\n",
|
||||
" - config:\n",
|
||||
" kvstore:\n",
|
||||
" db_path: \u001b[35m/Users/xiyan/.llama/distributions/together/\u001b[0m\u001b[95mfaiss_store.db\u001b[0m\n",
|
||||
" namespace: null\n",
|
||||
" type: sqlite\n",
|
||||
" provider_id: faiss\n",
|
||||
" provider_type: inlin\u001b[1;92me::fa\u001b[0miss\n",
|
||||
" safety:\n",
|
||||
" - config: \u001b[1m{\u001b[0m\u001b[1m}\u001b[0m\n",
|
||||
" - config:\n",
|
||||
" excluded_categories: \u001b[1m[\u001b[0m\u001b[1m]\u001b[0m\n",
|
||||
" provider_id: llama-guard\n",
|
||||
" provider_type: inline::llama-guard\n",
|
||||
" scoring:\n",
|
||||
|
@ -450,7 +622,26 @@
|
|||
" - config: \u001b[1m{\u001b[0m\u001b[1m}\u001b[0m\n",
|
||||
" provider_id: rag-runtime\n",
|
||||
" provider_type: inline::rag-runtime\n",
|
||||
" - config: \u001b[1m{\u001b[0m\u001b[1m}\u001b[0m\n",
|
||||
" provider_id: model-context-protocol\n",
|
||||
" provider_type: remote::model-context-protocol\n",
|
||||
" - config:\n",
|
||||
" api_key: \u001b[32m'********'\u001b[0m\n",
|
||||
" provider_id: wolfram-alpha\n",
|
||||
" provider_type: remote::wolfram-alpha\n",
|
||||
" vector_io:\n",
|
||||
" - config:\n",
|
||||
" kvstore:\n",
|
||||
" db_path: \u001b[35m/Users/xiyan/.llama/distributions/together/\u001b[0m\u001b[95mfaiss_store.db\u001b[0m\n",
|
||||
" namespace: null\n",
|
||||
" type: sqlite\n",
|
||||
" provider_id: faiss\n",
|
||||
" provider_type: inlin\u001b[1;92me::fa\u001b[0miss\n",
|
||||
"scoring_fns: \u001b[1m[\u001b[0m\u001b[1m]\u001b[0m\n",
|
||||
"server:\n",
|
||||
" port: \u001b[1;36m8321\u001b[0m\n",
|
||||
" tls_certfile: null\n",
|
||||
" tls_keyfile: null\n",
|
||||
"shields:\n",
|
||||
"- params: null\n",
|
||||
" provider_id: null\n",
|
||||
|
@ -469,6 +660,11 @@
|
|||
" mcp_endpoint: null\n",
|
||||
" provider_id: code-interpreter\n",
|
||||
" toolgroup_id: builtin::code_interpreter\n",
|
||||
"- args: null\n",
|
||||
" mcp_endpoint: null\n",
|
||||
" provider_id: wolfram-alpha\n",
|
||||
" toolgroup_id: builtin::wolfram_alpha\n",
|
||||
"vector_dbs: \u001b[1m[\u001b[0m\u001b[1m]\u001b[0m\n",
|
||||
"version: \u001b[32m'2'\u001b[0m\n",
|
||||
"\n"
|
||||
]
|
||||
|
@ -532,7 +728,7 @@
|
|||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 5,
|
||||
"execution_count": 3,
|
||||
"metadata": {
|
||||
"colab": {
|
||||
"base_uri": "https://localhost:8080/",
|
||||
|
@ -643,17 +839,7 @@
|
|||
"id": "DJkmoG2kq1_P",
|
||||
"outputId": "8493ee59-c6ff-4bb6-d787-f295944db1cf"
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Generating dev split: 100%|██████████| 5/5 [00:00<00:00, 139.81 examples/s]\n",
|
||||
"Generating validation split: 100%|██████████| 30/30 [00:00<00:00, 258.29 examples/s]\n",
|
||||
"Generating test split: 100%|██████████| 287/287 [00:01<00:00, 197.69 examples/s]\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import datasets\n",
|
||||
"\n",
|
||||
|
@ -676,7 +862,7 @@
|
|||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 7,
|
||||
"execution_count": 4,
|
||||
"metadata": {
|
||||
"colab": {
|
||||
"base_uri": "https://localhost:8080/",
|
||||
|
@ -691,7 +877,7 @@
|
|||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"100%|██████████| 5/5 [00:42<00:00, 8.60s/it]\n"
|
||||
"100%|██████████| 5/5 [00:33<00:00, 6.71s/it]\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
|
@ -699,16 +885,18 @@
|
|||
"text/html": [
|
||||
"<pre style=\"white-space:pre;overflow-x:auto;line-height:normal;font-family:Menlo,'DejaVu Sans Mono',consolas,'Courier New',monospace\"><span style=\"color: #800080; text-decoration-color: #800080; font-weight: bold\">EvaluateResponse</span><span style=\"font-weight: bold\">(</span>\n",
|
||||
"<span style=\"color: #7fbf7f; text-decoration-color: #7fbf7f\">│ </span><span style=\"color: #808000; text-decoration-color: #808000\">generations</span>=<span style=\"font-weight: bold\">[</span>\n",
|
||||
"<span style=\"color: #7fbf7f; text-decoration-color: #7fbf7f\">│ │ </span><span style=\"font-weight: bold\">{</span><span style=\"color: #008000; text-decoration-color: #008000\">'generated_answer'</span>: <span style=\"color: #008000; text-decoration-color: #008000\">'Answer: D'</span><span style=\"font-weight: bold\">}</span>,\n",
|
||||
"<span style=\"color: #7fbf7f; text-decoration-color: #7fbf7f\">│ │ </span><span style=\"font-weight: bold\">{</span>\n",
|
||||
"<span style=\"color: #7fbf7f; text-decoration-color: #7fbf7f\">│ │ │ </span><span style=\"color: #008000; text-decoration-color: #008000\">'generated_answer'</span>: <span style=\"color: #008000; text-decoration-color: #008000\">'The image shows a sunflower leaf with small, dark spots and white powdery patches. The dark spots are likely caused by a fungal pathogen, such as rust or septoria leaf spot, while the white powdery patches are likely caused by a fungal pathogen, such as powdery mildew.\\n\\nSince there are two distinct types of lesions on the leaf, it is likely that there are two different pathogens infecting the leaf.\\n\\n**Answer:** B) Two pathogens'</span>\n",
|
||||
"<span style=\"color: #7fbf7f; text-decoration-color: #7fbf7f\">│ │ │ </span><span style=\"color: #008000; text-decoration-color: #008000\">'generated_answer'</span>: <span style=\"color: #008000; text-decoration-color: #008000\">'**Potato Pests**\\n\\nThe two insects depicted are:\\n\\n* **Colorado Potato Beetle (Leptinotarsa decemlineata)**: Characterized by black and yellow stripes, this beetle is a significant pest of potatoes. It feeds on the leaves and can cause substantial damage to the crop.\\n* **False Potato Beetle (Leptinotarsa juncta)**: Also known as the false Colorado beetle, this species has similar coloring but is not as harmful to potatoes as the Colorado potato beetle.'</span>\n",
|
||||
"<span style=\"color: #7fbf7f; text-decoration-color: #7fbf7f\">│ │ </span><span style=\"font-weight: bold\">}</span>,\n",
|
||||
"<span style=\"color: #7fbf7f; text-decoration-color: #7fbf7f\">│ │ </span><span style=\"font-weight: bold\">{</span>\n",
|
||||
"<span style=\"color: #7fbf7f; text-decoration-color: #7fbf7f\">│ │ │ </span><span style=\"color: #008000; text-decoration-color: #008000\">'generated_answer'</span>: <span style=\"color: #008000; text-decoration-color: #008000\">\"The question requires the identification of the reason behind the massive gum production on the trunks of grapefruit trees in Cyprus, despite appearing healthy from a distance. The correct answer can be deduced by analyzing the symptoms and considering the possible causes.\\n\\nTo determine the correct answer, let's evaluate each option:\\n\\nA) Don't know or not sure: This option is incorrect because it does not provide a specific reason for the gum production.\\n\\nB) Physiological stress: This option is also incorrect because it is too broad and does not specifically explain the gum production.\\n\\nC) Bacterial disease: This option is incorrect because bacterial diseases typically cause different symptoms such as leaf spots, blights, or wilting.\\n\\nD) Harvesting damage when cutting with knives: This option is incorrect because harvesting damage would likely cause wounds or scars on the tree, but it would not lead to massive gum production.\\n\\nE) Fungal gummosis: This option is the most likely cause of the gum production. Fungal gummosis is a common disease in citrus trees, including grapefruit, that causes the production of gum or sap on the trunks and branches. The disease is typically caused by fungi such as Phytophthora or Diplodia, which infect the tree through wounds or natural openings. The gum production is a defense mechanism by the tree to try to seal off the infection and prevent further damage.\\n\\nTherefore, the correct answer is:\\n\\nAnswer: E\"</span>\n",
|
||||
"<span style=\"color: #7fbf7f; text-decoration-color: #7fbf7f\">│ │ │ </span><span style=\"color: #008000; text-decoration-color: #008000\">'generated_answer'</span>: <span style=\"color: #008000; text-decoration-color: #008000\">\"The image shows a sunflower leaf with a powdery mildew, which is a fungal disease caused by various species of fungi. The white powdery coating on the leaves is a characteristic symptom of this disease. The leaf also has some black spots, which could be indicative of a secondary infection or another type of disease. However, without more information or a closer examination, it's difficult to determine the exact cause of the black spots.\\n\\nBased on the image alone, we can see at least two types of symptoms: the powdery mildew and the black spots. This suggests that there may be more than one pathogen involved, but it's also possible that the black spots are a result of the same fungal infection causing the powdery mildew.\\n\\nAnswer: B) Two pathogens\"</span>\n",
|
||||
"<span style=\"color: #7fbf7f; text-decoration-color: #7fbf7f\">│ │ </span><span style=\"font-weight: bold\">}</span>,\n",
|
||||
"<span style=\"color: #7fbf7f; text-decoration-color: #7fbf7f\">│ │ </span><span style=\"font-weight: bold\">{</span>\n",
|
||||
"<span style=\"color: #7fbf7f; text-decoration-color: #7fbf7f\">│ │ │ </span><span style=\"color: #008000; text-decoration-color: #008000\">'generated_answer'</span>: <span style=\"color: #008000; text-decoration-color: #008000\">'The symptoms observed, characterized by the massive gum production on the trunks of the grapefruit trees in Cyprus, suggest a physiological or pathological response. Given the absence of visible signs of damage or pests from a higher point on a hillside, and considering the specific nature of the symptom (gum production), we can infer that the cause is more likely related to an internal process within the tree rather than external damage from harvesting. While physiological stress (B) could lead to such symptoms, the primary reason for gum production in trees, especially in citrus species, is typically linked to disease. Among the options provided, fungal gummosis (E) is a condition known to cause gumming in citrus trees, which aligns with the observed symptoms. Therefore, without direct evidence of external damage (harvesting) or confirmation of physiological stress being the primary cause, the most appropriate answer based on the information given is:\\n\\nAnswer: E'</span>\n",
|
||||
"<span style=\"color: #7fbf7f; text-decoration-color: #7fbf7f\">│ │ </span><span style=\"font-weight: bold\">}</span>,\n",
|
||||
"<span style=\"color: #7fbf7f; text-decoration-color: #7fbf7f\">│ │ </span><span style=\"font-weight: bold\">{</span><span style=\"color: #008000; text-decoration-color: #008000\">'generated_answer'</span>: <span style=\"color: #008000; text-decoration-color: #008000\">'Answer: D'</span><span style=\"font-weight: bold\">}</span>,\n",
|
||||
"<span style=\"color: #7fbf7f; text-decoration-color: #7fbf7f\">│ │ </span><span style=\"font-weight: bold\">{</span>\n",
|
||||
"<span style=\"color: #7fbf7f; text-decoration-color: #7fbf7f\">│ │ │ </span><span style=\"color: #008000; text-decoration-color: #008000\">'generated_answer'</span>: <span style=\"color: #008000; text-decoration-color: #008000\">'**Causes of Splitting Petioles in Rhubarb**\\n\\nThe following factors can cause the petioles of rhubarb to split:\\n\\n* **Physiological Problems**: Issues such as water stress, nutrient deficiencies, or extreme temperatures can lead to splitting.\\n* **Phytoplasma Infection**: A bacterial infection caused by phytoplasma can lead to splitting of the petioles.\\n* **Animal Damage**: Pests like slugs, snails, or rodents can damage the plant and cause splitting.\\n* **Bacterial Infection**: Bacterial infections can also cause splitting.\\n\\nAs a result, the correct answer is:\\n\\n*Answer*: A) Physiological problems'</span>\n",
|
||||
"<span style=\"color: #7fbf7f; text-decoration-color: #7fbf7f\">│ │ │ </span><span style=\"color: #008000; text-decoration-color: #008000\">'generated_answer'</span>: <span style=\"color: #008000; text-decoration-color: #008000\">\"**Analysis of the Image**\\n\\nThe image provided shows a rhubarb plant with split petioles. To determine the cause of this issue, we need to consider various factors that could lead to such damage.\\n\\n**Possible Causes of Petiole Splitting**\\n\\n* **Physiological Problems**: Rhubarb plants can experience physiological stress due to environmental factors like extreme temperatures, waterlogging, or nutrient deficiencies. This stress can cause the petioles to split.\\n* **Phytoplasma Infection**: Phytoplasma is a type of bacteria that can infect plants, including rhubarb. It can cause symptoms such as yellowing leaves, stunted growth, and splitting of petioles.\\n* **Animal Damage**: Animals like rabbits, deer, or insects can damage rhubarb plants by eating the leaves or stems, which can lead to splitting of the petioles.\\n* **Bacteria**: Bacterial infections can also cause damage to rhubarb plants, including splitting of the petioles.\\n\\n**Conclusion**\\n\\nBased on the analysis, it is clear that all the options listed (A) Physiological problems, B) Phytoplasma infection, D) Animal damage, and E) Bacteria) could potentially cause the petioles of the rhubarb plant to split. Therefore, there is no single option that would not be a cause for the petioles splitting.\\n\\n**Answer**: C) I don't know and don't want to guess.\"</span>\n",
|
||||
"<span style=\"color: #7fbf7f; text-decoration-color: #7fbf7f\">│ │ </span><span style=\"font-weight: bold\">}</span>\n",
|
||||
"<span style=\"color: #7fbf7f; text-decoration-color: #7fbf7f\">│ </span><span style=\"font-weight: bold\">]</span>,\n",
|
||||
"<span style=\"color: #7fbf7f; text-decoration-color: #7fbf7f\">│ </span><span style=\"color: #808000; text-decoration-color: #808000\">scores</span>=<span style=\"font-weight: bold\">{</span>\n",
|
||||
|
@ -723,16 +911,18 @@
|
|||
"text/plain": [
|
||||
"\u001b[1;35mEvaluateResponse\u001b[0m\u001b[1m(\u001b[0m\n",
|
||||
"\u001b[2;32m│ \u001b[0m\u001b[33mgenerations\u001b[0m=\u001b[1m[\u001b[0m\n",
|
||||
"\u001b[2;32m│ │ \u001b[0m\u001b[1m{\u001b[0m\u001b[32m'generated_answer'\u001b[0m: \u001b[32m'Answer: D'\u001b[0m\u001b[1m}\u001b[0m,\n",
|
||||
"\u001b[2;32m│ │ \u001b[0m\u001b[1m{\u001b[0m\n",
|
||||
"\u001b[2;32m│ │ │ \u001b[0m\u001b[32m'generated_answer'\u001b[0m: \u001b[32m'The image shows a sunflower leaf with small, dark spots and white powdery patches. The dark spots are likely caused by a fungal pathogen, such as rust or septoria leaf spot, while the white powdery patches are likely caused by a fungal pathogen, such as powdery mildew.\\n\\nSince there are two distinct types of lesions on the leaf, it is likely that there are two different pathogens infecting the leaf.\\n\\n**Answer:** B\u001b[0m\u001b[32m)\u001b[0m\u001b[32m Two pathogens'\u001b[0m\n",
|
||||
"\u001b[2;32m│ │ │ \u001b[0m\u001b[32m'generated_answer'\u001b[0m: \u001b[32m'**Potato Pests**\\n\\nThe two insects depicted are:\\n\\n* **Colorado Potato Beetle \u001b[0m\u001b[32m(\u001b[0m\u001b[32mLeptinotarsa decemlineata\u001b[0m\u001b[32m)\u001b[0m\u001b[32m**: Characterized by black and yellow stripes, this beetle is a significant pest of potatoes. It feeds on the leaves and can cause substantial damage to the crop.\\n* **False Potato Beetle \u001b[0m\u001b[32m(\u001b[0m\u001b[32mLeptinotarsa juncta\u001b[0m\u001b[32m)\u001b[0m\u001b[32m**: Also known as the false Colorado beetle, this species has similar coloring but is not as harmful to potatoes as the Colorado potato beetle.'\u001b[0m\n",
|
||||
"\u001b[2;32m│ │ \u001b[0m\u001b[1m}\u001b[0m,\n",
|
||||
"\u001b[2;32m│ │ \u001b[0m\u001b[1m{\u001b[0m\n",
|
||||
"\u001b[2;32m│ │ │ \u001b[0m\u001b[32m'generated_answer'\u001b[0m: \u001b[32m\"The question requires the identification of the reason behind the massive gum production on the trunks of grapefruit trees in Cyprus, despite appearing healthy from a distance. The correct answer can be deduced by analyzing the symptoms and considering the possible causes.\\n\\nTo determine the correct answer, let's evaluate each option:\\n\\nA\u001b[0m\u001b[32m)\u001b[0m\u001b[32m Don't know or not sure: This option is incorrect because it does not provide a specific reason for the gum production.\\n\\nB\u001b[0m\u001b[32m)\u001b[0m\u001b[32m Physiological stress: This option is also incorrect because it is too broad and does not specifically explain the gum production.\\n\\nC\u001b[0m\u001b[32m)\u001b[0m\u001b[32m Bacterial disease: This option is incorrect because bacterial diseases typically cause different symptoms such as leaf spots, blights, or wilting.\\n\\nD\u001b[0m\u001b[32m)\u001b[0m\u001b[32m Harvesting damage when cutting with knives: This option is incorrect because harvesting damage would likely cause wounds or scars on the tree, but it would not lead to massive gum production.\\n\\nE\u001b[0m\u001b[32m)\u001b[0m\u001b[32m Fungal gummosis: This option is the most likely cause of the gum production. Fungal gummosis is a common disease in citrus trees, including grapefruit, that causes the production of gum or sap on the trunks and branches. The disease is typically caused by fungi such as Phytophthora or Diplodia, which infect the tree through wounds or natural openings. The gum production is a defense mechanism by the tree to try to seal off the infection and prevent further damage.\\n\\nTherefore, the correct answer is:\\n\\nAnswer: E\"\u001b[0m\n",
|
||||
"\u001b[2;32m│ │ │ \u001b[0m\u001b[32m'generated_answer'\u001b[0m: \u001b[32m\"The image shows a sunflower leaf with a powdery mildew, which is a fungal disease caused by various species of fungi. The white powdery coating on the leaves is a characteristic symptom of this disease. The leaf also has some black spots, which could be indicative of a secondary infection or another type of disease. However, without more information or a closer examination, it's difficult to determine the exact cause of the black spots.\\n\\nBased on the image alone, we can see at least two types of symptoms: the powdery mildew and the black spots. This suggests that there may be more than one pathogen involved, but it's also possible that the black spots are a result of the same fungal infection causing the powdery mildew.\\n\\nAnswer: B\u001b[0m\u001b[32m)\u001b[0m\u001b[32m Two pathogens\"\u001b[0m\n",
|
||||
"\u001b[2;32m│ │ \u001b[0m\u001b[1m}\u001b[0m,\n",
|
||||
"\u001b[2;32m│ │ \u001b[0m\u001b[1m{\u001b[0m\n",
|
||||
"\u001b[2;32m│ │ │ \u001b[0m\u001b[32m'generated_answer'\u001b[0m: \u001b[32m'The symptoms observed, characterized by the massive gum production on the trunks of the grapefruit trees in Cyprus, suggest a physiological or pathological response. Given the absence of visible signs of damage or pests from a higher point on a hillside, and considering the specific nature of the symptom \u001b[0m\u001b[32m(\u001b[0m\u001b[32mgum production\u001b[0m\u001b[32m)\u001b[0m\u001b[32m, we can infer that the cause is more likely related to an internal process within the tree rather than external damage from harvesting. While physiological stress \u001b[0m\u001b[32m(\u001b[0m\u001b[32mB\u001b[0m\u001b[32m)\u001b[0m\u001b[32m could lead to such symptoms, the primary reason for gum production in trees, especially in citrus species, is typically linked to disease. Among the options provided, fungal gummosis \u001b[0m\u001b[32m(\u001b[0m\u001b[32mE\u001b[0m\u001b[32m)\u001b[0m\u001b[32m is a condition known to cause gumming in citrus trees, which aligns with the observed symptoms. Therefore, without direct evidence of external damage \u001b[0m\u001b[32m(\u001b[0m\u001b[32mharvesting\u001b[0m\u001b[32m)\u001b[0m\u001b[32m or confirmation of physiological stress being the primary cause, the most appropriate answer based on the information given is:\\n\\nAnswer: E'\u001b[0m\n",
|
||||
"\u001b[2;32m│ │ \u001b[0m\u001b[1m}\u001b[0m,\n",
|
||||
"\u001b[2;32m│ │ \u001b[0m\u001b[1m{\u001b[0m\u001b[32m'generated_answer'\u001b[0m: \u001b[32m'Answer: D'\u001b[0m\u001b[1m}\u001b[0m,\n",
|
||||
"\u001b[2;32m│ │ \u001b[0m\u001b[1m{\u001b[0m\n",
|
||||
"\u001b[2;32m│ │ │ \u001b[0m\u001b[32m'generated_answer'\u001b[0m: \u001b[32m'**Causes of Splitting Petioles in Rhubarb**\\n\\nThe following factors can cause the petioles of rhubarb to split:\\n\\n* **Physiological Problems**: Issues such as water stress, nutrient deficiencies, or extreme temperatures can lead to splitting.\\n* **Phytoplasma Infection**: A bacterial infection caused by phytoplasma can lead to splitting of the petioles.\\n* **Animal Damage**: Pests like slugs, snails, or rodents can damage the plant and cause splitting.\\n* **Bacterial Infection**: Bacterial infections can also cause splitting.\\n\\nAs a result, the correct answer is:\\n\\n*Answer*: A\u001b[0m\u001b[32m)\u001b[0m\u001b[32m Physiological problems'\u001b[0m\n",
|
||||
"\u001b[2;32m│ │ │ \u001b[0m\u001b[32m'generated_answer'\u001b[0m: \u001b[32m\"**Analysis of the Image**\\n\\nThe image provided shows a rhubarb plant with split petioles. To determine the cause of this issue, we need to consider various factors that could lead to such damage.\\n\\n**Possible Causes of Petiole Splitting**\\n\\n* **Physiological Problems**: Rhubarb plants can experience physiological stress due to environmental factors like extreme temperatures, waterlogging, or nutrient deficiencies. This stress can cause the petioles to split.\\n* **Phytoplasma Infection**: Phytoplasma is a type of bacteria that can infect plants, including rhubarb. It can cause symptoms such as yellowing leaves, stunted growth, and splitting of petioles.\\n* **Animal Damage**: Animals like rabbits, deer, or insects can damage rhubarb plants by eating the leaves or stems, which can lead to splitting of the petioles.\\n* **Bacteria**: Bacterial infections can also cause damage to rhubarb plants, including splitting of the petioles.\\n\\n**Conclusion**\\n\\nBased on the analysis, it is clear that all the options listed \u001b[0m\u001b[32m(\u001b[0m\u001b[32mA\u001b[0m\u001b[32m)\u001b[0m\u001b[32m Physiological problems, B\u001b[0m\u001b[32m)\u001b[0m\u001b[32m Phytoplasma infection, D\u001b[0m\u001b[32m)\u001b[0m\u001b[32m Animal damage, and E\u001b[0m\u001b[32m)\u001b[0m\u001b[32m Bacteria\u001b[0m\u001b[32m)\u001b[0m\u001b[32m could potentially cause the petioles of the rhubarb plant to split. Therefore, there is no single option that would not be a cause for the petioles splitting.\\n\\n**Answer**: C\u001b[0m\u001b[32m)\u001b[0m\u001b[32m I don't know and don't want to guess.\"\u001b[0m\n",
|
||||
"\u001b[2;32m│ │ \u001b[0m\u001b[1m}\u001b[0m\n",
|
||||
"\u001b[2;32m│ \u001b[0m\u001b[1m]\u001b[0m,\n",
|
||||
"\u001b[2;32m│ \u001b[0m\u001b[33mscores\u001b[0m=\u001b[1m{\u001b[0m\n",
|
||||
|
@ -815,7 +1005,7 @@
|
|||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 8,
|
||||
"execution_count": 5,
|
||||
"metadata": {
|
||||
"id": "HXmZf3Ymw-aX"
|
||||
},
|
||||
|
@ -823,39 +1013,33 @@
|
|||
"source": [
|
||||
"simpleqa_dataset_id = \"huggingface::simpleqa\"\n",
|
||||
"\n",
|
||||
"_ = client.datasets.register(\n",
|
||||
"register_dataset_response = client.datasets.register(\n",
|
||||
" purpose=\"eval/messages-answer\",\n",
|
||||
" source={\n",
|
||||
" \"type\": \"uri\",\n",
|
||||
" \"uri\": \"huggingface://datasets/llamastack/simpleqa?split=train\",\n",
|
||||
" },\n",
|
||||
" dataset_id=simpleqa_dataset_id,\n",
|
||||
" provider_id=\"huggingface\",\n",
|
||||
" url={\"uri\": \"https://huggingface.co/datasets/llamastack/simpleqa\"},\n",
|
||||
" metadata={\n",
|
||||
" \"path\": \"llamastack/simpleqa\",\n",
|
||||
" \"split\": \"train\",\n",
|
||||
" },\n",
|
||||
" dataset_schema={\n",
|
||||
" \"input_query\": {\"type\": \"string\"},\n",
|
||||
" \"expected_answer\": {\"type\": \"string\"},\n",
|
||||
" \"chat_completion_input\": {\"type\": \"chat_completion_input\"},\n",
|
||||
" },\n",
|
||||
")\n"
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 9,
|
||||
"execution_count": 6,
|
||||
"metadata": {
|
||||
"id": "Gc8azb4Rxr5J"
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"eval_rows = client.datasetio.get_rows_paginated(\n",
|
||||
"eval_rows = client.datasets.iterrows(\n",
|
||||
" dataset_id=simpleqa_dataset_id,\n",
|
||||
" rows_in_page=5,\n",
|
||||
")\n"
|
||||
" limit=5,\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 12,
|
||||
"execution_count": 7,
|
||||
"metadata": {
|
||||
"colab": {
|
||||
"base_uri": "https://localhost:8080/",
|
||||
|
@ -876,7 +1060,7 @@
|
|||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"100%|██████████| 5/5 [00:31<00:00, 6.38s/it]\n"
|
||||
"100%|██████████| 5/5 [00:13<00:00, 2.71s/it]\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
|
@ -889,14 +1073,14 @@
|
|||
"<span style=\"color: #7fbf7f; text-decoration-color: #7fbf7f\">│ │ </span><span style=\"font-weight: bold\">{</span>\n",
|
||||
"<span style=\"color: #7fbf7f; text-decoration-color: #7fbf7f\">│ │ │ </span><span style=\"color: #008000; text-decoration-color: #008000\">'generated_answer'</span>: <span style=\"color: #008000; text-decoration-color: #008000\">\"Radcliffe College was a women's liberal arts college in Cambridge, Massachusetts. However, it merged with Harvard University in 1977 and is now known as the Radcliffe Institute for Advanced Study at Harvard University.\"</span>\n",
|
||||
"<span style=\"color: #7fbf7f; text-decoration-color: #7fbf7f\">│ │ </span><span style=\"font-weight: bold\">}</span>,\n",
|
||||
"<span style=\"color: #7fbf7f; text-decoration-color: #7fbf7f\">│ │ </span><span style=\"font-weight: bold\">{</span><span style=\"color: #008000; text-decoration-color: #008000\">'generated_answer'</span>: <span style=\"color: #008000; text-decoration-color: #008000\">'I do not have information on the Leipzig 1877 tournament.'</span><span style=\"font-weight: bold\">}</span>,\n",
|
||||
"<span style=\"color: #7fbf7f; text-decoration-color: #7fbf7f\">│ │ </span><span style=\"font-weight: bold\">{</span><span style=\"color: #008000; text-decoration-color: #008000\">'generated_answer'</span>: <span style=\"color: #008000; text-decoration-color: #008000\">'I am unable to verify in whose honor the Leipzig 1877 tournament was organized.'</span><span style=\"font-weight: bold\">}</span>,\n",
|
||||
"<span style=\"color: #7fbf7f; text-decoration-color: #7fbf7f\">│ │ </span><span style=\"font-weight: bold\">{</span>\n",
|
||||
"<span style=\"color: #7fbf7f; text-decoration-color: #7fbf7f\">│ │ │ </span><span style=\"color: #008000; text-decoration-color: #008000\">'generated_answer'</span>: <span style=\"color: #008000; text-decoration-color: #008000\">\"I am unable to verify what Empress Elizabeth of Austria's favorite sculpture depicted at her villa Achilleion at Corfu, according to Karl Küchler.\"</span>\n",
|
||||
"<span style=\"color: #7fbf7f; text-decoration-color: #7fbf7f\">│ │ </span><span style=\"font-weight: bold\">}</span>\n",
|
||||
"<span style=\"color: #7fbf7f; text-decoration-color: #7fbf7f\">│ </span><span style=\"font-weight: bold\">]</span>,\n",
|
||||
"<span style=\"color: #7fbf7f; text-decoration-color: #7fbf7f\">│ </span><span style=\"color: #808000; text-decoration-color: #808000\">scores</span>=<span style=\"font-weight: bold\">{</span>\n",
|
||||
"<span style=\"color: #7fbf7f; text-decoration-color: #7fbf7f\">│ │ </span><span style=\"color: #008000; text-decoration-color: #008000\">'llm-as-judge::405b-simpleqa'</span>: <span style=\"color: #800080; text-decoration-color: #800080; font-weight: bold\">ScoringResult</span><span style=\"font-weight: bold\">(</span>\n",
|
||||
"<span style=\"color: #7fbf7f; text-decoration-color: #7fbf7f\">│ │ │ </span><span style=\"color: #808000; text-decoration-color: #808000\">aggregated_results</span>=<span style=\"font-weight: bold\">{}</span>,\n",
|
||||
"<span style=\"color: #7fbf7f; text-decoration-color: #7fbf7f\">│ │ │ </span><span style=\"color: #808000; text-decoration-color: #808000\">aggregated_results</span>=<span style=\"font-weight: bold\">{</span><span style=\"color: #008000; text-decoration-color: #008000\">'categorical_count'</span>: <span style=\"font-weight: bold\">{</span><span style=\"color: #008000; text-decoration-color: #008000\">'categorical_count'</span>: <span style=\"font-weight: bold\">{</span><span style=\"color: #008000; text-decoration-color: #008000\">'A'</span>: <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">1</span>, <span style=\"color: #008000; text-decoration-color: #008000\">'C'</span>: <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">4</span><span style=\"font-weight: bold\">}}}</span>,\n",
|
||||
"<span style=\"color: #7fbf7f; text-decoration-color: #7fbf7f\">│ │ │ </span><span style=\"color: #808000; text-decoration-color: #808000\">score_rows</span>=<span style=\"font-weight: bold\">[</span>\n",
|
||||
"<span style=\"color: #7fbf7f; text-decoration-color: #7fbf7f\">│ │ │ │ </span><span style=\"font-weight: bold\">{</span><span style=\"color: #008000; text-decoration-color: #008000\">'score'</span>: <span style=\"color: #008000; text-decoration-color: #008000\">'C'</span>, <span style=\"color: #008000; text-decoration-color: #008000\">'judge_feedback'</span>: <span style=\"color: #008000; text-decoration-color: #008000\">'C'</span><span style=\"font-weight: bold\">}</span>,\n",
|
||||
"<span style=\"color: #7fbf7f; text-decoration-color: #7fbf7f\">│ │ │ │ </span><span style=\"font-weight: bold\">{</span><span style=\"color: #008000; text-decoration-color: #008000\">'score'</span>: <span style=\"color: #008000; text-decoration-color: #008000\">'C'</span>, <span style=\"color: #008000; text-decoration-color: #008000\">'judge_feedback'</span>: <span style=\"color: #008000; text-decoration-color: #008000\">'C'</span><span style=\"font-weight: bold\">}</span>,\n",
|
||||
|
@ -917,14 +1101,14 @@
|
|||
"\u001b[2;32m│ │ \u001b[0m\u001b[1m{\u001b[0m\n",
|
||||
"\u001b[2;32m│ │ │ \u001b[0m\u001b[32m'generated_answer'\u001b[0m: \u001b[32m\"Radcliffe College was a women's liberal arts college in Cambridge, Massachusetts. However, it merged with Harvard University in 1977 and is now known as the Radcliffe Institute for Advanced Study at Harvard University.\"\u001b[0m\n",
|
||||
"\u001b[2;32m│ │ \u001b[0m\u001b[1m}\u001b[0m,\n",
|
||||
"\u001b[2;32m│ │ \u001b[0m\u001b[1m{\u001b[0m\u001b[32m'generated_answer'\u001b[0m: \u001b[32m'I do not have information on the Leipzig 1877 tournament.'\u001b[0m\u001b[1m}\u001b[0m,\n",
|
||||
"\u001b[2;32m│ │ \u001b[0m\u001b[1m{\u001b[0m\u001b[32m'generated_answer'\u001b[0m: \u001b[32m'I am unable to verify in whose honor the Leipzig 1877 tournament was organized.'\u001b[0m\u001b[1m}\u001b[0m,\n",
|
||||
"\u001b[2;32m│ │ \u001b[0m\u001b[1m{\u001b[0m\n",
|
||||
"\u001b[2;32m│ │ │ \u001b[0m\u001b[32m'generated_answer'\u001b[0m: \u001b[32m\"I am unable to verify what Empress Elizabeth of Austria's favorite sculpture depicted at her villa Achilleion at Corfu, according to Karl Küchler.\"\u001b[0m\n",
|
||||
"\u001b[2;32m│ │ \u001b[0m\u001b[1m}\u001b[0m\n",
|
||||
"\u001b[2;32m│ \u001b[0m\u001b[1m]\u001b[0m,\n",
|
||||
"\u001b[2;32m│ \u001b[0m\u001b[33mscores\u001b[0m=\u001b[1m{\u001b[0m\n",
|
||||
"\u001b[2;32m│ │ \u001b[0m\u001b[32m'llm-as-judge::405b-simpleqa'\u001b[0m: \u001b[1;35mScoringResult\u001b[0m\u001b[1m(\u001b[0m\n",
|
||||
"\u001b[2;32m│ │ │ \u001b[0m\u001b[33maggregated_results\u001b[0m=\u001b[1m{\u001b[0m\u001b[1m}\u001b[0m,\n",
|
||||
"\u001b[2;32m│ │ │ \u001b[0m\u001b[33maggregated_results\u001b[0m=\u001b[1m{\u001b[0m\u001b[32m'categorical_count'\u001b[0m: \u001b[1m{\u001b[0m\u001b[32m'categorical_count'\u001b[0m: \u001b[1m{\u001b[0m\u001b[32m'A'\u001b[0m: \u001b[1;36m1\u001b[0m, \u001b[32m'C'\u001b[0m: \u001b[1;36m4\u001b[0m\u001b[1m}\u001b[0m\u001b[1m}\u001b[0m\u001b[1m}\u001b[0m,\n",
|
||||
"\u001b[2;32m│ │ │ \u001b[0m\u001b[33mscore_rows\u001b[0m=\u001b[1m[\u001b[0m\n",
|
||||
"\u001b[2;32m│ │ │ │ \u001b[0m\u001b[1m{\u001b[0m\u001b[32m'score'\u001b[0m: \u001b[32m'C'\u001b[0m, \u001b[32m'judge_feedback'\u001b[0m: \u001b[32m'C'\u001b[0m\u001b[1m}\u001b[0m,\n",
|
||||
"\u001b[2;32m│ │ │ │ \u001b[0m\u001b[1m{\u001b[0m\u001b[32m'score'\u001b[0m: \u001b[32m'C'\u001b[0m, \u001b[32m'judge_feedback'\u001b[0m: \u001b[32m'C'\u001b[0m\u001b[1m}\u001b[0m,\n",
|
||||
|
@ -957,7 +1141,7 @@
|
|||
"\n",
|
||||
"response = client.eval.evaluate_rows_alpha(\n",
|
||||
" benchmark_id=\"meta-reference::simpleqa\",\n",
|
||||
" input_rows=eval_rows.rows,\n",
|
||||
" input_rows=eval_rows.data,\n",
|
||||
" scoring_functions=[\"llm-as-judge::405b-simpleqa\"],\n",
|
||||
" benchmark_config={\n",
|
||||
" \"type\": \"benchmark\",\n",
|
||||
|
@ -1106,7 +1290,7 @@
|
|||
"\n",
|
||||
"response = client.eval.evaluate_rows_alpha(\n",
|
||||
" benchmark_id=\"meta-reference::simpleqa\",\n",
|
||||
" input_rows=eval_rows.rows,\n",
|
||||
" input_rows=eval_rows.data,\n",
|
||||
" scoring_functions=[\"llm-as-judge::405b-simpleqa\"],\n",
|
||||
" benchmark_config={\n",
|
||||
" \"type\": \"benchmark\",\n",
|
||||
|
|
|
@ -457,9 +457,9 @@ class Generator:
|
|||
"status": 400,
|
||||
"title": "Bad Request",
|
||||
"detail": "The request was invalid or malformed",
|
||||
}
|
||||
},
|
||||
)
|
||||
}
|
||||
},
|
||||
)
|
||||
|
||||
self.responses["TooManyRequests429"] = Response(
|
||||
|
@ -471,9 +471,9 @@ class Generator:
|
|||
"status": 429,
|
||||
"title": "Too Many Requests",
|
||||
"detail": "You have exceeded the rate limit. Please try again later.",
|
||||
}
|
||||
},
|
||||
)
|
||||
}
|
||||
},
|
||||
)
|
||||
|
||||
self.responses["InternalServerError500"] = Response(
|
||||
|
@ -485,9 +485,9 @@ class Generator:
|
|||
"status": 500,
|
||||
"title": "Internal Server Error",
|
||||
"detail": "An unexpected error occurred. Our team has been notified.",
|
||||
}
|
||||
},
|
||||
)
|
||||
}
|
||||
},
|
||||
)
|
||||
|
||||
# Add a default error response for any unhandled error cases
|
||||
|
@ -500,9 +500,9 @@ class Generator:
|
|||
"status": 0,
|
||||
"title": "Error",
|
||||
"detail": "An unexpected error occurred",
|
||||
}
|
||||
},
|
||||
)
|
||||
}
|
||||
},
|
||||
)
|
||||
|
||||
def _build_type_tag(self, ref: str, schema: Schema) -> Tag:
|
||||
|
@ -547,11 +547,14 @@ class Generator:
|
|||
"SyntheticDataGeneration",
|
||||
"PostTraining",
|
||||
"BatchInference",
|
||||
"Files",
|
||||
]:
|
||||
op.defining_class.__name__ = f"{op.defining_class.__name__} (Coming Soon)"
|
||||
print(op.defining_class.__name__)
|
||||
|
||||
# TODO (xiyan): temporary fix for datasetio inner impl + datasets api
|
||||
# if op.defining_class.__name__ in ["DatasetIO"]:
|
||||
# op.defining_class.__name__ = "Datasets"
|
||||
|
||||
doc_string = parse_type(op.func_ref)
|
||||
doc_params = dict(
|
||||
(param.name, param.description) for param in doc_string.params.values()
|
||||
|
@ -598,7 +601,9 @@ class Generator:
|
|||
|
||||
# data passed in request body as raw bytes cannot have request parameters
|
||||
if raw_bytes_request_body and op.request_params:
|
||||
raise ValueError("Cannot have both raw bytes request body and request parameters")
|
||||
raise ValueError(
|
||||
"Cannot have both raw bytes request body and request parameters"
|
||||
)
|
||||
|
||||
# data passed in request body as raw bytes
|
||||
if raw_bytes_request_body:
|
||||
|
|
|
@ -114,23 +114,17 @@ pprint(response)
|
|||
simpleqa_dataset_id = "huggingface::simpleqa"
|
||||
|
||||
_ = client.datasets.register(
|
||||
purpose="eval/messages-answer",
|
||||
source={
|
||||
"type": "uri",
|
||||
"uri": "huggingface://datasets/llamastack/simpleqa?split=train",
|
||||
},
|
||||
dataset_id=simpleqa_dataset_id,
|
||||
provider_id="huggingface",
|
||||
url={"uri": "https://huggingface.co/datasets/llamastack/simpleqa"},
|
||||
metadata={
|
||||
"path": "llamastack/simpleqa",
|
||||
"split": "train",
|
||||
},
|
||||
dataset_schema={
|
||||
"input_query": {"type": "string"},
|
||||
"expected_answer": {"type": "string"},
|
||||
"chat_completion_input": {"type": "chat_completion_input"},
|
||||
},
|
||||
)
|
||||
|
||||
eval_rows = client.datasetio.get_rows_paginated(
|
||||
eval_rows = client.datasets.iterrows(
|
||||
dataset_id=simpleqa_dataset_id,
|
||||
rows_in_page=5,
|
||||
limit=5,
|
||||
)
|
||||
```
|
||||
|
||||
|
@ -143,7 +137,7 @@ client.benchmarks.register(
|
|||
|
||||
response = client.eval.evaluate_rows(
|
||||
benchmark_id="meta-reference::simpleqa",
|
||||
input_rows=eval_rows.rows,
|
||||
input_rows=eval_rows.data,
|
||||
scoring_functions=["llm-as-judge::405b-simpleqa"],
|
||||
benchmark_config={
|
||||
"eval_candidate": {
|
||||
|
@ -191,7 +185,7 @@ agent_config = {
|
|||
|
||||
response = client.eval.evaluate_rows(
|
||||
benchmark_id="meta-reference::simpleqa",
|
||||
input_rows=eval_rows.rows,
|
||||
input_rows=eval_rows.data,
|
||||
scoring_functions=["llm-as-judge::405b-simpleqa"],
|
||||
benchmark_config={
|
||||
"eval_candidate": {
|
||||
|
|
|
@ -13,19 +13,16 @@ from llama_stack.schema_utils import json_schema_type, webmethod
|
|||
|
||||
|
||||
@json_schema_type
|
||||
class PaginatedRowsResult(BaseModel):
|
||||
class IterrowsResponse(BaseModel):
|
||||
"""
|
||||
A paginated list of rows from a dataset.
|
||||
|
||||
:param rows: The rows in the current page.
|
||||
:param total_count: The total number of rows in the dataset.
|
||||
:param next_page_token: The token to get the next page of rows.
|
||||
:param data: The rows in the current page.
|
||||
:param next_start_index: Index into dataset for the first row in the next page. None if there are no more rows.
|
||||
"""
|
||||
|
||||
# the rows obey the DatasetSchema for the given dataset
|
||||
rows: List[Dict[str, Any]]
|
||||
total_count: int
|
||||
next_page_token: Optional[str] = None
|
||||
data: List[Dict[str, Any]]
|
||||
next_start_index: Optional[int] = None
|
||||
|
||||
|
||||
class DatasetStore(Protocol):
|
||||
|
@ -37,22 +34,21 @@ class DatasetIO(Protocol):
|
|||
# keeping for aligning with inference/safety, but this is not used
|
||||
dataset_store: DatasetStore
|
||||
|
||||
@webmethod(route="/datasetio/rows", method="GET")
|
||||
async def get_rows_paginated(
|
||||
# TODO(xiyan): there's a flakiness here where setting route to "/datasets/" here will not result in proper routing
|
||||
@webmethod(route="/datasetio/iterrows/{dataset_id:path}", method="GET")
|
||||
async def iterrows(
|
||||
self,
|
||||
dataset_id: str,
|
||||
rows_in_page: int,
|
||||
page_token: Optional[str] = None,
|
||||
filter_condition: Optional[str] = None,
|
||||
) -> PaginatedRowsResult:
|
||||
"""Get a paginated list of rows from a dataset.
|
||||
start_index: Optional[int] = None,
|
||||
limit: Optional[int] = None,
|
||||
) -> IterrowsResponse:
|
||||
"""Get a paginated list of rows from a dataset. Uses cursor-based pagination.
|
||||
|
||||
:param dataset_id: The ID of the dataset to get the rows from.
|
||||
:param rows_in_page: The number of rows to get per page.
|
||||
:param page_token: The token to get the next page of rows.
|
||||
:param filter_condition: (Optional) A condition to filter the rows by.
|
||||
:param start_index: Index into dataset for the first row to get. Get all rows if None.
|
||||
:param limit: The number of rows to get.
|
||||
"""
|
||||
...
|
||||
|
||||
@webmethod(route="/datasetio/rows", method="POST")
|
||||
@webmethod(route="/datasetio/append-rows/{dataset_id:path}", method="POST")
|
||||
async def append_rows(self, dataset_id: str, rows: List[Dict[str, Any]]) -> None: ...
|
||||
|
|
|
@ -4,19 +4,102 @@
|
|||
# This source code is licensed under the terms described in the LICENSE file in
|
||||
# the root directory of this source tree.
|
||||
|
||||
from typing import Any, Dict, List, Literal, Optional, Protocol
|
||||
from enum import Enum
|
||||
from typing import Annotated, Any, Dict, List, Literal, Optional, Protocol, Union
|
||||
|
||||
from pydantic import BaseModel, Field
|
||||
|
||||
from llama_stack.apis.common.content_types import URL
|
||||
from llama_stack.apis.common.type_system import ParamType
|
||||
from llama_stack.apis.resource import Resource, ResourceType
|
||||
from llama_stack.schema_utils import json_schema_type, webmethod
|
||||
from llama_stack.schema_utils import json_schema_type, register_schema, webmethod
|
||||
|
||||
|
||||
class DatasetPurpose(str, Enum):
|
||||
"""
|
||||
Purpose of the dataset. Each purpose has a required input data schema.
|
||||
|
||||
:cvar post-training/messages: The dataset contains messages used for post-training.
|
||||
{
|
||||
"messages": [
|
||||
{"role": "user", "content": "Hello, world!"},
|
||||
{"role": "assistant", "content": "Hello, world!"},
|
||||
]
|
||||
}
|
||||
:cvar eval/question-answer: The dataset contains a question column and an answer column.
|
||||
{
|
||||
"question": "What is the capital of France?",
|
||||
"answer": "Paris"
|
||||
}
|
||||
:cvar eval/messages-answer: The dataset contains a messages column with list of messages and an answer column.
|
||||
{
|
||||
"messages": [
|
||||
{"role": "user", "content": "Hello, my name is John Doe."},
|
||||
{"role": "assistant", "content": "Hello, John Doe. How can I help you today?"},
|
||||
{"role": "user", "content": "What's my name?"},
|
||||
],
|
||||
"answer": "John Doe"
|
||||
}
|
||||
"""
|
||||
|
||||
post_training_messages = "post-training/messages"
|
||||
eval_question_answer = "eval/question-answer"
|
||||
eval_messages_answer = "eval/messages-answer"
|
||||
|
||||
# TODO: add more schemas here
|
||||
|
||||
|
||||
class DatasetType(Enum):
|
||||
"""
|
||||
Type of the dataset source.
|
||||
:cvar uri: The dataset can be obtained from a URI.
|
||||
:cvar rows: The dataset is stored in rows.
|
||||
"""
|
||||
|
||||
uri = "uri"
|
||||
rows = "rows"
|
||||
|
||||
|
||||
@json_schema_type
|
||||
class URIDataSource(BaseModel):
|
||||
"""A dataset that can be obtained from a URI.
|
||||
:param uri: The dataset can be obtained from a URI. E.g.
|
||||
- "https://mywebsite.com/mydata.jsonl"
|
||||
- "lsfs://mydata.jsonl"
|
||||
- "data:csv;base64,{base64_content}"
|
||||
"""
|
||||
|
||||
type: Literal["uri"] = "uri"
|
||||
uri: str
|
||||
|
||||
|
||||
@json_schema_type
|
||||
class RowsDataSource(BaseModel):
|
||||
"""A dataset stored in rows.
|
||||
:param rows: The dataset is stored in rows. E.g.
|
||||
- [
|
||||
{"messages": [{"role": "user", "content": "Hello, world!"}, {"role": "assistant", "content": "Hello, world!"}]}
|
||||
]
|
||||
"""
|
||||
|
||||
type: Literal["rows"] = "rows"
|
||||
rows: List[Dict[str, Any]]
|
||||
|
||||
|
||||
DataSource = register_schema(
|
||||
Annotated[
|
||||
Union[URIDataSource, RowsDataSource],
|
||||
Field(discriminator="type"),
|
||||
],
|
||||
name="DataSource",
|
||||
)
|
||||
|
||||
|
||||
class CommonDatasetFields(BaseModel):
|
||||
dataset_schema: Dict[str, ParamType]
|
||||
url: URL
|
||||
"""
|
||||
Common fields for a dataset.
|
||||
"""
|
||||
|
||||
purpose: DatasetPurpose
|
||||
source: DataSource
|
||||
metadata: Dict[str, Any] = Field(
|
||||
default_factory=dict,
|
||||
description="Any additional metadata for this dataset",
|
||||
|
@ -50,13 +133,69 @@ class Datasets(Protocol):
|
|||
@webmethod(route="/datasets", method="POST")
|
||||
async def register_dataset(
|
||||
self,
|
||||
dataset_id: str,
|
||||
dataset_schema: Dict[str, ParamType],
|
||||
url: URL,
|
||||
provider_dataset_id: Optional[str] = None,
|
||||
provider_id: Optional[str] = None,
|
||||
purpose: DatasetPurpose,
|
||||
source: DataSource,
|
||||
metadata: Optional[Dict[str, Any]] = None,
|
||||
) -> None: ...
|
||||
dataset_id: Optional[str] = None,
|
||||
) -> Dataset:
|
||||
"""
|
||||
Register a new dataset.
|
||||
|
||||
:param purpose: The purpose of the dataset. One of
|
||||
- "post-training/messages": The dataset contains a messages column with list of messages for post-training.
|
||||
{
|
||||
"messages": [
|
||||
{"role": "user", "content": "Hello, world!"},
|
||||
{"role": "assistant", "content": "Hello, world!"},
|
||||
]
|
||||
}
|
||||
- "eval/question-answer": The dataset contains a question column and an answer column for evaluation.
|
||||
{
|
||||
"question": "What is the capital of France?",
|
||||
"answer": "Paris"
|
||||
}
|
||||
- "eval/messages-answer": The dataset contains a messages column with list of messages and an answer column for evaluation.
|
||||
{
|
||||
"messages": [
|
||||
{"role": "user", "content": "Hello, my name is John Doe."},
|
||||
{"role": "assistant", "content": "Hello, John Doe. How can I help you today?"},
|
||||
{"role": "user", "content": "What's my name?"},
|
||||
],
|
||||
"answer": "John Doe"
|
||||
}
|
||||
:param source: The data source of the dataset. Ensure that the data source schema is compatible with the purpose of the dataset. Examples:
|
||||
- {
|
||||
"type": "uri",
|
||||
"uri": "https://mywebsite.com/mydata.jsonl"
|
||||
}
|
||||
- {
|
||||
"type": "uri",
|
||||
"uri": "lsfs://mydata.jsonl"
|
||||
}
|
||||
- {
|
||||
"type": "uri",
|
||||
"uri": "data:csv;base64,{base64_content}"
|
||||
}
|
||||
- {
|
||||
"type": "uri",
|
||||
"uri": "huggingface://llamastack/simpleqa?split=train"
|
||||
}
|
||||
- {
|
||||
"type": "rows",
|
||||
"rows": [
|
||||
{
|
||||
"messages": [
|
||||
{"role": "user", "content": "Hello, world!"},
|
||||
{"role": "assistant", "content": "Hello, world!"},
|
||||
]
|
||||
}
|
||||
]
|
||||
}
|
||||
:param metadata: The metadata for the dataset.
|
||||
- E.g. {"description": "My dataset"}
|
||||
:param dataset_id: The ID of the dataset. If not provided, an ID will be generated.
|
||||
"""
|
||||
...
|
||||
|
||||
@webmethod(route="/datasets/{dataset_id:path}", method="GET")
|
||||
async def get_dataset(
|
||||
|
|
|
@ -12,7 +12,8 @@ from llama_stack.apis.common.content_types import (
|
|||
InterleavedContent,
|
||||
InterleavedContentItem,
|
||||
)
|
||||
from llama_stack.apis.datasetio import DatasetIO, PaginatedRowsResult
|
||||
from llama_stack.apis.datasetio import DatasetIO, IterrowsResponse
|
||||
from llama_stack.apis.datasets import DatasetPurpose, DataSource
|
||||
from llama_stack.apis.eval import (
|
||||
BenchmarkConfig,
|
||||
Eval,
|
||||
|
@ -160,7 +161,11 @@ class InferenceRouter(Inference):
|
|||
await self.routing_table.register_model(model_id, provider_model_id, provider_id, metadata, model_type)
|
||||
|
||||
def _construct_metrics(
|
||||
self, prompt_tokens: int, completion_tokens: int, total_tokens: int, model: Model
|
||||
self,
|
||||
prompt_tokens: int,
|
||||
completion_tokens: int,
|
||||
total_tokens: int,
|
||||
model: Model,
|
||||
) -> List[MetricEvent]:
|
||||
"""Constructs a list of MetricEvent objects containing token usage metrics.
|
||||
|
||||
|
@ -298,7 +303,12 @@ class InferenceRouter(Inference):
|
|||
completion_text += chunk.event.delta.text
|
||||
if chunk.event.event_type == ChatCompletionResponseEventType.complete:
|
||||
completion_tokens = await self._count_tokens(
|
||||
[CompletionMessage(content=completion_text, stop_reason=StopReason.end_of_turn)],
|
||||
[
|
||||
CompletionMessage(
|
||||
content=completion_text,
|
||||
stop_reason=StopReason.end_of_turn,
|
||||
)
|
||||
],
|
||||
tool_config.tool_prompt_format,
|
||||
)
|
||||
total_tokens = (prompt_tokens or 0) + (completion_tokens or 0)
|
||||
|
@ -471,21 +481,36 @@ class DatasetIORouter(DatasetIO):
|
|||
logger.debug("DatasetIORouter.shutdown")
|
||||
pass
|
||||
|
||||
async def get_rows_paginated(
|
||||
async def register_dataset(
|
||||
self,
|
||||
purpose: DatasetPurpose,
|
||||
source: DataSource,
|
||||
metadata: Optional[Dict[str, Any]] = None,
|
||||
dataset_id: Optional[str] = None,
|
||||
) -> None:
|
||||
logger.debug(
|
||||
f"DatasetIORouter.register_dataset: {purpose=} {source=} {metadata=} {dataset_id=}",
|
||||
)
|
||||
await self.routing_table.register_dataset(
|
||||
purpose=purpose,
|
||||
source=source,
|
||||
metadata=metadata,
|
||||
dataset_id=dataset_id,
|
||||
)
|
||||
|
||||
async def iterrows(
|
||||
self,
|
||||
dataset_id: str,
|
||||
rows_in_page: int,
|
||||
page_token: Optional[str] = None,
|
||||
filter_condition: Optional[str] = None,
|
||||
) -> PaginatedRowsResult:
|
||||
start_index: Optional[int] = None,
|
||||
limit: Optional[int] = None,
|
||||
) -> IterrowsResponse:
|
||||
logger.debug(
|
||||
f"DatasetIORouter.get_rows_paginated: {dataset_id}, rows_in_page={rows_in_page}",
|
||||
f"DatasetIORouter.iterrows: {dataset_id}, {start_index=} {limit=}",
|
||||
)
|
||||
return await self.routing_table.get_provider_impl(dataset_id).get_rows_paginated(
|
||||
return await self.routing_table.get_provider_impl(dataset_id).iterrows(
|
||||
dataset_id=dataset_id,
|
||||
rows_in_page=rows_in_page,
|
||||
page_token=page_token,
|
||||
filter_condition=filter_condition,
|
||||
start_index=start_index,
|
||||
limit=limit,
|
||||
)
|
||||
|
||||
async def append_rows(self, dataset_id: str, rows: List[Dict[str, Any]]) -> None:
|
||||
|
|
|
@ -5,6 +5,7 @@
|
|||
# the root directory of this source tree.
|
||||
|
||||
import logging
|
||||
import uuid
|
||||
from typing import Any, Dict, List, Optional
|
||||
|
||||
from pydantic import TypeAdapter
|
||||
|
@ -12,7 +13,14 @@ from pydantic import TypeAdapter
|
|||
from llama_stack.apis.benchmarks import Benchmark, Benchmarks, ListBenchmarksResponse
|
||||
from llama_stack.apis.common.content_types import URL
|
||||
from llama_stack.apis.common.type_system import ParamType
|
||||
from llama_stack.apis.datasets import Dataset, Datasets, ListDatasetsResponse
|
||||
from llama_stack.apis.datasets import (
|
||||
Dataset,
|
||||
DatasetPurpose,
|
||||
Datasets,
|
||||
DatasetType,
|
||||
DataSource,
|
||||
ListDatasetsResponse,
|
||||
)
|
||||
from llama_stack.apis.models import ListModelsResponse, Model, Models, ModelType
|
||||
from llama_stack.apis.resource import ResourceType
|
||||
from llama_stack.apis.scoring_functions import (
|
||||
|
@ -352,34 +360,42 @@ class DatasetsRoutingTable(CommonRoutingTableImpl, Datasets):
|
|||
|
||||
async def register_dataset(
|
||||
self,
|
||||
dataset_id: str,
|
||||
dataset_schema: Dict[str, ParamType],
|
||||
url: URL,
|
||||
provider_dataset_id: Optional[str] = None,
|
||||
provider_id: Optional[str] = None,
|
||||
purpose: DatasetPurpose,
|
||||
source: DataSource,
|
||||
metadata: Optional[Dict[str, Any]] = None,
|
||||
) -> None:
|
||||
if provider_dataset_id is None:
|
||||
dataset_id: Optional[str] = None,
|
||||
) -> Dataset:
|
||||
if not dataset_id:
|
||||
dataset_id = f"dataset-{str(uuid.uuid4())}"
|
||||
|
||||
provider_dataset_id = dataset_id
|
||||
if provider_id is None:
|
||||
# If provider_id not specified, use the only provider if it supports this dataset
|
||||
if len(self.impls_by_provider_id) == 1:
|
||||
provider_id = list(self.impls_by_provider_id.keys())[0]
|
||||
|
||||
# infer provider from source
|
||||
if source.type == DatasetType.rows.value:
|
||||
provider_id = "localfs"
|
||||
elif source.type == DatasetType.uri.value:
|
||||
# infer provider from uri
|
||||
if source.uri.startswith("huggingface"):
|
||||
provider_id = "huggingface"
|
||||
else:
|
||||
raise ValueError(
|
||||
f"No provider specified and multiple providers available. Please specify a provider_id. Available providers: {self.impls_by_provider_id.keys()}"
|
||||
)
|
||||
provider_id = "localfs"
|
||||
else:
|
||||
raise ValueError(f"Unknown data source type: {source.type}")
|
||||
|
||||
if metadata is None:
|
||||
metadata = {}
|
||||
|
||||
dataset = Dataset(
|
||||
identifier=dataset_id,
|
||||
provider_resource_id=provider_dataset_id,
|
||||
provider_id=provider_id,
|
||||
dataset_schema=dataset_schema,
|
||||
url=url,
|
||||
purpose=purpose,
|
||||
source=source,
|
||||
metadata=metadata,
|
||||
)
|
||||
|
||||
await self.register_object(dataset)
|
||||
return dataset
|
||||
|
||||
async def unregister_dataset(self, dataset_id: str) -> None:
|
||||
dataset = await self.get_dataset(dataset_id)
|
||||
|
|
|
@ -166,11 +166,10 @@ def run_evaluation_3():
|
|||
eval_candidate = st.session_state["eval_candidate"]
|
||||
|
||||
dataset_id = benchmarks[selected_benchmark].dataset_id
|
||||
rows = llama_stack_api.client.datasetio.get_rows_paginated(
|
||||
rows = llama_stack_api.client.datasets.iterrows(
|
||||
dataset_id=dataset_id,
|
||||
rows_in_page=-1,
|
||||
)
|
||||
total_rows = len(rows.rows)
|
||||
total_rows = len(rows.data)
|
||||
# Add number of examples control
|
||||
num_rows = st.number_input(
|
||||
"Number of Examples to Evaluate",
|
||||
|
@ -195,7 +194,7 @@ def run_evaluation_3():
|
|||
if st.button("Run Evaluation"):
|
||||
progress_text = "Running evaluation..."
|
||||
progress_bar = st.progress(0, text=progress_text)
|
||||
rows = rows.rows
|
||||
rows = rows.data
|
||||
if num_rows < total_rows:
|
||||
rows = rows[:num_rows]
|
||||
|
||||
|
|
|
@ -3,20 +3,14 @@
|
|||
#
|
||||
# This source code is licensed under the terms described in the LICENSE file in
|
||||
# the root directory of this source tree.
|
||||
import base64
|
||||
import os
|
||||
from abc import ABC, abstractmethod
|
||||
from dataclasses import dataclass
|
||||
from typing import Any, Dict, List, Optional
|
||||
from urllib.parse import urlparse
|
||||
|
||||
import pandas
|
||||
|
||||
from llama_stack.apis.common.content_types import URL
|
||||
from llama_stack.apis.datasetio import DatasetIO, PaginatedRowsResult
|
||||
from llama_stack.apis.datasetio import DatasetIO, IterrowsResponse
|
||||
from llama_stack.apis.datasets import Dataset
|
||||
from llama_stack.providers.datatypes import DatasetsProtocolPrivate
|
||||
from llama_stack.providers.utils.datasetio.url_utils import get_dataframe_from_url
|
||||
from llama_stack.providers.utils.datasetio.url_utils import get_dataframe_from_uri
|
||||
from llama_stack.providers.utils.kvstore import kvstore_impl
|
||||
|
||||
from .config import LocalFSDatasetIOConfig
|
||||
|
@ -24,30 +18,7 @@ from .config import LocalFSDatasetIOConfig
|
|||
DATASETS_PREFIX = "localfs_datasets:"
|
||||
|
||||
|
||||
class BaseDataset(ABC):
|
||||
def __init__(self, *args, **kwargs) -> None:
|
||||
super().__init__(*args, **kwargs)
|
||||
|
||||
@abstractmethod
|
||||
def __len__(self) -> int:
|
||||
raise NotImplementedError()
|
||||
|
||||
@abstractmethod
|
||||
def __getitem__(self, idx):
|
||||
raise NotImplementedError()
|
||||
|
||||
@abstractmethod
|
||||
def load(self):
|
||||
raise NotImplementedError()
|
||||
|
||||
|
||||
@dataclass
|
||||
class DatasetInfo:
|
||||
dataset_def: Dataset
|
||||
dataset_impl: BaseDataset
|
||||
|
||||
|
||||
class PandasDataframeDataset(BaseDataset):
|
||||
class PandasDataframeDataset:
|
||||
def __init__(self, dataset_def: Dataset, *args, **kwargs) -> None:
|
||||
super().__init__(*args, **kwargs)
|
||||
self.dataset_def = dataset_def
|
||||
|
@ -64,23 +35,19 @@ class PandasDataframeDataset(BaseDataset):
|
|||
else:
|
||||
return self.df.iloc[idx].to_dict()
|
||||
|
||||
def _validate_dataset_schema(self, df) -> pandas.DataFrame:
|
||||
# note that we will drop any columns in dataset that are not in the schema
|
||||
df = df[self.dataset_def.dataset_schema.keys()]
|
||||
# check all columns in dataset schema are present
|
||||
assert len(df.columns) == len(self.dataset_def.dataset_schema)
|
||||
# TODO: type checking against column types in dataset schema
|
||||
return df
|
||||
|
||||
def load(self) -> None:
|
||||
if self.df is not None:
|
||||
return
|
||||
|
||||
df = get_dataframe_from_url(self.dataset_def.url)
|
||||
if df is None:
|
||||
raise ValueError(f"Failed to load dataset from {self.dataset_def.url}")
|
||||
if self.dataset_def.source.type == "uri":
|
||||
self.df = get_dataframe_from_uri(self.dataset_def.source.uri)
|
||||
elif self.dataset_def.source.type == "rows":
|
||||
self.df = pandas.DataFrame(self.dataset_def.source.rows)
|
||||
else:
|
||||
raise ValueError(f"Unsupported dataset source type: {self.dataset_def.source.type}")
|
||||
|
||||
self.df = self._validate_dataset_schema(df)
|
||||
if self.df is None:
|
||||
raise ValueError(f"Failed to load dataset from {self.dataset_def.url}")
|
||||
|
||||
|
||||
class LocalFSDatasetIOImpl(DatasetIO, DatasetsProtocolPrivate):
|
||||
|
@ -99,95 +66,55 @@ class LocalFSDatasetIOImpl(DatasetIO, DatasetsProtocolPrivate):
|
|||
|
||||
for dataset in stored_datasets:
|
||||
dataset = Dataset.model_validate_json(dataset)
|
||||
dataset_impl = PandasDataframeDataset(dataset)
|
||||
self.dataset_infos[dataset.identifier] = DatasetInfo(
|
||||
dataset_def=dataset,
|
||||
dataset_impl=dataset_impl,
|
||||
)
|
||||
self.dataset_infos[dataset.identifier] = dataset
|
||||
|
||||
async def shutdown(self) -> None: ...
|
||||
|
||||
async def register_dataset(
|
||||
self,
|
||||
dataset: Dataset,
|
||||
dataset_def: Dataset,
|
||||
) -> None:
|
||||
# Store in kvstore
|
||||
key = f"{DATASETS_PREFIX}{dataset.identifier}"
|
||||
key = f"{DATASETS_PREFIX}{dataset_def.identifier}"
|
||||
await self.kvstore.set(
|
||||
key=key,
|
||||
value=dataset.json(),
|
||||
)
|
||||
dataset_impl = PandasDataframeDataset(dataset)
|
||||
self.dataset_infos[dataset.identifier] = DatasetInfo(
|
||||
dataset_def=dataset,
|
||||
dataset_impl=dataset_impl,
|
||||
value=dataset_def.model_dump_json(),
|
||||
)
|
||||
self.dataset_infos[dataset_def.identifier] = dataset_def
|
||||
|
||||
async def unregister_dataset(self, dataset_id: str) -> None:
|
||||
key = f"{DATASETS_PREFIX}{dataset_id}"
|
||||
await self.kvstore.delete(key=key)
|
||||
del self.dataset_infos[dataset_id]
|
||||
|
||||
async def get_rows_paginated(
|
||||
async def iterrows(
|
||||
self,
|
||||
dataset_id: str,
|
||||
rows_in_page: int,
|
||||
page_token: Optional[str] = None,
|
||||
filter_condition: Optional[str] = None,
|
||||
) -> PaginatedRowsResult:
|
||||
dataset_info = self.dataset_infos.get(dataset_id)
|
||||
dataset_info.dataset_impl.load()
|
||||
start_index: Optional[int] = None,
|
||||
limit: Optional[int] = None,
|
||||
) -> IterrowsResponse:
|
||||
dataset_def = self.dataset_infos[dataset_id]
|
||||
dataset_impl = PandasDataframeDataset(dataset_def)
|
||||
dataset_impl.load()
|
||||
|
||||
if page_token and not page_token.isnumeric():
|
||||
raise ValueError("Invalid page_token")
|
||||
start_index = start_index or 0
|
||||
|
||||
if page_token is None or len(page_token) == 0:
|
||||
next_page_token = 0
|
||||
if limit is None or limit == -1:
|
||||
end = len(dataset_impl)
|
||||
else:
|
||||
next_page_token = int(page_token)
|
||||
end = min(start_index + limit, len(dataset_impl))
|
||||
|
||||
start = next_page_token
|
||||
if rows_in_page == -1:
|
||||
end = len(dataset_info.dataset_impl)
|
||||
else:
|
||||
end = min(start + rows_in_page, len(dataset_info.dataset_impl))
|
||||
rows = dataset_impl[start_index:end]
|
||||
|
||||
rows = dataset_info.dataset_impl[start:end]
|
||||
|
||||
return PaginatedRowsResult(
|
||||
rows=rows,
|
||||
total_count=len(rows),
|
||||
next_page_token=str(end),
|
||||
return IterrowsResponse(
|
||||
data=rows,
|
||||
next_start_index=end if end < len(dataset_impl) else None,
|
||||
)
|
||||
|
||||
async def append_rows(self, dataset_id: str, rows: List[Dict[str, Any]]) -> None:
|
||||
dataset_info = self.dataset_infos.get(dataset_id)
|
||||
if dataset_info is None:
|
||||
raise ValueError(f"Dataset with id {dataset_id} not found")
|
||||
|
||||
dataset_impl = dataset_info.dataset_impl
|
||||
dataset_def = self.dataset_infos[dataset_id]
|
||||
dataset_impl = PandasDataframeDataset(dataset_def)
|
||||
dataset_impl.load()
|
||||
|
||||
new_rows_df = pandas.DataFrame(rows)
|
||||
new_rows_df = dataset_impl._validate_dataset_schema(new_rows_df)
|
||||
dataset_impl.df = pandas.concat([dataset_impl.df, new_rows_df], ignore_index=True)
|
||||
|
||||
url = str(dataset_info.dataset_def.url.uri)
|
||||
parsed_url = urlparse(url)
|
||||
|
||||
if parsed_url.scheme == "file" or not parsed_url.scheme:
|
||||
file_path = parsed_url.path
|
||||
os.makedirs(os.path.dirname(file_path), exist_ok=True)
|
||||
dataset_impl.df.to_csv(file_path, index=False)
|
||||
elif parsed_url.scheme == "data":
|
||||
# For data URLs, we need to update the base64-encoded content
|
||||
if not parsed_url.path.startswith("text/csv;base64,"):
|
||||
raise ValueError("Data URL must be a base64-encoded CSV")
|
||||
|
||||
csv_buffer = dataset_impl.df.to_csv(index=False)
|
||||
base64_content = base64.b64encode(csv_buffer.encode("utf-8")).decode("utf-8")
|
||||
dataset_info.dataset_def.url = URL(uri=f"data:text/csv;base64,{base64_content}")
|
||||
else:
|
||||
raise ValueError(
|
||||
f"Unsupported URL scheme: {parsed_url.scheme}. Only file:// and data: URLs are supported for writing."
|
||||
)
|
||||
|
|
|
@ -14,16 +14,11 @@ from llama_stack.apis.datasetio import DatasetIO
|
|||
from llama_stack.apis.datasets import Datasets
|
||||
from llama_stack.apis.inference import Inference, SystemMessage, UserMessage
|
||||
from llama_stack.apis.scoring import Scoring
|
||||
from llama_stack.distribution.datatypes import Api
|
||||
from llama_stack.providers.datatypes import BenchmarksProtocolPrivate
|
||||
from llama_stack.providers.inline.agents.meta_reference.agent_instance import (
|
||||
MEMORY_QUERY_TOOL,
|
||||
)
|
||||
from llama_stack.providers.utils.common.data_schema_validator import (
|
||||
ColumnName,
|
||||
get_valid_schemas,
|
||||
validate_dataset_schema,
|
||||
)
|
||||
from llama_stack.providers.utils.common.data_schema_validator import ColumnName
|
||||
from llama_stack.providers.utils.kvstore import kvstore_impl
|
||||
|
||||
from .....apis.common.job_types import Job
|
||||
|
@ -88,15 +83,17 @@ class MetaReferenceEvalImpl(
|
|||
task_def = self.benchmarks[benchmark_id]
|
||||
dataset_id = task_def.dataset_id
|
||||
scoring_functions = task_def.scoring_functions
|
||||
dataset_def = await self.datasets_api.get_dataset(dataset_id=dataset_id)
|
||||
validate_dataset_schema(dataset_def.dataset_schema, get_valid_schemas(Api.eval.value))
|
||||
all_rows = await self.datasetio_api.get_rows_paginated(
|
||||
|
||||
# TODO (xiyan): validate dataset schema
|
||||
# dataset_def = await self.datasets_api.get_dataset(dataset_id=dataset_id)
|
||||
|
||||
all_rows = await self.datasetio_api.iterrows(
|
||||
dataset_id=dataset_id,
|
||||
rows_in_page=(-1 if benchmark_config.num_examples is None else benchmark_config.num_examples),
|
||||
limit=(-1 if benchmark_config.num_examples is None else benchmark_config.num_examples),
|
||||
)
|
||||
res = await self.evaluate_rows(
|
||||
benchmark_id=benchmark_id,
|
||||
input_rows=all_rows.rows,
|
||||
input_rows=all_rows.data,
|
||||
scoring_functions=scoring_functions,
|
||||
benchmark_config=benchmark_config,
|
||||
)
|
||||
|
|
|
@ -328,13 +328,13 @@ class LoraFinetuningSingleDevice:
|
|||
batch_size: int,
|
||||
) -> Tuple[DistributedSampler, DataLoader]:
|
||||
async def fetch_rows(dataset_id: str):
|
||||
return await self.datasetio_api.get_rows_paginated(
|
||||
return await self.datasetio_api.iterrows(
|
||||
dataset_id=dataset_id,
|
||||
rows_in_page=-1,
|
||||
limit=-1,
|
||||
)
|
||||
|
||||
all_rows = await fetch_rows(dataset_id)
|
||||
rows = all_rows.rows
|
||||
rows = all_rows.data
|
||||
|
||||
await validate_input_dataset_schema(
|
||||
datasets_api=self.datasets_api,
|
||||
|
|
|
@ -24,7 +24,9 @@ from llama_stack.providers.utils.common.data_schema_validator import (
|
|||
from .config import BasicScoringConfig
|
||||
from .scoring_fn.bfcl_scoring_fn import BFCLScoringFn
|
||||
from .scoring_fn.equality_scoring_fn import EqualityScoringFn
|
||||
from .scoring_fn.regex_parser_math_response_scoring_fn import RegexParserMathResponseScoringFn
|
||||
from .scoring_fn.regex_parser_math_response_scoring_fn import (
|
||||
RegexParserMathResponseScoringFn,
|
||||
)
|
||||
from .scoring_fn.regex_parser_scoring_fn import RegexParserScoringFn
|
||||
from .scoring_fn.subset_of_scoring_fn import SubsetOfScoringFn
|
||||
|
||||
|
@ -82,12 +84,12 @@ class BasicScoringImpl(
|
|||
dataset_def = await self.datasets_api.get_dataset(dataset_id=dataset_id)
|
||||
validate_dataset_schema(dataset_def.dataset_schema, get_valid_schemas(Api.scoring.value))
|
||||
|
||||
all_rows = await self.datasetio_api.get_rows_paginated(
|
||||
all_rows = await self.datasetio_api.iterrows(
|
||||
dataset_id=dataset_id,
|
||||
rows_in_page=-1,
|
||||
limit=-1,
|
||||
)
|
||||
res = await self.score(
|
||||
input_rows=all_rows.rows,
|
||||
input_rows=all_rows.data,
|
||||
scoring_functions=scoring_functions,
|
||||
)
|
||||
if save_results_dataset:
|
||||
|
|
|
@ -167,11 +167,11 @@ class BraintrustScoringImpl(
|
|||
dataset_def = await self.datasets_api.get_dataset(dataset_id=dataset_id)
|
||||
validate_dataset_schema(dataset_def.dataset_schema, get_valid_schemas(Api.scoring.value))
|
||||
|
||||
all_rows = await self.datasetio_api.get_rows_paginated(
|
||||
all_rows = await self.datasetio_api.iterrows(
|
||||
dataset_id=dataset_id,
|
||||
rows_in_page=-1,
|
||||
limit=-1,
|
||||
)
|
||||
res = await self.score(input_rows=all_rows.rows, scoring_functions=scoring_functions)
|
||||
res = await self.score(input_rows=all_rows.data, scoring_functions=scoring_functions)
|
||||
if save_results_dataset:
|
||||
# TODO: persist and register dataset on to server for reading
|
||||
# self.datasets_api.register_dataset()
|
||||
|
|
|
@ -72,12 +72,12 @@ class LlmAsJudgeScoringImpl(
|
|||
dataset_def = await self.datasets_api.get_dataset(dataset_id=dataset_id)
|
||||
validate_dataset_schema(dataset_def.dataset_schema, get_valid_schemas(Api.scoring.value))
|
||||
|
||||
all_rows = await self.datasetio_api.get_rows_paginated(
|
||||
all_rows = await self.datasetio_api.iterrows(
|
||||
dataset_id=dataset_id,
|
||||
rows_in_page=-1,
|
||||
limit=-1,
|
||||
)
|
||||
res = await self.score(
|
||||
input_rows=all_rows.rows,
|
||||
input_rows=all_rows.data,
|
||||
scoring_functions=scoring_functions,
|
||||
)
|
||||
if save_results_dataset:
|
||||
|
|
|
@ -4,13 +4,13 @@
|
|||
# This source code is licensed under the terms described in the LICENSE file in
|
||||
# the root directory of this source tree.
|
||||
from typing import Any, Dict, List, Optional
|
||||
from urllib.parse import parse_qs, urlparse
|
||||
|
||||
import datasets as hf_datasets
|
||||
|
||||
from llama_stack.apis.datasetio import DatasetIO, PaginatedRowsResult
|
||||
from llama_stack.apis.datasetio import DatasetIO, IterrowsResponse
|
||||
from llama_stack.apis.datasets import Dataset
|
||||
from llama_stack.providers.datatypes import DatasetsProtocolPrivate
|
||||
from llama_stack.providers.utils.datasetio.url_utils import get_dataframe_from_url
|
||||
from llama_stack.providers.utils.kvstore import kvstore_impl
|
||||
|
||||
from .config import HuggingfaceDatasetIOConfig
|
||||
|
@ -18,22 +18,14 @@ from .config import HuggingfaceDatasetIOConfig
|
|||
DATASETS_PREFIX = "datasets:"
|
||||
|
||||
|
||||
def load_hf_dataset(dataset_def: Dataset):
|
||||
if dataset_def.metadata.get("path", None):
|
||||
dataset = hf_datasets.load_dataset(**dataset_def.metadata)
|
||||
else:
|
||||
df = get_dataframe_from_url(dataset_def.url)
|
||||
def parse_hf_params(dataset_def: Dataset):
|
||||
uri = dataset_def.source.uri
|
||||
parsed_uri = urlparse(uri)
|
||||
params = parse_qs(parsed_uri.query)
|
||||
params = {k: v[0] for k, v in params.items()}
|
||||
path = parsed_uri.path.lstrip("/")
|
||||
|
||||
if df is None:
|
||||
raise ValueError(f"Failed to load dataset from {dataset_def.url}")
|
||||
|
||||
dataset = hf_datasets.Dataset.from_pandas(df)
|
||||
|
||||
# drop columns not specified by schema
|
||||
if dataset_def.dataset_schema:
|
||||
dataset = dataset.select_columns(list(dataset_def.dataset_schema.keys()))
|
||||
|
||||
return dataset
|
||||
return path, params
|
||||
|
||||
|
||||
class HuggingfaceDatasetIOImpl(DatasetIO, DatasetsProtocolPrivate):
|
||||
|
@ -64,7 +56,7 @@ class HuggingfaceDatasetIOImpl(DatasetIO, DatasetsProtocolPrivate):
|
|||
key = f"{DATASETS_PREFIX}{dataset_def.identifier}"
|
||||
await self.kvstore.set(
|
||||
key=key,
|
||||
value=dataset_def.json(),
|
||||
value=dataset_def.model_dump_json(),
|
||||
)
|
||||
self.dataset_infos[dataset_def.identifier] = dataset_def
|
||||
|
||||
|
@ -73,41 +65,34 @@ class HuggingfaceDatasetIOImpl(DatasetIO, DatasetsProtocolPrivate):
|
|||
await self.kvstore.delete(key=key)
|
||||
del self.dataset_infos[dataset_id]
|
||||
|
||||
async def get_rows_paginated(
|
||||
async def iterrows(
|
||||
self,
|
||||
dataset_id: str,
|
||||
rows_in_page: int,
|
||||
page_token: Optional[str] = None,
|
||||
filter_condition: Optional[str] = None,
|
||||
) -> PaginatedRowsResult:
|
||||
start_index: Optional[int] = None,
|
||||
limit: Optional[int] = None,
|
||||
) -> IterrowsResponse:
|
||||
dataset_def = self.dataset_infos[dataset_id]
|
||||
loaded_dataset = load_hf_dataset(dataset_def)
|
||||
path, params = parse_hf_params(dataset_def)
|
||||
loaded_dataset = hf_datasets.load_dataset(path, **params)
|
||||
|
||||
if page_token and not page_token.isnumeric():
|
||||
raise ValueError("Invalid page_token")
|
||||
start_index = start_index or 0
|
||||
|
||||
if page_token is None or len(page_token) == 0:
|
||||
next_page_token = 0
|
||||
else:
|
||||
next_page_token = int(page_token)
|
||||
|
||||
start = next_page_token
|
||||
if rows_in_page == -1:
|
||||
if limit is None or limit == -1:
|
||||
end = len(loaded_dataset)
|
||||
else:
|
||||
end = min(start + rows_in_page, len(loaded_dataset))
|
||||
end = min(start_index + limit, len(loaded_dataset))
|
||||
|
||||
rows = [loaded_dataset[i] for i in range(start, end)]
|
||||
rows = [loaded_dataset[i] for i in range(start_index, end)]
|
||||
|
||||
return PaginatedRowsResult(
|
||||
rows=rows,
|
||||
total_count=len(rows),
|
||||
next_page_token=str(end),
|
||||
return IterrowsResponse(
|
||||
data=rows,
|
||||
next_start_index=end if end < len(loaded_dataset) else None,
|
||||
)
|
||||
|
||||
async def append_rows(self, dataset_id: str, rows: List[Dict[str, Any]]) -> None:
|
||||
dataset_def = self.dataset_infos[dataset_id]
|
||||
loaded_dataset = load_hf_dataset(dataset_def)
|
||||
path, params = parse_hf_params(dataset_def)
|
||||
loaded_dataset = hf_datasets.load_dataset(path, **params)
|
||||
|
||||
# Convert rows to HF Dataset format
|
||||
new_dataset = hf_datasets.Dataset.from_list(rows)
|
||||
|
|
|
@ -10,18 +10,17 @@ from urllib.parse import unquote
|
|||
|
||||
import pandas
|
||||
|
||||
from llama_stack.apis.common.content_types import URL
|
||||
from llama_stack.providers.utils.memory.vector_store import parse_data_url
|
||||
|
||||
|
||||
def get_dataframe_from_url(url: URL):
|
||||
def get_dataframe_from_uri(uri: str):
|
||||
df = None
|
||||
if url.uri.endswith(".csv"):
|
||||
df = pandas.read_csv(url.uri)
|
||||
elif url.uri.endswith(".xlsx"):
|
||||
df = pandas.read_excel(url.uri)
|
||||
elif url.uri.startswith("data:"):
|
||||
parts = parse_data_url(url.uri)
|
||||
if uri.endswith(".csv"):
|
||||
df = pandas.read_csv(uri)
|
||||
elif uri.endswith(".xlsx"):
|
||||
df = pandas.read_excel(uri)
|
||||
elif uri.startswith("data:"):
|
||||
parts = parse_data_url(uri)
|
||||
data = parts["data"]
|
||||
if parts["is_base64"]:
|
||||
data = base64.b64decode(data)
|
||||
|
@ -39,6 +38,6 @@ def get_dataframe_from_url(url: URL):
|
|||
else:
|
||||
df = pandas.read_excel(data_bytes)
|
||||
else:
|
||||
raise ValueError(f"Unsupported file type: {url}")
|
||||
raise ValueError(f"Unsupported file type: {uri}")
|
||||
|
||||
return df
|
||||
|
|
|
@ -6,7 +6,7 @@
|
|||
|
||||
from typing import Dict, List, Tuple
|
||||
|
||||
from llama_stack.apis.common.content_types import URL
|
||||
from llama_stack.apis.datasets import DatasetPurpose, URIDataSource
|
||||
from llama_stack.apis.models.models import ModelType
|
||||
from llama_stack.distribution.datatypes import (
|
||||
BenchmarkInput,
|
||||
|
@ -171,76 +171,42 @@ def get_distribution_template() -> DistributionTemplate:
|
|||
DatasetInput(
|
||||
dataset_id="simpleqa",
|
||||
provider_id="huggingface",
|
||||
url=URL(uri="https://huggingface.co/datasets/llamastack/simpleqa"),
|
||||
metadata={
|
||||
"path": "llamastack/simpleqa",
|
||||
"split": "train",
|
||||
},
|
||||
dataset_schema={
|
||||
"input_query": {"type": "string"},
|
||||
"expected_answer": {"type": "string"},
|
||||
"chat_completion_input": {"type": "string"},
|
||||
},
|
||||
purpose=DatasetPurpose.eval_messages_answer,
|
||||
source=URIDataSource(
|
||||
uri="huggingface://datasets/llamastack/simpleqa?split=train",
|
||||
),
|
||||
),
|
||||
DatasetInput(
|
||||
dataset_id="mmlu_cot",
|
||||
provider_id="huggingface",
|
||||
url=URL(uri="https://huggingface.co/datasets/llamastack/mmlu_cot"),
|
||||
metadata={
|
||||
"path": "llamastack/mmlu_cot",
|
||||
"name": "all",
|
||||
"split": "test",
|
||||
},
|
||||
dataset_schema={
|
||||
"input_query": {"type": "string"},
|
||||
"expected_answer": {"type": "string"},
|
||||
"chat_completion_input": {"type": "string"},
|
||||
},
|
||||
purpose=DatasetPurpose.eval_messages_answer,
|
||||
source=URIDataSource(
|
||||
uri="huggingface://datasets/llamastack/mmlu_cot?split=test&name=all",
|
||||
),
|
||||
),
|
||||
DatasetInput(
|
||||
dataset_id="gpqa_cot",
|
||||
provider_id="huggingface",
|
||||
url=URL(uri="https://huggingface.co/datasets/llamastack/gpqa_0shot_cot"),
|
||||
metadata={
|
||||
"path": "llamastack/gpqa_0shot_cot",
|
||||
"name": "gpqa_main",
|
||||
"split": "train",
|
||||
},
|
||||
dataset_schema={
|
||||
"input_query": {"type": "string"},
|
||||
"expected_answer": {"type": "string"},
|
||||
"chat_completion_input": {"type": "string"},
|
||||
},
|
||||
purpose=DatasetPurpose.eval_messages_answer,
|
||||
source=URIDataSource(
|
||||
uri="huggingface://datasets/llamastack/gpqa_0shot_cot?split=test&name=gpqa_main",
|
||||
),
|
||||
),
|
||||
DatasetInput(
|
||||
dataset_id="math_500",
|
||||
provider_id="huggingface",
|
||||
url=URL(uri="https://huggingface.co/datasets/llamastack/math_500"),
|
||||
metadata={
|
||||
"path": "llamastack/math_500",
|
||||
"split": "test",
|
||||
},
|
||||
dataset_schema={
|
||||
"input_query": {"type": "string"},
|
||||
"expected_answer": {"type": "string"},
|
||||
"chat_completion_input": {"type": "string"},
|
||||
},
|
||||
purpose=DatasetPurpose.eval_messages_answer,
|
||||
source=URIDataSource(
|
||||
uri="huggingface://datasets/llamastack/math_500?split=test",
|
||||
),
|
||||
),
|
||||
DatasetInput(
|
||||
dataset_id="bfcl",
|
||||
provider_id="huggingface",
|
||||
url=URL(uri="https://huggingface.co/datasets/llamastack/bfcl_v3"),
|
||||
metadata={
|
||||
"path": "llamastack/bfcl_v3",
|
||||
"split": "train",
|
||||
},
|
||||
dataset_schema={
|
||||
"function": {"type": "string"},
|
||||
"language": {"type": "string"},
|
||||
"ground_truth": {"type": "string"},
|
||||
"id": {"type": "string"},
|
||||
"chat_completion_input": {"type": "string"},
|
||||
},
|
||||
purpose=DatasetPurpose.eval_messages_answer,
|
||||
source=URIDataSource(
|
||||
uri="huggingface://datasets/llamastack/bfcl_v3?split=train",
|
||||
),
|
||||
),
|
||||
]
|
||||
|
||||
|
|
|
@ -158,80 +158,39 @@ shields:
|
|||
- shield_id: meta-llama/Llama-Guard-3-8B
|
||||
vector_dbs: []
|
||||
datasets:
|
||||
- dataset_schema:
|
||||
input_query:
|
||||
type: string
|
||||
expected_answer:
|
||||
type: string
|
||||
chat_completion_input:
|
||||
type: string
|
||||
url:
|
||||
uri: https://huggingface.co/datasets/llamastack/simpleqa
|
||||
metadata:
|
||||
path: llamastack/simpleqa
|
||||
split: train
|
||||
- purpose: eval/messages-answer
|
||||
source:
|
||||
type: uri
|
||||
uri: huggingface://datasets/llamastack/simpleqa?split=train
|
||||
metadata: {}
|
||||
dataset_id: simpleqa
|
||||
provider_id: huggingface
|
||||
- dataset_schema:
|
||||
input_query:
|
||||
type: string
|
||||
expected_answer:
|
||||
type: string
|
||||
chat_completion_input:
|
||||
type: string
|
||||
url:
|
||||
uri: https://huggingface.co/datasets/llamastack/mmlu_cot
|
||||
metadata:
|
||||
path: llamastack/mmlu_cot
|
||||
name: all
|
||||
split: test
|
||||
- purpose: eval/messages-answer
|
||||
source:
|
||||
type: uri
|
||||
uri: huggingface://datasets/llamastack/mmlu_cot?split=test&name=all
|
||||
metadata: {}
|
||||
dataset_id: mmlu_cot
|
||||
provider_id: huggingface
|
||||
- dataset_schema:
|
||||
input_query:
|
||||
type: string
|
||||
expected_answer:
|
||||
type: string
|
||||
chat_completion_input:
|
||||
type: string
|
||||
url:
|
||||
uri: https://huggingface.co/datasets/llamastack/gpqa_0shot_cot
|
||||
metadata:
|
||||
path: llamastack/gpqa_0shot_cot
|
||||
name: gpqa_main
|
||||
split: train
|
||||
- purpose: eval/messages-answer
|
||||
source:
|
||||
type: uri
|
||||
uri: huggingface://datasets/llamastack/gpqa_0shot_cot?split=test&name=gpqa_main
|
||||
metadata: {}
|
||||
dataset_id: gpqa_cot
|
||||
provider_id: huggingface
|
||||
- dataset_schema:
|
||||
input_query:
|
||||
type: string
|
||||
expected_answer:
|
||||
type: string
|
||||
chat_completion_input:
|
||||
type: string
|
||||
url:
|
||||
uri: https://huggingface.co/datasets/llamastack/math_500
|
||||
metadata:
|
||||
path: llamastack/math_500
|
||||
split: test
|
||||
- purpose: eval/messages-answer
|
||||
source:
|
||||
type: uri
|
||||
uri: huggingface://datasets/llamastack/math_500?split=test
|
||||
metadata: {}
|
||||
dataset_id: math_500
|
||||
provider_id: huggingface
|
||||
- dataset_schema:
|
||||
function:
|
||||
type: string
|
||||
language:
|
||||
type: string
|
||||
ground_truth:
|
||||
type: string
|
||||
id:
|
||||
type: string
|
||||
chat_completion_input:
|
||||
type: string
|
||||
url:
|
||||
uri: https://huggingface.co/datasets/llamastack/bfcl_v3
|
||||
metadata:
|
||||
path: llamastack/bfcl_v3
|
||||
split: train
|
||||
- purpose: eval/messages-answer
|
||||
source:
|
||||
type: uri
|
||||
uri: huggingface://datasets/llamastack/bfcl_v3?split=train
|
||||
metadata: {}
|
||||
dataset_id: bfcl
|
||||
provider_id: huggingface
|
||||
scoring_fns: []
|
||||
|
|
|
@ -11,6 +11,7 @@ import jinja2
|
|||
import yaml
|
||||
from pydantic import BaseModel, Field
|
||||
|
||||
from llama_stack.apis.datasets import DatasetPurpose
|
||||
from llama_stack.apis.models.models import ModelType
|
||||
from llama_stack.distribution.datatypes import (
|
||||
Api,
|
||||
|
@ -214,7 +215,9 @@ class DistributionTemplate(BaseModel):
|
|||
|
||||
# Register YAML representer for ModelType
|
||||
yaml.add_representer(ModelType, enum_representer)
|
||||
yaml.add_representer(DatasetPurpose, enum_representer)
|
||||
yaml.SafeDumper.add_representer(ModelType, enum_representer)
|
||||
yaml.SafeDumper.add_representer(DatasetPurpose, enum_representer)
|
||||
|
||||
for output_dir in [yaml_output_dir, doc_output_dir]:
|
||||
output_dir.mkdir(parents=True, exist_ok=True)
|
||||
|
|
|
@ -1,114 +0,0 @@
|
|||
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
||||
# All rights reserved.
|
||||
#
|
||||
# This source code is licensed under the terms described in the LICENSE file in
|
||||
# the root directory of this source tree.
|
||||
|
||||
import base64
|
||||
import mimetypes
|
||||
import os
|
||||
from pathlib import Path
|
||||
|
||||
import pytest
|
||||
|
||||
# How to run this test:
|
||||
#
|
||||
# LLAMA_STACK_CONFIG="template-name" pytest -v tests/integration/datasetio
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def dataset_for_test(llama_stack_client):
|
||||
dataset_id = "test_dataset"
|
||||
register_dataset(llama_stack_client, dataset_id=dataset_id)
|
||||
yield
|
||||
# Teardown - this always runs, even if the test fails
|
||||
try:
|
||||
llama_stack_client.datasets.unregister(dataset_id)
|
||||
except Exception as e:
|
||||
print(f"Warning: Failed to unregister test_dataset: {e}")
|
||||
|
||||
|
||||
def data_url_from_file(file_path: str) -> str:
|
||||
if not os.path.exists(file_path):
|
||||
raise FileNotFoundError(f"File not found: {file_path}")
|
||||
|
||||
with open(file_path, "rb") as file:
|
||||
file_content = file.read()
|
||||
|
||||
base64_content = base64.b64encode(file_content).decode("utf-8")
|
||||
mime_type, _ = mimetypes.guess_type(file_path)
|
||||
|
||||
data_url = f"data:{mime_type};base64,{base64_content}"
|
||||
|
||||
return data_url
|
||||
|
||||
|
||||
def register_dataset(llama_stack_client, for_generation=False, for_rag=False, dataset_id="test_dataset"):
|
||||
if for_rag:
|
||||
test_file = Path(os.path.abspath(__file__)).parent / "test_rag_dataset.csv"
|
||||
else:
|
||||
test_file = Path(os.path.abspath(__file__)).parent / "test_dataset.csv"
|
||||
test_url = data_url_from_file(str(test_file))
|
||||
|
||||
if for_generation:
|
||||
dataset_schema = {
|
||||
"expected_answer": {"type": "string"},
|
||||
"input_query": {"type": "string"},
|
||||
"chat_completion_input": {"type": "chat_completion_input"},
|
||||
}
|
||||
elif for_rag:
|
||||
dataset_schema = {
|
||||
"expected_answer": {"type": "string"},
|
||||
"input_query": {"type": "string"},
|
||||
"generated_answer": {"type": "string"},
|
||||
"context": {"type": "string"},
|
||||
}
|
||||
else:
|
||||
dataset_schema = {
|
||||
"expected_answer": {"type": "string"},
|
||||
"input_query": {"type": "string"},
|
||||
"generated_answer": {"type": "string"},
|
||||
}
|
||||
|
||||
dataset_providers = [x for x in llama_stack_client.providers.list() if x.api == "datasetio"]
|
||||
dataset_provider_id = dataset_providers[0].provider_id
|
||||
|
||||
llama_stack_client.datasets.register(
|
||||
dataset_id=dataset_id,
|
||||
dataset_schema=dataset_schema,
|
||||
url=dict(uri=test_url),
|
||||
provider_id=dataset_provider_id,
|
||||
)
|
||||
|
||||
|
||||
def test_register_unregister_dataset(llama_stack_client):
|
||||
register_dataset(llama_stack_client)
|
||||
response = llama_stack_client.datasets.list()
|
||||
assert isinstance(response, list)
|
||||
assert len(response) == 1
|
||||
assert response[0].identifier == "test_dataset"
|
||||
|
||||
llama_stack_client.datasets.unregister("test_dataset")
|
||||
response = llama_stack_client.datasets.list()
|
||||
assert isinstance(response, list)
|
||||
assert len(response) == 0
|
||||
|
||||
|
||||
def test_get_rows_paginated(llama_stack_client, dataset_for_test):
|
||||
response = llama_stack_client.datasetio.get_rows_paginated(
|
||||
dataset_id="test_dataset",
|
||||
rows_in_page=3,
|
||||
)
|
||||
assert isinstance(response.rows, list)
|
||||
assert len(response.rows) == 3
|
||||
assert response.next_page_token == "3"
|
||||
|
||||
# iterate over all rows
|
||||
response = llama_stack_client.datasetio.get_rows_paginated(
|
||||
dataset_id="test_dataset",
|
||||
rows_in_page=2,
|
||||
page_token=response.next_page_token,
|
||||
)
|
||||
assert isinstance(response.rows, list)
|
||||
assert len(response.rows) == 2
|
||||
assert response.next_page_token == "5"
|
95
tests/integration/datasets/test_datasets.py
Normal file
95
tests/integration/datasets/test_datasets.py
Normal file
|
@ -0,0 +1,95 @@
|
|||
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
||||
# All rights reserved.
|
||||
#
|
||||
# This source code is licensed under the terms described in the LICENSE file in
|
||||
# the root directory of this source tree.
|
||||
|
||||
|
||||
import base64
|
||||
import mimetypes
|
||||
import os
|
||||
|
||||
import pytest
|
||||
|
||||
# How to run this test:
|
||||
#
|
||||
# LLAMA_STACK_CONFIG="template-name" pytest -v tests/integration/datasets
|
||||
|
||||
|
||||
def data_url_from_file(file_path: str) -> str:
|
||||
if not os.path.exists(file_path):
|
||||
raise FileNotFoundError(f"File not found: {file_path}")
|
||||
|
||||
with open(file_path, "rb") as file:
|
||||
file_content = file.read()
|
||||
|
||||
base64_content = base64.b64encode(file_content).decode("utf-8")
|
||||
mime_type, _ = mimetypes.guess_type(file_path)
|
||||
|
||||
data_url = f"data:{mime_type};base64,{base64_content}"
|
||||
|
||||
return data_url
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"purpose, source, provider_id, limit",
|
||||
[
|
||||
(
|
||||
"eval/messages-answer",
|
||||
{
|
||||
"type": "uri",
|
||||
"uri": "huggingface://datasets/llamastack/simpleqa?split=train",
|
||||
},
|
||||
"huggingface",
|
||||
10,
|
||||
),
|
||||
(
|
||||
"eval/messages-answer",
|
||||
{
|
||||
"type": "rows",
|
||||
"rows": [
|
||||
{
|
||||
"messages": [{"role": "user", "content": "Hello, world!"}],
|
||||
"answer": "Hello, world!",
|
||||
},
|
||||
{
|
||||
"messages": [
|
||||
{
|
||||
"role": "user",
|
||||
"content": "What is the capital of France?",
|
||||
}
|
||||
],
|
||||
"answer": "Paris",
|
||||
},
|
||||
],
|
||||
},
|
||||
"localfs",
|
||||
2,
|
||||
),
|
||||
(
|
||||
"eval/messages-answer",
|
||||
{
|
||||
"type": "uri",
|
||||
"uri": data_url_from_file(os.path.join(os.path.dirname(__file__), "test_dataset.csv")),
|
||||
},
|
||||
"localfs",
|
||||
5,
|
||||
),
|
||||
],
|
||||
)
|
||||
def test_register_and_iterrows(llama_stack_client, purpose, source, provider_id, limit):
|
||||
dataset = llama_stack_client.datasets.register(
|
||||
purpose=purpose,
|
||||
source=source,
|
||||
)
|
||||
assert dataset.identifier is not None
|
||||
assert dataset.provider_id == provider_id
|
||||
iterrow_response = llama_stack_client.datasets.iterrows(dataset.identifier, limit=limit)
|
||||
assert len(iterrow_response.data) == limit
|
||||
|
||||
dataset_list = llama_stack_client.datasets.list()
|
||||
assert dataset.identifier in [d.identifier for d in dataset_list]
|
||||
|
||||
llama_stack_client.datasets.unregister(dataset.identifier)
|
||||
dataset_list = llama_stack_client.datasets.list()
|
||||
assert dataset.identifier not in [d.identifier for d in dataset_list]
|
|
@ -4,10 +4,11 @@
|
|||
# This source code is licensed under the terms described in the LICENSE file in
|
||||
# the root directory of this source tree.
|
||||
import uuid
|
||||
from pathlib import Path
|
||||
|
||||
import pytest
|
||||
|
||||
from ..datasetio.test_datasetio import register_dataset
|
||||
from ..datasets.test_datasets import data_url_from_file
|
||||
|
||||
# How to run this test:
|
||||
#
|
||||
|
@ -16,15 +17,21 @@ from ..datasetio.test_datasetio import register_dataset
|
|||
|
||||
@pytest.mark.parametrize("scoring_fn_id", ["basic::equality"])
|
||||
def test_evaluate_rows(llama_stack_client, text_model_id, scoring_fn_id):
|
||||
register_dataset(llama_stack_client, for_generation=True, dataset_id="test_dataset_for_eval")
|
||||
response = llama_stack_client.datasets.list()
|
||||
assert any(x.identifier == "test_dataset_for_eval" for x in response)
|
||||
|
||||
rows = llama_stack_client.datasetio.get_rows_paginated(
|
||||
dataset_id="test_dataset_for_eval",
|
||||
rows_in_page=3,
|
||||
dataset = llama_stack_client.datasets.register(
|
||||
purpose="eval/messages-answer",
|
||||
source={
|
||||
"type": "uri",
|
||||
"uri": data_url_from_file(Path(__file__).parent.parent / "datasets" / "test_dataset.csv"),
|
||||
},
|
||||
)
|
||||
assert len(rows.rows) == 3
|
||||
response = llama_stack_client.datasets.list()
|
||||
assert any(x.identifier == dataset.identifier for x in response)
|
||||
|
||||
rows = llama_stack_client.datasets.iterrows(
|
||||
dataset_id=dataset.identifier,
|
||||
limit=3,
|
||||
)
|
||||
assert len(rows.data) == 3
|
||||
|
||||
scoring_functions = [
|
||||
scoring_fn_id,
|
||||
|
@ -32,7 +39,7 @@ def test_evaluate_rows(llama_stack_client, text_model_id, scoring_fn_id):
|
|||
benchmark_id = str(uuid.uuid4())
|
||||
llama_stack_client.benchmarks.register(
|
||||
benchmark_id=benchmark_id,
|
||||
dataset_id="test_dataset_for_eval",
|
||||
dataset_id=dataset.identifier,
|
||||
scoring_functions=scoring_functions,
|
||||
)
|
||||
list_benchmarks = llama_stack_client.benchmarks.list()
|
||||
|
@ -40,7 +47,7 @@ def test_evaluate_rows(llama_stack_client, text_model_id, scoring_fn_id):
|
|||
|
||||
response = llama_stack_client.eval.evaluate_rows(
|
||||
benchmark_id=benchmark_id,
|
||||
input_rows=rows.rows,
|
||||
input_rows=rows.data,
|
||||
scoring_functions=scoring_functions,
|
||||
benchmark_config={
|
||||
"eval_candidate": {
|
||||
|
@ -59,11 +66,17 @@ def test_evaluate_rows(llama_stack_client, text_model_id, scoring_fn_id):
|
|||
|
||||
@pytest.mark.parametrize("scoring_fn_id", ["basic::subset_of"])
|
||||
def test_evaluate_benchmark(llama_stack_client, text_model_id, scoring_fn_id):
|
||||
register_dataset(llama_stack_client, for_generation=True, dataset_id="test_dataset_for_eval_2")
|
||||
dataset = llama_stack_client.datasets.register(
|
||||
purpose="eval/messages-answer",
|
||||
source={
|
||||
"type": "uri",
|
||||
"uri": data_url_from_file(Path(__file__).parent.parent / "datasets" / "test_dataset.csv"),
|
||||
},
|
||||
)
|
||||
benchmark_id = str(uuid.uuid4())
|
||||
llama_stack_client.benchmarks.register(
|
||||
benchmark_id=benchmark_id,
|
||||
dataset_id="test_dataset_for_eval_2",
|
||||
dataset_id=dataset.identifier,
|
||||
scoring_functions=[scoring_fn_id],
|
||||
)
|
||||
|
||||
|
|
|
@ -5,23 +5,11 @@
|
|||
# the root directory of this source tree.
|
||||
|
||||
|
||||
from pathlib import Path
|
||||
|
||||
import pandas as pd
|
||||
import pytest
|
||||
|
||||
from ..datasetio.test_datasetio import register_dataset
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def rag_dataset_for_test(llama_stack_client):
|
||||
dataset_id = "test_dataset"
|
||||
register_dataset(llama_stack_client, for_rag=True, dataset_id=dataset_id)
|
||||
yield # This is where the test function will run
|
||||
|
||||
# Teardown - this always runs, even if the test fails
|
||||
try:
|
||||
llama_stack_client.datasets.unregister(dataset_id)
|
||||
except Exception as e:
|
||||
print(f"Warning: Failed to unregister test_dataset: {e}")
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def sample_judge_prompt_template():
|
||||
|
@ -92,49 +80,34 @@ def test_scoring_functions_register(
|
|||
# TODO: add unregister api for scoring functions
|
||||
|
||||
|
||||
def test_scoring_score(llama_stack_client, rag_dataset_for_test):
|
||||
@pytest.mark.parametrize("scoring_fn_id", ["basic::equality"])
|
||||
def test_scoring_score(llama_stack_client, scoring_fn_id):
|
||||
# scoring individual rows
|
||||
rows = llama_stack_client.datasetio.get_rows_paginated(
|
||||
dataset_id="test_dataset",
|
||||
rows_in_page=3,
|
||||
)
|
||||
assert len(rows.rows) == 3
|
||||
df = pd.read_csv(Path(__file__).parent.parent / "datasets" / "test_dataset.csv")
|
||||
rows = df.to_dict(orient="records")
|
||||
|
||||
scoring_fns_list = llama_stack_client.scoring_functions.list()
|
||||
scoring_functions = {
|
||||
scoring_fns_list[0].identifier: None,
|
||||
scoring_fn_id: None,
|
||||
}
|
||||
|
||||
response = llama_stack_client.scoring.score(
|
||||
input_rows=rows.rows,
|
||||
input_rows=rows,
|
||||
scoring_functions=scoring_functions,
|
||||
)
|
||||
assert len(response.results) == len(scoring_functions)
|
||||
for x in scoring_functions:
|
||||
assert x in response.results
|
||||
assert len(response.results[x].score_rows) == len(rows.rows)
|
||||
|
||||
# score batch
|
||||
response = llama_stack_client.scoring.score_batch(
|
||||
dataset_id="test_dataset",
|
||||
scoring_functions=scoring_functions,
|
||||
save_results_dataset=False,
|
||||
)
|
||||
assert len(response.results) == len(scoring_functions)
|
||||
for x in scoring_functions:
|
||||
assert x in response.results
|
||||
assert len(response.results[x].score_rows) == 5
|
||||
assert len(response.results[x].score_rows) == len(rows)
|
||||
|
||||
|
||||
def test_scoring_score_with_params_llm_as_judge(
|
||||
llama_stack_client, sample_judge_prompt_template, judge_model_id, rag_dataset_for_test
|
||||
llama_stack_client,
|
||||
sample_judge_prompt_template,
|
||||
judge_model_id,
|
||||
):
|
||||
# scoring individual rows
|
||||
rows = llama_stack_client.datasetio.get_rows_paginated(
|
||||
dataset_id="test_dataset",
|
||||
rows_in_page=3,
|
||||
)
|
||||
assert len(rows.rows) == 3
|
||||
df = pd.read_csv(Path(__file__).parent.parent / "datasets" / "test_dataset.csv")
|
||||
rows = df.to_dict(orient="records")
|
||||
|
||||
scoring_functions = {
|
||||
"llm-as-judge::base": dict(
|
||||
|
@ -149,24 +122,13 @@ def test_scoring_score_with_params_llm_as_judge(
|
|||
}
|
||||
|
||||
response = llama_stack_client.scoring.score(
|
||||
input_rows=rows.rows,
|
||||
input_rows=rows,
|
||||
scoring_functions=scoring_functions,
|
||||
)
|
||||
assert len(response.results) == len(scoring_functions)
|
||||
for x in scoring_functions:
|
||||
assert x in response.results
|
||||
assert len(response.results[x].score_rows) == len(rows.rows)
|
||||
|
||||
# score batch
|
||||
response = llama_stack_client.scoring.score_batch(
|
||||
dataset_id="test_dataset",
|
||||
scoring_functions=scoring_functions,
|
||||
save_results_dataset=False,
|
||||
)
|
||||
assert len(response.results) == len(scoring_functions)
|
||||
for x in scoring_functions:
|
||||
assert x in response.results
|
||||
assert len(response.results[x].score_rows) == 5
|
||||
assert len(response.results[x].score_rows) == len(rows)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
|
@ -178,13 +140,14 @@ def test_scoring_score_with_params_llm_as_judge(
|
|||
],
|
||||
)
|
||||
def test_scoring_score_with_aggregation_functions(
|
||||
llama_stack_client, sample_judge_prompt_template, judge_model_id, provider_id, rag_dataset_for_test
|
||||
llama_stack_client,
|
||||
sample_judge_prompt_template,
|
||||
judge_model_id,
|
||||
provider_id,
|
||||
rag_dataset_for_test,
|
||||
):
|
||||
rows = llama_stack_client.datasetio.get_rows_paginated(
|
||||
dataset_id="test_dataset",
|
||||
rows_in_page=3,
|
||||
)
|
||||
assert len(rows.rows) == 3
|
||||
df = pd.read_csv(Path(__file__).parent.parent / "datasets" / "test_dataset.csv")
|
||||
rows = df.to_dict(orient="records")
|
||||
|
||||
scoring_fns_list = [x for x in llama_stack_client.scoring_functions.list() if x.provider_id == provider_id]
|
||||
if len(scoring_fns_list) == 0:
|
||||
|
@ -224,12 +187,12 @@ def test_scoring_score_with_aggregation_functions(
|
|||
scoring_functions[scoring_fn.identifier] = None
|
||||
|
||||
response = llama_stack_client.scoring.score(
|
||||
input_rows=rows.rows,
|
||||
input_rows=rows,
|
||||
scoring_functions=scoring_functions,
|
||||
)
|
||||
|
||||
assert len(response.results) == len(scoring_functions)
|
||||
for x in scoring_functions:
|
||||
assert x in response.results
|
||||
assert len(response.results[x].score_rows) == len(rows.rows)
|
||||
assert len(response.results[x].score_rows) == len(rows)
|
||||
assert len(response.results[x].aggregated_results) == len(aggr_fns)
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue