diff --git a/docs/openapi_generator/generate.py b/docs/openapi_generator/generate.py
index f9f56119b..dbfc90452 100644
--- a/docs/openapi_generator/generate.py
+++ b/docs/openapi_generator/generate.py
@@ -49,6 +49,7 @@ from llama_stack.apis.models import * # noqa: F403
from llama_stack.apis.memory_banks import * # noqa: F403
from llama_stack.apis.shields import * # noqa: F403
from llama_stack.apis.inspect import * # noqa: F403
+from llama_stack.apis.eval_tasks import * # noqa: F403
class LlamaStack(
@@ -63,6 +64,7 @@ class LlamaStack(
PostTraining,
Memory,
Eval,
+ EvalTasks,
Scoring,
ScoringFunctions,
DatasetIO,
diff --git a/docs/resources/llama-stack-spec.html b/docs/resources/llama-stack-spec.html
index b1b1504ee..5a26c3ef4 100644
--- a/docs/resources/llama-stack-spec.html
+++ b/docs/resources/llama-stack-spec.html
@@ -21,7 +21,7 @@
"info": {
"title": "[DRAFT] Llama Stack Specification",
"version": "0.0.1",
- "description": "This is the specification of the llama stack that provides\n a set of endpoints and their corresponding interfaces that are tailored to\n best leverage Llama Models. The specification is still in draft and subject to change.\n Generated at 2024-11-07 22:26:27.169134"
+ "description": "This is the specification of the llama stack that provides\n a set of endpoints and their corresponding interfaces that are tailored to\n best leverage Llama Models. The specification is still in draft and subject to change.\n Generated at 2024-11-07 22:57:18.550543"
},
"servers": [
{
@@ -726,6 +726,51 @@
]
}
},
+ "/eval_tasks/get": {
+ "get": {
+ "responses": {
+ "200": {
+ "description": "OK",
+ "content": {
+ "application/json": {
+ "schema": {
+ "oneOf": [
+ {
+ "$ref": "#/components/schemas/EvalTaskDefWithProvider"
+ },
+ {
+ "type": "null"
+ }
+ ]
+ }
+ }
+ }
+ }
+ },
+ "tags": [
+ "EvalTasks"
+ ],
+ "parameters": [
+ {
+ "name": "name",
+ "in": "query",
+ "required": true,
+ "schema": {
+ "type": "string"
+ }
+ },
+ {
+ "name": "X-LlamaStack-ProviderData",
+ "in": "header",
+ "description": "JSON-encoded provider data which will be made available to the adapter servicing the API",
+ "required": false,
+ "schema": {
+ "type": "string"
+ }
+ }
+ ]
+ }
+ },
"/memory_banks/get": {
"get": {
"responses": {
@@ -1388,6 +1433,36 @@
]
}
},
+ "/eval_tasks/list": {
+ "get": {
+ "responses": {
+ "200": {
+ "description": "OK",
+ "content": {
+ "application/jsonl": {
+ "schema": {
+ "$ref": "#/components/schemas/EvalTaskDefWithProvider"
+ }
+ }
+ }
+ }
+ },
+ "tags": [
+ "EvalTasks"
+ ],
+ "parameters": [
+ {
+ "name": "X-LlamaStack-ProviderData",
+ "in": "header",
+ "description": "JSON-encoded provider data which will be made available to the adapter servicing the API",
+ "required": false,
+ "schema": {
+ "type": "string"
+ }
+ }
+ ]
+ }
+ },
"/memory_banks/list": {
"get": {
"responses": {
@@ -1736,6 +1811,39 @@
}
}
},
+ "/eval_tasks/register": {
+ "post": {
+ "responses": {
+ "200": {
+ "description": "OK"
+ }
+ },
+ "tags": [
+ "EvalTasks"
+ ],
+ "parameters": [
+ {
+ "name": "X-LlamaStack-ProviderData",
+ "in": "header",
+ "description": "JSON-encoded provider data which will be made available to the adapter servicing the API",
+ "required": false,
+ "schema": {
+ "type": "string"
+ }
+ }
+ ],
+ "requestBody": {
+ "content": {
+ "application/json": {
+ "schema": {
+ "$ref": "#/components/schemas/RegisterEvalTaskRequest"
+ }
+ }
+ },
+ "required": true
+ }
+ }
+ },
"/memory_banks/register": {
"post": {
"responses": {
@@ -5199,6 +5307,65 @@
"provider_id"
]
},
+ "EvalTaskDefWithProvider": {
+ "type": "object",
+ "properties": {
+ "identifier": {
+ "type": "string"
+ },
+ "dataset_id": {
+ "type": "string"
+ },
+ "scoring_functions": {
+ "type": "array",
+ "items": {
+ "type": "string"
+ }
+ },
+ "metadata": {
+ "type": "object",
+ "additionalProperties": {
+ "oneOf": [
+ {
+ "type": "null"
+ },
+ {
+ "type": "boolean"
+ },
+ {
+ "type": "number"
+ },
+ {
+ "type": "string"
+ },
+ {
+ "type": "array"
+ },
+ {
+ "type": "object"
+ }
+ ]
+ }
+ },
+ "type": {
+ "type": "string",
+ "const": "eval_task",
+ "default": "eval_task"
+ },
+ "provider_id": {
+ "type": "string"
+ }
+ },
+ "additionalProperties": false,
+ "required": [
+ "identifier",
+ "dataset_id",
+ "scoring_functions",
+ "metadata",
+ "type",
+ "provider_id"
+ ]
+ },
"ModelDefWithProvider": {
"type": "object",
"properties": {
@@ -6459,6 +6626,18 @@
"dataset_def"
]
},
+ "RegisterEvalTaskRequest": {
+ "type": "object",
+ "properties": {
+ "eval_task_def": {
+ "$ref": "#/components/schemas/EvalTaskDefWithProvider"
+ }
+ },
+ "additionalProperties": false,
+ "required": [
+ "eval_task_def"
+ ]
+ },
"RegisterMemoryBankRequest": {
"type": "object",
"properties": {
@@ -7071,55 +7250,58 @@
],
"tags": [
{
- "name": "ScoringFunctions"
- },
- {
- "name": "Agents"
- },
- {
- "name": "Shields"
- },
- {
- "name": "Telemetry"
- },
- {
- "name": "Safety"
+ "name": "Inspect"
},
{
"name": "MemoryBanks"
},
{
- "name": "Models"
+ "name": "Safety"
},
{
"name": "BatchInference"
},
{
- "name": "PostTraining"
- },
- {
- "name": "Inference"
- },
- {
- "name": "Datasets"
+ "name": "Telemetry"
},
{
"name": "DatasetIO"
},
{
- "name": "SyntheticDataGeneration"
+ "name": "Models"
},
{
- "name": "Memory"
+ "name": "Shields"
+ },
+ {
+ "name": "Scoring"
+ },
+ {
+ "name": "Datasets"
+ },
+ {
+ "name": "Agents"
+ },
+ {
+ "name": "PostTraining"
},
{
"name": "Eval"
},
{
- "name": "Inspect"
+ "name": "ScoringFunctions"
},
{
- "name": "Scoring"
+ "name": "EvalTasks"
+ },
+ {
+ "name": "Memory"
+ },
+ {
+ "name": "SyntheticDataGeneration"
+ },
+ {
+ "name": "Inference"
},
{
"name": "BuiltinTool",
@@ -7449,6 +7631,10 @@
"name": "DatasetDefWithProvider",
"description": ""
},
+ {
+ "name": "EvalTaskDefWithProvider",
+ "description": ""
+ },
{
"name": "ModelDefWithProvider",
"description": ""
@@ -7585,6 +7771,10 @@
"name": "RegisterDatasetRequest",
"description": ""
},
+ {
+ "name": "RegisterEvalTaskRequest",
+ "description": ""
+ },
{
"name": "RegisterMemoryBankRequest",
"description": ""
@@ -7671,6 +7861,7 @@
"DatasetIO",
"Datasets",
"Eval",
+ "EvalTasks",
"Inference",
"Inspect",
"Memory",
@@ -7730,6 +7921,7 @@
"DoraFinetuningConfig",
"EmbeddingsRequest",
"EmbeddingsResponse",
+ "EvalTaskDefWithProvider",
"EvaluateResponse",
"EvaluateRowsRequest",
"FinetuningAlgorithm",
@@ -7771,6 +7963,7 @@
"RLHFAlgorithm",
"RegexParserScoringFnParams",
"RegisterDatasetRequest",
+ "RegisterEvalTaskRequest",
"RegisterMemoryBankRequest",
"RegisterModelRequest",
"RegisterScoringFunctionRequest",
diff --git a/docs/resources/llama-stack-spec.yaml b/docs/resources/llama-stack-spec.yaml
index f839e7bc0..d165cedcf 100644
--- a/docs/resources/llama-stack-spec.yaml
+++ b/docs/resources/llama-stack-spec.yaml
@@ -918,6 +918,41 @@ components:
required:
- embeddings
type: object
+ EvalTaskDefWithProvider:
+ additionalProperties: false
+ properties:
+ dataset_id:
+ type: string
+ identifier:
+ type: string
+ metadata:
+ additionalProperties:
+ oneOf:
+ - type: 'null'
+ - type: boolean
+ - type: number
+ - type: string
+ - type: array
+ - type: object
+ type: object
+ provider_id:
+ type: string
+ scoring_functions:
+ items:
+ type: string
+ type: array
+ type:
+ const: eval_task
+ default: eval_task
+ type: string
+ required:
+ - identifier
+ - dataset_id
+ - scoring_functions
+ - metadata
+ - type
+ - provider_id
+ type: object
EvaluateResponse:
additionalProperties: false
properties:
@@ -1821,6 +1856,14 @@ components:
required:
- dataset_def
type: object
+ RegisterEvalTaskRequest:
+ additionalProperties: false
+ properties:
+ eval_task_def:
+ $ref: '#/components/schemas/EvalTaskDefWithProvider'
+ required:
+ - eval_task_def
+ type: object
RegisterMemoryBankRequest:
additionalProperties: false
properties:
@@ -2983,7 +3026,7 @@ info:
description: "This is the specification of the llama stack that provides\n \
\ a set of endpoints and their corresponding interfaces that are tailored\
\ to\n best leverage Llama Models. The specification is still in\
- \ draft and subject to change.\n Generated at 2024-11-07 22:26:27.169134"
+ \ draft and subject to change.\n Generated at 2024-11-07 22:57:18.550543"
title: '[DRAFT] Llama Stack Specification'
version: 0.0.1
jsonSchemaDialect: https://json-schema.org/draft/2020-12/schema
@@ -3503,6 +3546,72 @@ paths:
description: OK
tags:
- Eval
+ /eval_tasks/get:
+ get:
+ parameters:
+ - in: query
+ name: name
+ required: true
+ schema:
+ type: string
+ - description: JSON-encoded provider data which will be made available to the
+ adapter servicing the API
+ in: header
+ name: X-LlamaStack-ProviderData
+ required: false
+ schema:
+ type: string
+ responses:
+ '200':
+ content:
+ application/json:
+ schema:
+ oneOf:
+ - $ref: '#/components/schemas/EvalTaskDefWithProvider'
+ - type: 'null'
+ description: OK
+ tags:
+ - EvalTasks
+ /eval_tasks/list:
+ get:
+ parameters:
+ - description: JSON-encoded provider data which will be made available to the
+ adapter servicing the API
+ in: header
+ name: X-LlamaStack-ProviderData
+ required: false
+ schema:
+ type: string
+ responses:
+ '200':
+ content:
+ application/jsonl:
+ schema:
+ $ref: '#/components/schemas/EvalTaskDefWithProvider'
+ description: OK
+ tags:
+ - EvalTasks
+ /eval_tasks/register:
+ post:
+ parameters:
+ - description: JSON-encoded provider data which will be made available to the
+ adapter servicing the API
+ in: header
+ name: X-LlamaStack-ProviderData
+ required: false
+ schema:
+ type: string
+ requestBody:
+ content:
+ application/json:
+ schema:
+ $ref: '#/components/schemas/RegisterEvalTaskRequest'
+ required: true
+ responses:
+ '200':
+ description: OK
+ tags:
+ - EvalTasks
/health:
get:
parameters:
@@ -4275,23 +4384,24 @@ security:
servers:
- url: http://any-hosted-llama-stack.com
tags:
-- name: ScoringFunctions
-- name: Agents
-- name: Shields
-- name: Telemetry
-- name: Safety
-- name: MemoryBanks
-- name: Models
-- name: BatchInference
-- name: PostTraining
-- name: Inference
-- name: Datasets
-- name: DatasetIO
-- name: SyntheticDataGeneration
-- name: Memory
-- name: Eval
- name: Inspect
+- name: MemoryBanks
+- name: Safety
+- name: BatchInference
+- name: Telemetry
+- name: DatasetIO
+- name: Models
+- name: Shields
- name: Scoring
+- name: Datasets
+- name: Agents
+- name: PostTraining
+- name: Eval
+- name: ScoringFunctions
+- name: EvalTasks
+- name: Memory
+- name: SyntheticDataGeneration
+- name: Inference
- description:
name: BuiltinTool
- description:
name: DatasetDefWithProvider
+- description:
+ name: EvalTaskDefWithProvider
- description:
name: ModelDefWithProvider
@@ -4647,6 +4760,9 @@ tags:
- description:
name: RegisterDatasetRequest
+- description:
+ name: RegisterEvalTaskRequest
- description:
name: RegisterMemoryBankRequest
@@ -4712,6 +4828,7 @@ x-tagGroups:
- DatasetIO
- Datasets
- Eval
+ - EvalTasks
- Inference
- Inspect
- Memory
@@ -4768,6 +4885,7 @@ x-tagGroups:
- DoraFinetuningConfig
- EmbeddingsRequest
- EmbeddingsResponse
+ - EvalTaskDefWithProvider
- EvaluateResponse
- EvaluateRowsRequest
- FinetuningAlgorithm
@@ -4809,6 +4927,7 @@ x-tagGroups:
- RLHFAlgorithm
- RegexParserScoringFnParams
- RegisterDatasetRequest
+ - RegisterEvalTaskRequest
- RegisterMemoryBankRequest
- RegisterModelRequest
- RegisterScoringFunctionRequest