diff --git a/docs/_static/llama-stack-spec.html b/docs/_static/llama-stack-spec.html
index 7df0c901e..6d545e7c1 100644
--- a/docs/_static/llama-stack-spec.html
+++ b/docs/_static/llama-stack-spec.html
@@ -2293,67 +2293,6 @@
]
}
},
- "/v1/eval/benchmarks/{benchmark_id}/jobs/{job_id}": {
- "get": {
- "responses": {
- "200": {
- "description": "OK",
- "content": {
- "application/json": {
- "schema": {
- "$ref": "#/components/schemas/IterrowsResponse"
- }
- }
- }
- },
- "400": {
- "$ref": "#/components/responses/BadRequest400"
- },
- "429": {
- "$ref": "#/components/responses/TooManyRequests429"
- },
- "500": {
- "$ref": "#/components/responses/InternalServerError500"
- },
- "default": {
- "$ref": "#/components/responses/DefaultError"
- }
- },
- "tags": [
- "DatasetIO"
- ],
- "description": "Get a paginated list of rows from a dataset. Uses cursor-based pagination.",
- "parameters": [
- {
- "name": "dataset_id",
- "in": "path",
- "description": "The ID of the dataset to get the rows from.",
- "required": true,
- "schema": {
- "type": "string"
- }
- },
- {
- "name": "start_index",
- "in": "query",
- "description": "Index into dataset for the first row to get. Get all rows if None.",
- "required": false,
- "schema": {
- "type": "integer"
- }
- },
- {
- "name": "limit",
- "in": "query",
- "description": "The number of rows to get per page.",
- "required": false,
- "schema": {
- "type": "integer"
- }
- }
- ]
- }
- },
"/v1/agents/{agent_id}/sessions": {
"get": {
"responses": {
@@ -6613,69 +6552,77 @@
"const": "factuality",
"default": "factuality"
},
- "dataset_id": {
- "type": "string"
- },
- "scoring_functions": {
- "type": "array",
- "items": {
- "type": "string"
- }
- },
- "metadata": {
+ "factuality": {
"type": "object",
- "additionalProperties": {
- "oneOf": [
- {
- "type": "null"
- },
- {
- "type": "boolean"
- },
- {
- "type": "number"
- },
- {
- "type": "string"
- },
- {
- "type": "array"
- },
- {
- "type": "object"
+ "properties": {
+ "aggregation_functions": {
+ "type": "array",
+ "items": {
+ "type": "string",
+ "enum": [
+ "average",
+ "median",
+ "categorical_count",
+ "accuracy"
+ ],
+ "title": "AggregationFunctionType",
+ "description": "A type of aggregation function."
}
- ]
- }
+ }
+ },
+ "additionalProperties": false,
+ "required": [
+ "aggregation_functions"
+ ],
+ "title": "BasicGraderParams"
}
},
"additionalProperties": false,
"required": [
- "identifier",
- "provider_resource_id",
- "provider_id",
"type",
- "dataset_id",
- "scoring_functions",
- "metadata"
+ "factuality"
],
- "title": "Benchmark"
+ "title": "FactualityGrader"
},
- "DataSource": {
- "oneOf": [
- {
- "$ref": "#/components/schemas/URIDataSource"
+ "FaithfulnessGrader": {
+ "type": "object",
+ "properties": {
+ "type": {
+ "type": "string",
+ "const": "faithfulness",
+ "default": "faithfulness"
},
- {
- "$ref": "#/components/schemas/RowsDataSource"
+ "faithfulness": {
+ "type": "object",
+ "properties": {
+ "aggregation_functions": {
+ "type": "array",
+ "items": {
+ "type": "string",
+ "enum": [
+ "average",
+ "median",
+ "categorical_count",
+ "accuracy"
+ ],
+ "title": "AggregationFunctionType",
+ "description": "A type of aggregation function."
+ }
+ }
+ },
+ "additionalProperties": false,
+ "required": [
+ "aggregation_functions"
+ ],
+ "title": "BasicGraderParams"
}
+ },
+ "additionalProperties": false,
+ "required": [
+ "type",
+ "faithfulness"
],
- "discriminator": {
- "propertyName": "type",
- "mapping": {
- "uri": "#/components/schemas/URIDataSource",
- "rows": "#/components/schemas/RowsDataSource"
- }
- }
+ "title": "FaithfulnessGrader"
},
"Grader": {
"type": "object",
@@ -6694,18 +6641,11 @@
"const": "grader",
"default": "grader"
},
- "purpose": {
- "type": "string",
- "enum": [
- "post-training/messages",
- "eval/question-answer",
- "eval/messages-answer"
- ],
- "title": "DatasetPurpose",
- "description": "Purpose of the dataset. Each purpose has a required input data schema."
+ "grader": {
+ "$ref": "#/components/schemas/GraderDefinition"
},
- "source": {
- "$ref": "#/components/schemas/DataSource"
+ "description": {
+ "type": "string"
},
"metadata": {
"type": "object",
@@ -6739,78 +6679,98 @@
"provider_resource_id",
"provider_id",
"type",
- "purpose",
- "source",
+ "grader",
"metadata"
],
- "title": "Dataset"
+ "title": "Grader"
},
- "RowsDataSource": {
+ "GraderDefinition": {
+ "oneOf": [
+ {
+ "$ref": "#/components/schemas/LlmGrader"
+ },
+ {
+ "$ref": "#/components/schemas/RegexParserGrader"
+ },
+ {
+ "$ref": "#/components/schemas/EqualityGrader"
+ },
+ {
+ "$ref": "#/components/schemas/SubsetOfGrader"
+ },
+ {
+ "$ref": "#/components/schemas/FactualityGrader"
+ },
+ {
+ "$ref": "#/components/schemas/FaithfulnessGrader"
+ }
+ ],
+ "discriminator": {
+ "propertyName": "type",
+ "mapping": {
+ "llm": "#/components/schemas/LlmGrader",
+ "regex_parser": "#/components/schemas/RegexParserGrader",
+ "equality": "#/components/schemas/EqualityGrader",
+ "subset_of": "#/components/schemas/SubsetOfGrader",
+ "factuality": "#/components/schemas/FactualityGrader",
+ "faithfulness": "#/components/schemas/FaithfulnessGrader"
+ }
+ }
+ },
+ "LlmGrader": {
"type": "object",
"properties": {
"type": {
"type": "string",
- "const": "rows",
- "default": "rows"
+ "const": "llm",
+ "default": "llm"
},
- "rows": {
- "type": "array",
- "items": {
- "type": "object",
- "additionalProperties": {
- "oneOf": [
- {
- "type": "null"
- },
- {
- "type": "boolean"
- },
- {
- "type": "number"
- },
- {
- "type": "string"
- },
- {
- "type": "array"
- },
- {
- "type": "object"
- }
- ]
+ "llm": {
+ "type": "object",
+ "properties": {
+ "model": {
+ "type": "string"
+ },
+ "prompt": {
+ "type": "string"
+ },
+ "score_regexes": {
+ "type": "array",
+ "items": {
+ "type": "string"
+ }
+ },
+ "aggregation_functions": {
+ "type": "array",
+ "items": {
+ "type": "string",
+ "enum": [
+ "average",
+ "median",
+ "categorical_count",
+ "accuracy"
+ ],
+ "title": "AggregationFunctionType",
+ "description": "A type of aggregation function."
+ }
}
},
- "description": "The dataset is stored in rows. E.g. - [ {\"messages\": [{\"role\": \"user\", \"content\": \"Hello, world!\"}, {\"role\": \"assistant\", \"content\": \"Hello, world!\"}]} ]"
+ "additionalProperties": false,
+ "required": [
+ "model",
+ "prompt",
+ "score_regexes",
+ "aggregation_functions"
+ ],
+ "title": "LlmGraderParams"
}
},
"additionalProperties": false,
"required": [
"type",
- "rows"
+ "llm"
],
- "title": "RowsDataSource",
- "description": "A dataset stored in rows."
- },
- "URIDataSource": {
- "type": "object",
- "properties": {
- "type": {
- "type": "string",
- "const": "uri",
- "default": "uri"
- },
- "uri": {
- "type": "string",
- "description": "The dataset can be obtained from a URI. E.g. - \"https://mywebsite.com/mydata.jsonl\" - \"lsfs://mydata.jsonl\" - \"data:csv;base64,{base64_content}\""
- }
- },
- "additionalProperties": false,
- "required": [
- "type",
- "uri"
- ],
- "title": "URIDataSource",
- "description": "A dataset that can be obtained from a URI."
+ "title": "LlmGrader"
},
"RegexParserGrader": {
"type": "object",
@@ -6859,182 +6819,45 @@
],
"title": "RegexParserGrader"
},
- "ModelType": {
- "type": "string",
- "enum": [
- "llm",
- "embedding"
- ],
- "title": "ModelType"
- },
- "AgentTurnInputType": {
+ "SubsetOfGrader": {
"type": "object",
"properties": {
"type": {
"type": "string",
- "const": "agent_turn_input",
- "default": "agent_turn_input"
+ "const": "subset_of",
+ "default": "subset_of"
+ },
+ "subset_of": {
+ "type": "object",
+ "properties": {
+ "aggregation_functions": {
+ "type": "array",
+ "items": {
+ "type": "string",
+ "enum": [
+ "average",
+ "median",
+ "categorical_count",
+ "accuracy"
+ ],
+ "title": "AggregationFunctionType",
+ "description": "A type of aggregation function."
+ }
+ }
+ },
+ "additionalProperties": false,
+ "required": [
+ "aggregation_functions"
+ ],
+ "title": "BasicGraderParams"
}
},
"additionalProperties": false,
"required": [
- "type"
+ "type",
+ "subset_of"
],
- "title": "AgentTurnInputType"
- },
- "ArrayType": {
- "type": "object",
- "properties": {
- "type": {
- "type": "string",
- "const": "array",
- "default": "array"
- }
- },
- "additionalProperties": false,
- "required": [
- "type"
- ],
- "title": "ArrayType"
- },
- "BooleanType": {
- "type": "object",
- "properties": {
- "type": {
- "type": "string",
- "const": "boolean",
- "default": "boolean"
- }
- },
- "additionalProperties": false,
- "required": [
- "type"
- ],
- "title": "BooleanType"
- },
- "ChatCompletionInputType": {
- "type": "object",
- "properties": {
- "type": {
- "type": "string",
- "const": "chat_completion_input",
- "default": "chat_completion_input"
- }
- },
- "additionalProperties": false,
- "required": [
- "type"
- ],
- "title": "ChatCompletionInputType"
- },
- "CompletionInputType": {
- "type": "object",
- "properties": {
- "type": {
- "type": "string",
- "const": "completion_input",
- "default": "completion_input"
- }
- },
- "additionalProperties": false,
- "required": [
- "type"
- ],
- "title": "CompletionInputType"
- },
- "JsonType": {
- "type": "object",
- "properties": {
- "type": {
- "type": "string",
- "const": "json",
- "default": "json"
- }
- },
- "additionalProperties": false,
- "required": [
- "type"
- ],
- "title": "JsonType"
- },
- "NumberType": {
- "type": "object",
- "properties": {
- "type": {
- "type": "string",
- "const": "number",
- "default": "number"
- }
- },
- "additionalProperties": false,
- "required": [
- "type"
- ],
- "title": "NumberType"
- },
- "ObjectType": {
- "type": "object",
- "properties": {
- "type": {
- "type": "string",
- "const": "object",
- "default": "object"
- }
- },
- "additionalProperties": false,
- "required": [
- "type"
- ],
- "title": "ObjectType"
- },
- "ParamType": {
- "oneOf": [
- {
- "$ref": "#/components/schemas/StringType"
- },
- {
- "$ref": "#/components/schemas/NumberType"
- },
- {
- "$ref": "#/components/schemas/BooleanType"
- },
- {
- "$ref": "#/components/schemas/ArrayType"
- },
- {
- "$ref": "#/components/schemas/ObjectType"
- },
- {
- "$ref": "#/components/schemas/JsonType"
- },
- {
- "$ref": "#/components/schemas/UnionType"
- },
- {
- "$ref": "#/components/schemas/ChatCompletionInputType"
- },
- {
- "$ref": "#/components/schemas/CompletionInputType"
- },
- {
- "$ref": "#/components/schemas/AgentTurnInputType"
- }
- ],
- "discriminator": {
- "propertyName": "type",
- "mapping": {
- "string": "#/components/schemas/StringType",
- "number": "#/components/schemas/NumberType",
- "boolean": "#/components/schemas/BooleanType",
- "array": "#/components/schemas/ArrayType",
- "object": "#/components/schemas/ObjectType",
- "json": "#/components/schemas/JsonType",
- "union": "#/components/schemas/UnionType",
- "chat_completion_input": "#/components/schemas/ChatCompletionInputType",
- "completion_input": "#/components/schemas/CompletionInputType",
- "agent_turn_input": "#/components/schemas/AgentTurnInputType"
- }
- }
+ "title": "SubsetOfGrader"
},
"Model": {
"type": "object",
@@ -7090,39 +6913,17 @@
"provider_id",
"type",
"metadata",
- "return_type"
+ "model_type"
],
- "title": "ScoringFn"
+ "title": "Model"
},
- "StringType": {
- "type": "object",
- "properties": {
- "type": {
- "type": "string",
- "const": "string",
- "default": "string"
- }
- },
- "additionalProperties": false,
- "required": [
- "type"
+ "ModelType": {
+ "type": "string",
+ "enum": [
+ "llm",
+ "embedding"
],
- "title": "StringType"
- },
- "UnionType": {
- "type": "object",
- "properties": {
- "type": {
- "type": "string",
- "const": "union",
- "default": "union"
- }
- },
- "additionalProperties": false,
- "required": [
- "type"
- ],
- "title": "UnionType"
+ "title": "ModelType"
},
"Shield": {
"type": "object",
@@ -9677,6 +9478,50 @@
"purpose",
"source"
],
+ "title": "RegisterDatasetRequest"
+ },
+ "RegisterGraderRequest": {
+ "type": "object",
+ "properties": {
+ "grader": {
+ "$ref": "#/components/schemas/GraderDefinition",
+ "description": "The grader definition, E.g. - { \"type\": \"llm\", \"llm\": { \"model\": \"llama-405b\", \"prompt\": \"You are a judge. Score the answer based on the question. {question} {answer}\", } }"
+ },
+ "grader_id": {
+ "type": "string",
+ "description": "(Optional) The ID of the grader. If not provided, a random ID will be generated."
+ },
+ "metadata": {
+ "type": "object",
+ "additionalProperties": {
+ "oneOf": [
+ {
+ "type": "null"
+ },
+ {
+ "type": "boolean"
+ },
+ {
+ "type": "number"
+ },
+ {
+ "type": "string"
+ },
+ {
+ "type": "array"
+ },
+ {
+ "type": "object"
+ }
+ ]
+ },
+ "description": "(Optional) Any additional metadata for this grader. - E.g. { \"description\": \"A grader that scores the answer based on the question.\", }"
+ }
+ },
+ "additionalProperties": false,
+ "required": [
+ "grader"
+ ],
"title": "RegisterGraderRequest"
},
"RegisterModelRequest": {
@@ -10354,6 +10199,9 @@
{
"name": "Files"
},
+ {
+ "name": "Graders"
+ },
{
"name": "Inference",
"description": "This API provides the raw interface to the underlying models. Two kinds of models are supported:\n- LLM models: these models generate \"raw\" and \"chat\" (conversational) completions.\n- Embedding models: these models generate embeddings to be used for semantic search.",
@@ -10406,8 +10254,9 @@
"Benchmarks",
"DatasetIO",
"Datasets",
- "Eval",
+ "Evaluation",
"Files",
+ "Graders",
"Inference",
"Inspect",
"Models",
diff --git a/docs/_static/llama-stack-spec.yaml b/docs/_static/llama-stack-spec.yaml
index 90b04b50a..41b11d9e0 100644
--- a/docs/_static/llama-stack-spec.yaml
+++ b/docs/_static/llama-stack-spec.yaml
@@ -1550,50 +1550,6 @@ paths:
required: false
schema:
type: integer
- /v1/eval/benchmarks/{benchmark_id}/jobs/{job_id}:
- get:
- responses:
- '200':
- description: OK
- content:
- application/json:
- schema:
- $ref: '#/components/schemas/IterrowsResponse'
- '400':
- $ref: '#/components/responses/BadRequest400'
- '429':
- $ref: >-
- #/components/responses/TooManyRequests429
- '500':
- $ref: >-
- #/components/responses/InternalServerError500
- default:
- $ref: '#/components/responses/DefaultError'
- tags:
- - DatasetIO
- description: >-
- Get a paginated list of rows from a dataset. Uses cursor-based pagination.
- parameters:
- - name: dataset_id
- in: path
- description: >-
- The ID of the dataset to get the rows from.
- required: true
- schema:
- type: string
- - name: start_index
- in: query
- description: >-
- Index into dataset for the first row to get. Get all rows if None.
- required: false
- schema:
- type: integer
- - name: limit
- in: query
- description: The number of rows to get per page.
- required: false
- schema:
- type: integer
/v1/agents/{agent_id}/sessions:
get:
responses:
@@ -4571,6 +4527,255 @@ components:
title: URIDataSource
description: >-
A dataset that can be obtained from a URI.
+ EqualityGrader:
+ type: object
+ properties:
+ type:
+ type: string
+ const: equality
+ default: equality
+ equality:
+ type: object
+ properties:
+ aggregation_functions:
+ type: array
+ items:
+ type: string
+ enum:
+ - average
+ - median
+ - categorical_count
+ - accuracy
+ title: AggregationFunctionType
+ description: A type of aggregation function.
+ additionalProperties: false
+ required:
+ - aggregation_functions
+ title: BasicGraderParams
+ additionalProperties: false
+ required:
+ - type
+ - equality
+ title: EqualityGrader
+ FactualityGrader:
+ type: object
+ properties:
+ type:
+ type: string
+ const: factuality
+ default: factuality
+ factuality:
+ type: object
+ properties:
+ aggregation_functions:
+ type: array
+ items:
+ type: string
+ enum:
+ - average
+ - median
+ - categorical_count
+ - accuracy
+ title: AggregationFunctionType
+ description: A type of aggregation function.
+ additionalProperties: false
+ required:
+ - aggregation_functions
+ title: BasicGraderParams
+ additionalProperties: false
+ required:
+ - type
+ - factuality
+ title: FactualityGrader
+ FaithfulnessGrader:
+ type: object
+ properties:
+ type:
+ type: string
+ const: faithfulness
+ default: faithfulness
+ faithfulness:
+ type: object
+ properties:
+ aggregation_functions:
+ type: array
+ items:
+ type: string
+ enum:
+ - average
+ - median
+ - categorical_count
+ - accuracy
+ title: AggregationFunctionType
+ description: A type of aggregation function.
+ additionalProperties: false
+ required:
+ - aggregation_functions
+ title: BasicGraderParams
+ additionalProperties: false
+ required:
+ - type
+ - faithfulness
+ title: FaithfulnessGrader
+ Grader:
+ type: object
+ properties:
+ identifier:
+ type: string
+ provider_resource_id:
+ type: string
+ provider_id:
+ type: string
+ type:
+ type: string
+ const: grader
+ default: grader
+ grader:
+ $ref: '#/components/schemas/GraderDefinition'
+ description:
+ type: string
+ metadata:
+ type: object
+ additionalProperties:
+ oneOf:
+ - type: 'null'
+ - type: boolean
+ - type: number
+ - type: string
+ - type: array
+ - type: object
+ additionalProperties: false
+ required:
+ - identifier
+ - provider_resource_id
+ - provider_id
+ - type
+ - grader
+ - metadata
+ title: Grader
+ GraderDefinition:
+ oneOf:
+ - $ref: '#/components/schemas/LlmGrader'
+ - $ref: '#/components/schemas/RegexParserGrader'
+ - $ref: '#/components/schemas/EqualityGrader'
+ - $ref: '#/components/schemas/SubsetOfGrader'
+ - $ref: '#/components/schemas/FactualityGrader'
+ - $ref: '#/components/schemas/FaithfulnessGrader'
+ discriminator:
+ propertyName: type
+ mapping:
+ llm: '#/components/schemas/LlmGrader'
+ regex_parser: '#/components/schemas/RegexParserGrader'
+ equality: '#/components/schemas/EqualityGrader'
+ subset_of: '#/components/schemas/SubsetOfGrader'
+ factuality: '#/components/schemas/FactualityGrader'
+ faithfulness: '#/components/schemas/FaithfulnessGrader'
+ LlmGrader:
+ type: object
+ properties:
+ type:
+ type: string
+ const: llm
+ default: llm
+ llm:
+ type: object
+ properties:
+ model:
+ type: string
+ prompt:
+ type: string
+ score_regexes:
+ type: array
+ items:
+ type: string
+ aggregation_functions:
+ type: array
+ items:
+ type: string
+ enum:
+ - average
+ - median
+ - categorical_count
+ - accuracy
+ title: AggregationFunctionType
+ description: A type of aggregation function.
+ additionalProperties: false
+ required:
+ - model
+ - prompt
+ - score_regexes
+ - aggregation_functions
+ title: LlmGraderParams
+ additionalProperties: false
+ required:
+ - type
+ - llm
+ title: LlmGrader
+ RegexParserGrader:
+ type: object
+ properties:
+ type:
+ type: string
+ const: regex_parser
+ default: regex_parser
+ regex_parser:
+ type: object
+ properties:
+ parsing_regexes:
+ type: array
+ items:
+ type: string
+ aggregation_functions:
+ type: array
+ items:
+ type: string
+ enum:
+ - average
+ - median
+ - categorical_count
+ - accuracy
+ title: AggregationFunctionType
+ description: A type of aggregation function.
+ additionalProperties: false
+ required:
+ - parsing_regexes
+ - aggregation_functions
+ title: RegexParserGraderParams
+ additionalProperties: false
+ required:
+ - type
+ - regex_parser
+ title: RegexParserGrader
+ SubsetOfGrader:
+ type: object
+ properties:
+ type:
+ type: string
+ const: subset_of
+ default: subset_of
+ subset_of:
+ type: object
+ properties:
+ aggregation_functions:
+ type: array
+ items:
+ type: string
+ enum:
+ - average
+ - median
+ - categorical_count
+ - accuracy
+ title: AggregationFunctionType
+ description: A type of aggregation function.
+ additionalProperties: false
+ required:
+ - aggregation_functions
+ title: BasicGraderParams
+ additionalProperties: false
+ required:
+ - type
+ - subset_of
+ title: SubsetOfGrader
Model:
type: object
properties:
@@ -4612,224 +4817,6 @@ components:
- llm
- embedding
title: ModelType
- AgentTurnInputType:
- type: object
- properties:
- type:
- type: string
- const: agent_turn_input
- default: agent_turn_input
- additionalProperties: false
- required:
- - type
- title: AgentTurnInputType
- ArrayType:
- type: object
- properties:
- type:
- type: string
- const: array
- default: array
- additionalProperties: false
- required:
- - type
- title: ArrayType
- BooleanType:
- type: object
- properties:
- type:
- type: string
- const: boolean
- default: boolean
- additionalProperties: false
- required:
- - type
- title: BooleanType
- ChatCompletionInputType:
- type: object
- properties:
- type:
- type: string
- const: chat_completion_input
- default: chat_completion_input
- additionalProperties: false
- required:
- - type
- title: ChatCompletionInputType
- CompletionInputType:
- type: object
- properties:
- type:
- type: string
- const: completion_input
- default: completion_input
- additionalProperties: false
- required:
- - type
- title: CompletionInputType
- JsonType:
- type: object
- properties:
- type:
- type: string
- const: rows
- default: rows
- rows:
- type: array
- items:
- type: object
- additionalProperties:
- oneOf:
- - type: 'null'
- - type: boolean
- - type: number
- - type: string
- - type: array
- - type: object
- description: >-
- The dataset is stored in rows. E.g. - [ {"messages": [{"role": "user",
- "content": "Hello, world!"}, {"role": "assistant", "content": "Hello,
- world!"}]} ]
- additionalProperties: false
- required:
- - type
- - rows
- title: RowsDataSource
- description: A dataset stored in rows.
- URIDataSource:
- type: object
- properties:
- type:
- type: string
- const: uri
- default: uri
- uri:
- type: string
- description: >-
- The dataset can be obtained from a URI. E.g. - "https://mywebsite.com/mydata.jsonl"
- - "lsfs://mydata.jsonl" - "data:csv;base64,{base64_content}"
- additionalProperties: false
- required:
- - type
- - uri
- title: URIDataSource
- description: >-
- A dataset that can be obtained from a URI.
- EqualityGrader:
- type: object
- properties:
- type:
- type: string
- const: equality
- default: equality
- equality:
- type: object
- properties:
- aggregation_functions:
- type: array
- items:
- type: string
- enum:
- - average
- - median
- - categorical_count
- - accuracy
- title: AggregationFunctionType
- description: A type of aggregation function.
- additionalProperties: false
- required:
- - aggregation_functions
- title: BasicGraderParams
- additionalProperties: false
- required:
- - type
- title: ObjectType
- ParamType:
- oneOf:
- - $ref: '#/components/schemas/StringType'
- - $ref: '#/components/schemas/NumberType'
- - $ref: '#/components/schemas/BooleanType'
- - $ref: '#/components/schemas/ArrayType'
- - $ref: '#/components/schemas/ObjectType'
- - $ref: '#/components/schemas/JsonType'
- - $ref: '#/components/schemas/UnionType'
- - $ref: '#/components/schemas/ChatCompletionInputType'
- - $ref: '#/components/schemas/CompletionInputType'
- - $ref: '#/components/schemas/AgentTurnInputType'
- discriminator:
- propertyName: type
- mapping:
- string: '#/components/schemas/StringType'
- number: '#/components/schemas/NumberType'
- boolean: '#/components/schemas/BooleanType'
- array: '#/components/schemas/ArrayType'
- object: '#/components/schemas/ObjectType'
- json: '#/components/schemas/JsonType'
- union: '#/components/schemas/UnionType'
- chat_completion_input: '#/components/schemas/ChatCompletionInputType'
- completion_input: '#/components/schemas/CompletionInputType'
- agent_turn_input: '#/components/schemas/AgentTurnInputType'
- ScoringFn:
- type: object
- properties:
- identifier:
- type: string
- provider_resource_id:
- type: string
- provider_id:
- type: string
- type:
- type: string
- const: scoring_function
- default: scoring_function
- description:
- type: string
- metadata:
- type: object
- additionalProperties:
- oneOf:
- - type: 'null'
- - type: boolean
- - type: number
- - type: string
- - type: array
- - type: object
- return_type:
- $ref: '#/components/schemas/ParamType'
- params:
- $ref: '#/components/schemas/ScoringFnParams'
- additionalProperties: false
- required:
- - identifier
- - provider_resource_id
- - provider_id
- - type
- - grader
- - metadata
- - return_type
- title: ScoringFn
- StringType:
- type: object
- properties:
- type:
- type: string
- const: string
- default: string
- additionalProperties: false
- required:
- - type
- title: StringType
- UnionType:
- type: object
- properties:
- type:
- type: string
- const: union
- default: union
- additionalProperties: false
- required:
- - type
- title: UnionType
Shield:
type: object
properties:
@@ -6503,6 +6490,37 @@ components:
- purpose
- source
title: RegisterDatasetRequest
+ RegisterGraderRequest:
+ type: object
+ properties:
+ grader:
+ $ref: '#/components/schemas/GraderDefinition'
+ description: >-
+ The grader definition, E.g. - { "type": "llm", "llm": { "model": "llama-405b",
+ "prompt": "You are a judge. Score the answer based on the question. {question}
+ {answer}", } }
+ grader_id:
+ type: string
+ description: >-
+ (Optional) The ID of the grader. If not provided, a random ID will be
+ generated.
+ metadata:
+ type: object
+ additionalProperties:
+ oneOf:
+ - type: 'null'
+ - type: boolean
+ - type: number
+ - type: string
+ - type: array
+ - type: object
+ description: >-
+ (Optional) Any additional metadata for this grader. - E.g. { "description":
+ "A grader that scores the answer based on the question.", }
+ additionalProperties: false
+ required:
+ - grader
+ title: RegisterGraderRequest
RegisterModelRequest:
type: object
properties:
@@ -6935,10 +6953,9 @@ tags:
- name: Benchmarks
- name: DatasetIO
- name: Datasets
- - name: Eval
- x-displayName: >-
- Llama Stack Evaluation API for running evaluations on model and agent candidates.
+ - name: Evaluation
- name: Files
+ - name: Graders
- name: Inference
description: >-
This API provides the raw interface to the underlying models. Two kinds of models
@@ -6973,8 +6990,9 @@ x-tagGroups:
- Benchmarks
- DatasetIO
- Datasets
- - Eval
+ - Evaluation
- Files
+ - Graders
- Inference
- Inspect
- Models
diff --git a/llama_stack/apis/datasetio/datasetio.py b/llama_stack/apis/datasetio/datasetio.py
index b1eaffa17..d9d86fe1b 100644
--- a/llama_stack/apis/datasetio/datasetio.py
+++ b/llama_stack/apis/datasetio/datasetio.py
@@ -51,6 +51,4 @@ class DatasetIO(Protocol):
...
@webmethod(route="/datasetio/append-rows/{dataset_id:path}", method="POST")
- async def append_rows(
- self, dataset_id: str, rows: List[Dict[str, Any]]
- ) -> None: ...
+ async def append_rows(self, dataset_id: str, rows: List[Dict[str, Any]]) -> None: ...
diff --git a/llama_stack/providers/inline/datasetio/localfs/datasetio.py b/llama_stack/providers/inline/datasetio/localfs/datasetio.py
index 958c7d387..cf4bf7fec 100644
--- a/llama_stack/providers/inline/datasetio/localfs/datasetio.py
+++ b/llama_stack/providers/inline/datasetio/localfs/datasetio.py
@@ -44,9 +44,7 @@ class PandasDataframeDataset:
elif self.dataset_def.source.type == "rows":
self.df = pandas.DataFrame(self.dataset_def.source.rows)
else:
- raise ValueError(
- f"Unsupported dataset source type: {self.dataset_def.source.type}"
- )
+ raise ValueError(f"Unsupported dataset source type: {self.dataset_def.source.type}")
if self.df is None:
raise ValueError(f"Failed to load dataset from {self.dataset_def.url}")
@@ -119,6 +117,4 @@ class LocalFSDatasetIOImpl(DatasetIO, DatasetsProtocolPrivate):
dataset_impl.load()
new_rows_df = pandas.DataFrame(rows)
- dataset_impl.df = pandas.concat(
- [dataset_impl.df, new_rows_df], ignore_index=True
- )
+ dataset_impl.df = pandas.concat([dataset_impl.df, new_rows_df], ignore_index=True)
diff --git a/llama_stack/providers/remote/datasetio/huggingface/huggingface.py b/llama_stack/providers/remote/datasetio/huggingface/huggingface.py
index db6edbce3..fe3195332 100644
--- a/llama_stack/providers/remote/datasetio/huggingface/huggingface.py
+++ b/llama_stack/providers/remote/datasetio/huggingface/huggingface.py
@@ -98,13 +98,9 @@ class HuggingfaceDatasetIOImpl(DatasetIO, DatasetsProtocolPrivate):
new_dataset = hf_datasets.Dataset.from_list(rows)
# Concatenate the new rows with existing dataset
- updated_dataset = hf_datasets.concatenate_datasets(
- [loaded_dataset, new_dataset]
- )
+ updated_dataset = hf_datasets.concatenate_datasets([loaded_dataset, new_dataset])
if dataset_def.metadata.get("path", None):
updated_dataset.push_to_hub(dataset_def.metadata["path"])
else:
- raise NotImplementedError(
- "Uploading to URL-based datasets is not supported yet"
- )
+ raise NotImplementedError("Uploading to URL-based datasets is not supported yet")