Merge branch 'main' into eval_api_final

This commit is contained in:
Xi Yan 2025-03-17 17:00:30 -07:00
commit 66cd83fb58
37 changed files with 1215 additions and 840 deletions

View file

@ -2233,6 +2233,67 @@
}
},
"/v1/datasetio/iterrows/{dataset_id}": {
"get": {
"responses": {
"200": {
"description": "OK",
"content": {
"application/json": {
"schema": {
"$ref": "#/components/schemas/IterrowsResponse"
}
}
}
},
"400": {
"$ref": "#/components/responses/BadRequest400"
},
"429": {
"$ref": "#/components/responses/TooManyRequests429"
},
"500": {
"$ref": "#/components/responses/InternalServerError500"
},
"default": {
"$ref": "#/components/responses/DefaultError"
}
},
"tags": [
"DatasetIO"
],
"description": "Get a paginated list of rows from a dataset. Uses cursor-based pagination.",
"parameters": [
{
"name": "dataset_id",
"in": "path",
"description": "The ID of the dataset to get the rows from.",
"required": true,
"schema": {
"type": "string"
}
},
{
"name": "start_index",
"in": "query",
"description": "Index into dataset for the first row to get. Get all rows if None.",
"required": false,
"schema": {
"type": "integer"
}
},
{
"name": "limit",
"in": "query",
"description": "The number of rows to get.",
"required": false,
"schema": {
"type": "integer"
}
}
]
}
},
"/v1/eval/benchmarks/{benchmark_id}/jobs/{job_id}": {
"get": {
"responses": {
"200": {
@ -6552,100 +6613,14 @@
"const": "factuality",
"default": "factuality"
},
"factuality": {
"type": "object",
"properties": {
"aggregation_functions": {
"type": "array",
"items": {
"type": "string",
"enum": [
"average",
"median",
"categorical_count",
"accuracy"
],
"title": "AggregationFunctionType",
"description": "A type of aggregation function."
}
}
},
"additionalProperties": false,
"required": [
"aggregation_functions"
],
"title": "BasicGraderParams"
}
},
"additionalProperties": false,
"required": [
"type",
"factuality"
],
"title": "FactualityGrader"
},
"FaithfulnessGrader": {
"type": "object",
"properties": {
"type": {
"type": "string",
"const": "faithfulness",
"default": "faithfulness"
},
"faithfulness": {
"type": "object",
"properties": {
"aggregation_functions": {
"type": "array",
"items": {
"type": "string",
"enum": [
"average",
"median",
"categorical_count",
"accuracy"
],
"title": "AggregationFunctionType",
"description": "A type of aggregation function."
}
}
},
"additionalProperties": false,
"required": [
"aggregation_functions"
],
"title": "BasicGraderParams"
}
},
"additionalProperties": false,
"required": [
"type",
"faithfulness"
],
"title": "FaithfulnessGrader"
},
"Grader": {
"type": "object",
"properties": {
"identifier": {
"dataset_id": {
"type": "string"
},
"provider_resource_id": {
"type": "string"
},
"provider_id": {
"type": "string"
},
"type": {
"type": "string",
"const": "grader",
"default": "grader"
},
"grader": {
"$ref": "#/components/schemas/GraderDefinition"
},
"description": {
"type": "string"
"scoring_functions": {
"type": "array",
"items": {
"type": "string"
}
},
"metadata": {
"type": "object",
@ -6679,98 +6654,163 @@
"provider_resource_id",
"provider_id",
"type",
"grader",
"dataset_id",
"scoring_functions",
"metadata"
],
"title": "Grader"
"title": "Benchmark"
},
"GraderDefinition": {
"DataSource": {
"oneOf": [
{
"$ref": "#/components/schemas/LlmGrader"
"$ref": "#/components/schemas/URIDataSource"
},
{
"$ref": "#/components/schemas/RegexParserGrader"
},
{
"$ref": "#/components/schemas/EqualityGrader"
},
{
"$ref": "#/components/schemas/SubsetOfGrader"
},
{
"$ref": "#/components/schemas/FactualityGrader"
},
{
"$ref": "#/components/schemas/FaithfulnessGrader"
"$ref": "#/components/schemas/RowsDataSource"
}
],
"discriminator": {
"propertyName": "type",
"mapping": {
"llm": "#/components/schemas/LlmGrader",
"regex_parser": "#/components/schemas/RegexParserGrader",
"equality": "#/components/schemas/EqualityGrader",
"subset_of": "#/components/schemas/SubsetOfGrader",
"factuality": "#/components/schemas/FactualityGrader",
"faithfulness": "#/components/schemas/FaithfulnessGrader"
"uri": "#/components/schemas/URIDataSource",
"rows": "#/components/schemas/RowsDataSource"
}
}
},
"LlmGrader": {
"Grader": {
"type": "object",
"properties": {
"identifier": {
"type": "string"
},
"provider_resource_id": {
"type": "string"
},
"provider_id": {
"type": "string"
},
"type": {
"type": "string",
"const": "grader",
"default": "grader"
},
"purpose": {
"type": "string",
"enum": [
"post-training/messages",
"eval/question-answer",
"eval/messages-answer"
],
"title": "DatasetPurpose",
"description": "Purpose of the dataset. Each purpose has a required input data schema."
},
"source": {
"$ref": "#/components/schemas/DataSource"
},
"metadata": {
"type": "object",
"additionalProperties": {
"oneOf": [
{
"type": "null"
},
{
"type": "boolean"
},
{
"type": "number"
},
{
"type": "string"
},
{
"type": "array"
},
{
"type": "object"
}
]
}
}
},
"additionalProperties": false,
"required": [
"identifier",
"provider_resource_id",
"provider_id",
"type",
"purpose",
"source",
"metadata"
],
"title": "Dataset"
},
"RowsDataSource": {
"type": "object",
"properties": {
"type": {
"type": "string",
"const": "llm",
"default": "llm"
"const": "rows",
"default": "rows"
},
"llm": {
"type": "object",
"properties": {
"model": {
"type": "string"
},
"prompt": {
"type": "string"
},
"score_regexes": {
"type": "array",
"items": {
"type": "string"
}
},
"aggregation_functions": {
"type": "array",
"items": {
"type": "string",
"enum": [
"average",
"median",
"categorical_count",
"accuracy"
],
"title": "AggregationFunctionType",
"description": "A type of aggregation function."
}
"rows": {
"type": "array",
"items": {
"type": "object",
"additionalProperties": {
"oneOf": [
{
"type": "null"
},
{
"type": "boolean"
},
{
"type": "number"
},
{
"type": "string"
},
{
"type": "array"
},
{
"type": "object"
}
]
}
},
"additionalProperties": false,
"required": [
"model",
"prompt",
"score_regexes",
"aggregation_functions"
],
"title": "LlmGraderParams"
"description": "The dataset is stored in rows. E.g. - [ {\"messages\": [{\"role\": \"user\", \"content\": \"Hello, world!\"}, {\"role\": \"assistant\", \"content\": \"Hello, world!\"}]} ]"
}
},
"additionalProperties": false,
"required": [
"type",
"llm"
"rows"
],
"title": "LlmGrader"
"title": "RowsDataSource",
"description": "A dataset stored in rows."
},
"URIDataSource": {
"type": "object",
"properties": {
"type": {
"type": "string",
"const": "uri",
"default": "uri"
},
"uri": {
"type": "string",
"description": "The dataset can be obtained from a URI. E.g. - \"https://mywebsite.com/mydata.jsonl\" - \"lsfs://mydata.jsonl\" - \"data:csv;base64,{base64_content}\""
}
},
"additionalProperties": false,
"required": [
"type",
"uri"
],
"title": "URIDataSource",
"description": "A dataset that can be obtained from a URI."
},
"RegexParserGrader": {
"type": "object",
@ -6819,45 +6859,182 @@
],
"title": "RegexParserGrader"
},
"SubsetOfGrader": {
"ModelType": {
"type": "string",
"enum": [
"llm",
"embedding"
],
"title": "ModelType"
},
"AgentTurnInputType": {
"type": "object",
"properties": {
"type": {
"type": "string",
"const": "subset_of",
"default": "subset_of"
},
"subset_of": {
"type": "object",
"properties": {
"aggregation_functions": {
"type": "array",
"items": {
"type": "string",
"enum": [
"average",
"median",
"categorical_count",
"accuracy"
],
"title": "AggregationFunctionType",
"description": "A type of aggregation function."
}
}
},
"additionalProperties": false,
"required": [
"aggregation_functions"
],
"title": "BasicGraderParams"
"const": "agent_turn_input",
"default": "agent_turn_input"
}
},
"additionalProperties": false,
"required": [
"type",
"subset_of"
"type"
],
"title": "SubsetOfGrader"
"title": "AgentTurnInputType"
},
"ArrayType": {
"type": "object",
"properties": {
"type": {
"type": "string",
"const": "array",
"default": "array"
}
},
"additionalProperties": false,
"required": [
"type"
],
"title": "ArrayType"
},
"BooleanType": {
"type": "object",
"properties": {
"type": {
"type": "string",
"const": "boolean",
"default": "boolean"
}
},
"additionalProperties": false,
"required": [
"type"
],
"title": "BooleanType"
},
"ChatCompletionInputType": {
"type": "object",
"properties": {
"type": {
"type": "string",
"const": "chat_completion_input",
"default": "chat_completion_input"
}
},
"additionalProperties": false,
"required": [
"type"
],
"title": "ChatCompletionInputType"
},
"CompletionInputType": {
"type": "object",
"properties": {
"type": {
"type": "string",
"const": "completion_input",
"default": "completion_input"
}
},
"additionalProperties": false,
"required": [
"type"
],
"title": "CompletionInputType"
},
"JsonType": {
"type": "object",
"properties": {
"type": {
"type": "string",
"const": "json",
"default": "json"
}
},
"additionalProperties": false,
"required": [
"type"
],
"title": "JsonType"
},
"NumberType": {
"type": "object",
"properties": {
"type": {
"type": "string",
"const": "number",
"default": "number"
}
},
"additionalProperties": false,
"required": [
"type"
],
"title": "NumberType"
},
"ObjectType": {
"type": "object",
"properties": {
"type": {
"type": "string",
"const": "object",
"default": "object"
}
},
"additionalProperties": false,
"required": [
"type"
],
"title": "ObjectType"
},
"ParamType": {
"oneOf": [
{
"$ref": "#/components/schemas/StringType"
},
{
"$ref": "#/components/schemas/NumberType"
},
{
"$ref": "#/components/schemas/BooleanType"
},
{
"$ref": "#/components/schemas/ArrayType"
},
{
"$ref": "#/components/schemas/ObjectType"
},
{
"$ref": "#/components/schemas/JsonType"
},
{
"$ref": "#/components/schemas/UnionType"
},
{
"$ref": "#/components/schemas/ChatCompletionInputType"
},
{
"$ref": "#/components/schemas/CompletionInputType"
},
{
"$ref": "#/components/schemas/AgentTurnInputType"
}
],
"discriminator": {
"propertyName": "type",
"mapping": {
"string": "#/components/schemas/StringType",
"number": "#/components/schemas/NumberType",
"boolean": "#/components/schemas/BooleanType",
"array": "#/components/schemas/ArrayType",
"object": "#/components/schemas/ObjectType",
"json": "#/components/schemas/JsonType",
"union": "#/components/schemas/UnionType",
"chat_completion_input": "#/components/schemas/ChatCompletionInputType",
"completion_input": "#/components/schemas/CompletionInputType",
"agent_turn_input": "#/components/schemas/AgentTurnInputType"
}
}
},
"Model": {
"type": "object",
@ -6913,17 +7090,39 @@
"provider_id",
"type",
"metadata",
"model_type"
"return_type"
],
"title": "Model"
"title": "ScoringFn"
},
"ModelType": {
"type": "string",
"enum": [
"llm",
"embedding"
"StringType": {
"type": "object",
"properties": {
"type": {
"type": "string",
"const": "string",
"default": "string"
}
},
"additionalProperties": false,
"required": [
"type"
],
"title": "ModelType"
"title": "StringType"
},
"UnionType": {
"type": "object",
"properties": {
"type": {
"type": "string",
"const": "union",
"default": "union"
}
},
"additionalProperties": false,
"required": [
"type"
],
"title": "UnionType"
},
"Shield": {
"type": "object",
@ -8131,7 +8330,7 @@
},
"description": "The rows in the current page."
},
"next_index": {
"next_start_index": {
"type": "integer",
"description": "Index into dataset for the first row in the next page. None if there are no more rows."
}
@ -9440,7 +9639,7 @@
},
"source": {
"$ref": "#/components/schemas/DataSource",
"description": "The data source of the dataset. Examples: - { \"type\": \"uri\", \"uri\": \"https://mywebsite.com/mydata.jsonl\" } - { \"type\": \"uri\", \"uri\": \"lsfs://mydata.jsonl\" } - { \"type\": \"uri\", \"uri\": \"data:csv;base64,{base64_content}\" } - { \"type\": \"uri\", \"uri\": \"huggingface://llamastack/simpleqa?split=train\" } - { \"type\": \"rows\", \"rows\": [ { \"messages\": [ {\"role\": \"user\", \"content\": \"Hello, world!\"}, {\"role\": \"assistant\", \"content\": \"Hello, world!\"}, ] } ] }"
"description": "The data source of the dataset. Ensure that the data source schema is compatible with the purpose of the dataset. Examples: - { \"type\": \"uri\", \"uri\": \"https://mywebsite.com/mydata.jsonl\" } - { \"type\": \"uri\", \"uri\": \"lsfs://mydata.jsonl\" } - { \"type\": \"uri\", \"uri\": \"data:csv;base64,{base64_content}\" } - { \"type\": \"uri\", \"uri\": \"huggingface://llamastack/simpleqa?split=train\" } - { \"type\": \"rows\", \"rows\": [ { \"messages\": [ {\"role\": \"user\", \"content\": \"Hello, world!\"}, {\"role\": \"assistant\", \"content\": \"Hello, world!\"}, ] } ] }"
},
"metadata": {
"type": "object",
@ -9478,50 +9677,6 @@
"purpose",
"source"
],
"title": "RegisterDatasetRequest"
},
"RegisterGraderRequest": {
"type": "object",
"properties": {
"grader": {
"$ref": "#/components/schemas/GraderDefinition",
"description": "The grader definition, E.g. - { \"type\": \"llm\", \"llm\": { \"model\": \"llama-405b\", \"prompt\": \"You are a judge. Score the answer based on the question. {question} {answer}\", } }"
},
"grader_id": {
"type": "string",
"description": "(Optional) The ID of the grader. If not provided, a random ID will be generated."
},
"metadata": {
"type": "object",
"additionalProperties": {
"oneOf": [
{
"type": "null"
},
{
"type": "boolean"
},
{
"type": "number"
},
{
"type": "string"
},
{
"type": "array"
},
{
"type": "object"
}
]
},
"description": "(Optional) Any additional metadata for this grader. - E.g. { \"description\": \"A grader that scores the answer based on the question.\", }"
}
},
"additionalProperties": false,
"required": [
"grader"
],
"title": "RegisterGraderRequest"
},
"RegisterModelRequest": {
@ -10199,9 +10354,6 @@
{
"name": "Files"
},
{
"name": "Graders"
},
{
"name": "Inference",
"description": "This API provides the raw interface to the underlying models. Two kinds of models are supported:\n- LLM models: these models generate \"raw\" and \"chat\" (conversational) completions.\n- Embedding models: these models generate embeddings to be used for semantic search.",
@ -10254,9 +10406,8 @@
"Benchmarks",
"DatasetIO",
"Datasets",
"Evaluation",
"Eval",
"Files",
"Graders",
"Inference",
"Inspect",
"Models",

View file

@ -1507,6 +1507,50 @@ paths:
$ref: '#/components/schemas/InvokeToolRequest'
required: true
/v1/datasetio/iterrows/{dataset_id}:
get:
responses:
'200':
description: OK
content:
application/json:
schema:
$ref: '#/components/schemas/IterrowsResponse'
'400':
$ref: '#/components/responses/BadRequest400'
'429':
$ref: >-
#/components/responses/TooManyRequests429
'500':
$ref: >-
#/components/responses/InternalServerError500
default:
$ref: '#/components/responses/DefaultError'
tags:
- DatasetIO
description: >-
Get a paginated list of rows from a dataset. Uses cursor-based pagination.
parameters:
- name: dataset_id
in: path
description: >-
The ID of the dataset to get the rows from.
required: true
schema:
type: string
- name: start_index
in: query
description: >-
Index into dataset for the first row to get. Get all rows if None.
required: false
schema:
type: integer
- name: limit
in: query
description: The number of rows to get.
required: false
schema:
type: integer
/v1/eval/benchmarks/{benchmark_id}/jobs/{job_id}:
get:
responses:
'200':
@ -4527,255 +4571,6 @@ components:
title: URIDataSource
description: >-
A dataset that can be obtained from a URI.
EqualityGrader:
type: object
properties:
type:
type: string
const: equality
default: equality
equality:
type: object
properties:
aggregation_functions:
type: array
items:
type: string
enum:
- average
- median
- categorical_count
- accuracy
title: AggregationFunctionType
description: A type of aggregation function.
additionalProperties: false
required:
- aggregation_functions
title: BasicGraderParams
additionalProperties: false
required:
- type
- equality
title: EqualityGrader
FactualityGrader:
type: object
properties:
type:
type: string
const: factuality
default: factuality
factuality:
type: object
properties:
aggregation_functions:
type: array
items:
type: string
enum:
- average
- median
- categorical_count
- accuracy
title: AggregationFunctionType
description: A type of aggregation function.
additionalProperties: false
required:
- aggregation_functions
title: BasicGraderParams
additionalProperties: false
required:
- type
- factuality
title: FactualityGrader
FaithfulnessGrader:
type: object
properties:
type:
type: string
const: faithfulness
default: faithfulness
faithfulness:
type: object
properties:
aggregation_functions:
type: array
items:
type: string
enum:
- average
- median
- categorical_count
- accuracy
title: AggregationFunctionType
description: A type of aggregation function.
additionalProperties: false
required:
- aggregation_functions
title: BasicGraderParams
additionalProperties: false
required:
- type
- faithfulness
title: FaithfulnessGrader
Grader:
type: object
properties:
identifier:
type: string
provider_resource_id:
type: string
provider_id:
type: string
type:
type: string
const: grader
default: grader
grader:
$ref: '#/components/schemas/GraderDefinition'
description:
type: string
metadata:
type: object
additionalProperties:
oneOf:
- type: 'null'
- type: boolean
- type: number
- type: string
- type: array
- type: object
additionalProperties: false
required:
- identifier
- provider_resource_id
- provider_id
- type
- grader
- metadata
title: Grader
GraderDefinition:
oneOf:
- $ref: '#/components/schemas/LlmGrader'
- $ref: '#/components/schemas/RegexParserGrader'
- $ref: '#/components/schemas/EqualityGrader'
- $ref: '#/components/schemas/SubsetOfGrader'
- $ref: '#/components/schemas/FactualityGrader'
- $ref: '#/components/schemas/FaithfulnessGrader'
discriminator:
propertyName: type
mapping:
llm: '#/components/schemas/LlmGrader'
regex_parser: '#/components/schemas/RegexParserGrader'
equality: '#/components/schemas/EqualityGrader'
subset_of: '#/components/schemas/SubsetOfGrader'
factuality: '#/components/schemas/FactualityGrader'
faithfulness: '#/components/schemas/FaithfulnessGrader'
LlmGrader:
type: object
properties:
type:
type: string
const: llm
default: llm
llm:
type: object
properties:
model:
type: string
prompt:
type: string
score_regexes:
type: array
items:
type: string
aggregation_functions:
type: array
items:
type: string
enum:
- average
- median
- categorical_count
- accuracy
title: AggregationFunctionType
description: A type of aggregation function.
additionalProperties: false
required:
- model
- prompt
- score_regexes
- aggregation_functions
title: LlmGraderParams
additionalProperties: false
required:
- type
- llm
title: LlmGrader
RegexParserGrader:
type: object
properties:
type:
type: string
const: regex_parser
default: regex_parser
regex_parser:
type: object
properties:
parsing_regexes:
type: array
items:
type: string
aggregation_functions:
type: array
items:
type: string
enum:
- average
- median
- categorical_count
- accuracy
title: AggregationFunctionType
description: A type of aggregation function.
additionalProperties: false
required:
- parsing_regexes
- aggregation_functions
title: RegexParserGraderParams
additionalProperties: false
required:
- type
- regex_parser
title: RegexParserGrader
SubsetOfGrader:
type: object
properties:
type:
type: string
const: subset_of
default: subset_of
subset_of:
type: object
properties:
aggregation_functions:
type: array
items:
type: string
enum:
- average
- median
- categorical_count
- accuracy
title: AggregationFunctionType
description: A type of aggregation function.
additionalProperties: false
required:
- aggregation_functions
title: BasicGraderParams
additionalProperties: false
required:
- type
- subset_of
title: SubsetOfGrader
Model:
type: object
properties:
@ -4817,6 +4612,224 @@ components:
- llm
- embedding
title: ModelType
AgentTurnInputType:
type: object
properties:
type:
type: string
const: agent_turn_input
default: agent_turn_input
additionalProperties: false
required:
- type
title: AgentTurnInputType
ArrayType:
type: object
properties:
type:
type: string
const: array
default: array
additionalProperties: false
required:
- type
title: ArrayType
BooleanType:
type: object
properties:
type:
type: string
const: boolean
default: boolean
additionalProperties: false
required:
- type
title: BooleanType
ChatCompletionInputType:
type: object
properties:
type:
type: string
const: chat_completion_input
default: chat_completion_input
additionalProperties: false
required:
- type
title: ChatCompletionInputType
CompletionInputType:
type: object
properties:
type:
type: string
const: completion_input
default: completion_input
additionalProperties: false
required:
- type
title: CompletionInputType
JsonType:
type: object
properties:
type:
type: string
const: rows
default: rows
rows:
type: array
items:
type: object
additionalProperties:
oneOf:
- type: 'null'
- type: boolean
- type: number
- type: string
- type: array
- type: object
description: >-
The dataset is stored in rows. E.g. - [ {"messages": [{"role": "user",
"content": "Hello, world!"}, {"role": "assistant", "content": "Hello,
world!"}]} ]
additionalProperties: false
required:
- type
- rows
title: RowsDataSource
description: A dataset stored in rows.
URIDataSource:
type: object
properties:
type:
type: string
const: uri
default: uri
uri:
type: string
description: >-
The dataset can be obtained from a URI. E.g. - "https://mywebsite.com/mydata.jsonl"
- "lsfs://mydata.jsonl" - "data:csv;base64,{base64_content}"
additionalProperties: false
required:
- type
- uri
title: URIDataSource
description: >-
A dataset that can be obtained from a URI.
EqualityGrader:
type: object
properties:
type:
type: string
const: equality
default: equality
equality:
type: object
properties:
aggregation_functions:
type: array
items:
type: string
enum:
- average
- median
- categorical_count
- accuracy
title: AggregationFunctionType
description: A type of aggregation function.
additionalProperties: false
required:
- aggregation_functions
title: BasicGraderParams
additionalProperties: false
required:
- type
title: ObjectType
ParamType:
oneOf:
- $ref: '#/components/schemas/StringType'
- $ref: '#/components/schemas/NumberType'
- $ref: '#/components/schemas/BooleanType'
- $ref: '#/components/schemas/ArrayType'
- $ref: '#/components/schemas/ObjectType'
- $ref: '#/components/schemas/JsonType'
- $ref: '#/components/schemas/UnionType'
- $ref: '#/components/schemas/ChatCompletionInputType'
- $ref: '#/components/schemas/CompletionInputType'
- $ref: '#/components/schemas/AgentTurnInputType'
discriminator:
propertyName: type
mapping:
string: '#/components/schemas/StringType'
number: '#/components/schemas/NumberType'
boolean: '#/components/schemas/BooleanType'
array: '#/components/schemas/ArrayType'
object: '#/components/schemas/ObjectType'
json: '#/components/schemas/JsonType'
union: '#/components/schemas/UnionType'
chat_completion_input: '#/components/schemas/ChatCompletionInputType'
completion_input: '#/components/schemas/CompletionInputType'
agent_turn_input: '#/components/schemas/AgentTurnInputType'
ScoringFn:
type: object
properties:
identifier:
type: string
provider_resource_id:
type: string
provider_id:
type: string
type:
type: string
const: scoring_function
default: scoring_function
description:
type: string
metadata:
type: object
additionalProperties:
oneOf:
- type: 'null'
- type: boolean
- type: number
- type: string
- type: array
- type: object
return_type:
$ref: '#/components/schemas/ParamType'
params:
$ref: '#/components/schemas/ScoringFnParams'
additionalProperties: false
required:
- identifier
- provider_resource_id
- provider_id
- type
- grader
- metadata
- return_type
title: ScoringFn
StringType:
type: object
properties:
type:
type: string
const: string
default: string
additionalProperties: false
required:
- type
title: StringType
UnionType:
type: object
properties:
type:
type: string
const: union
default: union
additionalProperties: false
required:
- type
title: UnionType
Shield:
type: object
properties:
@ -5580,7 +5593,7 @@ components:
- type: array
- type: object
description: The rows in the current page.
next_index:
next_start_index:
type: integer
description: >-
Index into dataset for the first row in the next page. None if there are
@ -6461,12 +6474,14 @@ components:
source:
$ref: '#/components/schemas/DataSource'
description: >-
The data source of the dataset. Examples: - { "type": "uri", "uri": "https://mywebsite.com/mydata.jsonl"
} - { "type": "uri", "uri": "lsfs://mydata.jsonl" } - { "type": "uri",
"uri": "data:csv;base64,{base64_content}" } - { "type": "uri", "uri":
"huggingface://llamastack/simpleqa?split=train" } - { "type": "rows",
"rows": [ { "messages": [ {"role": "user", "content": "Hello, world!"},
{"role": "assistant", "content": "Hello, world!"}, ] } ] }
The data source of the dataset. Ensure that the data source schema is
compatible with the purpose of the dataset. Examples: - { "type": "uri",
"uri": "https://mywebsite.com/mydata.jsonl" } - { "type": "uri", "uri":
"lsfs://mydata.jsonl" } - { "type": "uri", "uri": "data:csv;base64,{base64_content}"
} - { "type": "uri", "uri": "huggingface://llamastack/simpleqa?split=train"
} - { "type": "rows", "rows": [ { "messages": [ {"role": "user", "content":
"Hello, world!"}, {"role": "assistant", "content": "Hello, world!"}, ]
} ] }
metadata:
type: object
additionalProperties:
@ -6488,37 +6503,6 @@ components:
- purpose
- source
title: RegisterDatasetRequest
RegisterGraderRequest:
type: object
properties:
grader:
$ref: '#/components/schemas/GraderDefinition'
description: >-
The grader definition, E.g. - { "type": "llm", "llm": { "model": "llama-405b",
"prompt": "You are a judge. Score the answer based on the question. {question}
{answer}", } }
grader_id:
type: string
description: >-
(Optional) The ID of the grader. If not provided, a random ID will be
generated.
metadata:
type: object
additionalProperties:
oneOf:
- type: 'null'
- type: boolean
- type: number
- type: string
- type: array
- type: object
description: >-
(Optional) Any additional metadata for this grader. - E.g. { "description":
"A grader that scores the answer based on the question.", }
additionalProperties: false
required:
- grader
title: RegisterGraderRequest
RegisterModelRequest:
type: object
properties:
@ -6951,9 +6935,10 @@ tags:
- name: Benchmarks
- name: DatasetIO
- name: Datasets
- name: Evaluation
- name: Eval
x-displayName: >-
Llama Stack Evaluation API for running evaluations on model and agent candidates.
- name: Files
- name: Graders
- name: Inference
description: >-
This API provides the raw interface to the underlying models. Two kinds of models
@ -6988,9 +6973,8 @@ x-tagGroups:
- Benchmarks
- DatasetIO
- Datasets
- Evaluation
- Eval
- Files
- Graders
- Inference
- Inspect
- Models

View file

@ -6,7 +6,7 @@ This guide will walk you through the process of adding a new API provider to Lla
- Begin by reviewing the [core concepts](../concepts/index.md) of Llama Stack and choose the API your provider belongs to (Inference, Safety, VectorIO, etc.)
- Determine the provider type ({repopath}`Remote::llama_stack/providers/remote` or {repopath}`Inline::llama_stack/providers/inline`). Remote providers make requests to external services, while inline providers execute implementation locally.
- Add your provider to the appropriate {repopath}`Registry::llama_stack/providers/registry/`. Specify pip dependencies necessary.
- Update any distribution {repopath}`Templates::llama_stack/templates/` build.yaml and run.yaml files if they should include your provider by default. Run {repopath}`llama_stack/scripts/distro_codegen.py` if necessary. Note that `distro_codegen.py` will fail if the new provider causes any distribution template to attempt to import provider-specific dependencies. This usually means the distribution's `get_distribution_template()` code path should only import any necessary Config or model alias definitions from each provider and not the provider's actual implementation.
- Update any distribution {repopath}`Templates::llama_stack/templates/` build.yaml and run.yaml files if they should include your provider by default. Run {repopath}`./scripts/distro_codegen.py` if necessary. Note that `distro_codegen.py` will fail if the new provider causes any distribution template to attempt to import provider-specific dependencies. This usually means the distribution's `get_distribution_template()` code path should only import any necessary Config or model alias definitions from each provider and not the provider's actual implementation.
Here are some example PRs to help you get started:

View file

@ -6,13 +6,13 @@ The `llamastack/distribution-nvidia` distribution consists of the following prov
| API | Provider(s) |
|-----|-------------|
| agents | `inline::meta-reference` |
| datasetio | `remote::huggingface`, `inline::localfs` |
| datasetio | `inline::localfs` |
| eval | `inline::meta-reference` |
| inference | `remote::nvidia` |
| safety | `inline::llama-guard` |
| scoring | `inline::basic`, `inline::llm-as-judge`, `inline::braintrust` |
| safety | `remote::nvidia` |
| scoring | `inline::basic` |
| telemetry | `inline::meta-reference` |
| tool_runtime | `remote::brave-search`, `remote::tavily-search`, `inline::code-interpreter`, `inline::rag-runtime`, `remote::model-context-protocol` |
| tool_runtime | `inline::rag-runtime` |
| vector_io | `inline::faiss` |
@ -20,8 +20,10 @@ The `llamastack/distribution-nvidia` distribution consists of the following prov
The following environment variables can be configured:
- `LLAMASTACK_PORT`: Port for the Llama Stack distribution server (default: `5001`)
- `NVIDIA_API_KEY`: NVIDIA API Key (default: ``)
- `GUARDRAILS_SERVICE_URL`: URL for the NeMo Guardrails Service (default: `http://0.0.0.0:7331`)
- `INFERENCE_MODEL`: Inference model (default: `Llama3.1-8B-Instruct`)
- `SAFETY_MODEL`: Name of the model to use for safety (default: `meta/llama-3.1-8b-instruct`)
### Models

View file

@ -6,17 +6,32 @@ The `llama-stack-client` CLI allows you to query information about the distribut
### `llama-stack-client`
```bash
llama-stack-client -h
llama-stack-client
Usage: llama-stack-client [OPTIONS] COMMAND [ARGS]...
usage: llama-stack-client [-h] {models,memory_banks,shields} ...
Welcome to the LlamaStackClient CLI
Welcome to the LlamaStackClient CLI
Options:
--version Show the version and exit.
--endpoint TEXT Llama Stack distribution endpoint
--api-key TEXT Llama Stack distribution API key
--config TEXT Path to config file
--help Show this message and exit.
options:
-h, --help show this help message and exit
subcommands:
{models,memory_banks,shields}
Commands:
configure Configure Llama Stack Client CLI.
datasets Manage datasets.
eval Run evaluation tasks.
eval_tasks Manage evaluation tasks.
inference Inference (chat).
inspect Inspect server configuration.
models Manage GenAI models.
post_training Post-training.
providers Manage API providers.
scoring_functions Manage scoring functions.
shields Manage safety shield services.
toolgroups Manage available tool groups.
vector_dbs Manage vector databases.
```
### `llama-stack-client configure`
@ -127,11 +142,11 @@ llama-stack-client vector_dbs list
llama-stack-client vector_dbs register <vector-db-id> [--provider-id <provider-id>] [--provider-vector-db-id <provider-vector-db-id>] [--embedding-model <embedding-model>] [--embedding-dimension <embedding-dimension>]
```
Options:
- `--provider-id`: Optional. Provider ID for the vector db
- `--provider-vector-db-id`: Optional. Provider's vector db ID
- `--embedding-model`: Optional. Embedding model to use. Default: "all-MiniLM-L6-v2"
- `--embedding-dimension`: Optional. Dimension of embeddings. Default: 384
Optional arguments:
- `--provider-id`: Provider ID for the vector db
- `--provider-vector-db-id`: Provider's vector db ID
- `--embedding-model`: Embedding model to use. Default: "all-MiniLM-L6-v2"
- `--embedding-dimension`: Dimension of embeddings. Default: 384
### `llama-stack-client vector_dbs unregister`
```bash
@ -157,11 +172,13 @@ llama-stack-client shields list
llama-stack-client shields register --shield-id <shield-id> [--provider-id <provider-id>] [--provider-shield-id <provider-shield-id>] [--params <params>]
```
Options:
- `--shield-id`: Required. ID of the shield
- `--provider-id`: Optional. Provider ID for the shield
- `--provider-shield-id`: Optional. Provider's shield ID
- `--params`: Optional. JSON configuration parameters for the shield
Required arguments:
- `--shield-id`: ID of the shield
Optional arguments:
- `--provider-id`: Provider ID for the shield
- `--provider-shield-id`: Provider's shield ID
- `--params`: JSON configuration parameters for the shield
## Eval Task Management
@ -175,13 +192,15 @@ llama-stack-client benchmarks list
llama-stack-client benchmarks register --eval-task-id <eval-task-id> --dataset-id <dataset-id> --scoring-functions <function1> [<function2> ...] [--provider-id <provider-id>] [--provider-eval-task-id <provider-eval-task-id>] [--metadata <metadata>]
```
Options:
- `--eval-task-id`: Required. ID of the eval task
- `--dataset-id`: Required. ID of the dataset to evaluate
- `--scoring-functions`: Required. One or more scoring functions to use for evaluation
- `--provider-id`: Optional. Provider ID for the eval task
- `--provider-eval-task-id`: Optional. Provider's eval task ID
- `--metadata`: Optional. Metadata for the eval task in JSON format
Required arguments:
- `--eval-task-id`: ID of the eval task
- `--dataset-id`: ID of the dataset to evaluate
- `--scoring-functions`: One or more scoring functions to use for evaluation
Optional arguments:
- `--provider-id`: Provider ID for the eval task
- `--provider-eval-task-id`: Provider's eval task ID
- `--metadata`: Metadata for the eval task in JSON format
## Eval execution
### `llama-stack-client eval run-benchmark`
@ -189,11 +208,13 @@ Options:
llama-stack-client eval run-benchmark <eval-task-id1> [<eval-task-id2> ...] --eval-task-config <config-file> --output-dir <output-dir> [--num-examples <num>] [--visualize]
```
Options:
- `--eval-task-config`: Required. Path to the eval task config file in JSON format
- `--output-dir`: Required. Path to the directory where evaluation results will be saved
- `--num-examples`: Optional. Number of examples to evaluate (useful for debugging)
- `--visualize`: Optional flag. If set, visualizes evaluation results after completion
Required arguments:
- `--eval-task-config`: Path to the eval task config file in JSON format
- `--output-dir`: Path to the directory where evaluation results will be saved
Optional arguments:
- `--num-examples`: Number of examples to evaluate (useful for debugging)
- `--visualize`: If set, visualizes evaluation results after completion
Example benchmark_config.json:
```json
@ -214,11 +235,13 @@ Example benchmark_config.json:
llama-stack-client eval run-scoring <eval-task-id> --eval-task-config <config-file> --output-dir <output-dir> [--num-examples <num>] [--visualize]
```
Options:
- `--eval-task-config`: Required. Path to the eval task config file in JSON format
- `--output-dir`: Required. Path to the directory where scoring results will be saved
- `--num-examples`: Optional. Number of examples to evaluate (useful for debugging)
- `--visualize`: Optional flag. If set, visualizes scoring results after completion
Required arguments:
- `--eval-task-config`: Path to the eval task config file in JSON format
- `--output-dir`: Path to the directory where scoring results will be saved
Optional arguments:
- `--num-examples`: Number of examples to evaluate (useful for debugging)
- `--visualize`: If set, visualizes scoring results after completion
## Tool Group Management
@ -230,11 +253,11 @@ llama-stack-client toolgroups list
+---------------------------+------------------+------+---------------+
| identifier | provider_id | args | mcp_endpoint |
+===========================+==================+======+===============+
| builtin::code_interpreter | code-interpreter | None | None |
| builtin::code_interpreter | code-interpreter | None | None |
+---------------------------+------------------+------+---------------+
| builtin::rag | rag-runtime | None | None |
| builtin::rag | rag-runtime | None | None |
+---------------------------+------------------+------+---------------+
| builtin::websearch | tavily-search | None | None |
| builtin::websearch | tavily-search | None | None |
+---------------------------+------------------+------+---------------+
```
@ -250,11 +273,11 @@ Shows detailed information about a specific toolgroup. If the toolgroup is not f
llama-stack-client toolgroups register <toolgroup_id> [--provider-id <provider-id>] [--provider-toolgroup-id <provider-toolgroup-id>] [--mcp-config <mcp-config>] [--args <args>]
```
Options:
- `--provider-id`: Optional. Provider ID for the toolgroup
- `--provider-toolgroup-id`: Optional. Provider's toolgroup ID
- `--mcp-config`: Optional. JSON configuration for the MCP endpoint
- `--args`: Optional. JSON arguments for the toolgroup
Optional arguments:
- `--provider-id`: Provider ID for the toolgroup
- `--provider-toolgroup-id`: Provider's toolgroup ID
- `--mcp-config`: JSON configuration for the MCP endpoint
- `--args`: JSON arguments for the toolgroup
### `llama-stack-client toolgroups unregister`
```bash