diff --git a/docs/resources/llama-stack-spec.html b/docs/resources/llama-stack-spec.html
index 363d968f9..b1b1504ee 100644
--- a/docs/resources/llama-stack-spec.html
+++ b/docs/resources/llama-stack-spec.html
@@ -21,7 +21,7 @@
"info": {
"title": "[DRAFT] Llama Stack Specification",
"version": "0.0.1",
- "description": "This is the specification of the llama stack that provides\n a set of endpoints and their corresponding interfaces that are tailored to\n best leverage Llama Models. The specification is still in draft and subject to change.\n Generated at 2024-10-31 14:28:52.128905"
+ "description": "This is the specification of the llama stack that provides\n a set of endpoints and their corresponding interfaces that are tailored to\n best leverage Llama Models. The specification is still in draft and subject to change.\n Generated at 2024-11-07 22:26:27.169134"
},
"servers": [
{
@@ -469,7 +469,7 @@
}
}
},
- "/eval/evaluate": {
+ "/eval/evaluate_rows": {
"post": {
"responses": {
"200": {
@@ -501,47 +501,7 @@
"content": {
"application/json": {
"schema": {
- "$ref": "#/components/schemas/EvaluateRequest"
- }
- }
- },
- "required": true
- }
- }
- },
- "/eval/evaluate_batch": {
- "post": {
- "responses": {
- "200": {
- "description": "OK",
- "content": {
- "application/json": {
- "schema": {
- "$ref": "#/components/schemas/Job"
- }
- }
- }
- }
- },
- "tags": [
- "Eval"
- ],
- "parameters": [
- {
- "name": "X-LlamaStack-ProviderData",
- "in": "header",
- "description": "JSON-encoded provider data which will be made available to the adapter servicing the API",
- "required": false,
- "schema": {
- "type": "string"
- }
- }
- ],
- "requestBody": {
- "content": {
- "application/json": {
- "schema": {
- "$ref": "#/components/schemas/EvaluateBatchRequest"
+ "$ref": "#/components/schemas/EvaluateRowsRequest"
}
}
},
@@ -1002,7 +962,7 @@
],
"parameters": [
{
- "name": "shield_type",
+ "name": "identifier",
"in": "query",
"required": true,
"schema": {
@@ -1317,6 +1277,14 @@
"Eval"
],
"parameters": [
+ {
+ "name": "task_id",
+ "in": "query",
+ "required": true,
+ "schema": {
+ "type": "string"
+ }
+ },
{
"name": "job_id",
"in": "query",
@@ -1362,6 +1330,14 @@
"Eval"
],
"parameters": [
+ {
+ "name": "task_id",
+ "in": "query",
+ "required": true,
+ "schema": {
+ "type": "string"
+ }
+ },
{
"name": "job_id",
"in": "query",
@@ -1892,6 +1868,46 @@
}
}
},
+ "/eval/run_eval": {
+ "post": {
+ "responses": {
+ "200": {
+ "description": "OK",
+ "content": {
+ "application/json": {
+ "schema": {
+ "$ref": "#/components/schemas/Job"
+ }
+ }
+ }
+ }
+ },
+ "tags": [
+ "Eval"
+ ],
+ "parameters": [
+ {
+ "name": "X-LlamaStack-ProviderData",
+ "in": "header",
+ "description": "JSON-encoded provider data which will be made available to the adapter servicing the API",
+ "required": false,
+ "schema": {
+ "type": "string"
+ }
+ }
+ ],
+ "requestBody": {
+ "content": {
+ "application/json": {
+ "schema": {
+ "$ref": "#/components/schemas/RunEvalRequest"
+ }
+ }
+ },
+ "required": true
+ }
+ }
+ },
"/safety/run_shield": {
"post": {
"responses": {
@@ -4490,6 +4506,103 @@
"config"
]
},
+ "AppEvalTaskConfig": {
+ "type": "object",
+ "properties": {
+ "type": {
+ "type": "string",
+ "const": "app",
+ "default": "app"
+ },
+ "eval_candidate": {
+ "oneOf": [
+ {
+ "$ref": "#/components/schemas/ModelCandidate"
+ },
+ {
+ "$ref": "#/components/schemas/AgentCandidate"
+ }
+ ]
+ },
+ "scoring_params": {
+ "type": "object",
+ "additionalProperties": {
+ "oneOf": [
+ {
+ "$ref": "#/components/schemas/LLMAsJudgeScoringFnParams"
+ },
+ {
+ "$ref": "#/components/schemas/RegexParserScoringFnParams"
+ }
+ ]
+ }
+ },
+ "num_examples": {
+ "type": "integer"
+ }
+ },
+ "additionalProperties": false,
+ "required": [
+ "type",
+ "eval_candidate",
+ "scoring_params"
+ ]
+ },
+ "BenchmarkEvalTaskConfig": {
+ "type": "object",
+ "properties": {
+ "type": {
+ "type": "string",
+ "const": "benchmark",
+ "default": "benchmark"
+ },
+ "eval_candidate": {
+ "oneOf": [
+ {
+ "$ref": "#/components/schemas/ModelCandidate"
+ },
+ {
+ "$ref": "#/components/schemas/AgentCandidate"
+ }
+ ]
+ },
+ "num_examples": {
+ "type": "integer"
+ }
+ },
+ "additionalProperties": false,
+ "required": [
+ "type",
+ "eval_candidate"
+ ]
+ },
+ "LLMAsJudgeScoringFnParams": {
+ "type": "object",
+ "properties": {
+ "type": {
+ "type": "string",
+ "const": "llm_as_judge",
+ "default": "llm_as_judge"
+ },
+ "judge_model": {
+ "type": "string"
+ },
+ "prompt_template": {
+ "type": "string"
+ },
+ "judge_score_regexes": {
+ "type": "array",
+ "items": {
+ "type": "string"
+ }
+ }
+ },
+ "additionalProperties": false,
+ "required": [
+ "type",
+ "judge_model"
+ ]
+ },
"ModelCandidate": {
"type": "object",
"properties": {
@@ -4515,9 +4628,32 @@
"sampling_params"
]
},
- "EvaluateRequest": {
+ "RegexParserScoringFnParams": {
"type": "object",
"properties": {
+ "type": {
+ "type": "string",
+ "const": "regex_parser",
+ "default": "regex_parser"
+ },
+ "parsing_regexes": {
+ "type": "array",
+ "items": {
+ "type": "string"
+ }
+ }
+ },
+ "additionalProperties": false,
+ "required": [
+ "type"
+ ]
+ },
+ "EvaluateRowsRequest": {
+ "type": "object",
+ "properties": {
+ "task_id": {
+ "type": "string"
+ },
"input_rows": {
"type": "array",
"items": {
@@ -4546,28 +4682,29 @@
}
}
},
- "candidate": {
- "oneOf": [
- {
- "$ref": "#/components/schemas/ModelCandidate"
- },
- {
- "$ref": "#/components/schemas/AgentCandidate"
- }
- ]
- },
"scoring_functions": {
"type": "array",
"items": {
"type": "string"
}
+ },
+ "task_config": {
+ "oneOf": [
+ {
+ "$ref": "#/components/schemas/BenchmarkEvalTaskConfig"
+ },
+ {
+ "$ref": "#/components/schemas/AppEvalTaskConfig"
+ }
+ ]
}
},
"additionalProperties": false,
"required": [
+ "task_id",
"input_rows",
- "candidate",
- "scoring_functions"
+ "scoring_functions",
+ "task_config"
]
},
"EvaluateResponse": {
@@ -4677,48 +4814,6 @@
"aggregated_results"
]
},
- "EvaluateBatchRequest": {
- "type": "object",
- "properties": {
- "dataset_id": {
- "type": "string"
- },
- "candidate": {
- "oneOf": [
- {
- "$ref": "#/components/schemas/ModelCandidate"
- },
- {
- "$ref": "#/components/schemas/AgentCandidate"
- }
- ]
- },
- "scoring_functions": {
- "type": "array",
- "items": {
- "type": "string"
- }
- }
- },
- "additionalProperties": false,
- "required": [
- "dataset_id",
- "candidate",
- "scoring_functions"
- ]
- },
- "Job": {
- "type": "object",
- "properties": {
- "job_id": {
- "type": "string"
- }
- },
- "additionalProperties": false,
- "required": [
- "job_id"
- ]
- },
"GetAgentsSessionRequest": {
"type": "object",
"properties": {
@@ -5085,6 +5180,11 @@
]
}
},
+ "type": {
+ "type": "string",
+ "const": "dataset",
+ "default": "dataset"
+ },
"provider_id": {
"type": "string"
}
@@ -5095,6 +5195,7 @@
"dataset_schema",
"url",
"metadata",
+ "type",
"provider_id"
]
},
@@ -5132,6 +5233,11 @@
]
}
},
+ "type": {
+ "type": "string",
+ "const": "model",
+ "default": "model"
+ },
"provider_id": {
"type": "string"
}
@@ -5141,6 +5247,7 @@
"identifier",
"llama_model",
"metadata",
+ "type",
"provider_id"
]
},
@@ -5188,166 +5295,6 @@
"total_count"
]
},
- "Parameter": {
- "type": "object",
- "properties": {
- "name": {
- "type": "string"
- },
- "type": {
- "oneOf": [
- {
- "type": "object",
- "properties": {
- "type": {
- "type": "string",
- "const": "string",
- "default": "string"
- }
- },
- "additionalProperties": false,
- "required": [
- "type"
- ]
- },
- {
- "type": "object",
- "properties": {
- "type": {
- "type": "string",
- "const": "number",
- "default": "number"
- }
- },
- "additionalProperties": false,
- "required": [
- "type"
- ]
- },
- {
- "type": "object",
- "properties": {
- "type": {
- "type": "string",
- "const": "boolean",
- "default": "boolean"
- }
- },
- "additionalProperties": false,
- "required": [
- "type"
- ]
- },
- {
- "type": "object",
- "properties": {
- "type": {
- "type": "string",
- "const": "array",
- "default": "array"
- }
- },
- "additionalProperties": false,
- "required": [
- "type"
- ]
- },
- {
- "type": "object",
- "properties": {
- "type": {
- "type": "string",
- "const": "object",
- "default": "object"
- }
- },
- "additionalProperties": false,
- "required": [
- "type"
- ]
- },
- {
- "type": "object",
- "properties": {
- "type": {
- "type": "string",
- "const": "json",
- "default": "json"
- }
- },
- "additionalProperties": false,
- "required": [
- "type"
- ]
- },
- {
- "type": "object",
- "properties": {
- "type": {
- "type": "string",
- "const": "union",
- "default": "union"
- }
- },
- "additionalProperties": false,
- "required": [
- "type"
- ]
- },
- {
- "type": "object",
- "properties": {
- "type": {
- "type": "string",
- "const": "chat_completion_input",
- "default": "chat_completion_input"
- }
- },
- "additionalProperties": false,
- "required": [
- "type"
- ]
- },
- {
- "type": "object",
- "properties": {
- "type": {
- "type": "string",
- "const": "completion_input",
- "default": "completion_input"
- }
- },
- "additionalProperties": false,
- "required": [
- "type"
- ]
- },
- {
- "type": "object",
- "properties": {
- "type": {
- "type": "string",
- "const": "agent_turn_input",
- "default": "agent_turn_input"
- }
- },
- "additionalProperties": false,
- "required": [
- "type"
- ]
- }
- ]
- },
- "description": {
- "type": "string"
- }
- },
- "additionalProperties": false,
- "required": [
- "name",
- "type"
- ]
- },
"ScoringFnDefWithProvider": {
"type": "object",
"properties": {
@@ -5382,12 +5329,6 @@
]
}
},
- "parameters": {
- "type": "array",
- "items": {
- "$ref": "#/components/schemas/Parameter"
- }
- },
"return_type": {
"oneOf": [
{
@@ -5532,27 +5473,21 @@
}
]
},
- "context": {
- "type": "object",
- "properties": {
- "judge_model": {
- "type": "string"
+ "params": {
+ "oneOf": [
+ {
+ "$ref": "#/components/schemas/LLMAsJudgeScoringFnParams"
},
- "prompt_template": {
- "type": "string"
- },
- "judge_score_regex": {
- "type": "array",
- "items": {
- "type": "string"
- }
+ {
+ "$ref": "#/components/schemas/RegexParserScoringFnParams"
}
- },
- "additionalProperties": false,
- "required": [
- "judge_model"
]
},
+ "type": {
+ "type": "string",
+ "const": "scoring_fn",
+ "default": "scoring_fn"
+ },
"provider_id": {
"type": "string"
}
@@ -5561,8 +5496,8 @@
"required": [
"identifier",
"metadata",
- "parameters",
"return_type",
+ "type",
"provider_id"
]
},
@@ -5572,7 +5507,7 @@
"identifier": {
"type": "string"
},
- "type": {
+ "shield_type": {
"type": "string"
},
"params": {
@@ -5600,6 +5535,11 @@
]
}
},
+ "type": {
+ "type": "string",
+ "const": "shield",
+ "default": "shield"
+ },
"provider_id": {
"type": "string"
}
@@ -5607,8 +5547,9 @@
"additionalProperties": false,
"required": [
"identifier",
- "type",
+ "shield_type",
"params",
+ "type",
"provider_id"
]
},
@@ -5867,12 +5808,16 @@
"JobCancelRequest": {
"type": "object",
"properties": {
+ "task_id": {
+ "type": "string"
+ },
"job_id": {
"type": "string"
}
},
"additionalProperties": false,
"required": [
+ "task_id",
"job_id"
]
},
@@ -6575,10 +6520,45 @@
"shield"
]
},
+ "RunEvalRequest": {
+ "type": "object",
+ "properties": {
+ "task_id": {
+ "type": "string"
+ },
+ "task_config": {
+ "oneOf": [
+ {
+ "$ref": "#/components/schemas/BenchmarkEvalTaskConfig"
+ },
+ {
+ "$ref": "#/components/schemas/AppEvalTaskConfig"
+ }
+ ]
+ }
+ },
+ "additionalProperties": false,
+ "required": [
+ "task_id",
+ "task_config"
+ ]
+ },
+ "Job": {
+ "type": "object",
+ "properties": {
+ "job_id": {
+ "type": "string"
+ }
+ },
+ "additionalProperties": false,
+ "required": [
+ "job_id"
+ ]
+ },
"RunShieldRequest": {
"type": "object",
"properties": {
- "shield_type": {
+ "identifier": {
"type": "string"
},
"messages": {
@@ -6628,7 +6608,7 @@
},
"additionalProperties": false,
"required": [
- "shield_type",
+ "identifier",
"messages",
"params"
]
@@ -6674,9 +6654,23 @@
}
},
"scoring_functions": {
- "type": "array",
- "items": {
- "type": "string"
+ "type": "object",
+ "additionalProperties": {
+ "oneOf": [
+ {
+ "oneOf": [
+ {
+ "$ref": "#/components/schemas/LLMAsJudgeScoringFnParams"
+ },
+ {
+ "$ref": "#/components/schemas/RegexParserScoringFnParams"
+ }
+ ]
+ },
+ {
+ "type": "null"
+ }
+ ]
}
}
},
@@ -6708,9 +6702,23 @@
"type": "string"
},
"scoring_functions": {
- "type": "array",
- "items": {
- "type": "string"
+ "type": "object",
+ "additionalProperties": {
+ "oneOf": [
+ {
+ "oneOf": [
+ {
+ "$ref": "#/components/schemas/LLMAsJudgeScoringFnParams"
+ },
+ {
+ "$ref": "#/components/schemas/RegexParserScoringFnParams"
+ }
+ ]
+ },
+ {
+ "type": "null"
+ }
+ ]
}
},
"save_results_dataset": {
@@ -7063,13 +7071,19 @@
],
"tags": [
{
- "name": "Memory"
+ "name": "ScoringFunctions"
},
{
- "name": "Inference"
+ "name": "Agents"
},
{
- "name": "Eval"
+ "name": "Shields"
+ },
+ {
+ "name": "Telemetry"
+ },
+ {
+ "name": "Safety"
},
{
"name": "MemoryBanks"
@@ -7084,16 +7098,10 @@
"name": "PostTraining"
},
{
- "name": "Agents"
+ "name": "Inference"
},
{
- "name": "Shields"
- },
- {
- "name": "Telemetry"
- },
- {
- "name": "Inspect"
+ "name": "Datasets"
},
{
"name": "DatasetIO"
@@ -7102,17 +7110,17 @@
"name": "SyntheticDataGeneration"
},
{
- "name": "Datasets"
+ "name": "Memory"
+ },
+ {
+ "name": "Eval"
+ },
+ {
+ "name": "Inspect"
},
{
"name": "Scoring"
},
- {
- "name": "ScoringFunctions"
- },
- {
- "name": "Safety"
- },
{
"name": "BuiltinTool",
"description": ""
@@ -7377,13 +7385,29 @@
"name": "AgentCandidate",
"description": ""
},
+ {
+ "name": "AppEvalTaskConfig",
+ "description": ""
+ },
+ {
+ "name": "BenchmarkEvalTaskConfig",
+ "description": ""
+ },
+ {
+ "name": "LLMAsJudgeScoringFnParams",
+ "description": ""
+ },
{
"name": "ModelCandidate",
"description": ""
},
{
- "name": "EvaluateRequest",
- "description": ""
+ "name": "RegexParserScoringFnParams",
+ "description": ""
+ },
+ {
+ "name": "EvaluateRowsRequest",
+ "description": ""
},
{
"name": "EvaluateResponse",
@@ -7393,14 +7417,6 @@
"name": "ScoringResult",
"description": ""
},
- {
- "name": "EvaluateBatchRequest",
- "description": ""
- },
- {
- "name": "Job",
- "description": ""
- },
{
"name": "GetAgentsSessionRequest",
"description": ""
@@ -7441,10 +7457,6 @@
"name": "PaginatedRowsResult",
"description": ""
},
- {
- "name": "Parameter",
- "description": ""
- },
{
"name": "ScoringFnDefWithProvider",
"description": ""
@@ -7589,6 +7601,14 @@
"name": "RegisterShieldRequest",
"description": ""
},
+ {
+ "name": "RunEvalRequest",
+ "description": ""
+ },
+ {
+ "name": "Job",
+ "description": ""
+ },
{
"name": "RunShieldRequest",
"description": ""
@@ -7680,11 +7700,13 @@
"AgentTurnResponseStreamChunk",
"AgentTurnResponseTurnCompletePayload",
"AgentTurnResponseTurnStartPayload",
+ "AppEvalTaskConfig",
"Attachment",
"BatchChatCompletionRequest",
"BatchChatCompletionResponse",
"BatchCompletionRequest",
"BatchCompletionResponse",
+ "BenchmarkEvalTaskConfig",
"BuiltinTool",
"CancelTrainingJobRequest",
"ChatCompletionRequest",
@@ -7708,9 +7730,8 @@
"DoraFinetuningConfig",
"EmbeddingsRequest",
"EmbeddingsResponse",
- "EvaluateBatchRequest",
- "EvaluateRequest",
"EvaluateResponse",
+ "EvaluateRowsRequest",
"FinetuningAlgorithm",
"FunctionCallToolDefinition",
"GetAgentsSessionRequest",
@@ -7724,6 +7745,7 @@
"JobStatus",
"KeyValueMemoryBankDef",
"KeywordMemoryBankDef",
+ "LLMAsJudgeScoringFnParams",
"LogEventRequest",
"LogSeverity",
"LoraFinetuningConfig",
@@ -7735,7 +7757,6 @@
"ModelDefWithProvider",
"OptimizerConfig",
"PaginatedRowsResult",
- "Parameter",
"PhotogenToolDefinition",
"PostTrainingJob",
"PostTrainingJobArtifactsResponse",
@@ -7748,6 +7769,7 @@
"QueryDocumentsRequest",
"QueryDocumentsResponse",
"RLHFAlgorithm",
+ "RegexParserScoringFnParams",
"RegisterDatasetRequest",
"RegisterMemoryBankRequest",
"RegisterModelRequest",
@@ -7756,6 +7778,7 @@
"RestAPIExecutionConfig",
"RestAPIMethod",
"RouteInfo",
+ "RunEvalRequest",
"RunShieldRequest",
"RunShieldResponse",
"SafetyViolation",
diff --git a/docs/resources/llama-stack-spec.yaml b/docs/resources/llama-stack-spec.yaml
index 7dd231965..f839e7bc0 100644
--- a/docs/resources/llama-stack-spec.yaml
+++ b/docs/resources/llama-stack-spec.yaml
@@ -218,6 +218,30 @@ components:
- event_type
- turn_id
type: object
+ AppEvalTaskConfig:
+ additionalProperties: false
+ properties:
+ eval_candidate:
+ oneOf:
+ - $ref: '#/components/schemas/ModelCandidate'
+ - $ref: '#/components/schemas/AgentCandidate'
+ num_examples:
+ type: integer
+ scoring_params:
+ additionalProperties:
+ oneOf:
+ - $ref: '#/components/schemas/LLMAsJudgeScoringFnParams'
+ - $ref: '#/components/schemas/RegexParserScoringFnParams'
+ type: object
+ type:
+ const: app
+ default: app
+ type: string
+ required:
+ - type
+ - eval_candidate
+ - scoring_params
+ type: object
Attachment:
additionalProperties: false
properties:
@@ -322,6 +346,23 @@ components:
required:
- completion_message_batch
type: object
+ BenchmarkEvalTaskConfig:
+ additionalProperties: false
+ properties:
+ eval_candidate:
+ oneOf:
+ - $ref: '#/components/schemas/ModelCandidate'
+ - $ref: '#/components/schemas/AgentCandidate'
+ num_examples:
+ type: integer
+ type:
+ const: benchmark
+ default: benchmark
+ type: string
+ required:
+ - type
+ - eval_candidate
+ type: object
BuiltinTool:
enum:
- brave_search
@@ -790,6 +831,10 @@ components:
type: object
provider_id:
type: string
+ type:
+ const: dataset
+ default: dataset
+ type: string
url:
$ref: '#/components/schemas/URL'
required:
@@ -797,6 +842,7 @@ components:
- dataset_schema
- url
- metadata
+ - type
- provider_id
type: object
DeleteAgentsRequest:
@@ -872,52 +918,6 @@ components:
required:
- embeddings
type: object
- EvaluateBatchRequest:
- additionalProperties: false
- properties:
- candidate:
- oneOf:
- - $ref: '#/components/schemas/ModelCandidate'
- - $ref: '#/components/schemas/AgentCandidate'
- dataset_id:
- type: string
- scoring_functions:
- items:
- type: string
- type: array
- required:
- - dataset_id
- - candidate
- - scoring_functions
- type: object
- EvaluateRequest:
- additionalProperties: false
- properties:
- candidate:
- oneOf:
- - $ref: '#/components/schemas/ModelCandidate'
- - $ref: '#/components/schemas/AgentCandidate'
- input_rows:
- items:
- additionalProperties:
- oneOf:
- - type: 'null'
- - type: boolean
- - type: number
- - type: string
- - type: array
- - type: object
- type: object
- type: array
- scoring_functions:
- items:
- type: string
- type: array
- required:
- - input_rows
- - candidate
- - scoring_functions
- type: object
EvaluateResponse:
additionalProperties: false
properties:
@@ -941,6 +941,37 @@ components:
- generations
- scores
type: object
+ EvaluateRowsRequest:
+ additionalProperties: false
+ properties:
+ input_rows:
+ items:
+ additionalProperties:
+ oneOf:
+ - type: 'null'
+ - type: boolean
+ - type: number
+ - type: string
+ - type: array
+ - type: object
+ type: object
+ type: array
+ scoring_functions:
+ items:
+ type: string
+ type: array
+ task_config:
+ oneOf:
+ - $ref: '#/components/schemas/BenchmarkEvalTaskConfig'
+ - $ref: '#/components/schemas/AppEvalTaskConfig'
+ task_id:
+ type: string
+ required:
+ - task_id
+ - input_rows
+ - scoring_functions
+ - task_config
+ type: object
FinetuningAlgorithm:
enum:
- full
@@ -1082,7 +1113,10 @@ components:
properties:
job_id:
type: string
+ task_id:
+ type: string
required:
+ - task_id
- job_id
type: object
JobStatus:
@@ -1124,6 +1158,25 @@ components:
- provider_id
- type
type: object
+ LLMAsJudgeScoringFnParams:
+ additionalProperties: false
+ properties:
+ judge_model:
+ type: string
+ judge_score_regexes:
+ items:
+ type: string
+ type: array
+ prompt_template:
+ type: string
+ type:
+ const: llm_as_judge
+ default: llm_as_judge
+ type: string
+ required:
+ - type
+ - judge_model
+ type: object
LogEventRequest:
additionalProperties: false
properties:
@@ -1442,10 +1495,15 @@ components:
type: object
provider_id:
type: string
+ type:
+ const: model
+ default: model
+ type: string
required:
- identifier
- llama_model
- metadata
+ - type
- provider_id
type: object
OptimizerConfig:
@@ -1492,109 +1550,6 @@ components:
- rows
- total_count
type: object
- Parameter:
- additionalProperties: false
- properties:
- description:
- type: string
- name:
- type: string
- type:
- oneOf:
- - additionalProperties: false
- properties:
- type:
- const: string
- default: string
- type: string
- required:
- - type
- type: object
- - additionalProperties: false
- properties:
- type:
- const: number
- default: number
- type: string
- required:
- - type
- type: object
- - additionalProperties: false
- properties:
- type:
- const: boolean
- default: boolean
- type: string
- required:
- - type
- type: object
- - additionalProperties: false
- properties:
- type:
- const: array
- default: array
- type: string
- required:
- - type
- type: object
- - additionalProperties: false
- properties:
- type:
- const: object
- default: object
- type: string
- required:
- - type
- type: object
- - additionalProperties: false
- properties:
- type:
- const: json
- default: json
- type: string
- required:
- - type
- type: object
- - additionalProperties: false
- properties:
- type:
- const: union
- default: union
- type: string
- required:
- - type
- type: object
- - additionalProperties: false
- properties:
- type:
- const: chat_completion_input
- default: chat_completion_input
- type: string
- required:
- - type
- type: object
- - additionalProperties: false
- properties:
- type:
- const: completion_input
- default: completion_input
- type: string
- required:
- - type
- type: object
- - additionalProperties: false
- properties:
- type:
- const: agent_turn_input
- default: agent_turn_input
- type: string
- required:
- - type
- type: object
- required:
- - name
- - type
- type: object
PhotogenToolDefinition:
additionalProperties: false
properties:
@@ -1844,6 +1799,20 @@ components:
enum:
- dpo
type: string
+ RegexParserScoringFnParams:
+ additionalProperties: false
+ properties:
+ parsing_regexes:
+ items:
+ type: string
+ type: array
+ type:
+ const: regex_parser
+ default: regex_parser
+ type: string
+ required:
+ - type
+ type: object
RegisterDatasetRequest:
additionalProperties: false
properties:
@@ -1952,9 +1921,24 @@ components:
- method
- provider_types
type: object
+ RunEvalRequest:
+ additionalProperties: false
+ properties:
+ task_config:
+ oneOf:
+ - $ref: '#/components/schemas/BenchmarkEvalTaskConfig'
+ - $ref: '#/components/schemas/AppEvalTaskConfig'
+ task_id:
+ type: string
+ required:
+ - task_id
+ - task_config
+ type: object
RunShieldRequest:
additionalProperties: false
properties:
+ identifier:
+ type: string
messages:
items:
oneOf:
@@ -1973,10 +1957,8 @@ components:
- type: array
- type: object
type: object
- shield_type:
- type: string
required:
- - shield_type
+ - identifier
- messages
- params
type: object
@@ -2045,9 +2027,13 @@ components:
save_results_dataset:
type: boolean
scoring_functions:
- items:
- type: string
- type: array
+ additionalProperties:
+ oneOf:
+ - oneOf:
+ - $ref: '#/components/schemas/LLMAsJudgeScoringFnParams'
+ - $ref: '#/components/schemas/RegexParserScoringFnParams'
+ - type: 'null'
+ type: object
required:
- dataset_id
- scoring_functions
@@ -2081,9 +2067,13 @@ components:
type: object
type: array
scoring_functions:
- items:
- type: string
- type: array
+ additionalProperties:
+ oneOf:
+ - oneOf:
+ - $ref: '#/components/schemas/LLMAsJudgeScoringFnParams'
+ - $ref: '#/components/schemas/RegexParserScoringFnParams'
+ - type: 'null'
+ type: object
required:
- input_rows
- scoring_functions
@@ -2101,20 +2091,6 @@ components:
ScoringFnDefWithProvider:
additionalProperties: false
properties:
- context:
- additionalProperties: false
- properties:
- judge_model:
- type: string
- judge_score_regex:
- items:
- type: string
- type: array
- prompt_template:
- type: string
- required:
- - judge_model
- type: object
description:
type: string
identifier:
@@ -2129,10 +2105,10 @@ components:
- type: array
- type: object
type: object
- parameters:
- items:
- $ref: '#/components/schemas/Parameter'
- type: array
+ params:
+ oneOf:
+ - $ref: '#/components/schemas/LLMAsJudgeScoringFnParams'
+ - $ref: '#/components/schemas/RegexParserScoringFnParams'
provider_id:
type: string
return_type:
@@ -2227,11 +2203,15 @@ components:
required:
- type
type: object
+ type:
+ const: scoring_fn
+ default: scoring_fn
+ type: string
required:
- identifier
- metadata
- - parameters
- return_type
+ - type
- provider_id
type: object
ScoringResult:
@@ -2361,12 +2341,17 @@ components:
type: object
provider_id:
type: string
+ shield_type:
+ type: string
type:
+ const: shield
+ default: shield
type: string
required:
- identifier
- - type
+ - shield_type
- params
+ - type
- provider_id
type: object
SpanEndPayload:
@@ -2998,7 +2983,7 @@ info:
description: "This is the specification of the llama stack that provides\n \
\ a set of endpoints and their corresponding interfaces that are tailored\
\ to\n best leverage Llama Models. The specification is still in\
- \ draft and subject to change.\n Generated at 2024-10-31 14:28:52.128905"
+ \ draft and subject to change.\n Generated at 2024-11-07 22:26:27.169134"
title: '[DRAFT] Llama Stack Specification'
version: 0.0.1
jsonSchemaDialect: https://json-schema.org/draft/2020-12/schema
@@ -3387,7 +3372,7 @@ paths:
description: OK
tags:
- Datasets
- /eval/evaluate:
+ /eval/evaluate_rows:
post:
parameters:
- description: JSON-encoded provider data which will be made available to the
@@ -3401,7 +3386,7 @@ paths:
content:
application/json:
schema:
- $ref: '#/components/schemas/EvaluateRequest'
+ $ref: '#/components/schemas/EvaluateRowsRequest'
required: true
responses:
'200':
@@ -3412,31 +3397,6 @@ paths:
description: OK
tags:
- Eval
- /eval/evaluate_batch:
- post:
- parameters:
- - description: JSON-encoded provider data which will be made available to the
- adapter servicing the API
- in: header
- name: X-LlamaStack-ProviderData
- required: false
- schema:
- type: string
- requestBody:
- content:
- application/json:
- schema:
- $ref: '#/components/schemas/EvaluateBatchRequest'
- required: true
- responses:
- '200':
- content:
- application/json:
- schema:
- $ref: '#/components/schemas/Job'
- description: OK
- tags:
- - Eval
/eval/job/cancel:
post:
parameters:
@@ -3461,6 +3421,11 @@ paths:
/eval/job/result:
get:
parameters:
+ - in: query
+ name: task_id
+ required: true
+ schema:
+ type: string
- in: query
name: job_id
required: true
@@ -3485,6 +3450,11 @@ paths:
/eval/job/status:
get:
parameters:
+ - in: query
+ name: task_id
+ required: true
+ schema:
+ type: string
- in: query
name: job_id
required: true
@@ -3508,6 +3478,31 @@ paths:
description: OK
tags:
- Eval
+ /eval/run_eval:
+ post:
+ parameters:
+ - description: JSON-encoded provider data which will be made available to the
+ adapter servicing the API
+ in: header
+ name: X-LlamaStack-ProviderData
+ required: false
+ schema:
+ type: string
+ requestBody:
+ content:
+ application/json:
+ schema:
+ $ref: '#/components/schemas/RunEvalRequest'
+ required: true
+ responses:
+ '200':
+ content:
+ application/json:
+ schema:
+ $ref: '#/components/schemas/Job'
+ description: OK
+ tags:
+ - Eval
/health:
get:
parameters:
@@ -4143,7 +4138,7 @@ paths:
get:
parameters:
- in: query
- name: shield_type
+ name: identifier
required: true
schema:
type: string
@@ -4280,23 +4275,23 @@ security:
servers:
- url: http://any-hosted-llama-stack.com
tags:
-- name: Memory
-- name: Inference
-- name: Eval
+- name: ScoringFunctions
+- name: Agents
+- name: Shields
+- name: Telemetry
+- name: Safety
- name: MemoryBanks
- name: Models
- name: BatchInference
- name: PostTraining
-- name: Agents
-- name: Shields
-- name: Telemetry
-- name: Inspect
+- name: Inference
+- name: Datasets
- name: DatasetIO
- name: SyntheticDataGeneration
-- name: Datasets
+- name: Memory
+- name: Eval
+- name: Inspect
- name: Scoring
-- name: ScoringFunctions
-- name: Safety
- description:
name: BuiltinTool
- description:
name: AgentCandidate
+- description:
+ name: AppEvalTaskConfig
+- description:
+ name: BenchmarkEvalTaskConfig
+- description:
+ name: LLMAsJudgeScoringFnParams
- description:
name: ModelCandidate
-- description:
- name: EvaluateRequest
+ name: RegexParserScoringFnParams
+- description:
+ name: EvaluateRowsRequest
- description:
name: EvaluateResponse
- description:
name: ScoringResult
-- description:
- name: EvaluateBatchRequest
-- description:
- name: Job
- description:
name: GetAgentsSessionRequest
@@ -4550,8 +4552,6 @@ tags:
- description:
name: PaginatedRowsResult
-- description:
- name: Parameter
- description:
name: ScoringFnDefWithProvider
@@ -4659,6 +4659,10 @@ tags:
- description:
name: RegisterShieldRequest
+- description:
+ name: RunEvalRequest
+- description:
+ name: Job
- description:
name: RunShieldRequest
@@ -4734,11 +4738,13 @@ x-tagGroups:
- AgentTurnResponseStreamChunk
- AgentTurnResponseTurnCompletePayload
- AgentTurnResponseTurnStartPayload
+ - AppEvalTaskConfig
- Attachment
- BatchChatCompletionRequest
- BatchChatCompletionResponse
- BatchCompletionRequest
- BatchCompletionResponse
+ - BenchmarkEvalTaskConfig
- BuiltinTool
- CancelTrainingJobRequest
- ChatCompletionRequest
@@ -4762,9 +4768,8 @@ x-tagGroups:
- DoraFinetuningConfig
- EmbeddingsRequest
- EmbeddingsResponse
- - EvaluateBatchRequest
- - EvaluateRequest
- EvaluateResponse
+ - EvaluateRowsRequest
- FinetuningAlgorithm
- FunctionCallToolDefinition
- GetAgentsSessionRequest
@@ -4778,6 +4783,7 @@ x-tagGroups:
- JobStatus
- KeyValueMemoryBankDef
- KeywordMemoryBankDef
+ - LLMAsJudgeScoringFnParams
- LogEventRequest
- LogSeverity
- LoraFinetuningConfig
@@ -4789,7 +4795,6 @@ x-tagGroups:
- ModelDefWithProvider
- OptimizerConfig
- PaginatedRowsResult
- - Parameter
- PhotogenToolDefinition
- PostTrainingJob
- PostTrainingJobArtifactsResponse
@@ -4802,6 +4807,7 @@ x-tagGroups:
- QueryDocumentsRequest
- QueryDocumentsResponse
- RLHFAlgorithm
+ - RegexParserScoringFnParams
- RegisterDatasetRequest
- RegisterMemoryBankRequest
- RegisterModelRequest
@@ -4810,6 +4816,7 @@ x-tagGroups:
- RestAPIExecutionConfig
- RestAPIMethod
- RouteInfo
+ - RunEvalRequest
- RunShieldRequest
- RunShieldResponse
- SafetyViolation