forked from phoenix-oss/llama-stack-mirror
precommit
This commit is contained in:
parent
45f6d5cd08
commit
3f8c7a584a
8 changed files with 31 additions and 1037 deletions
|
@ -6,12 +6,10 @@
|
|||
"chardet",
|
||||
"chromadb-client",
|
||||
"datasets",
|
||||
"emoji",
|
||||
"faiss-cpu",
|
||||
"fastapi",
|
||||
"fire",
|
||||
"httpx",
|
||||
"langdetect",
|
||||
"matplotlib",
|
||||
"mcp",
|
||||
"nltk",
|
||||
|
@ -23,7 +21,6 @@
|
|||
"psycopg2-binary",
|
||||
"pymongo",
|
||||
"pypdf",
|
||||
"pythainlp",
|
||||
"redis",
|
||||
"requests",
|
||||
"scikit-learn",
|
||||
|
@ -40,12 +37,10 @@
|
|||
"chardet",
|
||||
"chromadb-client",
|
||||
"datasets",
|
||||
"emoji",
|
||||
"faiss-cpu",
|
||||
"fastapi",
|
||||
"fire",
|
||||
"httpx",
|
||||
"langdetect",
|
||||
"matplotlib",
|
||||
"nltk",
|
||||
"numpy",
|
||||
|
@ -56,7 +51,6 @@
|
|||
"psycopg2-binary",
|
||||
"pymongo",
|
||||
"pypdf",
|
||||
"pythainlp",
|
||||
"redis",
|
||||
"requests",
|
||||
"scikit-learn",
|
||||
|
@ -74,12 +68,10 @@
|
|||
"chardet",
|
||||
"chromadb-client",
|
||||
"datasets",
|
||||
"emoji",
|
||||
"fastapi",
|
||||
"fire",
|
||||
"fireworks-ai",
|
||||
"httpx",
|
||||
"langdetect",
|
||||
"matplotlib",
|
||||
"mcp",
|
||||
"nltk",
|
||||
|
@ -91,7 +83,6 @@
|
|||
"psycopg2-binary",
|
||||
"pymongo",
|
||||
"pypdf",
|
||||
"pythainlp",
|
||||
"redis",
|
||||
"requests",
|
||||
"scikit-learn",
|
||||
|
@ -111,13 +102,11 @@
|
|||
"chardet",
|
||||
"chromadb-client",
|
||||
"datasets",
|
||||
"emoji",
|
||||
"faiss-cpu",
|
||||
"fastapi",
|
||||
"fire",
|
||||
"httpx",
|
||||
"huggingface_hub",
|
||||
"langdetect",
|
||||
"matplotlib",
|
||||
"nltk",
|
||||
"numpy",
|
||||
|
@ -128,7 +117,6 @@
|
|||
"psycopg2-binary",
|
||||
"pymongo",
|
||||
"pypdf",
|
||||
"pythainlp",
|
||||
"redis",
|
||||
"requests",
|
||||
"scikit-learn",
|
||||
|
@ -146,12 +134,10 @@
|
|||
"chardet",
|
||||
"chromadb-client",
|
||||
"datasets",
|
||||
"emoji",
|
||||
"fastapi",
|
||||
"fire",
|
||||
"fireworks-ai",
|
||||
"httpx",
|
||||
"langdetect",
|
||||
"litellm",
|
||||
"matplotlib",
|
||||
"mcp",
|
||||
|
@ -164,7 +150,6 @@
|
|||
"psycopg2-binary",
|
||||
"pymongo",
|
||||
"pypdf",
|
||||
"pythainlp",
|
||||
"redis",
|
||||
"requests",
|
||||
"scikit-learn",
|
||||
|
@ -183,13 +168,11 @@
|
|||
"chardet",
|
||||
"chromadb-client",
|
||||
"datasets",
|
||||
"emoji",
|
||||
"faiss-cpu",
|
||||
"fastapi",
|
||||
"fire",
|
||||
"fireworks-ai",
|
||||
"httpx",
|
||||
"langdetect",
|
||||
"matplotlib",
|
||||
"mcp",
|
||||
"nltk",
|
||||
|
@ -201,7 +184,6 @@
|
|||
"psycopg2-binary",
|
||||
"pymongo",
|
||||
"pypdf",
|
||||
"pythainlp",
|
||||
"redis",
|
||||
"requests",
|
||||
"scikit-learn",
|
||||
|
@ -218,12 +200,10 @@
|
|||
"blobfile",
|
||||
"chardet",
|
||||
"datasets",
|
||||
"emoji",
|
||||
"faiss-cpu",
|
||||
"fastapi",
|
||||
"fire",
|
||||
"httpx",
|
||||
"langdetect",
|
||||
"litellm",
|
||||
"matplotlib",
|
||||
"nltk",
|
||||
|
@ -235,7 +215,6 @@
|
|||
"psycopg2-binary",
|
||||
"pymongo",
|
||||
"pypdf",
|
||||
"pythainlp",
|
||||
"redis",
|
||||
"requests",
|
||||
"scikit-learn",
|
||||
|
@ -252,13 +231,11 @@
|
|||
"chardet",
|
||||
"chromadb-client",
|
||||
"datasets",
|
||||
"emoji",
|
||||
"faiss-cpu",
|
||||
"fastapi",
|
||||
"fire",
|
||||
"httpx",
|
||||
"huggingface_hub",
|
||||
"langdetect",
|
||||
"matplotlib",
|
||||
"mcp",
|
||||
"nltk",
|
||||
|
@ -270,7 +247,6 @@
|
|||
"psycopg2-binary",
|
||||
"pymongo",
|
||||
"pypdf",
|
||||
"pythainlp",
|
||||
"redis",
|
||||
"requests",
|
||||
"scikit-learn",
|
||||
|
@ -287,13 +263,11 @@
|
|||
"chardet",
|
||||
"chromadb-client",
|
||||
"datasets",
|
||||
"emoji",
|
||||
"faiss-cpu",
|
||||
"fastapi",
|
||||
"fire",
|
||||
"httpx",
|
||||
"huggingface_hub",
|
||||
"langdetect",
|
||||
"matplotlib",
|
||||
"mcp",
|
||||
"nltk",
|
||||
|
@ -305,7 +279,6 @@
|
|||
"psycopg2-binary",
|
||||
"pymongo",
|
||||
"pypdf",
|
||||
"pythainlp",
|
||||
"redis",
|
||||
"requests",
|
||||
"scikit-learn",
|
||||
|
@ -324,13 +297,11 @@
|
|||
"chardet",
|
||||
"chromadb-client",
|
||||
"datasets",
|
||||
"emoji",
|
||||
"fairscale",
|
||||
"faiss-cpu",
|
||||
"fastapi",
|
||||
"fire",
|
||||
"httpx",
|
||||
"langdetect",
|
||||
"lm-format-enforcer",
|
||||
"matplotlib",
|
||||
"mcp",
|
||||
|
@ -343,7 +314,6 @@
|
|||
"psycopg2-binary",
|
||||
"pymongo",
|
||||
"pypdf",
|
||||
"pythainlp",
|
||||
"redis",
|
||||
"requests",
|
||||
"scikit-learn",
|
||||
|
@ -364,14 +334,12 @@
|
|||
"chardet",
|
||||
"chromadb-client",
|
||||
"datasets",
|
||||
"emoji",
|
||||
"fairscale",
|
||||
"faiss-cpu",
|
||||
"fastapi",
|
||||
"fbgemm-gpu",
|
||||
"fire",
|
||||
"httpx",
|
||||
"langdetect",
|
||||
"lm-format-enforcer",
|
||||
"matplotlib",
|
||||
"mcp",
|
||||
|
@ -384,7 +352,6 @@
|
|||
"psycopg2-binary",
|
||||
"pymongo",
|
||||
"pypdf",
|
||||
"pythainlp",
|
||||
"redis",
|
||||
"requests",
|
||||
"scikit-learn",
|
||||
|
@ -403,12 +370,10 @@
|
|||
"aiosqlite",
|
||||
"blobfile",
|
||||
"chardet",
|
||||
"emoji",
|
||||
"faiss-cpu",
|
||||
"fastapi",
|
||||
"fire",
|
||||
"httpx",
|
||||
"langdetect",
|
||||
"matplotlib",
|
||||
"nltk",
|
||||
"numpy",
|
||||
|
@ -420,7 +385,6 @@
|
|||
"psycopg2-binary",
|
||||
"pymongo",
|
||||
"pypdf",
|
||||
"pythainlp",
|
||||
"redis",
|
||||
"requests",
|
||||
"scikit-learn",
|
||||
|
@ -437,12 +401,10 @@
|
|||
"chardet",
|
||||
"chromadb-client",
|
||||
"datasets",
|
||||
"emoji",
|
||||
"faiss-cpu",
|
||||
"fastapi",
|
||||
"fire",
|
||||
"httpx",
|
||||
"langdetect",
|
||||
"matplotlib",
|
||||
"mcp",
|
||||
"nltk",
|
||||
|
@ -455,7 +417,6 @@
|
|||
"psycopg2-binary",
|
||||
"pymongo",
|
||||
"pypdf",
|
||||
"pythainlp",
|
||||
"redis",
|
||||
"requests",
|
||||
"scikit-learn",
|
||||
|
@ -471,11 +432,9 @@
|
|||
"chardet",
|
||||
"chromadb-client",
|
||||
"datasets",
|
||||
"emoji",
|
||||
"fastapi",
|
||||
"fire",
|
||||
"httpx",
|
||||
"langdetect",
|
||||
"litellm",
|
||||
"matplotlib",
|
||||
"mcp",
|
||||
|
@ -488,7 +447,6 @@
|
|||
"psycopg2-binary",
|
||||
"pymongo",
|
||||
"pypdf",
|
||||
"pythainlp",
|
||||
"redis",
|
||||
"requests",
|
||||
"scikit-learn",
|
||||
|
@ -506,12 +464,10 @@
|
|||
"chardet",
|
||||
"chromadb-client",
|
||||
"datasets",
|
||||
"emoji",
|
||||
"faiss-cpu",
|
||||
"fastapi",
|
||||
"fire",
|
||||
"httpx",
|
||||
"langdetect",
|
||||
"matplotlib",
|
||||
"mcp",
|
||||
"nltk",
|
||||
|
@ -523,7 +479,6 @@
|
|||
"psycopg2-binary",
|
||||
"pymongo",
|
||||
"pypdf",
|
||||
"pythainlp",
|
||||
"redis",
|
||||
"requests",
|
||||
"scikit-learn",
|
||||
|
@ -541,12 +496,10 @@
|
|||
"chardet",
|
||||
"chromadb-client",
|
||||
"datasets",
|
||||
"emoji",
|
||||
"faiss-cpu",
|
||||
"fastapi",
|
||||
"fire",
|
||||
"httpx",
|
||||
"langdetect",
|
||||
"matplotlib",
|
||||
"mcp",
|
||||
"nltk",
|
||||
|
@ -559,7 +512,6 @@
|
|||
"psycopg2-binary",
|
||||
"pymongo",
|
||||
"pypdf",
|
||||
"pythainlp",
|
||||
"redis",
|
||||
"requests",
|
||||
"scikit-learn",
|
||||
|
@ -607,13 +559,11 @@
|
|||
"chardet",
|
||||
"chromadb-client",
|
||||
"datasets",
|
||||
"emoji",
|
||||
"faiss-cpu",
|
||||
"fastapi",
|
||||
"fire",
|
||||
"httpx",
|
||||
"huggingface_hub",
|
||||
"langdetect",
|
||||
"matplotlib",
|
||||
"mcp",
|
||||
"nltk",
|
||||
|
@ -625,7 +575,6 @@
|
|||
"psycopg2-binary",
|
||||
"pymongo",
|
||||
"pypdf",
|
||||
"pythainlp",
|
||||
"redis",
|
||||
"requests",
|
||||
"scikit-learn",
|
||||
|
@ -643,12 +592,10 @@
|
|||
"chardet",
|
||||
"chromadb-client",
|
||||
"datasets",
|
||||
"emoji",
|
||||
"faiss-cpu",
|
||||
"fastapi",
|
||||
"fire",
|
||||
"httpx",
|
||||
"langdetect",
|
||||
"matplotlib",
|
||||
"mcp",
|
||||
"nltk",
|
||||
|
@ -660,7 +607,6 @@
|
|||
"psycopg2-binary",
|
||||
"pymongo",
|
||||
"pypdf",
|
||||
"pythainlp",
|
||||
"redis",
|
||||
"requests",
|
||||
"scikit-learn",
|
||||
|
@ -679,12 +625,10 @@
|
|||
"chardet",
|
||||
"chromadb-client",
|
||||
"datasets",
|
||||
"emoji",
|
||||
"faiss-cpu",
|
||||
"fastapi",
|
||||
"fire",
|
||||
"httpx",
|
||||
"langdetect",
|
||||
"matplotlib",
|
||||
"mcp",
|
||||
"nltk",
|
||||
|
@ -696,7 +640,6 @@
|
|||
"psycopg2-binary",
|
||||
"pymongo",
|
||||
"pypdf",
|
||||
"pythainlp",
|
||||
"redis",
|
||||
"requests",
|
||||
"scikit-learn",
|
||||
|
|
542
docs/_static/llama-stack-spec.html
vendored
542
docs/_static/llama-stack-spec.html
vendored
|
@ -2285,7 +2285,7 @@
|
|||
"content": {
|
||||
"application/json": {
|
||||
"schema": {
|
||||
"$ref": "#/components/schemas/Job"
|
||||
"$ref": "#/components/schemas/ListAgentSessionsResponse"
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -6192,382 +6192,6 @@
|
|||
"title": "EmbeddingsResponse",
|
||||
"description": "Response containing generated embeddings."
|
||||
},
|
||||
"AgentCandidate": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"type": {
|
||||
"type": "string",
|
||||
"const": "agent",
|
||||
"default": "agent"
|
||||
},
|
||||
"config": {
|
||||
"$ref": "#/components/schemas/AgentConfig",
|
||||
"description": "The configuration for the agent candidate."
|
||||
}
|
||||
},
|
||||
"additionalProperties": false,
|
||||
"required": [
|
||||
"type",
|
||||
"config"
|
||||
],
|
||||
"title": "AgentCandidate",
|
||||
"description": "An agent candidate for evaluation."
|
||||
},
|
||||
"AggregationFunctionType": {
|
||||
"type": "string",
|
||||
"enum": [
|
||||
"average",
|
||||
"weighted_average",
|
||||
"median",
|
||||
"categorical_count",
|
||||
"accuracy"
|
||||
],
|
||||
"title": "AggregationFunctionType"
|
||||
},
|
||||
"BasicScoringFnParams": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"type": {
|
||||
"type": "string",
|
||||
"const": "basic",
|
||||
"default": "basic"
|
||||
},
|
||||
"aggregation_functions": {
|
||||
"type": "array",
|
||||
"items": {
|
||||
"$ref": "#/components/schemas/AggregationFunctionType"
|
||||
}
|
||||
}
|
||||
},
|
||||
"additionalProperties": false,
|
||||
"required": [
|
||||
"type"
|
||||
],
|
||||
"title": "BasicScoringFnParams"
|
||||
},
|
||||
"BenchmarkConfig": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"eval_candidate": {
|
||||
"$ref": "#/components/schemas/EvalCandidate",
|
||||
"description": "The candidate to evaluate."
|
||||
},
|
||||
"scoring_params": {
|
||||
"type": "object",
|
||||
"additionalProperties": {
|
||||
"$ref": "#/components/schemas/ScoringFnParams"
|
||||
},
|
||||
"description": "Map between scoring function id and parameters for each scoring function you want to run"
|
||||
},
|
||||
"num_examples": {
|
||||
"type": "integer",
|
||||
"description": "(Optional) The number of examples to evaluate. If not provided, all examples in the dataset will be evaluated"
|
||||
}
|
||||
},
|
||||
"additionalProperties": false,
|
||||
"required": [
|
||||
"eval_candidate",
|
||||
"scoring_params"
|
||||
],
|
||||
"title": "BenchmarkConfig",
|
||||
"description": "A benchmark configuration for evaluation."
|
||||
},
|
||||
"EvalCandidate": {
|
||||
"oneOf": [
|
||||
{
|
||||
"$ref": "#/components/schemas/ModelCandidate"
|
||||
},
|
||||
{
|
||||
"$ref": "#/components/schemas/AgentCandidate"
|
||||
}
|
||||
],
|
||||
"discriminator": {
|
||||
"propertyName": "type",
|
||||
"mapping": {
|
||||
"model": "#/components/schemas/ModelCandidate",
|
||||
"agent": "#/components/schemas/AgentCandidate"
|
||||
}
|
||||
}
|
||||
},
|
||||
"LLMAsJudgeScoringFnParams": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"type": {
|
||||
"type": "string",
|
||||
"const": "llm_as_judge",
|
||||
"default": "llm_as_judge"
|
||||
},
|
||||
"judge_model": {
|
||||
"type": "string"
|
||||
},
|
||||
"prompt_template": {
|
||||
"type": "string"
|
||||
},
|
||||
"judge_score_regexes": {
|
||||
"type": "array",
|
||||
"items": {
|
||||
"type": "string"
|
||||
}
|
||||
},
|
||||
"aggregation_functions": {
|
||||
"type": "array",
|
||||
"items": {
|
||||
"$ref": "#/components/schemas/AggregationFunctionType"
|
||||
}
|
||||
}
|
||||
},
|
||||
"additionalProperties": false,
|
||||
"required": [
|
||||
"type",
|
||||
"judge_model"
|
||||
],
|
||||
"title": "LLMAsJudgeScoringFnParams"
|
||||
},
|
||||
"ModelCandidate": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"type": {
|
||||
"type": "string",
|
||||
"const": "model",
|
||||
"default": "model"
|
||||
},
|
||||
"model": {
|
||||
"type": "string",
|
||||
"description": "The model ID to evaluate."
|
||||
},
|
||||
"sampling_params": {
|
||||
"$ref": "#/components/schemas/SamplingParams",
|
||||
"description": "The sampling parameters for the model."
|
||||
},
|
||||
"system_message": {
|
||||
"$ref": "#/components/schemas/SystemMessage",
|
||||
"description": "(Optional) The system message providing instructions or context to the model."
|
||||
}
|
||||
},
|
||||
"additionalProperties": false,
|
||||
"required": [
|
||||
"type",
|
||||
"model",
|
||||
"sampling_params"
|
||||
],
|
||||
"title": "ModelCandidate",
|
||||
"description": "A model candidate for evaluation."
|
||||
},
|
||||
"RegexParserScoringFnParams": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"type": {
|
||||
"type": "string",
|
||||
"const": "regex_parser",
|
||||
"default": "regex_parser"
|
||||
},
|
||||
"parsing_regexes": {
|
||||
"type": "array",
|
||||
"items": {
|
||||
"type": "string"
|
||||
}
|
||||
},
|
||||
"aggregation_functions": {
|
||||
"type": "array",
|
||||
"items": {
|
||||
"$ref": "#/components/schemas/AggregationFunctionType"
|
||||
}
|
||||
}
|
||||
},
|
||||
"additionalProperties": false,
|
||||
"required": [
|
||||
"type"
|
||||
],
|
||||
"title": "RegexParserScoringFnParams"
|
||||
},
|
||||
"ScoringFnParams": {
|
||||
"oneOf": [
|
||||
{
|
||||
"$ref": "#/components/schemas/LLMAsJudgeScoringFnParams"
|
||||
},
|
||||
{
|
||||
"$ref": "#/components/schemas/RegexParserScoringFnParams"
|
||||
},
|
||||
{
|
||||
"$ref": "#/components/schemas/BasicScoringFnParams"
|
||||
}
|
||||
],
|
||||
"discriminator": {
|
||||
"propertyName": "type",
|
||||
"mapping": {
|
||||
"llm_as_judge": "#/components/schemas/LLMAsJudgeScoringFnParams",
|
||||
"regex_parser": "#/components/schemas/RegexParserScoringFnParams",
|
||||
"basic": "#/components/schemas/BasicScoringFnParams"
|
||||
}
|
||||
}
|
||||
},
|
||||
"EvaluateRowsRequest": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"input_rows": {
|
||||
"type": "array",
|
||||
"items": {
|
||||
"type": "object",
|
||||
"additionalProperties": {
|
||||
"oneOf": [
|
||||
{
|
||||
"type": "null"
|
||||
},
|
||||
{
|
||||
"type": "boolean"
|
||||
},
|
||||
{
|
||||
"type": "number"
|
||||
},
|
||||
{
|
||||
"type": "string"
|
||||
},
|
||||
{
|
||||
"type": "array"
|
||||
},
|
||||
{
|
||||
"type": "object"
|
||||
}
|
||||
]
|
||||
}
|
||||
},
|
||||
"description": "The rows to evaluate."
|
||||
},
|
||||
"scoring_functions": {
|
||||
"type": "array",
|
||||
"items": {
|
||||
"type": "string"
|
||||
},
|
||||
"description": "The scoring functions to use for the evaluation."
|
||||
},
|
||||
"benchmark_config": {
|
||||
"$ref": "#/components/schemas/BenchmarkConfig",
|
||||
"description": "The configuration for the benchmark."
|
||||
}
|
||||
},
|
||||
"additionalProperties": false,
|
||||
"required": [
|
||||
"input_rows",
|
||||
"scoring_functions",
|
||||
"benchmark_config"
|
||||
],
|
||||
"title": "EvaluateRowsRequest"
|
||||
},
|
||||
"EvaluateResponse": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"generations": {
|
||||
"type": "array",
|
||||
"items": {
|
||||
"type": "object",
|
||||
"additionalProperties": {
|
||||
"oneOf": [
|
||||
{
|
||||
"type": "null"
|
||||
},
|
||||
{
|
||||
"type": "boolean"
|
||||
},
|
||||
{
|
||||
"type": "number"
|
||||
},
|
||||
{
|
||||
"type": "string"
|
||||
},
|
||||
{
|
||||
"type": "array"
|
||||
},
|
||||
{
|
||||
"type": "object"
|
||||
}
|
||||
]
|
||||
}
|
||||
},
|
||||
"description": "The generations from the evaluation."
|
||||
},
|
||||
"scores": {
|
||||
"type": "object",
|
||||
"additionalProperties": {
|
||||
"$ref": "#/components/schemas/ScoringResult"
|
||||
},
|
||||
"description": "The scores from the evaluation."
|
||||
}
|
||||
},
|
||||
"additionalProperties": false,
|
||||
"required": [
|
||||
"generations",
|
||||
"scores"
|
||||
],
|
||||
"title": "EvaluateResponse",
|
||||
"description": "The response from an evaluation."
|
||||
},
|
||||
"ScoringResult": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"score_rows": {
|
||||
"type": "array",
|
||||
"items": {
|
||||
"type": "object",
|
||||
"additionalProperties": {
|
||||
"oneOf": [
|
||||
{
|
||||
"type": "null"
|
||||
},
|
||||
{
|
||||
"type": "boolean"
|
||||
},
|
||||
{
|
||||
"type": "number"
|
||||
},
|
||||
{
|
||||
"type": "string"
|
||||
},
|
||||
{
|
||||
"type": "array"
|
||||
},
|
||||
{
|
||||
"type": "object"
|
||||
}
|
||||
]
|
||||
}
|
||||
},
|
||||
"description": "The scoring result for each row. Each row is a map of column name to value."
|
||||
},
|
||||
"aggregated_results": {
|
||||
"type": "object",
|
||||
"additionalProperties": {
|
||||
"oneOf": [
|
||||
{
|
||||
"type": "null"
|
||||
},
|
||||
{
|
||||
"type": "boolean"
|
||||
},
|
||||
{
|
||||
"type": "number"
|
||||
},
|
||||
{
|
||||
"type": "string"
|
||||
},
|
||||
{
|
||||
"type": "array"
|
||||
},
|
||||
{
|
||||
"type": "object"
|
||||
}
|
||||
]
|
||||
},
|
||||
"description": "Map of metric name to aggregated value"
|
||||
}
|
||||
},
|
||||
"additionalProperties": false,
|
||||
"required": [
|
||||
"score_rows",
|
||||
"aggregated_results"
|
||||
],
|
||||
"title": "ScoringResult",
|
||||
"description": "A scoring result for a single row."
|
||||
},
|
||||
"Agent": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
|
@ -7705,7 +7329,8 @@
|
|||
"completed",
|
||||
"in_progress",
|
||||
"failed",
|
||||
"scheduled"
|
||||
"scheduled",
|
||||
"cancelled"
|
||||
],
|
||||
"title": "JobStatus"
|
||||
},
|
||||
|
@ -8400,30 +8025,6 @@
|
|||
"title": "IterrowsResponse",
|
||||
"description": "A paginated list of rows from a dataset."
|
||||
},
|
||||
"Job": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"job_id": {
|
||||
"type": "string"
|
||||
},
|
||||
"status": {
|
||||
"type": "string",
|
||||
"enum": [
|
||||
"completed",
|
||||
"in_progress",
|
||||
"failed",
|
||||
"scheduled"
|
||||
],
|
||||
"title": "JobStatus"
|
||||
}
|
||||
},
|
||||
"additionalProperties": false,
|
||||
"required": [
|
||||
"job_id",
|
||||
"status"
|
||||
],
|
||||
"title": "Job"
|
||||
},
|
||||
"ListAgentSessionsResponse": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
|
@ -10007,16 +9608,21 @@
|
|||
"RunRequest": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"benchmark_config": {
|
||||
"$ref": "#/components/schemas/BenchmarkConfig",
|
||||
"description": "The configuration for the benchmark."
|
||||
"task": {
|
||||
"$ref": "#/components/schemas/EvaluationTask",
|
||||
"description": "The task to evaluate. To specify a task, one of the following must be provided: - `benchmark_id`: Run evaluation task against a benchmark_id - `dataset_id` and `grader_ids`: Run evaluation task against a dataset_id and a list of grader_ids - `data_source` and `grader_ids`: Run evaluation task against a data source (e.g. rows, uri, etc.) and a list of grader_ids"
|
||||
},
|
||||
"candidate": {
|
||||
"$ref": "#/components/schemas/EvaluationCandidate",
|
||||
"description": "The candidate to evaluate."
|
||||
}
|
||||
},
|
||||
"additionalProperties": false,
|
||||
"required": [
|
||||
"benchmark_config"
|
||||
"task",
|
||||
"candidate"
|
||||
],
|
||||
"title": "RunEvalRequest"
|
||||
"title": "RunRequest"
|
||||
},
|
||||
"RunShieldRequest": {
|
||||
"type": "object",
|
||||
|
@ -10123,128 +9729,6 @@
|
|||
],
|
||||
"title": "SaveSpansToDatasetRequest"
|
||||
},
|
||||
"ScoreRequest": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"input_rows": {
|
||||
"type": "array",
|
||||
"items": {
|
||||
"type": "object",
|
||||
"additionalProperties": {
|
||||
"oneOf": [
|
||||
{
|
||||
"type": "null"
|
||||
},
|
||||
{
|
||||
"type": "boolean"
|
||||
},
|
||||
{
|
||||
"type": "number"
|
||||
},
|
||||
{
|
||||
"type": "string"
|
||||
},
|
||||
{
|
||||
"type": "array"
|
||||
},
|
||||
{
|
||||
"type": "object"
|
||||
}
|
||||
]
|
||||
}
|
||||
},
|
||||
"description": "The rows to score."
|
||||
},
|
||||
"scoring_functions": {
|
||||
"type": "object",
|
||||
"additionalProperties": {
|
||||
"oneOf": [
|
||||
{
|
||||
"$ref": "#/components/schemas/ScoringFnParams"
|
||||
},
|
||||
{
|
||||
"type": "null"
|
||||
}
|
||||
]
|
||||
},
|
||||
"description": "The scoring functions to use for the scoring."
|
||||
}
|
||||
},
|
||||
"additionalProperties": false,
|
||||
"required": [
|
||||
"input_rows",
|
||||
"scoring_functions"
|
||||
],
|
||||
"title": "ScoreRequest"
|
||||
},
|
||||
"ScoreResponse": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"results": {
|
||||
"type": "object",
|
||||
"additionalProperties": {
|
||||
"$ref": "#/components/schemas/ScoringResult"
|
||||
},
|
||||
"description": "A map of scoring function name to ScoringResult."
|
||||
}
|
||||
},
|
||||
"additionalProperties": false,
|
||||
"required": [
|
||||
"results"
|
||||
],
|
||||
"title": "ScoreResponse",
|
||||
"description": "The response from scoring."
|
||||
},
|
||||
"ScoreBatchRequest": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"dataset_id": {
|
||||
"type": "string"
|
||||
},
|
||||
"scoring_functions": {
|
||||
"type": "object",
|
||||
"additionalProperties": {
|
||||
"oneOf": [
|
||||
{
|
||||
"$ref": "#/components/schemas/ScoringFnParams"
|
||||
},
|
||||
{
|
||||
"type": "null"
|
||||
}
|
||||
]
|
||||
}
|
||||
},
|
||||
"save_results_dataset": {
|
||||
"type": "boolean"
|
||||
}
|
||||
},
|
||||
"additionalProperties": false,
|
||||
"required": [
|
||||
"dataset_id",
|
||||
"scoring_functions",
|
||||
"save_results_dataset"
|
||||
],
|
||||
"title": "ScoreBatchRequest"
|
||||
},
|
||||
"ScoreBatchResponse": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"dataset_id": {
|
||||
"type": "string"
|
||||
},
|
||||
"results": {
|
||||
"type": "object",
|
||||
"additionalProperties": {
|
||||
"$ref": "#/components/schemas/ScoringResult"
|
||||
}
|
||||
}
|
||||
},
|
||||
"additionalProperties": false,
|
||||
"required": [
|
||||
"results"
|
||||
],
|
||||
"title": "ScoreBatchResponse"
|
||||
},
|
||||
"AlgorithmConfig": {
|
||||
"oneOf": [
|
||||
{
|
||||
|
|
452
docs/_static/llama-stack-spec.yaml
vendored
452
docs/_static/llama-stack-spec.yaml
vendored
|
@ -1562,109 +1562,6 @@ paths:
|
|||
required: false
|
||||
schema:
|
||||
type: integer
|
||||
/v1/eval/benchmarks/{benchmark_id}/jobs/{job_id}:
|
||||
get:
|
||||
responses:
|
||||
'200':
|
||||
description: The status of the evaluationjob.
|
||||
content:
|
||||
application/json:
|
||||
schema:
|
||||
$ref: '#/components/schemas/Job'
|
||||
'400':
|
||||
$ref: '#/components/responses/BadRequest400'
|
||||
'429':
|
||||
$ref: >-
|
||||
#/components/responses/TooManyRequests429
|
||||
'500':
|
||||
$ref: >-
|
||||
#/components/responses/InternalServerError500
|
||||
default:
|
||||
$ref: '#/components/responses/DefaultError'
|
||||
tags:
|
||||
- Eval
|
||||
description: Get the status of a job.
|
||||
parameters:
|
||||
- name: benchmark_id
|
||||
in: path
|
||||
description: >-
|
||||
The ID of the benchmark to run the evaluation on.
|
||||
required: true
|
||||
schema:
|
||||
type: string
|
||||
- name: job_id
|
||||
in: path
|
||||
description: The ID of the job to get the status of.
|
||||
required: true
|
||||
schema:
|
||||
type: string
|
||||
delete:
|
||||
responses:
|
||||
'200':
|
||||
description: OK
|
||||
'400':
|
||||
$ref: '#/components/responses/BadRequest400'
|
||||
'429':
|
||||
$ref: >-
|
||||
#/components/responses/TooManyRequests429
|
||||
'500':
|
||||
$ref: >-
|
||||
#/components/responses/InternalServerError500
|
||||
default:
|
||||
$ref: '#/components/responses/DefaultError'
|
||||
tags:
|
||||
- Eval
|
||||
description: Cancel a job.
|
||||
parameters:
|
||||
- name: benchmark_id
|
||||
in: path
|
||||
description: >-
|
||||
The ID of the benchmark to run the evaluation on.
|
||||
required: true
|
||||
schema:
|
||||
type: string
|
||||
- name: job_id
|
||||
in: path
|
||||
description: The ID of the job to cancel.
|
||||
required: true
|
||||
schema:
|
||||
type: string
|
||||
/v1/eval/benchmarks/{benchmark_id}/jobs/{job_id}/result:
|
||||
get:
|
||||
responses:
|
||||
'200':
|
||||
description: The result of the job.
|
||||
content:
|
||||
application/json:
|
||||
schema:
|
||||
$ref: '#/components/schemas/EvaluateResponse'
|
||||
'400':
|
||||
$ref: '#/components/responses/BadRequest400'
|
||||
'429':
|
||||
$ref: >-
|
||||
#/components/responses/TooManyRequests429
|
||||
'500':
|
||||
$ref: >-
|
||||
#/components/responses/InternalServerError500
|
||||
default:
|
||||
$ref: '#/components/responses/DefaultError'
|
||||
tags:
|
||||
- Eval
|
||||
description: Get the result of a job.
|
||||
parameters:
|
||||
- name: benchmark_id
|
||||
in: path
|
||||
description: >-
|
||||
The ID of the benchmark to run the evaluation on.
|
||||
required: true
|
||||
schema:
|
||||
type: string
|
||||
- name: job_id
|
||||
in: path
|
||||
description: The ID of the job to get the result of.
|
||||
required: true
|
||||
schema:
|
||||
type: string
|
||||
/v1/agents/{agent_id}/sessions:
|
||||
get:
|
||||
responses:
|
||||
|
@ -1923,7 +1820,7 @@ paths:
|
|||
default:
|
||||
$ref: '#/components/responses/DefaultError'
|
||||
tags:
|
||||
- Providers
|
||||
- Models
|
||||
description: ''
|
||||
parameters: []
|
||||
post:
|
||||
|
@ -1974,7 +1871,7 @@ paths:
|
|||
default:
|
||||
$ref: '#/components/responses/DefaultError'
|
||||
tags:
|
||||
- Inspect
|
||||
- Providers
|
||||
description: ''
|
||||
parameters: []
|
||||
/v1/inspect/routes:
|
||||
|
@ -4448,252 +4345,6 @@ components:
|
|||
title: EmbeddingsResponse
|
||||
description: >-
|
||||
Response containing generated embeddings.
|
||||
AgentCandidate:
|
||||
type: object
|
||||
properties:
|
||||
type:
|
||||
type: string
|
||||
const: agent
|
||||
default: agent
|
||||
config:
|
||||
$ref: '#/components/schemas/AgentConfig'
|
||||
description: >-
|
||||
The configuration for the agent candidate.
|
||||
additionalProperties: false
|
||||
required:
|
||||
- type
|
||||
- config
|
||||
title: AgentCandidate
|
||||
description: An agent candidate for evaluation.
|
||||
AggregationFunctionType:
|
||||
type: string
|
||||
enum:
|
||||
- average
|
||||
- weighted_average
|
||||
- median
|
||||
- categorical_count
|
||||
- accuracy
|
||||
title: AggregationFunctionType
|
||||
BasicScoringFnParams:
|
||||
type: object
|
||||
properties:
|
||||
type:
|
||||
type: string
|
||||
const: basic
|
||||
default: basic
|
||||
aggregation_functions:
|
||||
type: array
|
||||
items:
|
||||
$ref: '#/components/schemas/AggregationFunctionType'
|
||||
additionalProperties: false
|
||||
required:
|
||||
- type
|
||||
title: BasicScoringFnParams
|
||||
BenchmarkConfig:
|
||||
type: object
|
||||
properties:
|
||||
eval_candidate:
|
||||
$ref: '#/components/schemas/EvalCandidate'
|
||||
description: The candidate to evaluate.
|
||||
scoring_params:
|
||||
type: object
|
||||
additionalProperties:
|
||||
$ref: '#/components/schemas/ScoringFnParams'
|
||||
description: >-
|
||||
Map between scoring function id and parameters for each scoring function
|
||||
you want to run
|
||||
num_examples:
|
||||
type: integer
|
||||
description: >-
|
||||
(Optional) The number of examples to evaluate. If not provided, all examples
|
||||
in the dataset will be evaluated
|
||||
additionalProperties: false
|
||||
required:
|
||||
- eval_candidate
|
||||
- scoring_params
|
||||
title: BenchmarkConfig
|
||||
description: >-
|
||||
A benchmark configuration for evaluation.
|
||||
EvalCandidate:
|
||||
oneOf:
|
||||
- $ref: '#/components/schemas/ModelCandidate'
|
||||
- $ref: '#/components/schemas/AgentCandidate'
|
||||
discriminator:
|
||||
propertyName: type
|
||||
mapping:
|
||||
model: '#/components/schemas/ModelCandidate'
|
||||
agent: '#/components/schemas/AgentCandidate'
|
||||
LLMAsJudgeScoringFnParams:
|
||||
type: object
|
||||
properties:
|
||||
type:
|
||||
type: string
|
||||
const: llm_as_judge
|
||||
default: llm_as_judge
|
||||
judge_model:
|
||||
type: string
|
||||
prompt_template:
|
||||
type: string
|
||||
judge_score_regexes:
|
||||
type: array
|
||||
items:
|
||||
type: string
|
||||
aggregation_functions:
|
||||
type: array
|
||||
items:
|
||||
$ref: '#/components/schemas/AggregationFunctionType'
|
||||
additionalProperties: false
|
||||
required:
|
||||
- type
|
||||
- judge_model
|
||||
title: LLMAsJudgeScoringFnParams
|
||||
ModelCandidate:
|
||||
type: object
|
||||
properties:
|
||||
type:
|
||||
type: string
|
||||
const: model
|
||||
default: model
|
||||
model:
|
||||
type: string
|
||||
description: The model ID to evaluate.
|
||||
sampling_params:
|
||||
$ref: '#/components/schemas/SamplingParams'
|
||||
description: The sampling parameters for the model.
|
||||
system_message:
|
||||
$ref: '#/components/schemas/SystemMessage'
|
||||
description: >-
|
||||
(Optional) The system message providing instructions or context to the
|
||||
model.
|
||||
additionalProperties: false
|
||||
required:
|
||||
- type
|
||||
- model
|
||||
- sampling_params
|
||||
title: ModelCandidate
|
||||
description: A model candidate for evaluation.
|
||||
RegexParserScoringFnParams:
|
||||
type: object
|
||||
properties:
|
||||
type:
|
||||
type: string
|
||||
const: regex_parser
|
||||
default: regex_parser
|
||||
parsing_regexes:
|
||||
type: array
|
||||
items:
|
||||
type: string
|
||||
aggregation_functions:
|
||||
type: array
|
||||
items:
|
||||
$ref: '#/components/schemas/AggregationFunctionType'
|
||||
additionalProperties: false
|
||||
required:
|
||||
- type
|
||||
title: RegexParserScoringFnParams
|
||||
ScoringFnParams:
|
||||
oneOf:
|
||||
- $ref: '#/components/schemas/LLMAsJudgeScoringFnParams'
|
||||
- $ref: '#/components/schemas/RegexParserScoringFnParams'
|
||||
- $ref: '#/components/schemas/BasicScoringFnParams'
|
||||
discriminator:
|
||||
propertyName: type
|
||||
mapping:
|
||||
llm_as_judge: '#/components/schemas/LLMAsJudgeScoringFnParams'
|
||||
regex_parser: '#/components/schemas/RegexParserScoringFnParams'
|
||||
basic: '#/components/schemas/BasicScoringFnParams'
|
||||
EvaluateRowsRequest:
|
||||
type: object
|
||||
properties:
|
||||
input_rows:
|
||||
type: array
|
||||
items:
|
||||
type: object
|
||||
additionalProperties:
|
||||
oneOf:
|
||||
- type: 'null'
|
||||
- type: boolean
|
||||
- type: number
|
||||
- type: string
|
||||
- type: array
|
||||
- type: object
|
||||
description: The rows to evaluate.
|
||||
scoring_functions:
|
||||
type: array
|
||||
items:
|
||||
type: string
|
||||
description: >-
|
||||
The scoring functions to use for the evaluation.
|
||||
benchmark_config:
|
||||
$ref: '#/components/schemas/BenchmarkConfig'
|
||||
description: The configuration for the benchmark.
|
||||
additionalProperties: false
|
||||
required:
|
||||
- input_rows
|
||||
- scoring_functions
|
||||
- benchmark_config
|
||||
title: EvaluateRowsRequest
|
||||
EvaluateResponse:
|
||||
type: object
|
||||
properties:
|
||||
generations:
|
||||
type: array
|
||||
items:
|
||||
type: object
|
||||
additionalProperties:
|
||||
oneOf:
|
||||
- type: 'null'
|
||||
- type: boolean
|
||||
- type: number
|
||||
- type: string
|
||||
- type: array
|
||||
- type: object
|
||||
description: The generations from the evaluation.
|
||||
scores:
|
||||
type: object
|
||||
additionalProperties:
|
||||
$ref: '#/components/schemas/ScoringResult'
|
||||
description: The scores from the evaluation.
|
||||
additionalProperties: false
|
||||
required:
|
||||
- generations
|
||||
- scores
|
||||
title: EvaluateResponse
|
||||
description: The response from an evaluation.
|
||||
ScoringResult:
|
||||
type: object
|
||||
properties:
|
||||
score_rows:
|
||||
type: array
|
||||
items:
|
||||
type: object
|
||||
additionalProperties:
|
||||
oneOf:
|
||||
- type: 'null'
|
||||
- type: boolean
|
||||
- type: number
|
||||
- type: string
|
||||
- type: array
|
||||
- type: object
|
||||
description: >-
|
||||
The scoring result for each row. Each row is a map of column name to value.
|
||||
aggregated_results:
|
||||
type: object
|
||||
additionalProperties:
|
||||
oneOf:
|
||||
- type: 'null'
|
||||
- type: boolean
|
||||
- type: number
|
||||
- type: string
|
||||
- type: array
|
||||
- type: object
|
||||
description: Map of metric name to aggregated value
|
||||
additionalProperties: false
|
||||
required:
|
||||
- score_rows
|
||||
- aggregated_results
|
||||
title: ScoringResult
|
||||
description: A scoring result for a single row.
|
||||
Agent:
|
||||
type: object
|
||||
properties:
|
||||
|
@ -5451,6 +5102,7 @@ components:
|
|||
- in_progress
|
||||
- failed
|
||||
- scheduled
|
||||
- cancelled
|
||||
title: JobStatus
|
||||
scheduled_at:
|
||||
type: string
|
||||
|
@ -5901,24 +5553,6 @@ components:
|
|||
- data
|
||||
title: IterrowsResponse
|
||||
description: A paginated list of rows from a dataset.
|
||||
Job:
|
||||
type: object
|
||||
properties:
|
||||
job_id:
|
||||
type: string
|
||||
status:
|
||||
type: string
|
||||
enum:
|
||||
- completed
|
||||
- in_progress
|
||||
- failed
|
||||
- scheduled
|
||||
title: JobStatus
|
||||
additionalProperties: false
|
||||
required:
|
||||
- job_id
|
||||
- status
|
||||
title: Job
|
||||
ListAgentSessionsResponse:
|
||||
type: object
|
||||
properties:
|
||||
|
@ -6984,8 +6618,9 @@ components:
|
|||
description: The candidate to evaluate.
|
||||
additionalProperties: false
|
||||
required:
|
||||
- benchmark_config
|
||||
title: RunEvalRequest
|
||||
- task
|
||||
- candidate
|
||||
title: RunRequest
|
||||
RunShieldRequest:
|
||||
type: object
|
||||
properties:
|
||||
|
@ -7058,81 +6693,6 @@ components:
|
|||
- attributes_to_save
|
||||
- dataset_id
|
||||
title: SaveSpansToDatasetRequest
|
||||
ScoreRequest:
|
||||
type: object
|
||||
properties:
|
||||
input_rows:
|
||||
type: array
|
||||
items:
|
||||
type: object
|
||||
additionalProperties:
|
||||
oneOf:
|
||||
- type: 'null'
|
||||
- type: boolean
|
||||
- type: number
|
||||
- type: string
|
||||
- type: array
|
||||
- type: object
|
||||
description: The rows to score.
|
||||
scoring_functions:
|
||||
type: object
|
||||
additionalProperties:
|
||||
oneOf:
|
||||
- $ref: '#/components/schemas/ScoringFnParams'
|
||||
- type: 'null'
|
||||
description: >-
|
||||
The scoring functions to use for the scoring.
|
||||
additionalProperties: false
|
||||
required:
|
||||
- input_rows
|
||||
- scoring_functions
|
||||
title: ScoreRequest
|
||||
ScoreResponse:
|
||||
type: object
|
||||
properties:
|
||||
results:
|
||||
type: object
|
||||
additionalProperties:
|
||||
$ref: '#/components/schemas/ScoringResult'
|
||||
description: >-
|
||||
A map of scoring function name to ScoringResult.
|
||||
additionalProperties: false
|
||||
required:
|
||||
- results
|
||||
title: ScoreResponse
|
||||
description: The response from scoring.
|
||||
ScoreBatchRequest:
|
||||
type: object
|
||||
properties:
|
||||
dataset_id:
|
||||
type: string
|
||||
scoring_functions:
|
||||
type: object
|
||||
additionalProperties:
|
||||
oneOf:
|
||||
- $ref: '#/components/schemas/ScoringFnParams'
|
||||
- type: 'null'
|
||||
save_results_dataset:
|
||||
type: boolean
|
||||
additionalProperties: false
|
||||
required:
|
||||
- dataset_id
|
||||
- scoring_functions
|
||||
- save_results_dataset
|
||||
title: ScoreBatchRequest
|
||||
ScoreBatchResponse:
|
||||
type: object
|
||||
properties:
|
||||
dataset_id:
|
||||
type: string
|
||||
results:
|
||||
type: object
|
||||
additionalProperties:
|
||||
$ref: '#/components/schemas/ScoringResult'
|
||||
additionalProperties: false
|
||||
required:
|
||||
- results
|
||||
title: ScoreBatchResponse
|
||||
AlgorithmConfig:
|
||||
oneOf:
|
||||
- $ref: '#/components/schemas/LoraFinetuningConfig'
|
||||
|
|
|
@ -141,4 +141,3 @@ class Eval(Protocol):
|
|||
:param job_id: The ID of the job to get the result of.
|
||||
:return: The result of the job.
|
||||
"""
|
||||
|
||||
|
|
|
@ -146,4 +146,3 @@ class ScoringFunctions(Protocol):
|
|||
provider_id: Optional[str] = None,
|
||||
params: Optional[ScoringFnParams] = None,
|
||||
) -> None: ...
|
||||
|
||||
|
|
|
@ -43,7 +43,6 @@ from llama_stack.distribution.datatypes import (
|
|||
RoutableObject,
|
||||
RoutableObjectWithProvider,
|
||||
RoutedProtocol,
|
||||
ScoringFnWithACL,
|
||||
ShieldWithACL,
|
||||
ToolGroupWithACL,
|
||||
ToolWithACL,
|
||||
|
|
|
@ -26,4 +26,3 @@ def available_providers() -> List[ProviderSpec]:
|
|||
],
|
||||
),
|
||||
]
|
||||
|
||||
|
|
|
@ -166,7 +166,18 @@ datasets:
|
|||
uri: huggingface://datasets/llamastack/bfcl_v3?split=train
|
||||
metadata: {}
|
||||
dataset_id: bfcl
|
||||
provider_id: huggingface
|
||||
- purpose: eval/messages-answer
|
||||
source:
|
||||
type: uri
|
||||
uri: huggingface://datasets/llamastack/IfEval?split=train
|
||||
metadata: {}
|
||||
dataset_id: ifeval
|
||||
- purpose: eval/messages-answer
|
||||
source:
|
||||
type: uri
|
||||
uri: huggingface://datasets/llamastack/docvqa?split=val
|
||||
metadata: {}
|
||||
dataset_id: docvqa
|
||||
benchmarks: []
|
||||
tool_groups:
|
||||
- toolgroup_id: builtin::websearch
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue