This commit is contained in:
Sai Soundararaj 2025-07-01 17:03:45 -07:00
parent 78ef9c605f
commit bcdb6fcc15
4 changed files with 26 additions and 8 deletions

View file

@ -234,7 +234,7 @@ Before finalizing documentation, verify:
[x] 13. `/Users/saip/Documents/GitHub/llama-stack/llama_stack/apis/eval/eval.py` - Evaluation framework [x] 13. `/Users/saip/Documents/GitHub/llama-stack/llama_stack/apis/eval/eval.py` - Evaluation framework
[x] 14. `/Users/saip/Documents/GitHub/llama-stack/llama_stack/apis/scoring/scoring.py` - Scoring system [x] 14. `/Users/saip/Documents/GitHub/llama-stack/llama_stack/apis/scoring/scoring.py` - Scoring system
[x] 15. `/Users/saip/Documents/GitHub/llama-stack/llama_stack/apis/scoring_functions/scoring_functions.py` - Scoring function definitions [x] 15. `/Users/saip/Documents/GitHub/llama-stack/llama_stack/apis/scoring_functions/scoring_functions.py` - Scoring function definitions
16. `/Users/saip/Documents/GitHub/llama-stack/llama_stack/apis/benchmarks/benchmarks.py` - Benchmarking framework [x] 16. `/Users/saip/Documents/GitHub/llama-stack/llama_stack/apis/benchmarks/benchmarks.py` - Benchmarking framework
17. `/Users/saip/Documents/GitHub/llama-stack/llama_stack/apis/shields/shields.py` - Safety shields 17. `/Users/saip/Documents/GitHub/llama-stack/llama_stack/apis/shields/shields.py` - Safety shields
18. `/Users/saip/Documents/GitHub/llama-stack/llama_stack/apis/batch_inference/batch_inference.py` - Batch inference operations 18. `/Users/saip/Documents/GitHub/llama-stack/llama_stack/apis/batch_inference/batch_inference.py` - Batch inference operations
19. `/Users/saip/Documents/GitHub/llama-stack/llama_stack/apis/synthetic_data_generation/synthetic_data_generation.py` - Data generation 19. `/Users/saip/Documents/GitHub/llama-stack/llama_stack/apis/synthetic_data_generation/synthetic_data_generation.py` - Data generation

View file

@ -9797,18 +9797,20 @@
"tool", "tool",
"tool_group" "tool_group"
], ],
"title": "ResourceType",
"const": "benchmark", "const": "benchmark",
"default": "benchmark" "default": "benchmark",
"description": "The resource type, always benchmark"
}, },
"dataset_id": { "dataset_id": {
"type": "string" "type": "string",
"description": "Identifier of the dataset to use for the benchmark evaluation"
}, },
"scoring_functions": { "scoring_functions": {
"type": "array", "type": "array",
"items": { "items": {
"type": "string" "type": "string"
} },
"description": "List of scoring function identifiers to apply during evaluation"
}, },
"metadata": { "metadata": {
"type": "object", "type": "object",
@ -9833,7 +9835,8 @@
"type": "object" "type": "object"
} }
] ]
} },
"description": "Metadata for this evaluation task"
} }
}, },
"additionalProperties": false, "additionalProperties": false,
@ -9845,7 +9848,8 @@
"scoring_functions", "scoring_functions",
"metadata" "metadata"
], ],
"title": "Benchmark" "title": "Benchmark",
"description": "A benchmark resource for evaluating model performance."
}, },
"OpenAIAssistantMessageParam": { "OpenAIAssistantMessageParam": {
"type": "object", "type": "object",

View file

@ -7037,15 +7037,19 @@ components:
- benchmark - benchmark
- tool - tool
- tool_group - tool_group
title: ResourceType
const: benchmark const: benchmark
default: benchmark default: benchmark
description: The resource type, always benchmark
dataset_id: dataset_id:
type: string type: string
description: >-
Identifier of the dataset to use for the benchmark evaluation
scoring_functions: scoring_functions:
type: array type: array
items: items:
type: string type: string
description: >-
List of scoring function identifiers to apply during evaluation
metadata: metadata:
type: object type: object
additionalProperties: additionalProperties:
@ -7056,6 +7060,7 @@ components:
- type: string - type: string
- type: array - type: array
- type: object - type: object
description: Metadata for this evaluation task
additionalProperties: false additionalProperties: false
required: required:
- identifier - identifier
@ -7065,6 +7070,8 @@ components:
- scoring_functions - scoring_functions
- metadata - metadata
title: Benchmark title: Benchmark
description: >-
A benchmark resource for evaluating model performance.
OpenAIAssistantMessageParam: OpenAIAssistantMessageParam:
type: object type: object
properties: properties:

View file

@ -22,6 +22,13 @@ class CommonBenchmarkFields(BaseModel):
@json_schema_type @json_schema_type
class Benchmark(CommonBenchmarkFields, Resource): class Benchmark(CommonBenchmarkFields, Resource):
"""A benchmark resource for evaluating model performance.
:param dataset_id: Identifier of the dataset to use for the benchmark evaluation
:param scoring_functions: List of scoring function identifiers to apply during evaluation
:param metadata: Metadata for this evaluation task
:param type: The resource type, always benchmark
"""
type: Literal[ResourceType.benchmark] = ResourceType.benchmark type: Literal[ResourceType.benchmark] = ResourceType.benchmark
@property @property