This commit is contained in:
Sai Soundararaj 2025-07-01 17:03:45 -07:00
parent 78ef9c605f
commit bcdb6fcc15
4 changed files with 26 additions and 8 deletions

View file

@ -234,7 +234,7 @@ Before finalizing documentation, verify:
[x] 13. `/Users/saip/Documents/GitHub/llama-stack/llama_stack/apis/eval/eval.py` - Evaluation framework
[x] 14. `/Users/saip/Documents/GitHub/llama-stack/llama_stack/apis/scoring/scoring.py` - Scoring system
[x] 15. `/Users/saip/Documents/GitHub/llama-stack/llama_stack/apis/scoring_functions/scoring_functions.py` - Scoring function definitions
16. `/Users/saip/Documents/GitHub/llama-stack/llama_stack/apis/benchmarks/benchmarks.py` - Benchmarking framework
[x] 16. `/Users/saip/Documents/GitHub/llama-stack/llama_stack/apis/benchmarks/benchmarks.py` - Benchmarking framework
17. `/Users/saip/Documents/GitHub/llama-stack/llama_stack/apis/shields/shields.py` - Safety shields
18. `/Users/saip/Documents/GitHub/llama-stack/llama_stack/apis/batch_inference/batch_inference.py` - Batch inference operations
19. `/Users/saip/Documents/GitHub/llama-stack/llama_stack/apis/synthetic_data_generation/synthetic_data_generation.py` - Data generation

View file

@ -9797,18 +9797,20 @@
"tool",
"tool_group"
],
"title": "ResourceType",
"const": "benchmark",
"default": "benchmark"
"default": "benchmark",
"description": "The resource type, always benchmark"
},
"dataset_id": {
"type": "string"
"type": "string",
"description": "Identifier of the dataset to use for the benchmark evaluation"
},
"scoring_functions": {
"type": "array",
"items": {
"type": "string"
}
},
"description": "List of scoring function identifiers to apply during evaluation"
},
"metadata": {
"type": "object",
@ -9833,7 +9835,8 @@
"type": "object"
}
]
}
},
"description": "Metadata for this evaluation task"
}
},
"additionalProperties": false,
@ -9845,7 +9848,8 @@
"scoring_functions",
"metadata"
],
"title": "Benchmark"
"title": "Benchmark",
"description": "A benchmark resource for evaluating model performance."
},
"OpenAIAssistantMessageParam": {
"type": "object",

View file

@ -7037,15 +7037,19 @@ components:
- benchmark
- tool
- tool_group
title: ResourceType
const: benchmark
default: benchmark
description: The resource type, always benchmark
dataset_id:
type: string
description: >-
Identifier of the dataset to use for the benchmark evaluation
scoring_functions:
type: array
items:
type: string
description: >-
List of scoring function identifiers to apply during evaluation
metadata:
type: object
additionalProperties:
@ -7056,6 +7060,7 @@ components:
- type: string
- type: array
- type: object
description: Metadata for this evaluation task
additionalProperties: false
required:
- identifier
@ -7065,6 +7070,8 @@ components:
- scoring_functions
- metadata
title: Benchmark
description: >-
A benchmark resource for evaluating model performance.
OpenAIAssistantMessageParam:
type: object
properties:

View file

@ -22,6 +22,13 @@ class CommonBenchmarkFields(BaseModel):
@json_schema_type
class Benchmark(CommonBenchmarkFields, Resource):
"""A benchmark resource for evaluating model performance.
:param dataset_id: Identifier of the dataset to use for the benchmark evaluation
:param scoring_functions: List of scoring function identifiers to apply during evaluation
:param metadata: Metadata for this evaluation task
:param type: The resource type, always benchmark
"""
type: Literal[ResourceType.benchmark] = ResourceType.benchmark
@property