mirror of
https://github.com/meta-llama/llama-stack.git
synced 2025-07-29 15:23:51 +00:00
a
This commit is contained in:
parent
78ef9c605f
commit
bcdb6fcc15
4 changed files with 26 additions and 8 deletions
|
@ -234,7 +234,7 @@ Before finalizing documentation, verify:
|
|||
[x] 13. `/Users/saip/Documents/GitHub/llama-stack/llama_stack/apis/eval/eval.py` - Evaluation framework
|
||||
[x] 14. `/Users/saip/Documents/GitHub/llama-stack/llama_stack/apis/scoring/scoring.py` - Scoring system
|
||||
[x] 15. `/Users/saip/Documents/GitHub/llama-stack/llama_stack/apis/scoring_functions/scoring_functions.py` - Scoring function definitions
|
||||
16. `/Users/saip/Documents/GitHub/llama-stack/llama_stack/apis/benchmarks/benchmarks.py` - Benchmarking framework
|
||||
[x] 16. `/Users/saip/Documents/GitHub/llama-stack/llama_stack/apis/benchmarks/benchmarks.py` - Benchmarking framework
|
||||
17. `/Users/saip/Documents/GitHub/llama-stack/llama_stack/apis/shields/shields.py` - Safety shields
|
||||
18. `/Users/saip/Documents/GitHub/llama-stack/llama_stack/apis/batch_inference/batch_inference.py` - Batch inference operations
|
||||
19. `/Users/saip/Documents/GitHub/llama-stack/llama_stack/apis/synthetic_data_generation/synthetic_data_generation.py` - Data generation
|
||||
|
|
16
docs/_static/llama-stack-spec.html
vendored
16
docs/_static/llama-stack-spec.html
vendored
|
@ -9797,18 +9797,20 @@
|
|||
"tool",
|
||||
"tool_group"
|
||||
],
|
||||
"title": "ResourceType",
|
||||
"const": "benchmark",
|
||||
"default": "benchmark"
|
||||
"default": "benchmark",
|
||||
"description": "The resource type, always benchmark"
|
||||
},
|
||||
"dataset_id": {
|
||||
"type": "string"
|
||||
"type": "string",
|
||||
"description": "Identifier of the dataset to use for the benchmark evaluation"
|
||||
},
|
||||
"scoring_functions": {
|
||||
"type": "array",
|
||||
"items": {
|
||||
"type": "string"
|
||||
}
|
||||
},
|
||||
"description": "List of scoring function identifiers to apply during evaluation"
|
||||
},
|
||||
"metadata": {
|
||||
"type": "object",
|
||||
|
@ -9833,7 +9835,8 @@
|
|||
"type": "object"
|
||||
}
|
||||
]
|
||||
}
|
||||
},
|
||||
"description": "Metadata for this evaluation task"
|
||||
}
|
||||
},
|
||||
"additionalProperties": false,
|
||||
|
@ -9845,7 +9848,8 @@
|
|||
"scoring_functions",
|
||||
"metadata"
|
||||
],
|
||||
"title": "Benchmark"
|
||||
"title": "Benchmark",
|
||||
"description": "A benchmark resource for evaluating model performance."
|
||||
},
|
||||
"OpenAIAssistantMessageParam": {
|
||||
"type": "object",
|
||||
|
|
9
docs/_static/llama-stack-spec.yaml
vendored
9
docs/_static/llama-stack-spec.yaml
vendored
|
@ -7037,15 +7037,19 @@ components:
|
|||
- benchmark
|
||||
- tool
|
||||
- tool_group
|
||||
title: ResourceType
|
||||
const: benchmark
|
||||
default: benchmark
|
||||
description: The resource type, always benchmark
|
||||
dataset_id:
|
||||
type: string
|
||||
description: >-
|
||||
Identifier of the dataset to use for the benchmark evaluation
|
||||
scoring_functions:
|
||||
type: array
|
||||
items:
|
||||
type: string
|
||||
description: >-
|
||||
List of scoring function identifiers to apply during evaluation
|
||||
metadata:
|
||||
type: object
|
||||
additionalProperties:
|
||||
|
@ -7056,6 +7060,7 @@ components:
|
|||
- type: string
|
||||
- type: array
|
||||
- type: object
|
||||
description: Metadata for this evaluation task
|
||||
additionalProperties: false
|
||||
required:
|
||||
- identifier
|
||||
|
@ -7065,6 +7070,8 @@ components:
|
|||
- scoring_functions
|
||||
- metadata
|
||||
title: Benchmark
|
||||
description: >-
|
||||
A benchmark resource for evaluating model performance.
|
||||
OpenAIAssistantMessageParam:
|
||||
type: object
|
||||
properties:
|
||||
|
|
|
@ -22,6 +22,13 @@ class CommonBenchmarkFields(BaseModel):
|
|||
|
||||
@json_schema_type
|
||||
class Benchmark(CommonBenchmarkFields, Resource):
|
||||
"""A benchmark resource for evaluating model performance.
|
||||
|
||||
:param dataset_id: Identifier of the dataset to use for the benchmark evaluation
|
||||
:param scoring_functions: List of scoring function identifiers to apply during evaluation
|
||||
:param metadata: Metadata for this evaluation task
|
||||
:param type: The resource type, always benchmark
|
||||
"""
|
||||
type: Literal[ResourceType.benchmark] = ResourceType.benchmark
|
||||
|
||||
@property
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue