mirror of
https://github.com/meta-llama/llama-stack.git
synced 2025-07-29 15:23:51 +00:00
a
This commit is contained in:
parent
78ef9c605f
commit
bcdb6fcc15
4 changed files with 26 additions and 8 deletions
|
@ -234,7 +234,7 @@ Before finalizing documentation, verify:
|
||||||
[x] 13. `/Users/saip/Documents/GitHub/llama-stack/llama_stack/apis/eval/eval.py` - Evaluation framework
|
[x] 13. `/Users/saip/Documents/GitHub/llama-stack/llama_stack/apis/eval/eval.py` - Evaluation framework
|
||||||
[x] 14. `/Users/saip/Documents/GitHub/llama-stack/llama_stack/apis/scoring/scoring.py` - Scoring system
|
[x] 14. `/Users/saip/Documents/GitHub/llama-stack/llama_stack/apis/scoring/scoring.py` - Scoring system
|
||||||
[x] 15. `/Users/saip/Documents/GitHub/llama-stack/llama_stack/apis/scoring_functions/scoring_functions.py` - Scoring function definitions
|
[x] 15. `/Users/saip/Documents/GitHub/llama-stack/llama_stack/apis/scoring_functions/scoring_functions.py` - Scoring function definitions
|
||||||
16. `/Users/saip/Documents/GitHub/llama-stack/llama_stack/apis/benchmarks/benchmarks.py` - Benchmarking framework
|
[x] 16. `/Users/saip/Documents/GitHub/llama-stack/llama_stack/apis/benchmarks/benchmarks.py` - Benchmarking framework
|
||||||
17. `/Users/saip/Documents/GitHub/llama-stack/llama_stack/apis/shields/shields.py` - Safety shields
|
17. `/Users/saip/Documents/GitHub/llama-stack/llama_stack/apis/shields/shields.py` - Safety shields
|
||||||
18. `/Users/saip/Documents/GitHub/llama-stack/llama_stack/apis/batch_inference/batch_inference.py` - Batch inference operations
|
18. `/Users/saip/Documents/GitHub/llama-stack/llama_stack/apis/batch_inference/batch_inference.py` - Batch inference operations
|
||||||
19. `/Users/saip/Documents/GitHub/llama-stack/llama_stack/apis/synthetic_data_generation/synthetic_data_generation.py` - Data generation
|
19. `/Users/saip/Documents/GitHub/llama-stack/llama_stack/apis/synthetic_data_generation/synthetic_data_generation.py` - Data generation
|
||||||
|
|
16
docs/_static/llama-stack-spec.html
vendored
16
docs/_static/llama-stack-spec.html
vendored
|
@ -9797,18 +9797,20 @@
|
||||||
"tool",
|
"tool",
|
||||||
"tool_group"
|
"tool_group"
|
||||||
],
|
],
|
||||||
"title": "ResourceType",
|
|
||||||
"const": "benchmark",
|
"const": "benchmark",
|
||||||
"default": "benchmark"
|
"default": "benchmark",
|
||||||
|
"description": "The resource type, always benchmark"
|
||||||
},
|
},
|
||||||
"dataset_id": {
|
"dataset_id": {
|
||||||
"type": "string"
|
"type": "string",
|
||||||
|
"description": "Identifier of the dataset to use for the benchmark evaluation"
|
||||||
},
|
},
|
||||||
"scoring_functions": {
|
"scoring_functions": {
|
||||||
"type": "array",
|
"type": "array",
|
||||||
"items": {
|
"items": {
|
||||||
"type": "string"
|
"type": "string"
|
||||||
}
|
},
|
||||||
|
"description": "List of scoring function identifiers to apply during evaluation"
|
||||||
},
|
},
|
||||||
"metadata": {
|
"metadata": {
|
||||||
"type": "object",
|
"type": "object",
|
||||||
|
@ -9833,7 +9835,8 @@
|
||||||
"type": "object"
|
"type": "object"
|
||||||
}
|
}
|
||||||
]
|
]
|
||||||
}
|
},
|
||||||
|
"description": "Metadata for this evaluation task"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"additionalProperties": false,
|
"additionalProperties": false,
|
||||||
|
@ -9845,7 +9848,8 @@
|
||||||
"scoring_functions",
|
"scoring_functions",
|
||||||
"metadata"
|
"metadata"
|
||||||
],
|
],
|
||||||
"title": "Benchmark"
|
"title": "Benchmark",
|
||||||
|
"description": "A benchmark resource for evaluating model performance."
|
||||||
},
|
},
|
||||||
"OpenAIAssistantMessageParam": {
|
"OpenAIAssistantMessageParam": {
|
||||||
"type": "object",
|
"type": "object",
|
||||||
|
|
9
docs/_static/llama-stack-spec.yaml
vendored
9
docs/_static/llama-stack-spec.yaml
vendored
|
@ -7037,15 +7037,19 @@ components:
|
||||||
- benchmark
|
- benchmark
|
||||||
- tool
|
- tool
|
||||||
- tool_group
|
- tool_group
|
||||||
title: ResourceType
|
|
||||||
const: benchmark
|
const: benchmark
|
||||||
default: benchmark
|
default: benchmark
|
||||||
|
description: The resource type, always benchmark
|
||||||
dataset_id:
|
dataset_id:
|
||||||
type: string
|
type: string
|
||||||
|
description: >-
|
||||||
|
Identifier of the dataset to use for the benchmark evaluation
|
||||||
scoring_functions:
|
scoring_functions:
|
||||||
type: array
|
type: array
|
||||||
items:
|
items:
|
||||||
type: string
|
type: string
|
||||||
|
description: >-
|
||||||
|
List of scoring function identifiers to apply during evaluation
|
||||||
metadata:
|
metadata:
|
||||||
type: object
|
type: object
|
||||||
additionalProperties:
|
additionalProperties:
|
||||||
|
@ -7056,6 +7060,7 @@ components:
|
||||||
- type: string
|
- type: string
|
||||||
- type: array
|
- type: array
|
||||||
- type: object
|
- type: object
|
||||||
|
description: Metadata for this evaluation task
|
||||||
additionalProperties: false
|
additionalProperties: false
|
||||||
required:
|
required:
|
||||||
- identifier
|
- identifier
|
||||||
|
@ -7065,6 +7070,8 @@ components:
|
||||||
- scoring_functions
|
- scoring_functions
|
||||||
- metadata
|
- metadata
|
||||||
title: Benchmark
|
title: Benchmark
|
||||||
|
description: >-
|
||||||
|
A benchmark resource for evaluating model performance.
|
||||||
OpenAIAssistantMessageParam:
|
OpenAIAssistantMessageParam:
|
||||||
type: object
|
type: object
|
||||||
properties:
|
properties:
|
||||||
|
|
|
@ -22,6 +22,13 @@ class CommonBenchmarkFields(BaseModel):
|
||||||
|
|
||||||
@json_schema_type
|
@json_schema_type
|
||||||
class Benchmark(CommonBenchmarkFields, Resource):
|
class Benchmark(CommonBenchmarkFields, Resource):
|
||||||
|
"""A benchmark resource for evaluating model performance.
|
||||||
|
|
||||||
|
:param dataset_id: Identifier of the dataset to use for the benchmark evaluation
|
||||||
|
:param scoring_functions: List of scoring function identifiers to apply during evaluation
|
||||||
|
:param metadata: Metadata for this evaluation task
|
||||||
|
:param type: The resource type, always benchmark
|
||||||
|
"""
|
||||||
type: Literal[ResourceType.benchmark] = ResourceType.benchmark
|
type: Literal[ResourceType.benchmark] = ResourceType.benchmark
|
||||||
|
|
||||||
@property
|
@property
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue