mirror of
https://github.com/meta-llama/llama-stack.git
synced 2025-08-07 19:12:09 +00:00
include benchmarks
This commit is contained in:
parent
e68e8c96ae
commit
b4d868a1e5
3 changed files with 70 additions and 35 deletions
45
docs/_static/llama-stack-spec.html
vendored
45
docs/_static/llama-stack-spec.html
vendored
|
@ -2426,7 +2426,14 @@
|
|||
"post": {
|
||||
"responses": {
|
||||
"200": {
|
||||
"description": "OK"
|
||||
"description": "OK",
|
||||
"content": {
|
||||
"application/json": {
|
||||
"schema": {
|
||||
"$ref": "#/components/schemas/Benchmark"
|
||||
}
|
||||
}
|
||||
}
|
||||
},
|
||||
"400": {
|
||||
"$ref": "#/components/responses/BadRequest400"
|
||||
|
@ -2444,7 +2451,7 @@
|
|||
"tags": [
|
||||
"Benchmarks"
|
||||
],
|
||||
"description": "",
|
||||
"description": "Register a new benchmark.",
|
||||
"parameters": [],
|
||||
"requestBody": {
|
||||
"content": {
|
||||
|
@ -7098,13 +7105,15 @@
|
|||
"default": "benchmark"
|
||||
},
|
||||
"dataset_id": {
|
||||
"type": "string"
|
||||
"type": "string",
|
||||
"description": "The ID of the dataset to used to run the benchmark."
|
||||
},
|
||||
"scoring_functions": {
|
||||
"type": "array",
|
||||
"items": {
|
||||
"type": "string"
|
||||
}
|
||||
"$ref": "#/components/schemas/ScoringFnParams"
|
||||
},
|
||||
"description": "The scoring functions with parameters to use for this benchmark."
|
||||
},
|
||||
"metadata": {
|
||||
"type": "object",
|
||||
|
@ -7129,7 +7138,8 @@
|
|||
"type": "object"
|
||||
}
|
||||
]
|
||||
}
|
||||
},
|
||||
"description": "Metadata for this benchmark for additional descriptions."
|
||||
}
|
||||
},
|
||||
"additionalProperties": false,
|
||||
|
@ -9448,23 +9458,20 @@
|
|||
"RegisterBenchmarkRequest": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"benchmark_id": {
|
||||
"type": "string"
|
||||
},
|
||||
"dataset_id": {
|
||||
"type": "string"
|
||||
"type": "string",
|
||||
"description": "The ID of the dataset to used to run the benchmark."
|
||||
},
|
||||
"scoring_functions": {
|
||||
"type": "array",
|
||||
"items": {
|
||||
"type": "string"
|
||||
}
|
||||
"$ref": "#/components/schemas/ScoringFnParams"
|
||||
},
|
||||
"description": "The scoring functions with parameters to use for this benchmark."
|
||||
},
|
||||
"provider_benchmark_id": {
|
||||
"type": "string"
|
||||
},
|
||||
"provider_id": {
|
||||
"type": "string"
|
||||
"benchmark_id": {
|
||||
"type": "string",
|
||||
"description": "(Optional) The ID of the benchmark to register. If not provided, a random ID will be generated."
|
||||
},
|
||||
"metadata": {
|
||||
"type": "object",
|
||||
|
@ -9489,12 +9496,12 @@
|
|||
"type": "object"
|
||||
}
|
||||
]
|
||||
}
|
||||
},
|
||||
"description": "(Optional) Metadata for this benchmark for additional descriptions."
|
||||
}
|
||||
},
|
||||
"additionalProperties": false,
|
||||
"required": [
|
||||
"benchmark_id",
|
||||
"dataset_id",
|
||||
"scoring_functions"
|
||||
],
|
||||
|
|
32
docs/_static/llama-stack-spec.yaml
vendored
32
docs/_static/llama-stack-spec.yaml
vendored
|
@ -1635,6 +1635,10 @@ paths:
|
|||
responses:
|
||||
'200':
|
||||
description: OK
|
||||
content:
|
||||
application/json:
|
||||
schema:
|
||||
$ref: '#/components/schemas/Benchmark'
|
||||
'400':
|
||||
$ref: '#/components/responses/BadRequest400'
|
||||
'429':
|
||||
|
@ -1647,7 +1651,7 @@ paths:
|
|||
$ref: '#/components/responses/DefaultError'
|
||||
tags:
|
||||
- Benchmarks
|
||||
description: ''
|
||||
description: Register a new benchmark.
|
||||
parameters: []
|
||||
requestBody:
|
||||
content:
|
||||
|
@ -4950,10 +4954,14 @@ components:
|
|||
default: benchmark
|
||||
dataset_id:
|
||||
type: string
|
||||
description: >-
|
||||
The ID of the dataset to used to run the benchmark.
|
||||
scoring_functions:
|
||||
type: array
|
||||
items:
|
||||
type: string
|
||||
$ref: '#/components/schemas/ScoringFnParams'
|
||||
description: >-
|
||||
The scoring functions with parameters to use for this benchmark.
|
||||
metadata:
|
||||
type: object
|
||||
additionalProperties:
|
||||
|
@ -4964,6 +4972,8 @@ components:
|
|||
- type: string
|
||||
- type: array
|
||||
- type: object
|
||||
description: >-
|
||||
Metadata for this benchmark for additional descriptions.
|
||||
additionalProperties: false
|
||||
required:
|
||||
- identifier
|
||||
|
@ -6438,18 +6448,21 @@ components:
|
|||
RegisterBenchmarkRequest:
|
||||
type: object
|
||||
properties:
|
||||
benchmark_id:
|
||||
type: string
|
||||
dataset_id:
|
||||
type: string
|
||||
description: >-
|
||||
The ID of the dataset to used to run the benchmark.
|
||||
scoring_functions:
|
||||
type: array
|
||||
items:
|
||||
type: string
|
||||
provider_benchmark_id:
|
||||
type: string
|
||||
provider_id:
|
||||
$ref: '#/components/schemas/ScoringFnParams'
|
||||
description: >-
|
||||
The scoring functions with parameters to use for this benchmark.
|
||||
benchmark_id:
|
||||
type: string
|
||||
description: >-
|
||||
(Optional) The ID of the benchmark to register. If not provided, a random
|
||||
ID will be generated.
|
||||
metadata:
|
||||
type: object
|
||||
additionalProperties:
|
||||
|
@ -6460,9 +6473,10 @@ components:
|
|||
- type: string
|
||||
- type: array
|
||||
- type: object
|
||||
description: >-
|
||||
(Optional) Metadata for this benchmark for additional descriptions.
|
||||
additionalProperties: false
|
||||
required:
|
||||
- benchmark_id
|
||||
- dataset_id
|
||||
- scoring_functions
|
||||
title: RegisterBenchmarkRequest
|
||||
|
|
|
@ -8,15 +8,22 @@ from typing import Any, Dict, List, Literal, Optional, Protocol, runtime_checkab
|
|||
from pydantic import BaseModel, Field
|
||||
|
||||
from llama_stack.apis.resource import Resource, ResourceType
|
||||
from llama_stack.apis.scoring_functions import ScoringFnParams
|
||||
from llama_stack.schema_utils import json_schema_type, webmethod
|
||||
|
||||
|
||||
class CommonBenchmarkFields(BaseModel):
|
||||
"""
|
||||
:param dataset_id: The ID of the dataset to used to run the benchmark.
|
||||
:param scoring_functions: The scoring functions with parameters to use for this benchmark.
|
||||
:param metadata: Metadata for this benchmark for additional descriptions.
|
||||
"""
|
||||
|
||||
dataset_id: str
|
||||
scoring_functions: List[str]
|
||||
scoring_functions: List[ScoringFnParams]
|
||||
metadata: Dict[str, Any] = Field(
|
||||
default_factory=dict,
|
||||
description="Metadata for this evaluation task",
|
||||
description="Metadata for this benchmark",
|
||||
)
|
||||
|
||||
|
||||
|
@ -57,10 +64,17 @@ class Benchmarks(Protocol):
|
|||
@webmethod(route="/eval/benchmarks", method="POST")
|
||||
async def register_benchmark(
|
||||
self,
|
||||
benchmark_id: str,
|
||||
dataset_id: str,
|
||||
scoring_functions: List[str],
|
||||
provider_benchmark_id: Optional[str] = None,
|
||||
provider_id: Optional[str] = None,
|
||||
scoring_functions: List[ScoringFnParams],
|
||||
benchmark_id: Optional[str] = None,
|
||||
metadata: Optional[Dict[str, Any]] = None,
|
||||
) -> None: ...
|
||||
) -> Benchmark:
|
||||
"""
|
||||
Register a new benchmark.
|
||||
|
||||
:param dataset_id: The ID of the dataset to used to run the benchmark.
|
||||
:param scoring_functions: The scoring functions with parameters to use for this benchmark.
|
||||
:param benchmark_id: (Optional) The ID of the benchmark to register. If not provided, a random ID will be generated.
|
||||
:param metadata: (Optional) Metadata for this benchmark for additional descriptions.
|
||||
"""
|
||||
...
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue