mirror of
https://github.com/meta-llama/llama-stack.git
synced 2025-08-03 17:29:01 +00:00
fix: Update NVIDIA Eval README
This commit is contained in:
parent
4317a0ddcc
commit
f939117dbf
1 changed files with 39 additions and 33 deletions
|
@ -18,9 +18,7 @@ POST /eval/benchmarks
|
||||||
"dataset_id": "",
|
"dataset_id": "",
|
||||||
"scoring_functions": [],
|
"scoring_functions": [],
|
||||||
"metadata": {
|
"metadata": {
|
||||||
"config": {
|
"type": "mmlu"
|
||||||
"type": "mmlu"
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
```
|
```
|
||||||
|
@ -36,31 +34,29 @@ POST /eval/benchmarks
|
||||||
"dataset_id": "",
|
"dataset_id": "",
|
||||||
"scoring_functions": [],
|
"scoring_functions": [],
|
||||||
"metadata": {
|
"metadata": {
|
||||||
"config": {
|
"type": "custom",
|
||||||
"type": "custom",
|
"params": {
|
||||||
"params": {
|
"parallelism": 8
|
||||||
"parallelism": 8
|
},
|
||||||
},
|
"tasks": {
|
||||||
"tasks": {
|
"qa": {
|
||||||
"qa": {
|
"type": "completion",
|
||||||
"type": "completion",
|
"params": {
|
||||||
"params": {
|
"template": {
|
||||||
"template": {
|
"prompt": "{{prompt}}",
|
||||||
"prompt": "{{prompt}}",
|
"max_tokens": 200
|
||||||
"max_tokens": 200
|
}
|
||||||
}
|
},
|
||||||
},
|
"dataset": {
|
||||||
"dataset": {
|
"files_url": "hf://datasets/default/sample-basic-test/testing/testing.jsonl"
|
||||||
"files_url": "hf://datasets/default/sample-basic-test/testing/testing.jsonl"
|
},
|
||||||
},
|
"metrics": {
|
||||||
"metrics": {
|
"bleu": {
|
||||||
"bleu": {
|
"type": "bleu",
|
||||||
"type": "bleu",
|
"params": {
|
||||||
"params": {
|
"references": [
|
||||||
"references": [
|
"{{ideal_response}}"
|
||||||
"{{ideal_response}}"
|
]
|
||||||
]
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -78,12 +74,16 @@ POST /eval/benchmarks/{benchmark_id}/jobs
|
||||||
```json
|
```json
|
||||||
{
|
{
|
||||||
"benchmark_id": "my-custom-benchmark",
|
"benchmark_id": "my-custom-benchmark",
|
||||||
"task_config": {
|
"benchmark_config": {
|
||||||
"eval_candidate": {
|
"eval_candidate": {
|
||||||
"type": "model",
|
"type": "model",
|
||||||
"model": "meta/llama-3.1-8b-instruct"
|
"model": "meta/llama-3.1-8b-instruct",
|
||||||
|
"sampling_params": {
|
||||||
|
"max_tokens": 100,
|
||||||
|
"temperature": 0.7
|
||||||
|
}
|
||||||
},
|
},
|
||||||
"scoring_params": []
|
"scoring_params": {}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
```
|
```
|
||||||
|
@ -91,7 +91,8 @@ POST /eval/benchmarks/{benchmark_id}/jobs
|
||||||
Response example:
|
Response example:
|
||||||
```json
|
```json
|
||||||
{
|
{
|
||||||
"job_id": "1234"
|
"job_id": "1234",
|
||||||
|
"status": "in_progress"
|
||||||
}
|
}
|
||||||
```
|
```
|
||||||
|
|
||||||
|
@ -100,9 +101,14 @@ Response example:
|
||||||
GET /eval/benchmarks/{benchmark_id}/jobs/{job_id}
|
GET /eval/benchmarks/{benchmark_id}/jobs/{job_id}
|
||||||
```
|
```
|
||||||
|
|
||||||
|
### Example for cancelling a job
|
||||||
|
```
|
||||||
|
POST /eval/benchmarks/{benchmark_id}/jobs/{job_id}/cancel
|
||||||
|
```
|
||||||
|
|
||||||
### Example for getting the results
|
### Example for getting the results
|
||||||
```
|
```
|
||||||
GET /eval/benchmarks/{benchmark_id}/result
|
GET /eval/benchmarks/{benchmark_id}/results
|
||||||
```
|
```
|
||||||
```json
|
```json
|
||||||
{
|
{
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue