mirror of
https://github.com/meta-llama/llama-stack.git
synced 2025-08-10 04:08:31 +00:00
datasetio
This commit is contained in:
parent
d695d26b77
commit
97d6b87e05
2 changed files with 28 additions and 17 deletions
|
@ -9,10 +9,23 @@ import mimetypes
|
||||||
import os
|
import os
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
|
|
||||||
|
import pytest
|
||||||
|
|
||||||
# How to run this test:
|
# How to run this test:
|
||||||
#
|
#
|
||||||
# LLAMA_STACK_CONFIG="template-name" pytest -v tests/integration/datasetio
|
# LLAMA_STACK_CONFIG="template-name" pytest -v tests/integration/datasetio
|
||||||
|
|
||||||
|
@pytest.fixture
|
||||||
|
def test_dataset(llama_stack_client):
|
||||||
|
register_dataset(llama_stack_client)
|
||||||
|
yield # This is where the test function will run
|
||||||
|
|
||||||
|
# Teardown - this always runs, even if the test fails
|
||||||
|
try:
|
||||||
|
llama_stack_client.datasets.unregister("test_dataset")
|
||||||
|
except Exception as e:
|
||||||
|
print(f"Warning: Failed to unregister test_dataset: {e}")
|
||||||
|
|
||||||
|
|
||||||
def data_url_from_file(file_path: str) -> str:
|
def data_url_from_file(file_path: str) -> str:
|
||||||
if not os.path.exists(file_path):
|
if not os.path.exists(file_path):
|
||||||
|
@ -80,8 +93,7 @@ def test_register_unregister_dataset(llama_stack_client):
|
||||||
assert len(response) == 0
|
assert len(response) == 0
|
||||||
|
|
||||||
|
|
||||||
def test_get_rows_paginated(llama_stack_client):
|
def test_get_rows_paginated(llama_stack_client, test_dataset):
|
||||||
register_dataset(llama_stack_client)
|
|
||||||
response = llama_stack_client.datasetio.get_rows_paginated(
|
response = llama_stack_client.datasetio.get_rows_paginated(
|
||||||
dataset_id="test_dataset",
|
dataset_id="test_dataset",
|
||||||
rows_in_page=3,
|
rows_in_page=3,
|
||||||
|
@ -99,4 +111,3 @@ def test_get_rows_paginated(llama_stack_client):
|
||||||
assert isinstance(response.rows, list)
|
assert isinstance(response.rows, list)
|
||||||
assert len(response.rows) == 2
|
assert len(response.rows) == 2
|
||||||
assert response.next_page_token == "5"
|
assert response.next_page_token == "5"
|
||||||
llama_stack_client.datasets.unregister("test_dataset")
|
|
||||||
|
|
|
@ -9,6 +9,17 @@ import pytest
|
||||||
|
|
||||||
from ..datasetio.test_datasetio import register_dataset
|
from ..datasetio.test_datasetio import register_dataset
|
||||||
|
|
||||||
|
@pytest.fixture
|
||||||
|
def test_dataset_rag(llama_stack_client):
|
||||||
|
register_dataset(llama_stack_client, for_rag=True)
|
||||||
|
yield # This is where the test function will run
|
||||||
|
|
||||||
|
# Teardown - this always runs, even if the test fails
|
||||||
|
try:
|
||||||
|
llama_stack_client.datasets.unregister("test_dataset")
|
||||||
|
except Exception as e:
|
||||||
|
print(f"Warning: Failed to unregister test_dataset: {e}")
|
||||||
|
|
||||||
|
|
||||||
@pytest.fixture
|
@pytest.fixture
|
||||||
def sample_judge_prompt_template():
|
def sample_judge_prompt_template():
|
||||||
|
@ -79,9 +90,7 @@ def test_scoring_functions_register(
|
||||||
# TODO: add unregister api for scoring functions
|
# TODO: add unregister api for scoring functions
|
||||||
|
|
||||||
|
|
||||||
def test_scoring_score(llama_stack_client):
|
def test_scoring_score(llama_stack_client, test_dataset_rag):
|
||||||
register_dataset(llama_stack_client, for_rag=True)
|
|
||||||
|
|
||||||
# scoring individual rows
|
# scoring individual rows
|
||||||
rows = llama_stack_client.datasetio.get_rows_paginated(
|
rows = llama_stack_client.datasetio.get_rows_paginated(
|
||||||
dataset_id="test_dataset",
|
dataset_id="test_dataset",
|
||||||
|
@ -114,12 +123,8 @@ def test_scoring_score(llama_stack_client):
|
||||||
assert x in response.results
|
assert x in response.results
|
||||||
assert len(response.results[x].score_rows) == 5
|
assert len(response.results[x].score_rows) == 5
|
||||||
|
|
||||||
llama_stack_client.datasets.unregister("test_dataset")
|
|
||||||
|
|
||||||
|
|
||||||
def test_scoring_score_with_params_llm_as_judge(llama_stack_client, sample_judge_prompt_template, judge_model_id):
|
|
||||||
register_dataset(llama_stack_client, for_rag=True)
|
|
||||||
|
|
||||||
|
def test_scoring_score_with_params_llm_as_judge(llama_stack_client, sample_judge_prompt_template, judge_model_id, test_dataset_rag):
|
||||||
# scoring individual rows
|
# scoring individual rows
|
||||||
rows = llama_stack_client.datasetio.get_rows_paginated(
|
rows = llama_stack_client.datasetio.get_rows_paginated(
|
||||||
dataset_id="test_dataset",
|
dataset_id="test_dataset",
|
||||||
|
@ -159,8 +164,6 @@ def test_scoring_score_with_params_llm_as_judge(llama_stack_client, sample_judge
|
||||||
assert x in response.results
|
assert x in response.results
|
||||||
assert len(response.results[x].score_rows) == 5
|
assert len(response.results[x].score_rows) == 5
|
||||||
|
|
||||||
llama_stack_client.datasets.unregister("test_dataset")
|
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.parametrize(
|
@pytest.mark.parametrize(
|
||||||
"provider_id",
|
"provider_id",
|
||||||
|
@ -171,9 +174,8 @@ def test_scoring_score_with_params_llm_as_judge(llama_stack_client, sample_judge
|
||||||
],
|
],
|
||||||
)
|
)
|
||||||
def test_scoring_score_with_aggregation_functions(
|
def test_scoring_score_with_aggregation_functions(
|
||||||
llama_stack_client, sample_judge_prompt_template, judge_model_id, provider_id
|
llama_stack_client, sample_judge_prompt_template, judge_model_id, provider_id, test_dataset_rag
|
||||||
):
|
):
|
||||||
register_dataset(llama_stack_client, for_rag=True)
|
|
||||||
rows = llama_stack_client.datasetio.get_rows_paginated(
|
rows = llama_stack_client.datasetio.get_rows_paginated(
|
||||||
dataset_id="test_dataset",
|
dataset_id="test_dataset",
|
||||||
rows_in_page=3,
|
rows_in_page=3,
|
||||||
|
@ -227,5 +229,3 @@ def test_scoring_score_with_aggregation_functions(
|
||||||
assert x in response.results
|
assert x in response.results
|
||||||
assert len(response.results[x].score_rows) == len(rows.rows)
|
assert len(response.results[x].score_rows) == len(rows.rows)
|
||||||
assert len(response.results[x].aggregated_results) == len(aggr_fns)
|
assert len(response.results[x].aggregated_results) == len(aggr_fns)
|
||||||
|
|
||||||
llama_stack_client.datasets.unregister("test_dataset")
|
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue