datasetio

This commit is contained in:
Xi Yan 2025-03-12 16:47:22 -07:00
parent d695d26b77
commit 97d6b87e05
2 changed files with 28 additions and 17 deletions

View file

@ -9,10 +9,23 @@ import mimetypes
import os
from pathlib import Path
import pytest
# How to run this test:
#
# LLAMA_STACK_CONFIG="template-name" pytest -v tests/integration/datasetio
@pytest.fixture
def test_dataset(llama_stack_client):
register_dataset(llama_stack_client)
yield # This is where the test function will run
# Teardown - this always runs, even if the test fails
try:
llama_stack_client.datasets.unregister("test_dataset")
except Exception as e:
print(f"Warning: Failed to unregister test_dataset: {e}")
def data_url_from_file(file_path: str) -> str:
if not os.path.exists(file_path):
@ -80,8 +93,7 @@ def test_register_unregister_dataset(llama_stack_client):
assert len(response) == 0
def test_get_rows_paginated(llama_stack_client):
register_dataset(llama_stack_client)
def test_get_rows_paginated(llama_stack_client, test_dataset):
response = llama_stack_client.datasetio.get_rows_paginated(
dataset_id="test_dataset",
rows_in_page=3,
@ -99,4 +111,3 @@ def test_get_rows_paginated(llama_stack_client):
assert isinstance(response.rows, list)
assert len(response.rows) == 2
assert response.next_page_token == "5"
llama_stack_client.datasets.unregister("test_dataset")

View file

@ -9,6 +9,17 @@ import pytest
from ..datasetio.test_datasetio import register_dataset
@pytest.fixture
def test_dataset_rag(llama_stack_client):
register_dataset(llama_stack_client, for_rag=True)
yield # This is where the test function will run
# Teardown - this always runs, even if the test fails
try:
llama_stack_client.datasets.unregister("test_dataset")
except Exception as e:
print(f"Warning: Failed to unregister test_dataset: {e}")
@pytest.fixture
def sample_judge_prompt_template():
@ -79,9 +90,7 @@ def test_scoring_functions_register(
# TODO: add unregister api for scoring functions
def test_scoring_score(llama_stack_client):
register_dataset(llama_stack_client, for_rag=True)
def test_scoring_score(llama_stack_client, test_dataset_rag):
# scoring individual rows
rows = llama_stack_client.datasetio.get_rows_paginated(
dataset_id="test_dataset",
@ -114,12 +123,8 @@ def test_scoring_score(llama_stack_client):
assert x in response.results
assert len(response.results[x].score_rows) == 5
llama_stack_client.datasets.unregister("test_dataset")
def test_scoring_score_with_params_llm_as_judge(llama_stack_client, sample_judge_prompt_template, judge_model_id):
register_dataset(llama_stack_client, for_rag=True)
def test_scoring_score_with_params_llm_as_judge(llama_stack_client, sample_judge_prompt_template, judge_model_id, test_dataset_rag):
# scoring individual rows
rows = llama_stack_client.datasetio.get_rows_paginated(
dataset_id="test_dataset",
@ -159,8 +164,6 @@ def test_scoring_score_with_params_llm_as_judge(llama_stack_client, sample_judge
assert x in response.results
assert len(response.results[x].score_rows) == 5
llama_stack_client.datasets.unregister("test_dataset")
@pytest.mark.parametrize(
"provider_id",
@ -171,9 +174,8 @@ def test_scoring_score_with_params_llm_as_judge(llama_stack_client, sample_judge
],
)
def test_scoring_score_with_aggregation_functions(
llama_stack_client, sample_judge_prompt_template, judge_model_id, provider_id
llama_stack_client, sample_judge_prompt_template, judge_model_id, provider_id, test_dataset_rag
):
register_dataset(llama_stack_client, for_rag=True)
rows = llama_stack_client.datasetio.get_rows_paginated(
dataset_id="test_dataset",
rows_in_page=3,
@ -227,5 +229,3 @@ def test_scoring_score_with_aggregation_functions(
assert x in response.results
assert len(response.results[x].score_rows) == len(rows.rows)
assert len(response.results[x].aggregated_results) == len(aggr_fns)
llama_stack_client.datasets.unregister("test_dataset")