forked from phoenix-oss/llama-stack-mirror
refactor(test): move tools, evals, datasetio, scoring and post training tests (#1401)
All of the tests from `llama_stack/providers/tests/` are now moved to `tests/integration`. I converted the `tools`, `scoring` and `datasetio` tests to use API. However, `eval` and `post_training` proved to be a bit challenging to leaving those. I think `post_training` should be relatively straightforward also. As part of this, I noticed that `wolfram_alpha` tool wasn't added to some of our commonly used distros so I added it. I am going to remove a lot of code duplication from distros next so while this looks like a one-off right now, it will go away and be there uniformly for all distros.
This commit is contained in:
parent
dd0db8038b
commit
abfbaf3c1b
51 changed files with 471 additions and 1245 deletions
5
tests/integration/datasetio/__init__.py
Normal file
5
tests/integration/datasetio/__init__.py
Normal file
|
@ -0,0 +1,5 @@
|
|||
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
||||
# All rights reserved.
|
||||
#
|
||||
# This source code is licensed under the terms described in the LICENSE file in
|
||||
# the root directory of this source tree.
|
6
tests/integration/datasetio/test_dataset.csv
Normal file
6
tests/integration/datasetio/test_dataset.csv
Normal file
|
@ -0,0 +1,6 @@
|
|||
input_query,generated_answer,expected_answer,chat_completion_input
|
||||
What is the capital of France?,London,Paris,"[{'role': 'user', 'content': 'What is the capital of France?'}]"
|
||||
Who is the CEO of Meta?,Mark Zuckerberg,Mark Zuckerberg,"[{'role': 'user', 'content': 'Who is the CEO of Meta?'}]"
|
||||
What is the largest planet in our solar system?,Jupiter,Jupiter,"[{'role': 'user', 'content': 'What is the largest planet in our solar system?'}]"
|
||||
What is the smallest country in the world?,China,Vatican City,"[{'role': 'user', 'content': 'What is the smallest country in the world?'}]"
|
||||
What is the currency of Japan?,Yen,Yen,"[{'role': 'user', 'content': 'What is the currency of Japan?'}]"
|
|
118
tests/integration/datasetio/test_datasetio.py
Normal file
118
tests/integration/datasetio/test_datasetio.py
Normal file
|
@ -0,0 +1,118 @@
|
|||
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
||||
# All rights reserved.
|
||||
#
|
||||
# This source code is licensed under the terms described in the LICENSE file in
|
||||
# the root directory of this source tree.
|
||||
|
||||
import base64
|
||||
import mimetypes
|
||||
import os
|
||||
from pathlib import Path
|
||||
|
||||
import pytest
|
||||
|
||||
# How to run this test:
|
||||
#
|
||||
# pytest llama_stack/providers/tests/datasetio/test_datasetio.py
|
||||
# -m "meta_reference"
|
||||
# -v -s --tb=short --disable-warnings
|
||||
|
||||
|
||||
def data_url_from_file(file_path: str) -> str:
|
||||
if not os.path.exists(file_path):
|
||||
raise FileNotFoundError(f"File not found: {file_path}")
|
||||
|
||||
with open(file_path, "rb") as file:
|
||||
file_content = file.read()
|
||||
|
||||
base64_content = base64.b64encode(file_content).decode("utf-8")
|
||||
mime_type, _ = mimetypes.guess_type(file_path)
|
||||
|
||||
data_url = f"data:{mime_type};base64,{base64_content}"
|
||||
|
||||
return data_url
|
||||
|
||||
|
||||
def register_dataset(llama_stack_client, for_generation=False, for_rag=False, dataset_id="test_dataset"):
|
||||
if for_rag:
|
||||
test_file = Path(os.path.abspath(__file__)).parent / "test_rag_dataset.csv"
|
||||
else:
|
||||
test_file = Path(os.path.abspath(__file__)).parent / "test_dataset.csv"
|
||||
test_url = data_url_from_file(str(test_file))
|
||||
|
||||
if for_generation:
|
||||
dataset_schema = {
|
||||
"expected_answer": {"type": "string"},
|
||||
"input_query": {"type": "string"},
|
||||
"chat_completion_input": {"type": "chat_completion_input"},
|
||||
}
|
||||
elif for_rag:
|
||||
dataset_schema = {
|
||||
"expected_answer": {"type": "string"},
|
||||
"input_query": {"type": "string"},
|
||||
"generated_answer": {"type": "string"},
|
||||
"context": {"type": "string"},
|
||||
}
|
||||
else:
|
||||
dataset_schema = {
|
||||
"expected_answer": {"type": "string"},
|
||||
"input_query": {"type": "string"},
|
||||
"generated_answer": {"type": "string"},
|
||||
}
|
||||
|
||||
llama_stack_client.datasets.register(
|
||||
dataset_id=dataset_id,
|
||||
dataset_schema=dataset_schema,
|
||||
url=dict(uri=test_url),
|
||||
provider_id="localfs",
|
||||
)
|
||||
|
||||
|
||||
def test_datasets_list(llama_stack_client):
|
||||
# NOTE: this needs you to ensure that you are starting from a clean state
|
||||
# but so far we don't have an unregister API unfortunately, so be careful
|
||||
|
||||
response = llama_stack_client.datasets.list()
|
||||
assert isinstance(response, list)
|
||||
assert len(response) == 0
|
||||
|
||||
|
||||
def test_register_dataset(llama_stack_client):
|
||||
register_dataset(llama_stack_client)
|
||||
response = llama_stack_client.datasets.list()
|
||||
assert isinstance(response, list)
|
||||
assert len(response) == 1
|
||||
assert response[0].identifier == "test_dataset"
|
||||
|
||||
with pytest.raises(ValueError):
|
||||
# unregister a dataset that does not exist
|
||||
llama_stack_client.datasets.unregister("test_dataset2")
|
||||
|
||||
llama_stack_client.datasets.unregister("test_dataset")
|
||||
response = llama_stack_client.datasets.list()
|
||||
assert isinstance(response, list)
|
||||
assert len(response) == 0
|
||||
|
||||
with pytest.raises(ValueError):
|
||||
llama_stack_client.datasets.unregister("test_dataset")
|
||||
|
||||
|
||||
def test_get_rows_paginated(llama_stack_client):
|
||||
register_dataset(llama_stack_client)
|
||||
response = llama_stack_client.datasetio.get_rows_paginated(
|
||||
dataset_id="test_dataset",
|
||||
rows_in_page=3,
|
||||
)
|
||||
assert isinstance(response.rows, list)
|
||||
assert len(response.rows) == 3
|
||||
assert response.next_page_token == "3"
|
||||
|
||||
# iterate over all rows
|
||||
response = llama_stack_client.datasetio.get_rows_paginated(
|
||||
dataset_id="test_dataset",
|
||||
rows_in_page=2,
|
||||
page_token=response.next_page_token,
|
||||
)
|
||||
assert isinstance(response.rows, list)
|
||||
assert len(response.rows) == 2
|
||||
assert response.next_page_token == "5"
|
6
tests/integration/datasetio/test_rag_dataset.csv
Normal file
6
tests/integration/datasetio/test_rag_dataset.csv
Normal file
|
@ -0,0 +1,6 @@
|
|||
input_query,context,generated_answer,expected_answer
|
||||
What is the capital of France?,"France is a country in Western Europe with a population of about 67 million people. Its capital city has been a major European cultural center since the 17th century and is known for landmarks like the Eiffel Tower and the Louvre Museum.",London,Paris
|
||||
Who is the CEO of Meta?,"Meta Platforms, formerly known as Facebook, is one of the world's largest technology companies. Founded by Mark Zuckerberg in 2004, the company has expanded to include platforms like Instagram, WhatsApp, and virtual reality technologies.",Mark Zuckerberg,Mark Zuckerberg
|
||||
What is the largest planet in our solar system?,"The solar system consists of eight planets orbiting around the Sun. These planets, in order from the Sun, are Mercury, Venus, Earth, Mars, Jupiter, Saturn, Uranus, and Neptune. Gas giants are significantly larger than terrestrial planets.",Jupiter,Jupiter
|
||||
What is the smallest country in the world?,"Independent city-states and micronations are among the world's smallest sovereign territories. Some notable examples include Monaco, San Marino, and Vatican City, which is an enclave within Rome, Italy.",China,Vatican City
|
||||
What is the currency of Japan?,"Japan is an island country in East Asia with a rich cultural heritage and one of the world's largest economies. Its financial system has been established since the Meiji period, with its modern currency being introduced in 1871.",Yen,Yen
|
|
Loading…
Add table
Add a link
Reference in a new issue