mirror of
https://github.com/meta-llama/llama-stack.git
synced 2025-12-08 03:00:56 +00:00
chore(misc): update datasets, benchmarks to use alpha, beta prefixes (#3891)
This will be landed together with https://github.com/llamastack/llama-stack-client-python/pull/282 (hence CI will be red on this one.) I have verified locally that tests pass with the updated version of the client-sdk.
This commit is contained in:
parent
7918188f1e
commit
0e57233a0a
3 changed files with 15 additions and 13 deletions
|
|
@ -174,7 +174,9 @@ class StackApp(FastAPI):
|
||||||
|
|
||||||
@asynccontextmanager
|
@asynccontextmanager
|
||||||
async def lifespan(app: StackApp):
|
async def lifespan(app: StackApp):
|
||||||
logger.info("Starting up")
|
server_version = parse_version("llama-stack")
|
||||||
|
|
||||||
|
logger.info(f"Starting up Llama Stack server (version: {server_version})")
|
||||||
assert app.stack is not None
|
assert app.stack is not None
|
||||||
app.stack.create_registry_refresh_task()
|
app.stack.create_registry_refresh_task()
|
||||||
yield
|
yield
|
||||||
|
|
|
||||||
|
|
@ -78,18 +78,18 @@ def data_url_from_file(file_path: str) -> str:
|
||||||
],
|
],
|
||||||
)
|
)
|
||||||
def test_register_and_iterrows(llama_stack_client, purpose, source, provider_id, limit):
|
def test_register_and_iterrows(llama_stack_client, purpose, source, provider_id, limit):
|
||||||
dataset = llama_stack_client.datasets.register(
|
dataset = llama_stack_client.beta.datasets.register(
|
||||||
purpose=purpose,
|
purpose=purpose,
|
||||||
source=source,
|
source=source,
|
||||||
)
|
)
|
||||||
assert dataset.identifier is not None
|
assert dataset.identifier is not None
|
||||||
assert dataset.provider_id == provider_id
|
assert dataset.provider_id == provider_id
|
||||||
iterrow_response = llama_stack_client.datasets.iterrows(dataset.identifier, limit=limit)
|
iterrow_response = llama_stack_client.beta.datasets.iterrows(dataset.identifier, limit=limit)
|
||||||
assert len(iterrow_response.data) == limit
|
assert len(iterrow_response.data) == limit
|
||||||
|
|
||||||
dataset_list = llama_stack_client.datasets.list()
|
dataset_list = llama_stack_client.beta.datasets.list()
|
||||||
assert dataset.identifier in [d.identifier for d in dataset_list]
|
assert dataset.identifier in [d.identifier for d in dataset_list]
|
||||||
|
|
||||||
llama_stack_client.datasets.unregister(dataset.identifier)
|
llama_stack_client.beta.datasets.unregister(dataset.identifier)
|
||||||
dataset_list = llama_stack_client.datasets.list()
|
dataset_list = llama_stack_client.beta.datasets.list()
|
||||||
assert dataset.identifier not in [d.identifier for d in dataset_list]
|
assert dataset.identifier not in [d.identifier for d in dataset_list]
|
||||||
|
|
|
||||||
|
|
@ -17,17 +17,17 @@ from ..datasets.test_datasets import data_url_from_file
|
||||||
|
|
||||||
@pytest.mark.parametrize("scoring_fn_id", ["basic::equality"])
|
@pytest.mark.parametrize("scoring_fn_id", ["basic::equality"])
|
||||||
def test_evaluate_rows(llama_stack_client, text_model_id, scoring_fn_id):
|
def test_evaluate_rows(llama_stack_client, text_model_id, scoring_fn_id):
|
||||||
dataset = llama_stack_client.datasets.register(
|
dataset = llama_stack_client.beta.datasets.register(
|
||||||
purpose="eval/messages-answer",
|
purpose="eval/messages-answer",
|
||||||
source={
|
source={
|
||||||
"type": "uri",
|
"type": "uri",
|
||||||
"uri": data_url_from_file(Path(__file__).parent.parent / "datasets" / "test_dataset.csv"),
|
"uri": data_url_from_file(Path(__file__).parent.parent / "datasets" / "test_dataset.csv"),
|
||||||
},
|
},
|
||||||
)
|
)
|
||||||
response = llama_stack_client.datasets.list()
|
response = llama_stack_client.beta.datasets.list()
|
||||||
assert any(x.identifier == dataset.identifier for x in response)
|
assert any(x.identifier == dataset.identifier for x in response)
|
||||||
|
|
||||||
rows = llama_stack_client.datasets.iterrows(
|
rows = llama_stack_client.beta.datasets.iterrows(
|
||||||
dataset_id=dataset.identifier,
|
dataset_id=dataset.identifier,
|
||||||
limit=3,
|
limit=3,
|
||||||
)
|
)
|
||||||
|
|
@ -37,12 +37,12 @@ def test_evaluate_rows(llama_stack_client, text_model_id, scoring_fn_id):
|
||||||
scoring_fn_id,
|
scoring_fn_id,
|
||||||
]
|
]
|
||||||
benchmark_id = str(uuid.uuid4())
|
benchmark_id = str(uuid.uuid4())
|
||||||
llama_stack_client.benchmarks.register(
|
llama_stack_client.alpha.benchmarks.register(
|
||||||
benchmark_id=benchmark_id,
|
benchmark_id=benchmark_id,
|
||||||
dataset_id=dataset.identifier,
|
dataset_id=dataset.identifier,
|
||||||
scoring_functions=scoring_functions,
|
scoring_functions=scoring_functions,
|
||||||
)
|
)
|
||||||
list_benchmarks = llama_stack_client.benchmarks.list()
|
list_benchmarks = llama_stack_client.alpha.benchmarks.list()
|
||||||
assert any(x.identifier == benchmark_id for x in list_benchmarks)
|
assert any(x.identifier == benchmark_id for x in list_benchmarks)
|
||||||
|
|
||||||
response = llama_stack_client.alpha.eval.evaluate_rows(
|
response = llama_stack_client.alpha.eval.evaluate_rows(
|
||||||
|
|
@ -66,7 +66,7 @@ def test_evaluate_rows(llama_stack_client, text_model_id, scoring_fn_id):
|
||||||
|
|
||||||
@pytest.mark.parametrize("scoring_fn_id", ["basic::subset_of"])
|
@pytest.mark.parametrize("scoring_fn_id", ["basic::subset_of"])
|
||||||
def test_evaluate_benchmark(llama_stack_client, text_model_id, scoring_fn_id):
|
def test_evaluate_benchmark(llama_stack_client, text_model_id, scoring_fn_id):
|
||||||
dataset = llama_stack_client.datasets.register(
|
dataset = llama_stack_client.beta.datasets.register(
|
||||||
purpose="eval/messages-answer",
|
purpose="eval/messages-answer",
|
||||||
source={
|
source={
|
||||||
"type": "uri",
|
"type": "uri",
|
||||||
|
|
@ -74,7 +74,7 @@ def test_evaluate_benchmark(llama_stack_client, text_model_id, scoring_fn_id):
|
||||||
},
|
},
|
||||||
)
|
)
|
||||||
benchmark_id = str(uuid.uuid4())
|
benchmark_id = str(uuid.uuid4())
|
||||||
llama_stack_client.benchmarks.register(
|
llama_stack_client.alpha.benchmarks.register(
|
||||||
benchmark_id=benchmark_id,
|
benchmark_id=benchmark_id,
|
||||||
dataset_id=dataset.identifier,
|
dataset_id=dataset.identifier,
|
||||||
scoring_functions=[scoring_fn_id],
|
scoring_functions=[scoring_fn_id],
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue