mirror of
https://github.com/meta-llama/llama-stack.git
synced 2025-12-05 10:23:44 +00:00
Merge 5c04d2e0a7 into sapling-pr-archive-ehhuang
This commit is contained in:
commit
fbd68f9588
8 changed files with 189 additions and 129 deletions
|
|
@ -78,18 +78,18 @@ def data_url_from_file(file_path: str) -> str:
|
|||
],
|
||||
)
|
||||
def test_register_and_iterrows(llama_stack_client, purpose, source, provider_id, limit):
|
||||
dataset = llama_stack_client.datasets.register(
|
||||
dataset = llama_stack_client.beta.datasets.register(
|
||||
purpose=purpose,
|
||||
source=source,
|
||||
)
|
||||
assert dataset.identifier is not None
|
||||
assert dataset.provider_id == provider_id
|
||||
iterrow_response = llama_stack_client.datasets.iterrows(dataset.identifier, limit=limit)
|
||||
iterrow_response = llama_stack_client.beta.datasets.iterrows(dataset.identifier, limit=limit)
|
||||
assert len(iterrow_response.data) == limit
|
||||
|
||||
dataset_list = llama_stack_client.datasets.list()
|
||||
dataset_list = llama_stack_client.beta.datasets.list()
|
||||
assert dataset.identifier in [d.identifier for d in dataset_list]
|
||||
|
||||
llama_stack_client.datasets.unregister(dataset.identifier)
|
||||
dataset_list = llama_stack_client.datasets.list()
|
||||
llama_stack_client.beta.datasets.unregister(dataset.identifier)
|
||||
dataset_list = llama_stack_client.beta.datasets.list()
|
||||
assert dataset.identifier not in [d.identifier for d in dataset_list]
|
||||
|
|
|
|||
|
|
@ -17,17 +17,17 @@ from ..datasets.test_datasets import data_url_from_file
|
|||
|
||||
@pytest.mark.parametrize("scoring_fn_id", ["basic::equality"])
|
||||
def test_evaluate_rows(llama_stack_client, text_model_id, scoring_fn_id):
|
||||
dataset = llama_stack_client.datasets.register(
|
||||
dataset = llama_stack_client.beta.datasets.register(
|
||||
purpose="eval/messages-answer",
|
||||
source={
|
||||
"type": "uri",
|
||||
"uri": data_url_from_file(Path(__file__).parent.parent / "datasets" / "test_dataset.csv"),
|
||||
},
|
||||
)
|
||||
response = llama_stack_client.datasets.list()
|
||||
response = llama_stack_client.beta.datasets.list()
|
||||
assert any(x.identifier == dataset.identifier for x in response)
|
||||
|
||||
rows = llama_stack_client.datasets.iterrows(
|
||||
rows = llama_stack_client.beta.datasets.iterrows(
|
||||
dataset_id=dataset.identifier,
|
||||
limit=3,
|
||||
)
|
||||
|
|
@ -37,12 +37,12 @@ def test_evaluate_rows(llama_stack_client, text_model_id, scoring_fn_id):
|
|||
scoring_fn_id,
|
||||
]
|
||||
benchmark_id = str(uuid.uuid4())
|
||||
llama_stack_client.benchmarks.register(
|
||||
llama_stack_client.alpha.benchmarks.register(
|
||||
benchmark_id=benchmark_id,
|
||||
dataset_id=dataset.identifier,
|
||||
scoring_functions=scoring_functions,
|
||||
)
|
||||
list_benchmarks = llama_stack_client.benchmarks.list()
|
||||
list_benchmarks = llama_stack_client.alpha.benchmarks.list()
|
||||
assert any(x.identifier == benchmark_id for x in list_benchmarks)
|
||||
|
||||
response = llama_stack_client.alpha.eval.evaluate_rows(
|
||||
|
|
@ -66,7 +66,7 @@ def test_evaluate_rows(llama_stack_client, text_model_id, scoring_fn_id):
|
|||
|
||||
@pytest.mark.parametrize("scoring_fn_id", ["basic::subset_of"])
|
||||
def test_evaluate_benchmark(llama_stack_client, text_model_id, scoring_fn_id):
|
||||
dataset = llama_stack_client.datasets.register(
|
||||
dataset = llama_stack_client.beta.datasets.register(
|
||||
purpose="eval/messages-answer",
|
||||
source={
|
||||
"type": "uri",
|
||||
|
|
@ -74,7 +74,7 @@ def test_evaluate_benchmark(llama_stack_client, text_model_id, scoring_fn_id):
|
|||
},
|
||||
)
|
||||
benchmark_id = str(uuid.uuid4())
|
||||
llama_stack_client.benchmarks.register(
|
||||
llama_stack_client.alpha.benchmarks.register(
|
||||
benchmark_id=benchmark_id,
|
||||
dataset_id=dataset.identifier,
|
||||
scoring_functions=[scoring_fn_id],
|
||||
|
|
|
|||
|
|
@ -5,6 +5,7 @@
|
|||
# the root directory of this source tree.
|
||||
|
||||
import json
|
||||
import logging # allow-direct-logging
|
||||
import os
|
||||
|
||||
import httpx
|
||||
|
|
@ -198,7 +199,7 @@ def test_response_sequential_file_search(
|
|||
|
||||
|
||||
@pytest.mark.parametrize("case", mcp_tool_test_cases)
|
||||
def test_response_non_streaming_mcp_tool(compat_client, text_model_id, case):
|
||||
def test_response_non_streaming_mcp_tool(compat_client, text_model_id, case, caplog):
|
||||
if not isinstance(compat_client, LlamaStackAsLibraryClient):
|
||||
pytest.skip("in-process MCP server is only supported in library client")
|
||||
|
||||
|
|
@ -245,13 +246,17 @@ def test_response_non_streaming_mcp_tool(compat_client, text_model_id, case):
|
|||
if isinstance(compat_client, LlamaStackAsLibraryClient)
|
||||
else (httpx.HTTPStatusError, openai.AuthenticationError)
|
||||
)
|
||||
with pytest.raises(exc_type):
|
||||
compat_client.responses.create(
|
||||
model=text_model_id,
|
||||
input=case.input,
|
||||
tools=tools,
|
||||
stream=False,
|
||||
)
|
||||
# Suppress expected auth error logs only for the failing auth attempt
|
||||
with caplog.at_level(
|
||||
logging.CRITICAL, logger="llama_stack.providers.inline.agents.meta_reference.responses.streaming"
|
||||
):
|
||||
with pytest.raises(exc_type):
|
||||
compat_client.responses.create(
|
||||
model=text_model_id,
|
||||
input=case.input,
|
||||
tools=tools,
|
||||
stream=False,
|
||||
)
|
||||
|
||||
for tool in tools:
|
||||
if tool["type"] == "mcp":
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue