Merge branch 'main' into content-extension

This commit is contained in:
Francisco Arceo 2025-08-13 14:04:47 -06:00 committed by GitHub
commit 84a26339c8
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
73 changed files with 2416 additions and 506 deletions

View file

@ -270,7 +270,7 @@ def openai_client(client_with_models):
@pytest.fixture(params=["openai_client", "client_with_models"])
def compat_client(request, client_with_models):
if isinstance(client_with_models, LlamaStackAsLibraryClient):
if request.param == "openai_client" and isinstance(client_with_models, LlamaStackAsLibraryClient):
# OpenAI client expects a server, so unless we also rewrite OpenAI client's requests
# to go via the Stack library client (which itself rewrites requests to be served inline),
# we cannot do this.

View file

@ -137,7 +137,7 @@ test_response_multi_turn_tool_execution:
server_url: "<FILLED_BY_TEST_RUNNER>"
output: "yes"
- case_id: "experiment_results_lookup"
input: "I need to get the results for the 'boiling_point' experiment. First, get the experiment ID for 'boiling_point', then use that ID to get the experiment results. Tell me what you found."
input: "I need to get the results for the 'boiling_point' experiment. First, get the experiment ID for 'boiling_point', then use that ID to get the experiment results. Tell me the boiling point in Celsius."
tools:
- type: mcp
server_label: "localmcp"
@ -149,7 +149,7 @@ test_response_multi_turn_tool_execution_streaming:
test_params:
case:
- case_id: "user_permissions_workflow"
input: "Help me with this security check: First, get the user ID for 'charlie', then get the permissions for that user ID, and finally check if that user can access 'secret_file.txt'. Stream your progress as you work through each step."
input: "Help me with this security check: First, get the user ID for 'charlie', then get the permissions for that user ID, and finally check if that user can access 'secret_file.txt'. Stream your progress as you work through each step. Return only one tool call per step. Summarize the final result with a single 'yes' or 'no' response."
tools:
- type: mcp
server_label: "localmcp"
@ -157,7 +157,7 @@ test_response_multi_turn_tool_execution_streaming:
stream: true
output: "no"
- case_id: "experiment_analysis_streaming"
input: "I need a complete analysis: First, get the experiment ID for 'chemical_reaction', then get the results for that experiment, and tell me if the yield was above 80%. Please stream your analysis process."
input: "I need a complete analysis: First, get the experiment ID for 'chemical_reaction', then get the results for that experiment, and tell me if the yield was above 80%. Return only one tool call per step. Please stream your analysis process."
tools:
- type: mcp
server_label: "localmcp"

View file

@ -363,6 +363,9 @@ def test_response_non_streaming_file_search_empty_vector_store(request, compat_c
ids=case_id_generator,
)
def test_response_non_streaming_mcp_tool(request, compat_client, text_model_id, case):
if not isinstance(compat_client, LlamaStackAsLibraryClient):
pytest.skip("in-process MCP server is only supported in library client")
with make_mcp_server() as mcp_server_info:
tools = case["tools"]
for tool in tools:
@ -381,12 +384,18 @@ def test_response_non_streaming_mcp_tool(request, compat_client, text_model_id,
assert list_tools.type == "mcp_list_tools"
assert list_tools.server_label == "localmcp"
assert len(list_tools.tools) == 2
assert {t.name for t in list_tools.tools} == {"get_boiling_point", "greet_everyone"}
assert {t.name for t in list_tools.tools} == {
"get_boiling_point",
"greet_everyone",
}
call = response.output[1]
assert call.type == "mcp_call"
assert call.name == "get_boiling_point"
assert json.loads(call.arguments) == {"liquid_name": "myawesomeliquid", "celsius": True}
assert json.loads(call.arguments) == {
"liquid_name": "myawesomeliquid",
"celsius": True,
}
assert call.error is None
assert "-100" in call.output
@ -485,8 +494,11 @@ def test_response_non_streaming_multi_turn_image(request, compat_client, text_mo
responses_test_cases["test_response_multi_turn_tool_execution"]["test_params"]["case"],
ids=case_id_generator,
)
def test_response_non_streaming_multi_turn_tool_execution(request, compat_client, text_model_id, case):
def test_response_non_streaming_multi_turn_tool_execution(compat_client, text_model_id, case):
"""Test multi-turn tool execution where multiple MCP tool calls are performed in sequence."""
if not isinstance(compat_client, LlamaStackAsLibraryClient):
pytest.skip("in-process MCP server is only supported in library client")
with make_mcp_server(tools=dependency_tools()) as mcp_server_info:
tools = case["tools"]
# Replace the placeholder URL with the actual server URL
@ -541,8 +553,11 @@ def test_response_non_streaming_multi_turn_tool_execution(request, compat_client
responses_test_cases["test_response_multi_turn_tool_execution_streaming"]["test_params"]["case"],
ids=case_id_generator,
)
async def test_response_streaming_multi_turn_tool_execution(request, compat_client, text_model_id, case):
def test_response_streaming_multi_turn_tool_execution(compat_client, text_model_id, case):
"""Test streaming multi-turn tool execution where multiple MCP tool calls are performed in sequence."""
if not isinstance(compat_client, LlamaStackAsLibraryClient):
pytest.skip("in-process MCP server is only supported in library client")
with make_mcp_server(tools=dependency_tools()) as mcp_server_info:
tools = case["tools"]
# Replace the placeholder URL with the actual server URL
@ -572,6 +587,105 @@ async def test_response_streaming_multi_turn_tool_execution(request, compat_clie
f"Last chunk should be response.completed, got {chunks[-1].type}"
)
# Verify tool call streaming events are present
chunk_types = [chunk.type for chunk in chunks]
# Should have function call arguments delta events for tool calls
delta_events = [chunk for chunk in chunks if chunk.type == "response.function_call_arguments.delta"]
done_events = [chunk for chunk in chunks if chunk.type == "response.function_call_arguments.done"]
# Should have output item events for tool calls
item_added_events = [chunk for chunk in chunks if chunk.type == "response.output_item.added"]
item_done_events = [chunk for chunk in chunks if chunk.type == "response.output_item.done"]
# Verify we have substantial streaming activity (not just batch events)
assert len(chunks) > 10, f"Expected rich streaming with many events, got only {len(chunks)} chunks"
# Since this test involves MCP tool calls, we should see streaming events
assert len(delta_events) > 0, f"Expected function_call_arguments.delta events, got chunk types: {chunk_types}"
assert len(done_events) > 0, f"Expected function_call_arguments.done events, got chunk types: {chunk_types}"
# Should have output item events for function calls
assert len(item_added_events) > 0, f"Expected response.output_item.added events, got chunk types: {chunk_types}"
assert len(item_done_events) > 0, f"Expected response.output_item.done events, got chunk types: {chunk_types}"
# Verify delta events have proper structure
for delta_event in delta_events:
assert hasattr(delta_event, "delta"), "Delta event should have 'delta' field"
assert hasattr(delta_event, "item_id"), "Delta event should have 'item_id' field"
assert hasattr(delta_event, "sequence_number"), "Delta event should have 'sequence_number' field"
assert delta_event.delta, "Delta should not be empty"
# Verify done events have proper structure
for done_event in done_events:
assert hasattr(done_event, "arguments"), "Done event should have 'arguments' field"
assert hasattr(done_event, "item_id"), "Done event should have 'item_id' field"
assert done_event.arguments, "Final arguments should not be empty"
# Verify output item added events have proper structure
for added_event in item_added_events:
assert hasattr(added_event, "item"), "Added event should have 'item' field"
assert hasattr(added_event, "output_index"), "Added event should have 'output_index' field"
assert hasattr(added_event, "sequence_number"), "Added event should have 'sequence_number' field"
assert hasattr(added_event, "response_id"), "Added event should have 'response_id' field"
assert added_event.item.type in ["function_call", "mcp_call"], "Added item should be a tool call"
assert added_event.item.status == "in_progress", "Added item should be in progress"
assert added_event.response_id, "Response ID should not be empty"
assert isinstance(added_event.output_index, int), "Output index should be integer"
assert added_event.output_index >= 0, "Output index should be non-negative"
# Verify output item done events have proper structure
for done_event in item_done_events:
assert hasattr(done_event, "item"), "Done event should have 'item' field"
assert hasattr(done_event, "output_index"), "Done event should have 'output_index' field"
assert hasattr(done_event, "sequence_number"), "Done event should have 'sequence_number' field"
assert hasattr(done_event, "response_id"), "Done event should have 'response_id' field"
assert done_event.item.type in ["function_call", "mcp_call"], "Done item should be a tool call"
# Note: MCP calls don't have a status field, only function calls do
if done_event.item.type == "function_call":
assert done_event.item.status == "completed", "Function call should be completed"
assert done_event.response_id, "Response ID should not be empty"
assert isinstance(done_event.output_index, int), "Output index should be integer"
assert done_event.output_index >= 0, "Output index should be non-negative"
# Group function call argument events by item_id (these should have proper tracking)
function_call_events_by_item_id = {}
for chunk in chunks:
if hasattr(chunk, "item_id") and chunk.type in [
"response.function_call_arguments.delta",
"response.function_call_arguments.done",
]:
item_id = chunk.item_id
if item_id not in function_call_events_by_item_id:
function_call_events_by_item_id[item_id] = []
function_call_events_by_item_id[item_id].append(chunk)
for item_id, related_events in function_call_events_by_item_id.items():
# Should have at least one delta and one done event for a complete function call
delta_events = [e for e in related_events if e.type == "response.function_call_arguments.delta"]
done_events = [e for e in related_events if e.type == "response.function_call_arguments.done"]
assert len(delta_events) > 0, f"Item {item_id} should have at least one delta event"
assert len(done_events) == 1, f"Item {item_id} should have exactly one done event"
# Verify all events have the same item_id
for event in related_events:
assert event.item_id == item_id, f"Event should have consistent item_id {item_id}, got {event.item_id}"
# Basic pairing check: each output_item.added should be followed by some activity
# (but we can't enforce strict 1:1 pairing due to the complexity of multi-turn scenarios)
assert len(item_added_events) > 0, "Should have at least one output_item.added event"
# Verify response_id consistency across all events
response_ids = set()
for chunk in chunks:
if hasattr(chunk, "response_id"):
response_ids.add(chunk.response_id)
elif hasattr(chunk, "response") and hasattr(chunk.response, "id"):
response_ids.add(chunk.response.id)
assert len(response_ids) == 1, f"All events should reference the same response_id, found: {response_ids}"
# Get the final response from the last chunk
final_chunk = chunks[-1]
if hasattr(final_chunk, "response"):
@ -634,7 +748,7 @@ async def test_response_streaming_multi_turn_tool_execution(request, compat_clie
},
],
)
def test_response_text_format(request, compat_client, text_model_id, text_format):
def test_response_text_format(compat_client, text_model_id, text_format):
if isinstance(compat_client, LlamaStackAsLibraryClient):
pytest.skip("Responses API text format is not yet supported in library client.")
@ -653,7 +767,7 @@ def test_response_text_format(request, compat_client, text_model_id, text_format
@pytest.fixture
def vector_store_with_filtered_files(request, compat_client, text_model_id, tmp_path_factory):
def vector_store_with_filtered_files(compat_client, text_model_id, tmp_path_factory):
"""Create a vector store with multiple files that have different attributes for filtering tests."""
if isinstance(compat_client, LlamaStackAsLibraryClient):
pytest.skip("Responses API file search is not yet supported in library client.")
@ -713,7 +827,9 @@ def vector_store_with_filtered_files(request, compat_client, text_model_id, tmp_
# Attach file to vector store with attributes
file_attach_response = compat_client.vector_stores.files.create(
vector_store_id=vector_store.id, file_id=file_response.id, attributes=file_data["attributes"]
vector_store_id=vector_store.id,
file_id=file_response.id,
attributes=file_data["attributes"],
)
# Wait for attachment

View file

@ -10,10 +10,11 @@ import uuid
from io import BytesIO
import pytest
from llama_stack_client import BadRequestError, LlamaStackClient
from llama_stack_client import BadRequestError
from openai import BadRequestError as OpenAIBadRequestError
from llama_stack.apis.vector_io import Chunk
from llama_stack.core.library_client import LlamaStackAsLibraryClient
logger = logging.getLogger(__name__)
@ -476,9 +477,6 @@ def test_openai_vector_store_attach_file(compat_client_with_empty_stores, client
"""Test OpenAI vector store attach file."""
skip_if_provider_doesnt_support_openai_vector_stores(client_with_models)
if isinstance(compat_client_with_empty_stores, LlamaStackClient):
pytest.skip("Vector Store Files attach is not yet supported with LlamaStackClient")
compat_client = compat_client_with_empty_stores
# Create a vector store
@ -527,9 +525,6 @@ def test_openai_vector_store_attach_files_on_creation(compat_client_with_empty_s
"""Test OpenAI vector store attach files on creation."""
skip_if_provider_doesnt_support_openai_vector_stores(client_with_models)
if isinstance(compat_client_with_empty_stores, LlamaStackClient):
pytest.skip("Vector Store Files attach is not yet supported with LlamaStackClient")
compat_client = compat_client_with_empty_stores
# Create some files and attach them to the vector store
@ -583,9 +578,6 @@ def test_openai_vector_store_list_files(compat_client_with_empty_stores, client_
"""Test OpenAI vector store list files."""
skip_if_provider_doesnt_support_openai_vector_stores(client_with_models)
if isinstance(compat_client_with_empty_stores, LlamaStackClient):
pytest.skip("Vector Store Files list is not yet supported with LlamaStackClient")
compat_client = compat_client_with_empty_stores
# Create a vector store
@ -598,16 +590,20 @@ def test_openai_vector_store_list_files(compat_client_with_empty_stores, client_
file_buffer.name = f"openai_test_{i}.txt"
file = compat_client.files.create(file=file_buffer, purpose="assistants")
compat_client.vector_stores.files.create(
response = compat_client.vector_stores.files.create(
vector_store_id=vector_store.id,
file_id=file.id,
)
assert response is not None
assert response.status == "completed", (
f"Failed to attach file {file.id} to vector store {vector_store.id}: {response=}"
)
file_ids.append(file.id)
files_list = compat_client.vector_stores.files.list(vector_store_id=vector_store.id)
assert files_list
assert files_list.object == "list"
assert files_list.data
assert files_list.data is not None
assert not files_list.has_more
assert len(files_list.data) == 3
assert set(file_ids) == {file.id for file in files_list.data}
@ -643,12 +639,13 @@ def test_openai_vector_store_list_files_invalid_vector_store(compat_client_with_
"""Test OpenAI vector store list files with invalid vector store ID."""
skip_if_provider_doesnt_support_openai_vector_stores(client_with_models)
if isinstance(compat_client_with_empty_stores, LlamaStackClient):
pytest.skip("Vector Store Files list is not yet supported with LlamaStackClient")
compat_client = compat_client_with_empty_stores
if isinstance(compat_client, LlamaStackAsLibraryClient):
errors = ValueError
else:
errors = (BadRequestError, OpenAIBadRequestError)
with pytest.raises((BadRequestError, OpenAIBadRequestError)):
with pytest.raises(errors):
compat_client.vector_stores.files.list(vector_store_id="abc123")
@ -656,9 +653,6 @@ def test_openai_vector_store_retrieve_file_contents(compat_client_with_empty_sto
"""Test OpenAI vector store retrieve file contents."""
skip_if_provider_doesnt_support_openai_vector_stores(client_with_models)
if isinstance(compat_client_with_empty_stores, LlamaStackClient):
pytest.skip("Vector Store Files retrieve contents is not yet supported with LlamaStackClient")
compat_client = compat_client_with_empty_stores
# Create a vector store
@ -686,9 +680,15 @@ def test_openai_vector_store_retrieve_file_contents(compat_client_with_empty_sto
file_id=file.id,
)
assert file_contents
assert file_contents.content[0]["type"] == "text"
assert file_contents.content[0]["text"] == test_content.decode("utf-8")
assert file_contents is not None
assert len(file_contents.content) == 1
content = file_contents.content[0]
# llama-stack-client returns a model, openai-python is a badboy and returns a dict
if not isinstance(content, dict):
content = content.model_dump()
assert content["type"] == "text"
assert content["text"] == test_content.decode("utf-8")
assert file_contents.filename == file_name
assert file_contents.attributes == attributes
@ -697,9 +697,6 @@ def test_openai_vector_store_delete_file(compat_client_with_empty_stores, client
"""Test OpenAI vector store delete file."""
skip_if_provider_doesnt_support_openai_vector_stores(client_with_models)
if isinstance(compat_client_with_empty_stores, LlamaStackClient):
pytest.skip("Vector Store Files list is not yet supported with LlamaStackClient")
compat_client = compat_client_with_empty_stores
# Create a vector store
@ -752,9 +749,6 @@ def test_openai_vector_store_delete_file_removes_from_vector_store(compat_client
"""Test OpenAI vector store delete file removes from vector store."""
skip_if_provider_doesnt_support_openai_vector_stores(client_with_models)
if isinstance(compat_client_with_empty_stores, LlamaStackClient):
pytest.skip("Vector Store Files attach is not yet supported with LlamaStackClient")
compat_client = compat_client_with_empty_stores
# Create a vector store
@ -793,9 +787,6 @@ def test_openai_vector_store_update_file(compat_client_with_empty_stores, client
"""Test OpenAI vector store update file."""
skip_if_provider_doesnt_support_openai_vector_stores(client_with_models)
if isinstance(compat_client_with_empty_stores, LlamaStackClient):
pytest.skip("Vector Store Files update is not yet supported with LlamaStackClient")
compat_client = compat_client_with_empty_stores
# Create a vector store
@ -841,9 +832,6 @@ def test_create_vector_store_files_duplicate_vector_store_name(compat_client_wit
"""
skip_if_provider_doesnt_support_openai_vector_stores(client_with_models)
if isinstance(compat_client_with_empty_stores, LlamaStackClient):
pytest.skip("Vector Store Files create is not yet supported with LlamaStackClient")
compat_client = compat_client_with_empty_stores
# Create a vector store with files