From 1c2ece229cd4271be5270c99cf7d40d8cd4ebd03 Mon Sep 17 00:00:00 2001 From: Ashwin Bharambe Date: Tue, 12 Aug 2025 11:12:01 -0700 Subject: [PATCH] chore(tests): fix responses and vector_io tests --- llama_stack/core/routers/inference.py | 3 +- llama_stack/core/routing_tables/models.py | 2 + llama_stack/log.py | 1 + .../models/llama/llama3/chat_format.py | 1 + .../agents/meta_reference/openai_responses.py | 4 ++ .../remote/inference/fireworks/fireworks.py | 3 ++ .../utils/memory/openai_vector_store_mixin.py | 2 +- tests/common/mcp.py | 12 ++---- tests/integration/fixtures/common.py | 2 +- .../fixtures/test_cases/responses.yaml | 6 +-- .../non_ci/responses/test_responses.py | 17 +++++++-- .../vector_io/test_openai_vector_stores.py | 37 ++++--------------- 12 files changed, 41 insertions(+), 49 deletions(-) diff --git a/llama_stack/core/routers/inference.py b/llama_stack/core/routers/inference.py index 52581cc9d..6a3f07247 100644 --- a/llama_stack/core/routers/inference.py +++ b/llama_stack/core/routers/inference.py @@ -65,7 +65,7 @@ from llama_stack.providers.datatypes import HealthResponse, HealthStatus, Routin from llama_stack.providers.utils.inference.inference_store import InferenceStore from llama_stack.providers.utils.telemetry.tracing import get_current_span -logger = get_logger(name=__name__, category="core") +logger = get_logger(name=__name__, category="inference") class InferenceRouter(Inference): @@ -854,4 +854,5 @@ class InferenceRouter(Inference): model=model.identifier, object="chat.completion", ) + logger.debug(f"InferenceRouter.completion_response: {final_response}") await self.store.store_chat_completion(final_response, messages) diff --git a/llama_stack/core/routing_tables/models.py b/llama_stack/core/routing_tables/models.py index c76619271..34c431e00 100644 --- a/llama_stack/core/routing_tables/models.py +++ b/llama_stack/core/routing_tables/models.py @@ -63,6 +63,8 @@ class ModelsRoutingTable(CommonRoutingTableImpl, Models): async def get_provider_impl(self, model_id: str) -> Any: model = await lookup_model(self, model_id) + if model.provider_id not in self.impls_by_provider_id: + raise ValueError(f"Provider {model.provider_id} not found in the routing table") return self.impls_by_provider_id[model.provider_id] async def register_model( diff --git a/llama_stack/log.py b/llama_stack/log.py index 0a2d63ef6..7507aface 100644 --- a/llama_stack/log.py +++ b/llama_stack/log.py @@ -32,6 +32,7 @@ CATEGORIES = [ "tools", "client", "telemetry", + "openai_responses", ] # Initialize category levels with default level diff --git a/llama_stack/models/llama/llama3/chat_format.py b/llama_stack/models/llama/llama3/chat_format.py index 0a973cf0c..1f88a1699 100644 --- a/llama_stack/models/llama/llama3/chat_format.py +++ b/llama_stack/models/llama/llama3/chat_format.py @@ -236,6 +236,7 @@ class ChatFormat: arguments_json=json.dumps(tool_arguments), ) ) + content = "" return RawMessage( role="assistant", diff --git a/llama_stack/providers/inline/agents/meta_reference/openai_responses.py b/llama_stack/providers/inline/agents/meta_reference/openai_responses.py index b98ca114f..347954908 100644 --- a/llama_stack/providers/inline/agents/meta_reference/openai_responses.py +++ b/llama_stack/providers/inline/agents/meta_reference/openai_responses.py @@ -488,8 +488,12 @@ class OpenAIResponsesImpl: # Convert collected chunks to complete response if chat_response_tool_calls: tool_calls = [chat_response_tool_calls[i] for i in sorted(chat_response_tool_calls.keys())] + + # when there are tool calls, we need to clear the content + chat_response_content = [] else: tool_calls = None + assistant_message = OpenAIAssistantMessageParam( content="".join(chat_response_content), tool_calls=tool_calls, diff --git a/llama_stack/providers/remote/inference/fireworks/fireworks.py b/llama_stack/providers/remote/inference/fireworks/fireworks.py index ca4c7b578..bd86f7238 100644 --- a/llama_stack/providers/remote/inference/fireworks/fireworks.py +++ b/llama_stack/providers/remote/inference/fireworks/fireworks.py @@ -235,6 +235,7 @@ class FireworksInferenceAdapter(ModelRegistryHelper, Inference, NeedsRequestProv llama_model = self.get_llama_model(request.model) if isinstance(request, ChatCompletionRequest): + # TODO: tools are never added to the request, so we need to add them here if media_present or not llama_model: input_dict["messages"] = [ await convert_message_to_openai_dict(m, download=True) for m in request.messages @@ -378,6 +379,7 @@ class FireworksInferenceAdapter(ModelRegistryHelper, Inference, NeedsRequestProv # Fireworks chat completions OpenAI-compatible API does not support # tool calls properly. llama_model = self.get_llama_model(model_obj.provider_resource_id) + if llama_model: return await OpenAIChatCompletionToLlamaStackMixin.openai_chat_completion( self, @@ -431,4 +433,5 @@ class FireworksInferenceAdapter(ModelRegistryHelper, Inference, NeedsRequestProv user=user, ) + logger.debug(f"fireworks params: {params}") return await self._get_openai_client().chat.completions.create(model=model_obj.provider_resource_id, **params) diff --git a/llama_stack/providers/utils/memory/openai_vector_store_mixin.py b/llama_stack/providers/utils/memory/openai_vector_store_mixin.py index 7b6e69df1..c5cfc632b 100644 --- a/llama_stack/providers/utils/memory/openai_vector_store_mixin.py +++ b/llama_stack/providers/utils/memory/openai_vector_store_mixin.py @@ -614,7 +614,7 @@ class OpenAIVectorStoreMixin(ABC): ) vector_store_file_object.status = "completed" except Exception as e: - logger.error(f"Error attaching file to vector store: {e}") + logger.exception("Error attaching file to vector store") vector_store_file_object.status = "failed" vector_store_file_object.last_error = VectorStoreFileLastError( code="server_error", diff --git a/tests/common/mcp.py b/tests/common/mcp.py index 775e38295..d05ac39c6 100644 --- a/tests/common/mcp.py +++ b/tests/common/mcp.py @@ -16,13 +16,10 @@ MCP_TOOLGROUP_ID = "mcp::localmcp" def default_tools(): """Default tools for backward compatibility.""" - from mcp import types from mcp.server.fastmcp import Context - async def greet_everyone( - url: str, ctx: Context - ) -> list[types.TextContent | types.ImageContent | types.EmbeddedResource]: - return [types.TextContent(type="text", text="Hello, world!")] + async def greet_everyone(url: str, ctx: Context) -> str: + return "Hello, world!" async def get_boiling_point(liquid_name: str, celsius: bool = True) -> int: """ @@ -45,7 +42,6 @@ def default_tools(): def dependency_tools(): """Tools with natural dependencies for multi-turn testing.""" - from mcp import types from mcp.server.fastmcp import Context async def get_user_id(username: str, ctx: Context) -> str: @@ -106,7 +102,7 @@ def dependency_tools(): else: access = "no" - return [types.TextContent(type="text", text=access)] + return access async def get_experiment_id(experiment_name: str, ctx: Context) -> str: """ @@ -245,7 +241,6 @@ def make_mcp_server(required_auth_token: str | None = None, tools: dict[str, Cal try: yield {"server_url": server_url} finally: - print("Telling SSE server to exit") server_instance.should_exit = True time.sleep(0.5) @@ -269,4 +264,3 @@ def make_mcp_server(required_auth_token: str | None = None, tools: dict[str, Cal AppStatus.should_exit = False AppStatus.should_exit_event = None - print("SSE server exited") diff --git a/tests/integration/fixtures/common.py b/tests/integration/fixtures/common.py index c91391f19..0b7132d71 100644 --- a/tests/integration/fixtures/common.py +++ b/tests/integration/fixtures/common.py @@ -270,7 +270,7 @@ def openai_client(client_with_models): @pytest.fixture(params=["openai_client", "client_with_models"]) def compat_client(request, client_with_models): - if isinstance(client_with_models, LlamaStackAsLibraryClient): + if request.param == "openai_client" and isinstance(client_with_models, LlamaStackAsLibraryClient): # OpenAI client expects a server, so unless we also rewrite OpenAI client's requests # to go via the Stack library client (which itself rewrites requests to be served inline), # we cannot do this. diff --git a/tests/integration/non_ci/responses/fixtures/test_cases/responses.yaml b/tests/integration/non_ci/responses/fixtures/test_cases/responses.yaml index 6db0dd970..353a64291 100644 --- a/tests/integration/non_ci/responses/fixtures/test_cases/responses.yaml +++ b/tests/integration/non_ci/responses/fixtures/test_cases/responses.yaml @@ -137,7 +137,7 @@ test_response_multi_turn_tool_execution: server_url: "" output: "yes" - case_id: "experiment_results_lookup" - input: "I need to get the results for the 'boiling_point' experiment. First, get the experiment ID for 'boiling_point', then use that ID to get the experiment results. Tell me what you found." + input: "I need to get the results for the 'boiling_point' experiment. First, get the experiment ID for 'boiling_point', then use that ID to get the experiment results. Tell me the boiling point in Celsius." tools: - type: mcp server_label: "localmcp" @@ -149,7 +149,7 @@ test_response_multi_turn_tool_execution_streaming: test_params: case: - case_id: "user_permissions_workflow" - input: "Help me with this security check: First, get the user ID for 'charlie', then get the permissions for that user ID, and finally check if that user can access 'secret_file.txt'. Stream your progress as you work through each step." + input: "Help me with this security check: First, get the user ID for 'charlie', then get the permissions for that user ID, and finally check if that user can access 'secret_file.txt'. Stream your progress as you work through each step. Return only one tool call per step. Summarize the final result with a single 'yes' or 'no' response." tools: - type: mcp server_label: "localmcp" @@ -157,7 +157,7 @@ test_response_multi_turn_tool_execution_streaming: stream: true output: "no" - case_id: "experiment_analysis_streaming" - input: "I need a complete analysis: First, get the experiment ID for 'chemical_reaction', then get the results for that experiment, and tell me if the yield was above 80%. Please stream your analysis process." + input: "I need a complete analysis: First, get the experiment ID for 'chemical_reaction', then get the results for that experiment, and tell me if the yield was above 80%. Return only one tool call per step. Please stream your analysis process." tools: - type: mcp server_label: "localmcp" diff --git a/tests/integration/non_ci/responses/test_responses.py b/tests/integration/non_ci/responses/test_responses.py index 4f4f27d7f..39d00f328 100644 --- a/tests/integration/non_ci/responses/test_responses.py +++ b/tests/integration/non_ci/responses/test_responses.py @@ -363,6 +363,9 @@ def test_response_non_streaming_file_search_empty_vector_store(request, compat_c ids=case_id_generator, ) def test_response_non_streaming_mcp_tool(request, compat_client, text_model_id, case): + if not isinstance(compat_client, LlamaStackAsLibraryClient): + pytest.skip("in-process MCP server is only supported in library client") + with make_mcp_server() as mcp_server_info: tools = case["tools"] for tool in tools: @@ -485,8 +488,11 @@ def test_response_non_streaming_multi_turn_image(request, compat_client, text_mo responses_test_cases["test_response_multi_turn_tool_execution"]["test_params"]["case"], ids=case_id_generator, ) -def test_response_non_streaming_multi_turn_tool_execution(request, compat_client, text_model_id, case): +def test_response_non_streaming_multi_turn_tool_execution(compat_client, text_model_id, case): """Test multi-turn tool execution where multiple MCP tool calls are performed in sequence.""" + if not isinstance(compat_client, LlamaStackAsLibraryClient): + pytest.skip("in-process MCP server is only supported in library client") + with make_mcp_server(tools=dependency_tools()) as mcp_server_info: tools = case["tools"] # Replace the placeholder URL with the actual server URL @@ -541,8 +547,11 @@ def test_response_non_streaming_multi_turn_tool_execution(request, compat_client responses_test_cases["test_response_multi_turn_tool_execution_streaming"]["test_params"]["case"], ids=case_id_generator, ) -async def test_response_streaming_multi_turn_tool_execution(request, compat_client, text_model_id, case): +def test_response_streaming_multi_turn_tool_execution(compat_client, text_model_id, case): """Test streaming multi-turn tool execution where multiple MCP tool calls are performed in sequence.""" + if not isinstance(compat_client, LlamaStackAsLibraryClient): + pytest.skip("in-process MCP server is only supported in library client") + with make_mcp_server(tools=dependency_tools()) as mcp_server_info: tools = case["tools"] # Replace the placeholder URL with the actual server URL @@ -634,7 +643,7 @@ async def test_response_streaming_multi_turn_tool_execution(request, compat_clie }, ], ) -def test_response_text_format(request, compat_client, text_model_id, text_format): +def test_response_text_format(compat_client, text_model_id, text_format): if isinstance(compat_client, LlamaStackAsLibraryClient): pytest.skip("Responses API text format is not yet supported in library client.") @@ -653,7 +662,7 @@ def test_response_text_format(request, compat_client, text_model_id, text_format @pytest.fixture -def vector_store_with_filtered_files(request, compat_client, text_model_id, tmp_path_factory): +def vector_store_with_filtered_files(compat_client, text_model_id, tmp_path_factory): """Create a vector store with multiple files that have different attributes for filtering tests.""" if isinstance(compat_client, LlamaStackAsLibraryClient): pytest.skip("Responses API file search is not yet supported in library client.") diff --git a/tests/integration/vector_io/test_openai_vector_stores.py b/tests/integration/vector_io/test_openai_vector_stores.py index 3212a7568..94324ca84 100644 --- a/tests/integration/vector_io/test_openai_vector_stores.py +++ b/tests/integration/vector_io/test_openai_vector_stores.py @@ -475,9 +475,6 @@ def test_openai_vector_store_attach_file(compat_client_with_empty_stores, client """Test OpenAI vector store attach file.""" skip_if_provider_doesnt_support_openai_vector_stores(client_with_models) - if isinstance(compat_client_with_empty_stores, LlamaStackClient): - pytest.skip("Vector Store Files attach is not yet supported with LlamaStackClient") - compat_client = compat_client_with_empty_stores # Create a vector store @@ -526,9 +523,6 @@ def test_openai_vector_store_attach_files_on_creation(compat_client_with_empty_s """Test OpenAI vector store attach files on creation.""" skip_if_provider_doesnt_support_openai_vector_stores(client_with_models) - if isinstance(compat_client_with_empty_stores, LlamaStackClient): - pytest.skip("Vector Store Files attach is not yet supported with LlamaStackClient") - compat_client = compat_client_with_empty_stores # Create some files and attach them to the vector store @@ -582,9 +576,6 @@ def test_openai_vector_store_list_files(compat_client_with_empty_stores, client_ """Test OpenAI vector store list files.""" skip_if_provider_doesnt_support_openai_vector_stores(client_with_models) - if isinstance(compat_client_with_empty_stores, LlamaStackClient): - pytest.skip("Vector Store Files list is not yet supported with LlamaStackClient") - compat_client = compat_client_with_empty_stores # Create a vector store @@ -642,12 +633,13 @@ def test_openai_vector_store_list_files_invalid_vector_store(compat_client_with_ """Test OpenAI vector store list files with invalid vector store ID.""" skip_if_provider_doesnt_support_openai_vector_stores(client_with_models) - if isinstance(compat_client_with_empty_stores, LlamaStackClient): - pytest.skip("Vector Store Files list is not yet supported with LlamaStackClient") - compat_client = compat_client_with_empty_stores + if isinstance(compat_client, LlamaStackClient): + errors = ValueError + else: + errors = (BadRequestError, OpenAIBadRequestError) - with pytest.raises((BadRequestError, OpenAIBadRequestError)): + with pytest.raises(errors): compat_client.vector_stores.files.list(vector_store_id="abc123") @@ -655,9 +647,6 @@ def test_openai_vector_store_retrieve_file_contents(compat_client_with_empty_sto """Test OpenAI vector store retrieve file contents.""" skip_if_provider_doesnt_support_openai_vector_stores(client_with_models) - if isinstance(compat_client_with_empty_stores, LlamaStackClient): - pytest.skip("Vector Store Files retrieve contents is not yet supported with LlamaStackClient") - compat_client = compat_client_with_empty_stores # Create a vector store @@ -686,8 +675,8 @@ def test_openai_vector_store_retrieve_file_contents(compat_client_with_empty_sto ) assert file_contents - assert file_contents.content[0]["type"] == "text" - assert file_contents.content[0]["text"] == test_content.decode("utf-8") + assert file_contents.content[0].type == "text" + assert file_contents.content[0].text == test_content.decode("utf-8") assert file_contents.filename == file_name assert file_contents.attributes == attributes @@ -696,9 +685,6 @@ def test_openai_vector_store_delete_file(compat_client_with_empty_stores, client """Test OpenAI vector store delete file.""" skip_if_provider_doesnt_support_openai_vector_stores(client_with_models) - if isinstance(compat_client_with_empty_stores, LlamaStackClient): - pytest.skip("Vector Store Files list is not yet supported with LlamaStackClient") - compat_client = compat_client_with_empty_stores # Create a vector store @@ -751,9 +737,6 @@ def test_openai_vector_store_delete_file_removes_from_vector_store(compat_client """Test OpenAI vector store delete file removes from vector store.""" skip_if_provider_doesnt_support_openai_vector_stores(client_with_models) - if isinstance(compat_client_with_empty_stores, LlamaStackClient): - pytest.skip("Vector Store Files attach is not yet supported with LlamaStackClient") - compat_client = compat_client_with_empty_stores # Create a vector store @@ -792,9 +775,6 @@ def test_openai_vector_store_update_file(compat_client_with_empty_stores, client """Test OpenAI vector store update file.""" skip_if_provider_doesnt_support_openai_vector_stores(client_with_models) - if isinstance(compat_client_with_empty_stores, LlamaStackClient): - pytest.skip("Vector Store Files update is not yet supported with LlamaStackClient") - compat_client = compat_client_with_empty_stores # Create a vector store @@ -840,9 +820,6 @@ def test_create_vector_store_files_duplicate_vector_store_name(compat_client_wit """ skip_if_provider_doesnt_support_openai_vector_stores(client_with_models) - if isinstance(compat_client_with_empty_stores, LlamaStackClient): - pytest.skip("Vector Store Files create is not yet supported with LlamaStackClient") - compat_client = compat_client_with_empty_stores # Create a vector store with files