chore(tests): fix responses and vector_io tests

This commit is contained in:
Ashwin Bharambe 2025-08-12 11:12:01 -07:00
parent 1721aafc1f
commit 1c2ece229c
12 changed files with 41 additions and 49 deletions

View file

@ -65,7 +65,7 @@ from llama_stack.providers.datatypes import HealthResponse, HealthStatus, Routin
from llama_stack.providers.utils.inference.inference_store import InferenceStore from llama_stack.providers.utils.inference.inference_store import InferenceStore
from llama_stack.providers.utils.telemetry.tracing import get_current_span from llama_stack.providers.utils.telemetry.tracing import get_current_span
logger = get_logger(name=__name__, category="core") logger = get_logger(name=__name__, category="inference")
class InferenceRouter(Inference): class InferenceRouter(Inference):
@ -854,4 +854,5 @@ class InferenceRouter(Inference):
model=model.identifier, model=model.identifier,
object="chat.completion", object="chat.completion",
) )
logger.debug(f"InferenceRouter.completion_response: {final_response}")
await self.store.store_chat_completion(final_response, messages) await self.store.store_chat_completion(final_response, messages)

View file

@ -63,6 +63,8 @@ class ModelsRoutingTable(CommonRoutingTableImpl, Models):
async def get_provider_impl(self, model_id: str) -> Any: async def get_provider_impl(self, model_id: str) -> Any:
model = await lookup_model(self, model_id) model = await lookup_model(self, model_id)
if model.provider_id not in self.impls_by_provider_id:
raise ValueError(f"Provider {model.provider_id} not found in the routing table")
return self.impls_by_provider_id[model.provider_id] return self.impls_by_provider_id[model.provider_id]
async def register_model( async def register_model(

View file

@ -32,6 +32,7 @@ CATEGORIES = [
"tools", "tools",
"client", "client",
"telemetry", "telemetry",
"openai_responses",
] ]
# Initialize category levels with default level # Initialize category levels with default level

View file

@ -236,6 +236,7 @@ class ChatFormat:
arguments_json=json.dumps(tool_arguments), arguments_json=json.dumps(tool_arguments),
) )
) )
content = ""
return RawMessage( return RawMessage(
role="assistant", role="assistant",

View file

@ -488,8 +488,12 @@ class OpenAIResponsesImpl:
# Convert collected chunks to complete response # Convert collected chunks to complete response
if chat_response_tool_calls: if chat_response_tool_calls:
tool_calls = [chat_response_tool_calls[i] for i in sorted(chat_response_tool_calls.keys())] tool_calls = [chat_response_tool_calls[i] for i in sorted(chat_response_tool_calls.keys())]
# when there are tool calls, we need to clear the content
chat_response_content = []
else: else:
tool_calls = None tool_calls = None
assistant_message = OpenAIAssistantMessageParam( assistant_message = OpenAIAssistantMessageParam(
content="".join(chat_response_content), content="".join(chat_response_content),
tool_calls=tool_calls, tool_calls=tool_calls,

View file

@ -235,6 +235,7 @@ class FireworksInferenceAdapter(ModelRegistryHelper, Inference, NeedsRequestProv
llama_model = self.get_llama_model(request.model) llama_model = self.get_llama_model(request.model)
if isinstance(request, ChatCompletionRequest): if isinstance(request, ChatCompletionRequest):
# TODO: tools are never added to the request, so we need to add them here
if media_present or not llama_model: if media_present or not llama_model:
input_dict["messages"] = [ input_dict["messages"] = [
await convert_message_to_openai_dict(m, download=True) for m in request.messages await convert_message_to_openai_dict(m, download=True) for m in request.messages
@ -378,6 +379,7 @@ class FireworksInferenceAdapter(ModelRegistryHelper, Inference, NeedsRequestProv
# Fireworks chat completions OpenAI-compatible API does not support # Fireworks chat completions OpenAI-compatible API does not support
# tool calls properly. # tool calls properly.
llama_model = self.get_llama_model(model_obj.provider_resource_id) llama_model = self.get_llama_model(model_obj.provider_resource_id)
if llama_model: if llama_model:
return await OpenAIChatCompletionToLlamaStackMixin.openai_chat_completion( return await OpenAIChatCompletionToLlamaStackMixin.openai_chat_completion(
self, self,
@ -431,4 +433,5 @@ class FireworksInferenceAdapter(ModelRegistryHelper, Inference, NeedsRequestProv
user=user, user=user,
) )
logger.debug(f"fireworks params: {params}")
return await self._get_openai_client().chat.completions.create(model=model_obj.provider_resource_id, **params) return await self._get_openai_client().chat.completions.create(model=model_obj.provider_resource_id, **params)

View file

@ -614,7 +614,7 @@ class OpenAIVectorStoreMixin(ABC):
) )
vector_store_file_object.status = "completed" vector_store_file_object.status = "completed"
except Exception as e: except Exception as e:
logger.error(f"Error attaching file to vector store: {e}") logger.exception("Error attaching file to vector store")
vector_store_file_object.status = "failed" vector_store_file_object.status = "failed"
vector_store_file_object.last_error = VectorStoreFileLastError( vector_store_file_object.last_error = VectorStoreFileLastError(
code="server_error", code="server_error",

View file

@ -16,13 +16,10 @@ MCP_TOOLGROUP_ID = "mcp::localmcp"
def default_tools(): def default_tools():
"""Default tools for backward compatibility.""" """Default tools for backward compatibility."""
from mcp import types
from mcp.server.fastmcp import Context from mcp.server.fastmcp import Context
async def greet_everyone( async def greet_everyone(url: str, ctx: Context) -> str:
url: str, ctx: Context return "Hello, world!"
) -> list[types.TextContent | types.ImageContent | types.EmbeddedResource]:
return [types.TextContent(type="text", text="Hello, world!")]
async def get_boiling_point(liquid_name: str, celsius: bool = True) -> int: async def get_boiling_point(liquid_name: str, celsius: bool = True) -> int:
""" """
@ -45,7 +42,6 @@ def default_tools():
def dependency_tools(): def dependency_tools():
"""Tools with natural dependencies for multi-turn testing.""" """Tools with natural dependencies for multi-turn testing."""
from mcp import types
from mcp.server.fastmcp import Context from mcp.server.fastmcp import Context
async def get_user_id(username: str, ctx: Context) -> str: async def get_user_id(username: str, ctx: Context) -> str:
@ -106,7 +102,7 @@ def dependency_tools():
else: else:
access = "no" access = "no"
return [types.TextContent(type="text", text=access)] return access
async def get_experiment_id(experiment_name: str, ctx: Context) -> str: async def get_experiment_id(experiment_name: str, ctx: Context) -> str:
""" """
@ -245,7 +241,6 @@ def make_mcp_server(required_auth_token: str | None = None, tools: dict[str, Cal
try: try:
yield {"server_url": server_url} yield {"server_url": server_url}
finally: finally:
print("Telling SSE server to exit")
server_instance.should_exit = True server_instance.should_exit = True
time.sleep(0.5) time.sleep(0.5)
@ -269,4 +264,3 @@ def make_mcp_server(required_auth_token: str | None = None, tools: dict[str, Cal
AppStatus.should_exit = False AppStatus.should_exit = False
AppStatus.should_exit_event = None AppStatus.should_exit_event = None
print("SSE server exited")

View file

@ -270,7 +270,7 @@ def openai_client(client_with_models):
@pytest.fixture(params=["openai_client", "client_with_models"]) @pytest.fixture(params=["openai_client", "client_with_models"])
def compat_client(request, client_with_models): def compat_client(request, client_with_models):
if isinstance(client_with_models, LlamaStackAsLibraryClient): if request.param == "openai_client" and isinstance(client_with_models, LlamaStackAsLibraryClient):
# OpenAI client expects a server, so unless we also rewrite OpenAI client's requests # OpenAI client expects a server, so unless we also rewrite OpenAI client's requests
# to go via the Stack library client (which itself rewrites requests to be served inline), # to go via the Stack library client (which itself rewrites requests to be served inline),
# we cannot do this. # we cannot do this.

View file

@ -137,7 +137,7 @@ test_response_multi_turn_tool_execution:
server_url: "<FILLED_BY_TEST_RUNNER>" server_url: "<FILLED_BY_TEST_RUNNER>"
output: "yes" output: "yes"
- case_id: "experiment_results_lookup" - case_id: "experiment_results_lookup"
input: "I need to get the results for the 'boiling_point' experiment. First, get the experiment ID for 'boiling_point', then use that ID to get the experiment results. Tell me what you found." input: "I need to get the results for the 'boiling_point' experiment. First, get the experiment ID for 'boiling_point', then use that ID to get the experiment results. Tell me the boiling point in Celsius."
tools: tools:
- type: mcp - type: mcp
server_label: "localmcp" server_label: "localmcp"
@ -149,7 +149,7 @@ test_response_multi_turn_tool_execution_streaming:
test_params: test_params:
case: case:
- case_id: "user_permissions_workflow" - case_id: "user_permissions_workflow"
input: "Help me with this security check: First, get the user ID for 'charlie', then get the permissions for that user ID, and finally check if that user can access 'secret_file.txt'. Stream your progress as you work through each step." input: "Help me with this security check: First, get the user ID for 'charlie', then get the permissions for that user ID, and finally check if that user can access 'secret_file.txt'. Stream your progress as you work through each step. Return only one tool call per step. Summarize the final result with a single 'yes' or 'no' response."
tools: tools:
- type: mcp - type: mcp
server_label: "localmcp" server_label: "localmcp"
@ -157,7 +157,7 @@ test_response_multi_turn_tool_execution_streaming:
stream: true stream: true
output: "no" output: "no"
- case_id: "experiment_analysis_streaming" - case_id: "experiment_analysis_streaming"
input: "I need a complete analysis: First, get the experiment ID for 'chemical_reaction', then get the results for that experiment, and tell me if the yield was above 80%. Please stream your analysis process." input: "I need a complete analysis: First, get the experiment ID for 'chemical_reaction', then get the results for that experiment, and tell me if the yield was above 80%. Return only one tool call per step. Please stream your analysis process."
tools: tools:
- type: mcp - type: mcp
server_label: "localmcp" server_label: "localmcp"

View file

@ -363,6 +363,9 @@ def test_response_non_streaming_file_search_empty_vector_store(request, compat_c
ids=case_id_generator, ids=case_id_generator,
) )
def test_response_non_streaming_mcp_tool(request, compat_client, text_model_id, case): def test_response_non_streaming_mcp_tool(request, compat_client, text_model_id, case):
if not isinstance(compat_client, LlamaStackAsLibraryClient):
pytest.skip("in-process MCP server is only supported in library client")
with make_mcp_server() as mcp_server_info: with make_mcp_server() as mcp_server_info:
tools = case["tools"] tools = case["tools"]
for tool in tools: for tool in tools:
@ -485,8 +488,11 @@ def test_response_non_streaming_multi_turn_image(request, compat_client, text_mo
responses_test_cases["test_response_multi_turn_tool_execution"]["test_params"]["case"], responses_test_cases["test_response_multi_turn_tool_execution"]["test_params"]["case"],
ids=case_id_generator, ids=case_id_generator,
) )
def test_response_non_streaming_multi_turn_tool_execution(request, compat_client, text_model_id, case): def test_response_non_streaming_multi_turn_tool_execution(compat_client, text_model_id, case):
"""Test multi-turn tool execution where multiple MCP tool calls are performed in sequence.""" """Test multi-turn tool execution where multiple MCP tool calls are performed in sequence."""
if not isinstance(compat_client, LlamaStackAsLibraryClient):
pytest.skip("in-process MCP server is only supported in library client")
with make_mcp_server(tools=dependency_tools()) as mcp_server_info: with make_mcp_server(tools=dependency_tools()) as mcp_server_info:
tools = case["tools"] tools = case["tools"]
# Replace the placeholder URL with the actual server URL # Replace the placeholder URL with the actual server URL
@ -541,8 +547,11 @@ def test_response_non_streaming_multi_turn_tool_execution(request, compat_client
responses_test_cases["test_response_multi_turn_tool_execution_streaming"]["test_params"]["case"], responses_test_cases["test_response_multi_turn_tool_execution_streaming"]["test_params"]["case"],
ids=case_id_generator, ids=case_id_generator,
) )
async def test_response_streaming_multi_turn_tool_execution(request, compat_client, text_model_id, case): def test_response_streaming_multi_turn_tool_execution(compat_client, text_model_id, case):
"""Test streaming multi-turn tool execution where multiple MCP tool calls are performed in sequence.""" """Test streaming multi-turn tool execution where multiple MCP tool calls are performed in sequence."""
if not isinstance(compat_client, LlamaStackAsLibraryClient):
pytest.skip("in-process MCP server is only supported in library client")
with make_mcp_server(tools=dependency_tools()) as mcp_server_info: with make_mcp_server(tools=dependency_tools()) as mcp_server_info:
tools = case["tools"] tools = case["tools"]
# Replace the placeholder URL with the actual server URL # Replace the placeholder URL with the actual server URL
@ -634,7 +643,7 @@ async def test_response_streaming_multi_turn_tool_execution(request, compat_clie
}, },
], ],
) )
def test_response_text_format(request, compat_client, text_model_id, text_format): def test_response_text_format(compat_client, text_model_id, text_format):
if isinstance(compat_client, LlamaStackAsLibraryClient): if isinstance(compat_client, LlamaStackAsLibraryClient):
pytest.skip("Responses API text format is not yet supported in library client.") pytest.skip("Responses API text format is not yet supported in library client.")
@ -653,7 +662,7 @@ def test_response_text_format(request, compat_client, text_model_id, text_format
@pytest.fixture @pytest.fixture
def vector_store_with_filtered_files(request, compat_client, text_model_id, tmp_path_factory): def vector_store_with_filtered_files(compat_client, text_model_id, tmp_path_factory):
"""Create a vector store with multiple files that have different attributes for filtering tests.""" """Create a vector store with multiple files that have different attributes for filtering tests."""
if isinstance(compat_client, LlamaStackAsLibraryClient): if isinstance(compat_client, LlamaStackAsLibraryClient):
pytest.skip("Responses API file search is not yet supported in library client.") pytest.skip("Responses API file search is not yet supported in library client.")

View file

@ -475,9 +475,6 @@ def test_openai_vector_store_attach_file(compat_client_with_empty_stores, client
"""Test OpenAI vector store attach file.""" """Test OpenAI vector store attach file."""
skip_if_provider_doesnt_support_openai_vector_stores(client_with_models) skip_if_provider_doesnt_support_openai_vector_stores(client_with_models)
if isinstance(compat_client_with_empty_stores, LlamaStackClient):
pytest.skip("Vector Store Files attach is not yet supported with LlamaStackClient")
compat_client = compat_client_with_empty_stores compat_client = compat_client_with_empty_stores
# Create a vector store # Create a vector store
@ -526,9 +523,6 @@ def test_openai_vector_store_attach_files_on_creation(compat_client_with_empty_s
"""Test OpenAI vector store attach files on creation.""" """Test OpenAI vector store attach files on creation."""
skip_if_provider_doesnt_support_openai_vector_stores(client_with_models) skip_if_provider_doesnt_support_openai_vector_stores(client_with_models)
if isinstance(compat_client_with_empty_stores, LlamaStackClient):
pytest.skip("Vector Store Files attach is not yet supported with LlamaStackClient")
compat_client = compat_client_with_empty_stores compat_client = compat_client_with_empty_stores
# Create some files and attach them to the vector store # Create some files and attach them to the vector store
@ -582,9 +576,6 @@ def test_openai_vector_store_list_files(compat_client_with_empty_stores, client_
"""Test OpenAI vector store list files.""" """Test OpenAI vector store list files."""
skip_if_provider_doesnt_support_openai_vector_stores(client_with_models) skip_if_provider_doesnt_support_openai_vector_stores(client_with_models)
if isinstance(compat_client_with_empty_stores, LlamaStackClient):
pytest.skip("Vector Store Files list is not yet supported with LlamaStackClient")
compat_client = compat_client_with_empty_stores compat_client = compat_client_with_empty_stores
# Create a vector store # Create a vector store
@ -642,12 +633,13 @@ def test_openai_vector_store_list_files_invalid_vector_store(compat_client_with_
"""Test OpenAI vector store list files with invalid vector store ID.""" """Test OpenAI vector store list files with invalid vector store ID."""
skip_if_provider_doesnt_support_openai_vector_stores(client_with_models) skip_if_provider_doesnt_support_openai_vector_stores(client_with_models)
if isinstance(compat_client_with_empty_stores, LlamaStackClient):
pytest.skip("Vector Store Files list is not yet supported with LlamaStackClient")
compat_client = compat_client_with_empty_stores compat_client = compat_client_with_empty_stores
if isinstance(compat_client, LlamaStackClient):
errors = ValueError
else:
errors = (BadRequestError, OpenAIBadRequestError)
with pytest.raises((BadRequestError, OpenAIBadRequestError)): with pytest.raises(errors):
compat_client.vector_stores.files.list(vector_store_id="abc123") compat_client.vector_stores.files.list(vector_store_id="abc123")
@ -655,9 +647,6 @@ def test_openai_vector_store_retrieve_file_contents(compat_client_with_empty_sto
"""Test OpenAI vector store retrieve file contents.""" """Test OpenAI vector store retrieve file contents."""
skip_if_provider_doesnt_support_openai_vector_stores(client_with_models) skip_if_provider_doesnt_support_openai_vector_stores(client_with_models)
if isinstance(compat_client_with_empty_stores, LlamaStackClient):
pytest.skip("Vector Store Files retrieve contents is not yet supported with LlamaStackClient")
compat_client = compat_client_with_empty_stores compat_client = compat_client_with_empty_stores
# Create a vector store # Create a vector store
@ -686,8 +675,8 @@ def test_openai_vector_store_retrieve_file_contents(compat_client_with_empty_sto
) )
assert file_contents assert file_contents
assert file_contents.content[0]["type"] == "text" assert file_contents.content[0].type == "text"
assert file_contents.content[0]["text"] == test_content.decode("utf-8") assert file_contents.content[0].text == test_content.decode("utf-8")
assert file_contents.filename == file_name assert file_contents.filename == file_name
assert file_contents.attributes == attributes assert file_contents.attributes == attributes
@ -696,9 +685,6 @@ def test_openai_vector_store_delete_file(compat_client_with_empty_stores, client
"""Test OpenAI vector store delete file.""" """Test OpenAI vector store delete file."""
skip_if_provider_doesnt_support_openai_vector_stores(client_with_models) skip_if_provider_doesnt_support_openai_vector_stores(client_with_models)
if isinstance(compat_client_with_empty_stores, LlamaStackClient):
pytest.skip("Vector Store Files list is not yet supported with LlamaStackClient")
compat_client = compat_client_with_empty_stores compat_client = compat_client_with_empty_stores
# Create a vector store # Create a vector store
@ -751,9 +737,6 @@ def test_openai_vector_store_delete_file_removes_from_vector_store(compat_client
"""Test OpenAI vector store delete file removes from vector store.""" """Test OpenAI vector store delete file removes from vector store."""
skip_if_provider_doesnt_support_openai_vector_stores(client_with_models) skip_if_provider_doesnt_support_openai_vector_stores(client_with_models)
if isinstance(compat_client_with_empty_stores, LlamaStackClient):
pytest.skip("Vector Store Files attach is not yet supported with LlamaStackClient")
compat_client = compat_client_with_empty_stores compat_client = compat_client_with_empty_stores
# Create a vector store # Create a vector store
@ -792,9 +775,6 @@ def test_openai_vector_store_update_file(compat_client_with_empty_stores, client
"""Test OpenAI vector store update file.""" """Test OpenAI vector store update file."""
skip_if_provider_doesnt_support_openai_vector_stores(client_with_models) skip_if_provider_doesnt_support_openai_vector_stores(client_with_models)
if isinstance(compat_client_with_empty_stores, LlamaStackClient):
pytest.skip("Vector Store Files update is not yet supported with LlamaStackClient")
compat_client = compat_client_with_empty_stores compat_client = compat_client_with_empty_stores
# Create a vector store # Create a vector store
@ -840,9 +820,6 @@ def test_create_vector_store_files_duplicate_vector_store_name(compat_client_wit
""" """
skip_if_provider_doesnt_support_openai_vector_stores(client_with_models) skip_if_provider_doesnt_support_openai_vector_stores(client_with_models)
if isinstance(compat_client_with_empty_stores, LlamaStackClient):
pytest.skip("Vector Store Files create is not yet supported with LlamaStackClient")
compat_client = compat_client_with_empty_stores compat_client = compat_client_with_empty_stores
# Create a vector store with files # Create a vector store with files