diff --git a/.github/workflows/integration-tests.yml b/.github/workflows/integration-tests.yml index a2a56c003..a38d4971a 100644 --- a/.github/workflows/integration-tests.yml +++ b/.github/workflows/integration-tests.yml @@ -53,7 +53,7 @@ jobs: # Get test directories dynamically, excluding non-test directories # NOTE: we are excluding post_training since the tests take too long TEST_TYPES=$(find tests/integration -maxdepth 1 -mindepth 1 -type d -printf "%f\n" | - grep -Ev "^(__pycache__|fixtures|test_cases|recordings|post_training)$" | + grep -Ev "^(__pycache__|fixtures|test_cases|recordings|non_ci|post_training)$" | sort | jq -R -s -c 'split("\n")[:-1]') echo "test-types=$TEST_TYPES" >> $GITHUB_OUTPUT diff --git a/tests/client-sdk/post_training/test_supervied_fine_tuning.py b/tests/client-sdk/post_training/test_supervied_fine_tuning.py deleted file mode 100644 index 232510478..000000000 --- a/tests/client-sdk/post_training/test_supervied_fine_tuning.py +++ /dev/null @@ -1,60 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the terms described in the LICENSE file in -# the root directory of this source tree. - -import pytest - -POST_TRAINING_PROVIDER_TYPES = ["remote::nvidia"] - - -@pytest.mark.integration -@pytest.fixture(scope="session") -def post_training_provider_available(llama_stack_client): - providers = llama_stack_client.providers.list() - post_training_providers = [p for p in providers if p.provider_type in POST_TRAINING_PROVIDER_TYPES] - return len(post_training_providers) > 0 - - -@pytest.mark.integration -def test_post_training_provider_registration(llama_stack_client, post_training_provider_available): - """Check if post_training is in the api list. - This is a sanity check to ensure the provider is registered.""" - if not post_training_provider_available: - pytest.skip("post training provider not available") - - providers = llama_stack_client.providers.list() - post_training_providers = [p for p in providers if p.provider_type in POST_TRAINING_PROVIDER_TYPES] - assert len(post_training_providers) > 0 - - -@pytest.mark.integration -def test_get_training_jobs(llama_stack_client, post_training_provider_available): - """Test listing all training jobs.""" - if not post_training_provider_available: - pytest.skip("post training provider not available") - - jobs = llama_stack_client.post_training.get_training_jobs() - assert isinstance(jobs, dict) - assert "data" in jobs - assert isinstance(jobs["data"], list) - - -@pytest.mark.integration -def test_get_training_job_status(llama_stack_client, post_training_provider_available): - """Test getting status of a specific training job.""" - if not post_training_provider_available: - pytest.skip("post training provider not available") - - jobs = llama_stack_client.post_training.get_training_jobs() - if not jobs["data"]: - pytest.skip("No training jobs available to check status") - - job_uuid = jobs["data"][0]["job_uuid"] - job_status = llama_stack_client.post_training.get_training_job_status(job_uuid=job_uuid) - - assert job_status is not None - assert "job_uuid" in job_status - assert "status" in job_status - assert job_status["job_uuid"] == job_uuid diff --git a/tests/client-sdk/post_training/__init__.py b/tests/integration/non_ci/responses/__init__.py similarity index 100% rename from tests/client-sdk/post_training/__init__.py rename to tests/integration/non_ci/responses/__init__.py diff --git a/tests/verifications/__init__.py b/tests/integration/non_ci/responses/fixtures/__init__.py similarity index 100% rename from tests/verifications/__init__.py rename to tests/integration/non_ci/responses/fixtures/__init__.py diff --git a/tests/verifications/openai_api/fixtures/fixtures.py b/tests/integration/non_ci/responses/fixtures/fixtures.py similarity index 91% rename from tests/verifications/openai_api/fixtures/fixtures.py rename to tests/integration/non_ci/responses/fixtures/fixtures.py index a3be7e402..2069010ad 100644 --- a/tests/verifications/openai_api/fixtures/fixtures.py +++ b/tests/integration/non_ci/responses/fixtures/fixtures.py @@ -56,16 +56,6 @@ def case_id_generator(case): return None -def should_skip_test(verification_config, provider, model, test_name_base): - """Check if a test should be skipped based on config exclusions.""" - provider_config = verification_config.get("providers", {}).get(provider) - if not provider_config: - return False # No config for provider, don't skip - - exclusions = provider_config.get("test_exclusions", {}).get(model, []) - return test_name_base in exclusions - - # Helper to get the base test name from the request object def get_base_test_name(request): return request.node.originalname diff --git a/tests/verifications/openai_api/fixtures/images/vision_test_1.jpg b/tests/integration/non_ci/responses/fixtures/images/vision_test_1.jpg similarity index 100% rename from tests/verifications/openai_api/fixtures/images/vision_test_1.jpg rename to tests/integration/non_ci/responses/fixtures/images/vision_test_1.jpg diff --git a/tests/verifications/openai_api/fixtures/images/vision_test_2.jpg b/tests/integration/non_ci/responses/fixtures/images/vision_test_2.jpg similarity index 100% rename from tests/verifications/openai_api/fixtures/images/vision_test_2.jpg rename to tests/integration/non_ci/responses/fixtures/images/vision_test_2.jpg diff --git a/tests/verifications/openai_api/fixtures/images/vision_test_3.jpg b/tests/integration/non_ci/responses/fixtures/images/vision_test_3.jpg similarity index 100% rename from tests/verifications/openai_api/fixtures/images/vision_test_3.jpg rename to tests/integration/non_ci/responses/fixtures/images/vision_test_3.jpg diff --git a/tests/verifications/openai_api/fixtures/load.py b/tests/integration/non_ci/responses/fixtures/load.py similarity index 100% rename from tests/verifications/openai_api/fixtures/load.py rename to tests/integration/non_ci/responses/fixtures/load.py diff --git a/tests/verifications/openai_api/fixtures/pdfs/llama_stack_and_models.pdf b/tests/integration/non_ci/responses/fixtures/pdfs/llama_stack_and_models.pdf similarity index 100% rename from tests/verifications/openai_api/fixtures/pdfs/llama_stack_and_models.pdf rename to tests/integration/non_ci/responses/fixtures/pdfs/llama_stack_and_models.pdf diff --git a/tests/verifications/openai_api/fixtures/test_cases/chat_completion.yaml b/tests/integration/non_ci/responses/fixtures/test_cases/chat_completion.yaml similarity index 100% rename from tests/verifications/openai_api/fixtures/test_cases/chat_completion.yaml rename to tests/integration/non_ci/responses/fixtures/test_cases/chat_completion.yaml diff --git a/tests/verifications/openai_api/fixtures/test_cases/responses.yaml b/tests/integration/non_ci/responses/fixtures/test_cases/responses.yaml similarity index 100% rename from tests/verifications/openai_api/fixtures/test_cases/responses.yaml rename to tests/integration/non_ci/responses/fixtures/test_cases/responses.yaml diff --git a/tests/verifications/openai_api/test_responses.py b/tests/integration/non_ci/responses/test_responses.py similarity index 77% rename from tests/verifications/openai_api/test_responses.py rename to tests/integration/non_ci/responses/test_responses.py index e312de6aa..4f4f27d7f 100644 --- a/tests/verifications/openai_api/test_responses.py +++ b/tests/integration/non_ci/responses/test_responses.py @@ -15,12 +15,9 @@ import pytest from llama_stack import LlamaStackAsLibraryClient from llama_stack.core.datatypes import AuthenticationRequiredError from tests.common.mcp import dependency_tools, make_mcp_server -from tests.verifications.openai_api.fixtures.fixtures import ( - case_id_generator, - get_base_test_name, - should_skip_test, -) -from tests.verifications.openai_api.fixtures.load import load_test_cases + +from .fixtures.fixtures import case_id_generator +from .fixtures.load import load_test_cases responses_test_cases = load_test_cases("responses") @@ -55,13 +52,9 @@ def _upload_file(openai_client, name, file_path): responses_test_cases["test_response_basic"]["test_params"]["case"], ids=case_id_generator, ) -def test_response_non_streaming_basic(request, openai_client, model, provider, verification_config, case): - test_name_base = get_base_test_name(request) - if should_skip_test(verification_config, provider, model, test_name_base): - pytest.skip(f"Skipping {test_name_base} for model {model} on provider {provider} based on config.") - - response = openai_client.responses.create( - model=model, +def test_response_non_streaming_basic(request, compat_client, text_model_id, case): + response = compat_client.responses.create( + model=text_model_id, input=case["input"], stream=False, ) @@ -69,11 +62,13 @@ def test_response_non_streaming_basic(request, openai_client, model, provider, v assert len(output_text) > 0 assert case["output"].lower() in output_text - retrieved_response = openai_client.responses.retrieve(response_id=response.id) + retrieved_response = compat_client.responses.retrieve(response_id=response.id) assert retrieved_response.output_text == response.output_text - next_response = openai_client.responses.create( - model=model, input="Repeat your previous response in all caps.", previous_response_id=response.id + next_response = compat_client.responses.create( + model=text_model_id, + input="Repeat your previous response in all caps.", + previous_response_id=response.id, ) next_output_text = next_response.output_text.strip() assert case["output"].upper() in next_output_text @@ -84,15 +79,11 @@ def test_response_non_streaming_basic(request, openai_client, model, provider, v responses_test_cases["test_response_basic"]["test_params"]["case"], ids=case_id_generator, ) -def test_response_streaming_basic(request, openai_client, model, provider, verification_config, case): - test_name_base = get_base_test_name(request) - if should_skip_test(verification_config, provider, model, test_name_base): - pytest.skip(f"Skipping {test_name_base} for model {model} on provider {provider} based on config.") - +def test_response_streaming_basic(request, compat_client, text_model_id, case): import time - response = openai_client.responses.create( - model=model, + response = compat_client.responses.create( + model=text_model_id, input=case["input"], stream=True, ) @@ -138,7 +129,7 @@ def test_response_streaming_basic(request, openai_client, model, provider, verif assert created_index < completed_index, "response.created should come before response.completed" # Verify stored response matches streamed response - retrieved_response = openai_client.responses.retrieve(response_id=response_id) + retrieved_response = compat_client.responses.retrieve(response_id=response_id) final_event = events[-1] assert retrieved_response.output_text == final_event.response.output_text @@ -148,16 +139,12 @@ def test_response_streaming_basic(request, openai_client, model, provider, verif responses_test_cases["test_response_basic"]["test_params"]["case"], ids=case_id_generator, ) -def test_response_streaming_incremental_content(request, openai_client, model, provider, verification_config, case): +def test_response_streaming_incremental_content(request, compat_client, text_model_id, case): """Test that streaming actually delivers content incrementally, not just at the end.""" - test_name_base = get_base_test_name(request) - if should_skip_test(verification_config, provider, model, test_name_base): - pytest.skip(f"Skipping {test_name_base} for model {model} on provider {provider} based on config.") - import time - response = openai_client.responses.create( - model=model, + response = compat_client.responses.create( + model=text_model_id, input=case["input"], stream=True, ) @@ -241,15 +228,11 @@ def test_response_streaming_incremental_content(request, openai_client, model, p responses_test_cases["test_response_multi_turn"]["test_params"]["case"], ids=case_id_generator, ) -def test_response_non_streaming_multi_turn(request, openai_client, model, provider, verification_config, case): - test_name_base = get_base_test_name(request) - if should_skip_test(verification_config, provider, model, test_name_base): - pytest.skip(f"Skipping {test_name_base} for model {model} on provider {provider} based on config.") - +def test_response_non_streaming_multi_turn(request, compat_client, text_model_id, case): previous_response_id = None for turn in case["turns"]: - response = openai_client.responses.create( - model=model, + response = compat_client.responses.create( + model=text_model_id, input=turn["input"], previous_response_id=previous_response_id, tools=turn["tools"] if "tools" in turn else None, @@ -264,13 +247,9 @@ def test_response_non_streaming_multi_turn(request, openai_client, model, provid responses_test_cases["test_response_web_search"]["test_params"]["case"], ids=case_id_generator, ) -def test_response_non_streaming_web_search(request, openai_client, model, provider, verification_config, case): - test_name_base = get_base_test_name(request) - if should_skip_test(verification_config, provider, model, test_name_base): - pytest.skip(f"Skipping {test_name_base} for model {model} on provider {provider} based on config.") - - response = openai_client.responses.create( - model=model, +def test_response_non_streaming_web_search(request, compat_client, text_model_id, case): + response = compat_client.responses.create( + model=text_model_id, input=case["input"], tools=case["tools"], stream=False, @@ -290,17 +269,11 @@ def test_response_non_streaming_web_search(request, openai_client, model, provid responses_test_cases["test_response_file_search"]["test_params"]["case"], ids=case_id_generator, ) -def test_response_non_streaming_file_search( - request, openai_client, model, provider, verification_config, tmp_path, case -): - if isinstance(openai_client, LlamaStackAsLibraryClient): +def test_response_non_streaming_file_search(request, compat_client, text_model_id, tmp_path, case): + if isinstance(compat_client, LlamaStackAsLibraryClient): pytest.skip("Responses API file search is not yet supported in library client.") - test_name_base = get_base_test_name(request) - if should_skip_test(verification_config, provider, model, test_name_base): - pytest.skip(f"Skipping {test_name_base} for model {model} on provider {provider} based on config.") - - vector_store = _new_vector_store(openai_client, "test_vector_store") + vector_store = _new_vector_store(compat_client, "test_vector_store") if "file_content" in case: file_name = "test_response_non_streaming_file_search.txt" @@ -312,10 +285,10 @@ def test_response_non_streaming_file_search( else: raise ValueError(f"No file content or path provided for case {case['case_id']}") - file_response = _upload_file(openai_client, file_name, file_path) + file_response = _upload_file(compat_client, file_name, file_path) # Attach our file to the vector store - file_attach_response = openai_client.vector_stores.files.create( + file_attach_response = compat_client.vector_stores.files.create( vector_store_id=vector_store.id, file_id=file_response.id, ) @@ -323,7 +296,7 @@ def test_response_non_streaming_file_search( # Wait for the file to be attached while file_attach_response.status == "in_progress": time.sleep(0.1) - file_attach_response = openai_client.vector_stores.files.retrieve( + file_attach_response = compat_client.vector_stores.files.retrieve( vector_store_id=vector_store.id, file_id=file_response.id, ) @@ -337,8 +310,8 @@ def test_response_non_streaming_file_search( tool["vector_store_ids"] = [vector_store.id] # Create the response request, which should query our vector store - response = openai_client.responses.create( - model=model, + response = compat_client.responses.create( + model=text_model_id, input=case["input"], tools=tools, stream=False, @@ -358,21 +331,15 @@ def test_response_non_streaming_file_search( assert case["output"].lower() in response.output_text.lower().strip() -def test_response_non_streaming_file_search_empty_vector_store( - request, openai_client, model, provider, verification_config -): - if isinstance(openai_client, LlamaStackAsLibraryClient): +def test_response_non_streaming_file_search_empty_vector_store(request, compat_client, text_model_id): + if isinstance(compat_client, LlamaStackAsLibraryClient): pytest.skip("Responses API file search is not yet supported in library client.") - test_name_base = get_base_test_name(request) - if should_skip_test(verification_config, provider, model, test_name_base): - pytest.skip(f"Skipping {test_name_base} for model {model} on provider {provider} based on config.") - - vector_store = _new_vector_store(openai_client, "test_vector_store") + vector_store = _new_vector_store(compat_client, "test_vector_store") # Create the response request, which should query our vector store - response = openai_client.responses.create( - model=model, + response = compat_client.responses.create( + model=text_model_id, input="How many experts does the Llama 4 Maverick model have?", tools=[{"type": "file_search", "vector_store_ids": [vector_store.id]}], stream=False, @@ -395,19 +362,15 @@ def test_response_non_streaming_file_search_empty_vector_store( responses_test_cases["test_response_mcp_tool"]["test_params"]["case"], ids=case_id_generator, ) -def test_response_non_streaming_mcp_tool(request, openai_client, model, provider, verification_config, case): - test_name_base = get_base_test_name(request) - if should_skip_test(verification_config, provider, model, test_name_base): - pytest.skip(f"Skipping {test_name_base} for model {model} on provider {provider} based on config.") - +def test_response_non_streaming_mcp_tool(request, compat_client, text_model_id, case): with make_mcp_server() as mcp_server_info: tools = case["tools"] for tool in tools: if tool["type"] == "mcp": tool["server_url"] = mcp_server_info["server_url"] - response = openai_client.responses.create( - model=model, + response = compat_client.responses.create( + model=text_model_id, input=case["input"], tools=tools, stream=False, @@ -418,7 +381,7 @@ def test_response_non_streaming_mcp_tool(request, openai_client, model, provider assert list_tools.type == "mcp_list_tools" assert list_tools.server_label == "localmcp" assert len(list_tools.tools) == 2 - assert {t["name"] for t in list_tools.tools} == {"get_boiling_point", "greet_everyone"} + assert {t.name for t in list_tools.tools} == {"get_boiling_point", "greet_everyone"} call = response.output[1] assert call.type == "mcp_call" @@ -440,12 +403,12 @@ def test_response_non_streaming_mcp_tool(request, openai_client, model, provider exc_type = ( AuthenticationRequiredError - if isinstance(openai_client, LlamaStackAsLibraryClient) + if isinstance(compat_client, LlamaStackAsLibraryClient) else (httpx.HTTPStatusError, openai.AuthenticationError) ) with pytest.raises(exc_type): - openai_client.responses.create( - model=model, + compat_client.responses.create( + model=text_model_id, input=case["input"], tools=tools, stream=False, @@ -456,8 +419,8 @@ def test_response_non_streaming_mcp_tool(request, openai_client, model, provider tool["server_url"] = mcp_server_info["server_url"] tool["headers"] = {"Authorization": "Bearer test-token"} - response = openai_client.responses.create( - model=model, + response = compat_client.responses.create( + model=text_model_id, input=case["input"], tools=tools, stream=False, @@ -470,13 +433,9 @@ def test_response_non_streaming_mcp_tool(request, openai_client, model, provider responses_test_cases["test_response_custom_tool"]["test_params"]["case"], ids=case_id_generator, ) -def test_response_non_streaming_custom_tool(request, openai_client, model, provider, verification_config, case): - test_name_base = get_base_test_name(request) - if should_skip_test(verification_config, provider, model, test_name_base): - pytest.skip(f"Skipping {test_name_base} for model {model} on provider {provider} based on config.") - - response = openai_client.responses.create( - model=model, +def test_response_non_streaming_custom_tool(request, compat_client, text_model_id, case): + response = compat_client.responses.create( + model=text_model_id, input=case["input"], tools=case["tools"], stream=False, @@ -492,13 +451,9 @@ def test_response_non_streaming_custom_tool(request, openai_client, model, provi responses_test_cases["test_response_image"]["test_params"]["case"], ids=case_id_generator, ) -def test_response_non_streaming_image(request, openai_client, model, provider, verification_config, case): - test_name_base = get_base_test_name(request) - if should_skip_test(verification_config, provider, model, test_name_base): - pytest.skip(f"Skipping {test_name_base} for model {model} on provider {provider} based on config.") - - response = openai_client.responses.create( - model=model, +def test_response_non_streaming_image(request, compat_client, text_model_id, case): + response = compat_client.responses.create( + model=text_model_id, input=case["input"], stream=False, ) @@ -511,15 +466,11 @@ def test_response_non_streaming_image(request, openai_client, model, provider, v responses_test_cases["test_response_multi_turn_image"]["test_params"]["case"], ids=case_id_generator, ) -def test_response_non_streaming_multi_turn_image(request, openai_client, model, provider, verification_config, case): - test_name_base = get_base_test_name(request) - if should_skip_test(verification_config, provider, model, test_name_base): - pytest.skip(f"Skipping {test_name_base} for model {model} on provider {provider} based on config.") - +def test_response_non_streaming_multi_turn_image(request, compat_client, text_model_id, case): previous_response_id = None for turn in case["turns"]: - response = openai_client.responses.create( - model=model, + response = compat_client.responses.create( + model=text_model_id, input=turn["input"], previous_response_id=previous_response_id, tools=turn["tools"] if "tools" in turn else None, @@ -534,14 +485,8 @@ def test_response_non_streaming_multi_turn_image(request, openai_client, model, responses_test_cases["test_response_multi_turn_tool_execution"]["test_params"]["case"], ids=case_id_generator, ) -def test_response_non_streaming_multi_turn_tool_execution( - request, openai_client, model, provider, verification_config, case -): +def test_response_non_streaming_multi_turn_tool_execution(request, compat_client, text_model_id, case): """Test multi-turn tool execution where multiple MCP tool calls are performed in sequence.""" - test_name_base = get_base_test_name(request) - if should_skip_test(verification_config, provider, model, test_name_base): - pytest.skip(f"Skipping {test_name_base} for model {model} on provider {provider} based on config.") - with make_mcp_server(tools=dependency_tools()) as mcp_server_info: tools = case["tools"] # Replace the placeholder URL with the actual server URL @@ -549,14 +494,15 @@ def test_response_non_streaming_multi_turn_tool_execution( if tool["type"] == "mcp" and tool["server_url"] == "": tool["server_url"] = mcp_server_info["server_url"] - response = openai_client.responses.create( + response = compat_client.responses.create( input=case["input"], - model=model, + model=text_model_id, tools=tools, ) # Verify we have MCP tool calls in the output mcp_list_tools = [output for output in response.output if output.type == "mcp_list_tools"] + mcp_calls = [output for output in response.output if output.type == "mcp_call"] message_outputs = [output for output in response.output if output.type == "message"] @@ -571,7 +517,7 @@ def test_response_non_streaming_multi_turn_tool_execution( "get_experiment_id", "get_experiment_results", } - assert {t["name"] for t in mcp_list_tools[0].tools} == expected_tool_names + assert {t.name for t in mcp_list_tools[0].tools} == expected_tool_names assert len(mcp_calls) >= 1, f"Expected at least 1 mcp_call, got {len(mcp_calls)}" for mcp_call in mcp_calls: @@ -595,14 +541,8 @@ def test_response_non_streaming_multi_turn_tool_execution( responses_test_cases["test_response_multi_turn_tool_execution_streaming"]["test_params"]["case"], ids=case_id_generator, ) -async def test_response_streaming_multi_turn_tool_execution( - request, openai_client, model, provider, verification_config, case -): +async def test_response_streaming_multi_turn_tool_execution(request, compat_client, text_model_id, case): """Test streaming multi-turn tool execution where multiple MCP tool calls are performed in sequence.""" - test_name_base = get_base_test_name(request) - if should_skip_test(verification_config, provider, model, test_name_base): - pytest.skip(f"Skipping {test_name_base} for model {model} on provider {provider} based on config.") - with make_mcp_server(tools=dependency_tools()) as mcp_server_info: tools = case["tools"] # Replace the placeholder URL with the actual server URL @@ -610,15 +550,15 @@ async def test_response_streaming_multi_turn_tool_execution( if tool["type"] == "mcp" and tool["server_url"] == "": tool["server_url"] = mcp_server_info["server_url"] - stream = openai_client.responses.create( + stream = compat_client.responses.create( input=case["input"], - model=model, + model=text_model_id, tools=tools, stream=True, ) chunks = [] - async for chunk in stream: + for chunk in stream: chunks.append(chunk) # Should have at least response.created and response.completed @@ -653,7 +593,7 @@ async def test_response_streaming_multi_turn_tool_execution( "get_experiment_id", "get_experiment_results", } - assert {t["name"] for t in mcp_list_tools[0].tools} == expected_tool_names + assert {t.name for t in mcp_list_tools[0].tools} == expected_tool_names # Should have at least 1 MCP call (the model should call at least one tool) assert len(mcp_calls) >= 1, f"Expected at least 1 mcp_call, got {len(mcp_calls)}" @@ -694,17 +634,13 @@ async def test_response_streaming_multi_turn_tool_execution( }, ], ) -def test_response_text_format(request, openai_client, model, provider, verification_config, text_format): - if isinstance(openai_client, LlamaStackAsLibraryClient): +def test_response_text_format(request, compat_client, text_model_id, text_format): + if isinstance(compat_client, LlamaStackAsLibraryClient): pytest.skip("Responses API text format is not yet supported in library client.") - test_name_base = get_base_test_name(request) - if should_skip_test(verification_config, provider, model, test_name_base): - pytest.skip(f"Skipping {test_name_base} for model {model} on provider {provider} based on config.") - stream = False - response = openai_client.responses.create( - model=model, + response = compat_client.responses.create( + model=text_model_id, input="What is the capital of France?", stream=stream, text={"format": text_format}, @@ -717,16 +653,12 @@ def test_response_text_format(request, openai_client, model, provider, verificat @pytest.fixture -def vector_store_with_filtered_files(request, openai_client, model, provider, verification_config, tmp_path_factory): +def vector_store_with_filtered_files(request, compat_client, text_model_id, tmp_path_factory): """Create a vector store with multiple files that have different attributes for filtering tests.""" - if isinstance(openai_client, LlamaStackAsLibraryClient): + if isinstance(compat_client, LlamaStackAsLibraryClient): pytest.skip("Responses API file search is not yet supported in library client.") - test_name_base = get_base_test_name(request) - if should_skip_test(verification_config, provider, model, test_name_base): - pytest.skip(f"Skipping {test_name_base} for model {model} on provider {provider} based on config.") - - vector_store = _new_vector_store(openai_client, "test_vector_store_with_filters") + vector_store = _new_vector_store(compat_client, "test_vector_store_with_filters") tmp_path = tmp_path_factory.mktemp("filter_test_files") # Create multiple files with different attributes @@ -776,18 +708,18 @@ def vector_store_with_filtered_files(request, openai_client, model, provider, ve file_path.write_text(file_data["content"]) # Upload file - file_response = _upload_file(openai_client, file_data["name"], str(file_path)) + file_response = _upload_file(compat_client, file_data["name"], str(file_path)) file_ids.append(file_response.id) # Attach file to vector store with attributes - file_attach_response = openai_client.vector_stores.files.create( + file_attach_response = compat_client.vector_stores.files.create( vector_store_id=vector_store.id, file_id=file_response.id, attributes=file_data["attributes"] ) # Wait for attachment while file_attach_response.status == "in_progress": time.sleep(0.1) - file_attach_response = openai_client.vector_stores.files.retrieve( + file_attach_response = compat_client.vector_stores.files.retrieve( vector_store_id=vector_store.id, file_id=file_response.id, ) @@ -797,17 +729,17 @@ def vector_store_with_filtered_files(request, openai_client, model, provider, ve # Cleanup: delete vector store and files try: - openai_client.vector_stores.delete(vector_store_id=vector_store.id) + compat_client.vector_stores.delete(vector_store_id=vector_store.id) for file_id in file_ids: try: - openai_client.files.delete(file_id=file_id) + compat_client.files.delete(file_id=file_id) except Exception: pass # File might already be deleted except Exception: pass # Best effort cleanup -def test_response_file_search_filter_by_region(openai_client, model, vector_store_with_filtered_files): +def test_response_file_search_filter_by_region(compat_client, text_model_id, vector_store_with_filtered_files): """Test file search with region equality filter.""" tools = [ { @@ -817,8 +749,8 @@ def test_response_file_search_filter_by_region(openai_client, model, vector_stor } ] - response = openai_client.responses.create( - model=model, + response = compat_client.responses.create( + model=text_model_id, input="What are the updates from the US region?", tools=tools, stream=False, @@ -838,7 +770,7 @@ def test_response_file_search_filter_by_region(openai_client, model, vector_stor assert "asia" not in result.text.lower() -def test_response_file_search_filter_by_category(openai_client, model, vector_store_with_filtered_files): +def test_response_file_search_filter_by_category(compat_client, text_model_id, vector_store_with_filtered_files): """Test file search with category equality filter.""" tools = [ { @@ -848,8 +780,8 @@ def test_response_file_search_filter_by_category(openai_client, model, vector_st } ] - response = openai_client.responses.create( - model=model, + response = compat_client.responses.create( + model=text_model_id, input="Show me all marketing reports", tools=tools, stream=False, @@ -868,7 +800,7 @@ def test_response_file_search_filter_by_category(openai_client, model, vector_st assert "revenue figures" not in result.text.lower() -def test_response_file_search_filter_by_date_range(openai_client, model, vector_store_with_filtered_files): +def test_response_file_search_filter_by_date_range(compat_client, text_model_id, vector_store_with_filtered_files): """Test file search with date range filter using compound AND.""" tools = [ { @@ -892,8 +824,8 @@ def test_response_file_search_filter_by_date_range(openai_client, model, vector_ } ] - response = openai_client.responses.create( - model=model, + response = compat_client.responses.create( + model=text_model_id, input="What happened in Q1 2023?", tools=tools, stream=False, @@ -911,7 +843,7 @@ def test_response_file_search_filter_by_date_range(openai_client, model, vector_ assert "q3" not in result.text.lower() -def test_response_file_search_filter_compound_and(openai_client, model, vector_store_with_filtered_files): +def test_response_file_search_filter_compound_and(compat_client, text_model_id, vector_store_with_filtered_files): """Test file search with compound AND filter (region AND category).""" tools = [ { @@ -927,8 +859,8 @@ def test_response_file_search_filter_compound_and(openai_client, model, vector_s } ] - response = openai_client.responses.create( - model=model, + response = compat_client.responses.create( + model=text_model_id, input="What are the engineering updates from the US?", tools=tools, stream=False, @@ -947,7 +879,7 @@ def test_response_file_search_filter_compound_and(openai_client, model, vector_s assert "promotional" not in result.text.lower() and "revenue" not in result.text.lower() -def test_response_file_search_filter_compound_or(openai_client, model, vector_store_with_filtered_files): +def test_response_file_search_filter_compound_or(compat_client, text_model_id, vector_store_with_filtered_files): """Test file search with compound OR filter (marketing OR sales).""" tools = [ { @@ -963,8 +895,8 @@ def test_response_file_search_filter_compound_or(openai_client, model, vector_st } ] - response = openai_client.responses.create( - model=model, + response = compat_client.responses.create( + model=text_model_id, input="Show me marketing and sales documents", tools=tools, stream=False, diff --git a/tests/verifications/README.md b/tests/verifications/README.md deleted file mode 100644 index b6c332cac..000000000 --- a/tests/verifications/README.md +++ /dev/null @@ -1,79 +0,0 @@ -# Llama Stack Verifications - -Llama Stack Verifications provide standardized test suites to ensure API compatibility and behavior consistency across different LLM providers. These tests help verify that different models and providers implement the expected interfaces and behaviors correctly. - -## Overview - -This framework allows you to run the same set of verification tests against different LLM providers' OpenAI-compatible endpoints (Fireworks, Together, Groq, Cerebras, etc., and OpenAI itself) to ensure they meet the expected behavior and interface standards. - -## Features - -The verification suite currently tests the following in both streaming and non-streaming modes: - -- Basic chat completions -- Image input capabilities -- Structured JSON output formatting -- Tool calling functionality - -## Report - -The lastest report can be found at [REPORT.md](REPORT.md). - -To update the report, ensure you have the API keys set, -```bash -export OPENAI_API_KEY= -export FIREWORKS_API_KEY= -export TOGETHER_API_KEY= -``` -then run -```bash -uv run python tests/verifications/generate_report.py --run-tests -``` - -## Running Tests - -To run the verification tests, use pytest with the following parameters: - -```bash -cd llama-stack -pytest tests/verifications/openai_api --provider= -``` - -Example: -```bash -# Run all tests -pytest tests/verifications/openai_api --provider=together - -# Only run tests with Llama 4 models -pytest tests/verifications/openai_api --provider=together -k 'Llama-4' -``` - -### Parameters - -- `--provider`: The provider name (openai, fireworks, together, groq, cerebras, etc.) -- `--base-url`: The base URL for the provider's API (optional - defaults to the standard URL for the specified provider) -- `--api-key`: Your API key for the provider (optional - defaults to the standard API_KEY name for the specified provider) - -## Supported Providers - -The verification suite supports any provider with an OpenAI compatible endpoint. - -See `tests/verifications/conf/` for the list of supported providers. - -To run on a new provider, simply add a new yaml file to the `conf/` directory with the provider config. See `tests/verifications/conf/together.yaml` for an example. - -## Adding New Test Cases - -To add new test cases, create appropriate JSON files in the `openai_api/fixtures/test_cases/` directory following the existing patterns. - - -## Structure - -- `__init__.py` - Marks the directory as a Python package -- `conf/` - Provider-specific configuration files -- `openai_api/` - Tests specific to OpenAI-compatible APIs - - `fixtures/` - Test fixtures and utilities - - `fixtures.py` - Provider-specific fixtures - - `load.py` - Utilities for loading test cases - - `test_cases/` - JSON test case definitions - - `test_chat_completion.py` - Tests for chat completion APIs diff --git a/tests/verifications/REPORT.md b/tests/verifications/REPORT.md deleted file mode 100644 index 2a700fa9c..000000000 --- a/tests/verifications/REPORT.md +++ /dev/null @@ -1,232 +0,0 @@ -# Test Results Report - -*Generated on: 2025-04-17 12:42:33* - -*This report was generated by running `python tests/verifications/generate_report.py`* - -## Legend - -- ✅ - Test passed -- ❌ - Test failed -- ⚪ - Test not applicable or not run for this model - - -## Summary - -| Provider | Pass Rate | Tests Passed | Total Tests | -| --- | --- | --- | --- | -| Meta_reference | 100.0% | 28 | 28 | -| Together | 50.0% | 40 | 80 | -| Fireworks | 50.0% | 40 | 80 | -| Openai | 100.0% | 56 | 56 | - - - -## Meta_reference - -*Tests run on: 2025-04-17 12:37:11* - -```bash -# Run all tests for this provider: -pytest tests/verifications/openai_api/test_chat_completion.py --provider=meta_reference -v - -# Example: Run only the 'stream=False' case of test_chat_multi_turn_multiple_images: -pytest tests/verifications/openai_api/test_chat_completion.py --provider=meta_reference -k "test_chat_multi_turn_multiple_images and stream=False" -``` - - -**Model Key (Meta_reference)** - -| Display Name | Full Model ID | -| --- | --- | -| Llama-4-Scout-Instruct | `meta-llama/Llama-4-Scout-17B-16E-Instruct` | - - -| Test | Llama-4-Scout-Instruct | -| --- | --- | -| test_chat_multi_turn_multiple_images (stream=False) | ✅ | -| test_chat_multi_turn_multiple_images (stream=True) | ✅ | -| test_chat_non_streaming_basic (earth) | ✅ | -| test_chat_non_streaming_basic (saturn) | ✅ | -| test_chat_non_streaming_image | ✅ | -| test_chat_non_streaming_multi_turn_tool_calling (add_product_tool) | ✅ | -| test_chat_non_streaming_multi_turn_tool_calling (compare_monthly_expense_tool) | ✅ | -| test_chat_non_streaming_multi_turn_tool_calling (get_then_create_event_tool) | ✅ | -| test_chat_non_streaming_multi_turn_tool_calling (text_then_weather_tool) | ✅ | -| test_chat_non_streaming_multi_turn_tool_calling (weather_tool_then_text) | ✅ | -| test_chat_non_streaming_structured_output (calendar) | ✅ | -| test_chat_non_streaming_structured_output (math) | ✅ | -| test_chat_non_streaming_tool_calling | ✅ | -| test_chat_non_streaming_tool_choice_none | ✅ | -| test_chat_non_streaming_tool_choice_required | ✅ | -| test_chat_streaming_basic (earth) | ✅ | -| test_chat_streaming_basic (saturn) | ✅ | -| test_chat_streaming_image | ✅ | -| test_chat_streaming_multi_turn_tool_calling (add_product_tool) | ✅ | -| test_chat_streaming_multi_turn_tool_calling (compare_monthly_expense_tool) | ✅ | -| test_chat_streaming_multi_turn_tool_calling (get_then_create_event_tool) | ✅ | -| test_chat_streaming_multi_turn_tool_calling (text_then_weather_tool) | ✅ | -| test_chat_streaming_multi_turn_tool_calling (weather_tool_then_text) | ✅ | -| test_chat_streaming_structured_output (calendar) | ✅ | -| test_chat_streaming_structured_output (math) | ✅ | -| test_chat_streaming_tool_calling | ✅ | -| test_chat_streaming_tool_choice_none | ✅ | -| test_chat_streaming_tool_choice_required | ✅ | - -## Together - -*Tests run on: 2025-04-17 12:27:45* - -```bash -# Run all tests for this provider: -pytest tests/verifications/openai_api/test_chat_completion.py --provider=together -v - -# Example: Run only the 'stream=False' case of test_chat_multi_turn_multiple_images: -pytest tests/verifications/openai_api/test_chat_completion.py --provider=together -k "test_chat_multi_turn_multiple_images and stream=False" -``` - - -**Model Key (Together)** - -| Display Name | Full Model ID | -| --- | --- | -| Llama-3.3-70B-Instruct | `meta-llama/Llama-3.3-70B-Instruct-Turbo` | -| Llama-4-Maverick-Instruct | `meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8` | -| Llama-4-Scout-Instruct | `meta-llama/Llama-4-Scout-17B-16E-Instruct` | - - -| Test | Llama-3.3-70B-Instruct | Llama-4-Maverick-Instruct | Llama-4-Scout-Instruct | -| --- | --- | --- | --- | -| test_chat_multi_turn_multiple_images (stream=False) | ⚪ | ✅ | ✅ | -| test_chat_multi_turn_multiple_images (stream=True) | ⚪ | ❌ | ❌ | -| test_chat_non_streaming_basic (earth) | ✅ | ✅ | ✅ | -| test_chat_non_streaming_basic (saturn) | ✅ | ✅ | ✅ | -| test_chat_non_streaming_image | ⚪ | ✅ | ✅ | -| test_chat_non_streaming_multi_turn_tool_calling (add_product_tool) | ✅ | ✅ | ✅ | -| test_chat_non_streaming_multi_turn_tool_calling (compare_monthly_expense_tool) | ✅ | ✅ | ✅ | -| test_chat_non_streaming_multi_turn_tool_calling (get_then_create_event_tool) | ✅ | ❌ | ✅ | -| test_chat_non_streaming_multi_turn_tool_calling (text_then_weather_tool) | ❌ | ❌ | ❌ | -| test_chat_non_streaming_multi_turn_tool_calling (weather_tool_then_text) | ✅ | ✅ | ✅ | -| test_chat_non_streaming_structured_output (calendar) | ✅ | ✅ | ✅ | -| test_chat_non_streaming_structured_output (math) | ✅ | ✅ | ✅ | -| test_chat_non_streaming_tool_calling | ✅ | ✅ | ✅ | -| test_chat_non_streaming_tool_choice_none | ❌ | ❌ | ❌ | -| test_chat_non_streaming_tool_choice_required | ✅ | ✅ | ✅ | -| test_chat_streaming_basic (earth) | ✅ | ❌ | ❌ | -| test_chat_streaming_basic (saturn) | ✅ | ❌ | ❌ | -| test_chat_streaming_image | ⚪ | ❌ | ❌ | -| test_chat_streaming_multi_turn_tool_calling (add_product_tool) | ✅ | ❌ | ❌ | -| test_chat_streaming_multi_turn_tool_calling (compare_monthly_expense_tool) | ❌ | ❌ | ❌ | -| test_chat_streaming_multi_turn_tool_calling (get_then_create_event_tool) | ❌ | ❌ | ❌ | -| test_chat_streaming_multi_turn_tool_calling (text_then_weather_tool) | ❌ | ❌ | ❌ | -| test_chat_streaming_multi_turn_tool_calling (weather_tool_then_text) | ❌ | ❌ | ❌ | -| test_chat_streaming_structured_output (calendar) | ✅ | ❌ | ❌ | -| test_chat_streaming_structured_output (math) | ✅ | ❌ | ❌ | -| test_chat_streaming_tool_calling | ✅ | ❌ | ❌ | -| test_chat_streaming_tool_choice_none | ❌ | ❌ | ❌ | -| test_chat_streaming_tool_choice_required | ✅ | ❌ | ❌ | - -## Fireworks - -*Tests run on: 2025-04-17 12:29:53* - -```bash -# Run all tests for this provider: -pytest tests/verifications/openai_api/test_chat_completion.py --provider=fireworks -v - -# Example: Run only the 'stream=False' case of test_chat_multi_turn_multiple_images: -pytest tests/verifications/openai_api/test_chat_completion.py --provider=fireworks -k "test_chat_multi_turn_multiple_images and stream=False" -``` - - -**Model Key (Fireworks)** - -| Display Name | Full Model ID | -| --- | --- | -| Llama-3.3-70B-Instruct | `accounts/fireworks/models/llama-v3p3-70b-instruct` | -| Llama-4-Maverick-Instruct | `accounts/fireworks/models/llama4-maverick-instruct-basic` | -| Llama-4-Scout-Instruct | `accounts/fireworks/models/llama4-scout-instruct-basic` | - - -| Test | Llama-3.3-70B-Instruct | Llama-4-Maverick-Instruct | Llama-4-Scout-Instruct | -| --- | --- | --- | --- | -| test_chat_multi_turn_multiple_images (stream=False) | ⚪ | ✅ | ✅ | -| test_chat_multi_turn_multiple_images (stream=True) | ⚪ | ✅ | ✅ | -| test_chat_non_streaming_basic (earth) | ✅ | ✅ | ✅ | -| test_chat_non_streaming_basic (saturn) | ✅ | ✅ | ✅ | -| test_chat_non_streaming_image | ⚪ | ✅ | ✅ | -| test_chat_non_streaming_multi_turn_tool_calling (add_product_tool) | ❌ | ❌ | ❌ | -| test_chat_non_streaming_multi_turn_tool_calling (compare_monthly_expense_tool) | ❌ | ❌ | ❌ | -| test_chat_non_streaming_multi_turn_tool_calling (get_then_create_event_tool) | ❌ | ❌ | ❌ | -| test_chat_non_streaming_multi_turn_tool_calling (text_then_weather_tool) | ❌ | ❌ | ❌ | -| test_chat_non_streaming_multi_turn_tool_calling (weather_tool_then_text) | ❌ | ❌ | ❌ | -| test_chat_non_streaming_structured_output (calendar) | ✅ | ✅ | ✅ | -| test_chat_non_streaming_structured_output (math) | ✅ | ✅ | ✅ | -| test_chat_non_streaming_tool_calling | ❌ | ❌ | ❌ | -| test_chat_non_streaming_tool_choice_none | ✅ | ✅ | ✅ | -| test_chat_non_streaming_tool_choice_required | ✅ | ❌ | ❌ | -| test_chat_streaming_basic (earth) | ✅ | ✅ | ✅ | -| test_chat_streaming_basic (saturn) | ✅ | ✅ | ✅ | -| test_chat_streaming_image | ⚪ | ✅ | ✅ | -| test_chat_streaming_multi_turn_tool_calling (add_product_tool) | ❌ | ❌ | ❌ | -| test_chat_streaming_multi_turn_tool_calling (compare_monthly_expense_tool) | ❌ | ❌ | ❌ | -| test_chat_streaming_multi_turn_tool_calling (get_then_create_event_tool) | ❌ | ❌ | ❌ | -| test_chat_streaming_multi_turn_tool_calling (text_then_weather_tool) | ❌ | ❌ | ❌ | -| test_chat_streaming_multi_turn_tool_calling (weather_tool_then_text) | ❌ | ❌ | ❌ | -| test_chat_streaming_structured_output (calendar) | ✅ | ✅ | ✅ | -| test_chat_streaming_structured_output (math) | ✅ | ✅ | ✅ | -| test_chat_streaming_tool_calling | ❌ | ❌ | ❌ | -| test_chat_streaming_tool_choice_none | ✅ | ✅ | ✅ | -| test_chat_streaming_tool_choice_required | ✅ | ❌ | ❌ | - -## Openai - -*Tests run on: 2025-04-17 12:34:08* - -```bash -# Run all tests for this provider: -pytest tests/verifications/openai_api/test_chat_completion.py --provider=openai -v - -# Example: Run only the 'stream=False' case of test_chat_multi_turn_multiple_images: -pytest tests/verifications/openai_api/test_chat_completion.py --provider=openai -k "test_chat_multi_turn_multiple_images and stream=False" -``` - - -**Model Key (Openai)** - -| Display Name | Full Model ID | -| --- | --- | -| gpt-4o | `gpt-4o` | -| gpt-4o-mini | `gpt-4o-mini` | - - -| Test | gpt-4o | gpt-4o-mini | -| --- | --- | --- | -| test_chat_multi_turn_multiple_images (stream=False) | ✅ | ✅ | -| test_chat_multi_turn_multiple_images (stream=True) | ✅ | ✅ | -| test_chat_non_streaming_basic (earth) | ✅ | ✅ | -| test_chat_non_streaming_basic (saturn) | ✅ | ✅ | -| test_chat_non_streaming_image | ✅ | ✅ | -| test_chat_non_streaming_multi_turn_tool_calling (add_product_tool) | ✅ | ✅ | -| test_chat_non_streaming_multi_turn_tool_calling (compare_monthly_expense_tool) | ✅ | ✅ | -| test_chat_non_streaming_multi_turn_tool_calling (get_then_create_event_tool) | ✅ | ✅ | -| test_chat_non_streaming_multi_turn_tool_calling (text_then_weather_tool) | ✅ | ✅ | -| test_chat_non_streaming_multi_turn_tool_calling (weather_tool_then_text) | ✅ | ✅ | -| test_chat_non_streaming_structured_output (calendar) | ✅ | ✅ | -| test_chat_non_streaming_structured_output (math) | ✅ | ✅ | -| test_chat_non_streaming_tool_calling | ✅ | ✅ | -| test_chat_non_streaming_tool_choice_none | ✅ | ✅ | -| test_chat_non_streaming_tool_choice_required | ✅ | ✅ | -| test_chat_streaming_basic (earth) | ✅ | ✅ | -| test_chat_streaming_basic (saturn) | ✅ | ✅ | -| test_chat_streaming_image | ✅ | ✅ | -| test_chat_streaming_multi_turn_tool_calling (add_product_tool) | ✅ | ✅ | -| test_chat_streaming_multi_turn_tool_calling (compare_monthly_expense_tool) | ✅ | ✅ | -| test_chat_streaming_multi_turn_tool_calling (get_then_create_event_tool) | ✅ | ✅ | -| test_chat_streaming_multi_turn_tool_calling (text_then_weather_tool) | ✅ | ✅ | -| test_chat_streaming_multi_turn_tool_calling (weather_tool_then_text) | ✅ | ✅ | -| test_chat_streaming_structured_output (calendar) | ✅ | ✅ | -| test_chat_streaming_structured_output (math) | ✅ | ✅ | -| test_chat_streaming_tool_calling | ✅ | ✅ | -| test_chat_streaming_tool_choice_none | ✅ | ✅ | -| test_chat_streaming_tool_choice_required | ✅ | ✅ | diff --git a/tests/verifications/conf/cerebras.yaml b/tests/verifications/conf/cerebras.yaml deleted file mode 100644 index 37fc713d6..000000000 --- a/tests/verifications/conf/cerebras.yaml +++ /dev/null @@ -1,11 +0,0 @@ -base_url: https://api.cerebras.ai/v1 -api_key_var: CEREBRAS_API_KEY -models: -- llama-3.3-70b -model_display_names: - llama-3.3-70b: Llama-3.3-70B-Instruct -test_exclusions: - llama-3.3-70b: - - test_chat_non_streaming_image - - test_chat_streaming_image - - test_chat_multi_turn_multiple_images diff --git a/tests/verifications/conf/fireworks-llama-stack.yaml b/tests/verifications/conf/fireworks-llama-stack.yaml deleted file mode 100644 index dffd7c739..000000000 --- a/tests/verifications/conf/fireworks-llama-stack.yaml +++ /dev/null @@ -1,17 +0,0 @@ -base_url: http://localhost:8321/v1/openai/v1 -api_key_var: FIREWORKS_API_KEY -models: -- fireworks/llama-v3p3-70b-instruct -- fireworks/llama4-scout-instruct-basic -- fireworks/llama4-maverick-instruct-basic -model_display_names: - fireworks/llama-v3p3-70b-instruct: Llama-3.3-70B-Instruct - fireworks/llama4-scout-instruct-basic: Llama-4-Scout-Instruct - fireworks/llama4-maverick-instruct-basic: Llama-4-Maverick-Instruct -test_exclusions: - fireworks/llama-v3p3-70b-instruct: - - test_chat_non_streaming_image - - test_chat_streaming_image - - test_chat_multi_turn_multiple_images - - test_response_non_streaming_image - - test_response_non_streaming_multi_turn_image diff --git a/tests/verifications/conf/fireworks.yaml b/tests/verifications/conf/fireworks.yaml deleted file mode 100644 index 9bb21f706..000000000 --- a/tests/verifications/conf/fireworks.yaml +++ /dev/null @@ -1,15 +0,0 @@ -base_url: https://api.fireworks.ai/inference/v1 -api_key_var: FIREWORKS_API_KEY -models: -- accounts/fireworks/models/llama-v3p3-70b-instruct -- accounts/fireworks/models/llama4-scout-instruct-basic -- accounts/fireworks/models/llama4-maverick-instruct-basic -model_display_names: - accounts/fireworks/models/llama-v3p3-70b-instruct: Llama-3.3-70B-Instruct - accounts/fireworks/models/llama4-scout-instruct-basic: Llama-4-Scout-Instruct - accounts/fireworks/models/llama4-maverick-instruct-basic: Llama-4-Maverick-Instruct -test_exclusions: - accounts/fireworks/models/llama-v3p3-70b-instruct: - - test_chat_non_streaming_image - - test_chat_streaming_image - - test_chat_multi_turn_multiple_images diff --git a/tests/verifications/conf/groq-llama-stack.yaml b/tests/verifications/conf/groq-llama-stack.yaml deleted file mode 100644 index 786b79c24..000000000 --- a/tests/verifications/conf/groq-llama-stack.yaml +++ /dev/null @@ -1,17 +0,0 @@ -base_url: http://localhost:8321/v1/openai/v1 -api_key_var: GROQ_API_KEY -models: -- groq/llama-3.3-70b-versatile -- groq/llama-4-scout-17b-16e-instruct -- groq/llama-4-maverick-17b-128e-instruct -model_display_names: - groq/llama-3.3-70b-versatile: Llama-3.3-70B-Instruct - groq/llama-4-scout-17b-16e-instruct: Llama-4-Scout-Instruct - groq/llama-4-maverick-17b-128e-instruct: Llama-4-Maverick-Instruct -test_exclusions: - groq/llama-3.3-70b-versatile: - - test_chat_non_streaming_image - - test_chat_streaming_image - - test_chat_multi_turn_multiple_images - - test_response_non_streaming_image - - test_response_non_streaming_multi_turn_image diff --git a/tests/verifications/conf/groq.yaml b/tests/verifications/conf/groq.yaml deleted file mode 100644 index bc3de58e9..000000000 --- a/tests/verifications/conf/groq.yaml +++ /dev/null @@ -1,15 +0,0 @@ -base_url: https://api.groq.com/openai/v1 -api_key_var: GROQ_API_KEY -models: -- llama-3.3-70b-versatile -- meta-llama/llama-4-scout-17b-16e-instruct -- meta-llama/llama-4-maverick-17b-128e-instruct -model_display_names: - llama-3.3-70b-versatile: Llama-3.3-70B-Instruct - meta-llama/llama-4-scout-17b-16e-instruct: Llama-4-Scout-Instruct - meta-llama/llama-4-maverick-17b-128e-instruct: Llama-4-Maverick-Instruct -test_exclusions: - llama-3.3-70b-versatile: - - test_chat_non_streaming_image - - test_chat_streaming_image - - test_chat_multi_turn_multiple_images diff --git a/tests/verifications/conf/meta_reference.yaml b/tests/verifications/conf/meta_reference.yaml deleted file mode 100644 index fb2680fe0..000000000 --- a/tests/verifications/conf/meta_reference.yaml +++ /dev/null @@ -1,8 +0,0 @@ -# LLAMA_STACK_PORT=5002 llama stack run meta-reference-gpu --env INFERENCE_MODEL=meta-llama/Llama-4-Scout-17B-16E-Instruct --env INFERENCE_CHECKPOINT_DIR= -base_url: http://localhost:5002/v1/openai/v1 -api_key_var: foo -models: -- meta-llama/Llama-4-Scout-17B-16E-Instruct -model_display_names: - meta-llama/Llama-4-Scout-17B-16E-Instruct: Llama-4-Scout-Instruct -test_exclusions: {} diff --git a/tests/verifications/conf/openai-llama-stack.yaml b/tests/verifications/conf/openai-llama-stack.yaml deleted file mode 100644 index de35439ae..000000000 --- a/tests/verifications/conf/openai-llama-stack.yaml +++ /dev/null @@ -1,9 +0,0 @@ -base_url: http://localhost:8321/v1/openai/v1 -api_key_var: OPENAI_API_KEY -models: -- openai/gpt-4o -- openai/gpt-4o-mini -model_display_names: - openai/gpt-4o: gpt-4o - openai/gpt-4o-mini: gpt-4o-mini -test_exclusions: {} diff --git a/tests/verifications/conf/openai.yaml b/tests/verifications/conf/openai.yaml deleted file mode 100644 index 95a6259f7..000000000 --- a/tests/verifications/conf/openai.yaml +++ /dev/null @@ -1,9 +0,0 @@ -base_url: https://api.openai.com/v1 -api_key_var: OPENAI_API_KEY -models: -- gpt-4o -- gpt-4o-mini -model_display_names: - gpt-4o: gpt-4o - gpt-4o-mini: gpt-4o-mini -test_exclusions: {} diff --git a/tests/verifications/conf/together-llama-stack.yaml b/tests/verifications/conf/together-llama-stack.yaml deleted file mode 100644 index 58cbcfa93..000000000 --- a/tests/verifications/conf/together-llama-stack.yaml +++ /dev/null @@ -1,17 +0,0 @@ -base_url: http://localhost:8321/v1/openai/v1 -api_key_var: TOGETHER_API_KEY -models: -- together/meta-llama/Llama-3.3-70B-Instruct-Turbo -- together/meta-llama/Llama-4-Scout-17B-16E-Instruct -- together/meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8 -model_display_names: - together/meta-llama/Llama-3.3-70B-Instruct-Turbo: Llama-3.3-70B-Instruct - together/meta-llama/Llama-4-Scout-17B-16E-Instruct: Llama-4-Scout-Instruct - together/meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8: Llama-4-Maverick-Instruct -test_exclusions: - together/meta-llama/Llama-3.3-70B-Instruct-Turbo: - - test_chat_non_streaming_image - - test_chat_streaming_image - - test_chat_multi_turn_multiple_images - - test_response_non_streaming_image - - test_response_non_streaming_multi_turn_image diff --git a/tests/verifications/conf/together.yaml b/tests/verifications/conf/together.yaml deleted file mode 100644 index e8fb62ab9..000000000 --- a/tests/verifications/conf/together.yaml +++ /dev/null @@ -1,15 +0,0 @@ -base_url: https://api.together.xyz/v1 -api_key_var: TOGETHER_API_KEY -models: -- meta-llama/Llama-3.3-70B-Instruct-Turbo -- meta-llama/Llama-4-Scout-17B-16E-Instruct -- meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8 -model_display_names: - meta-llama/Llama-3.3-70B-Instruct-Turbo: Llama-3.3-70B-Instruct - meta-llama/Llama-4-Scout-17B-16E-Instruct: Llama-4-Scout-Instruct - meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8: Llama-4-Maverick-Instruct -test_exclusions: - meta-llama/Llama-3.3-70B-Instruct-Turbo: - - test_chat_non_streaming_image - - test_chat_streaming_image - - test_chat_multi_turn_multiple_images diff --git a/tests/verifications/conftest.py b/tests/verifications/conftest.py deleted file mode 100644 index 030efcde9..000000000 --- a/tests/verifications/conftest.py +++ /dev/null @@ -1,96 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the terms described in the LICENSE file in -# the root directory of this source tree. - -import re - -import pytest - - -def pytest_addoption(parser): - parser.addoption( - "--base-url", - action="store", - help="Base URL for OpenAI compatible API", - ) - parser.addoption( - "--api-key", - action="store", - help="API key to use for the provider", - ) - parser.addoption( - "--provider", - action="store", - help="Provider to use for testing", - ) - parser.addoption( - "--model", - action="store", - help="Model to use for testing", - ) - - -pytest_plugins = [ - "pytest_jsonreport", - "tests.verifications.openai_api.fixtures.fixtures", - "tests.verifications.openai_api.fixtures.load", -] - - -@pytest.hookimpl(optionalhook=True) -def pytest_json_runtest_metadata(item, call): - """Add model and case_id to pytest-json report metadata.""" - metadata = {} - nodeid = item.nodeid - - # 1. Extract model from callspec if available - model = item.callspec.params.get("model") if hasattr(item, "callspec") else None - if model: - metadata["model"] = model - else: - # Fallback: Try parsing from nodeid (less reliable) - match_model = re.search(r"\[(.*?)-", nodeid) - if match_model: - model = match_model.group(1) # Store model even if found via fallback - metadata["model"] = model - else: - print(f"Warning: Could not determine model for test {nodeid}") - model = None # Ensure model is None if not found - - # 2. Extract case_id using the known model string if possible - if model: - # Construct a regex pattern to find the case_id *after* the model name and a hyphen. - # Escape the model name in case it contains regex special characters. - pattern = re.escape(model) + r"-(.*?)\]$" - match_case = re.search(pattern, nodeid) - if match_case: - case_id = match_case.group(1) - metadata["case_id"] = case_id - else: - # Fallback if the pattern didn't match (e.g., nodeid format unexpected) - # Try the old less specific regex as a last resort. - match_case_fallback = re.search(r"-(.*?)\]$", nodeid) - if match_case_fallback: - case_id = match_case_fallback.group(1) - metadata["case_id"] = case_id - print(f"Warning: Used fallback regex to parse case_id from nodeid {nodeid}") - else: - print(f"Warning: Could not parse case_id from nodeid {nodeid} even with fallback.") - if "case" in (item.callspec.params if hasattr(item, "callspec") else {}): - metadata["case_id"] = "parsing_failed" - elif "case" in (item.callspec.params if hasattr(item, "callspec") else {}): - # Cannot reliably parse case_id without model, but we know it's a case test. - # Try the generic fallback regex. - match_case_fallback = re.search(r"-(.*?)\]$", nodeid) - if match_case_fallback: - case_id = match_case_fallback.group(1) - metadata["case_id"] = case_id - print(f"Warning: Used fallback regex to parse case_id from nodeid {nodeid} (model unknown)") - else: - print(f"Warning: Could not parse case_id from nodeid {nodeid} (model unknown)") - metadata["case_id"] = "parsing_failed_no_model" - # else: Not a test with a model or case param we need to handle. - - return metadata diff --git a/tests/verifications/generate_report.py b/tests/verifications/generate_report.py deleted file mode 100755 index 67ef14e90..000000000 --- a/tests/verifications/generate_report.py +++ /dev/null @@ -1,502 +0,0 @@ -#!/usr/bin/env python3 - -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the terms described in the LICENSE file in -# the root directory of this source tree. -""" -Test Report Generator - -Description: - This script runs pytest tests (specifically designed for OpenAI API compatibility checks) - for different providers, aggregates the results from JSON reports, and generates - a markdown summary report (REPORT.md). - - It automatically cleans up old test result files, keeping only the latest - per provider. - - -Configuration: - - Provider details (models, display names) are loaded from `tests/verifications/conf/*.yaml`. - - Test cases are defined in YAML files within `tests/verifications/openai_api/fixtures/test_cases/`. - - Test results are stored in `tests/verifications/test_results/`. - -Usage: - # Generate a report using the latest existing test results - python tests/verifications/generate_report.py - - # Run tests for all configured providers and generate a report - python tests/verifications/generate_report.py --run-tests - - # Run tests only for specific providers (space-separated) - python tests/verifications/generate_report.py --run-tests --providers fireworks openai - - # Run tests matching a keyword expression (uses pytest -k) - python tests/verifications/generate_report.py --run-tests --providers fireworks --k "streaming" - - # Run a specific test case for a provider - python tests/verifications/generate_report.py --run-tests --providers fireworks --k "test_chat_streaming_basic and basic_earth" - - # Save the report to a custom location - python tests/verifications/generate_report.py --output custom_report.md -""" - -import argparse -import json -import os -import re -import subprocess -import time -from collections import defaultdict -from pathlib import Path -from typing import Any - -from tests.verifications.openai_api.fixtures.fixtures import _load_all_verification_configs - -# Define the root directory for test results -RESULTS_DIR = Path(__file__).parent / "test_results" -RESULTS_DIR.mkdir(exist_ok=True) - -# Maximum number of test result files to keep per provider -MAX_RESULTS_PER_PROVIDER = 1 - -DEFAULT_PROVIDERS = [ - "meta_reference", - "together", - "fireworks", - "openai", -] - -VERIFICATION_CONFIG = _load_all_verification_configs() - - -def run_tests(provider, keyword=None): - """Run pytest for a specific provider and save results""" - print(f"Running tests for provider: {provider}") - - timestamp = int(time.time()) - # Use a constant filename for the final result and temp file - result_file = RESULTS_DIR / f"{provider}.json" - temp_json_file = RESULTS_DIR / f"temp_{provider}.json" - - # Determine project root directory relative to this script - project_root = Path(__file__).parent.parent.parent - - # Run pytest with JSON output - cmd = [ - "python", - "-m", - "pytest", - "tests/verifications/openai_api/test_chat_completion.py", - f"--provider={provider}", - "-v", - "--json-report", - f"--json-report-file={temp_json_file}", - ] - - # Append -k argument if provided - if keyword: - cmd.extend(["-k", keyword]) - - try: - # Run subprocess with cwd set to project root - result = subprocess.run(cmd, capture_output=True, text=True, cwd=project_root) - print(f"Pytest exit code: {result.returncode}") - - # Check if the JSON file was created - if temp_json_file.exists(): - with open(temp_json_file) as f: - test_results = json.load(f) - - test_results["run_timestamp"] = timestamp - - # Save results to the final (overwritten) file - with open(result_file, "w") as f: - json.dump(test_results, f, indent=2) - f.write("\n") # Add a trailing newline for precommit - - # Clean up temp file - temp_json_file.unlink() - - print(f"Test results saved to {result_file}") - return result_file - else: - print(f"Error: JSON report file not created for {provider}") - print(f"Command stdout: {result.stdout}") - print(f"Command stderr: {result.stderr}") - return None - except Exception as e: - print(f"Error running tests for {provider}: {e}") - return None - - -def run_multiple_tests(providers_to_run: list[str], keyword: str | None): - """Runs tests for a list of providers.""" - print(f"Running tests for providers: {', '.join(providers_to_run)}") - for provider in providers_to_run: - run_tests(provider.strip(), keyword=keyword) - print("Finished running tests.") - - -def parse_results( - result_file, -) -> tuple[defaultdict[str, defaultdict[str, dict[str, bool]]], defaultdict[str, set[str]], set[str], str]: - """Parse a single test results file. - - Returns: - Tuple containing: - - parsed_results: DefaultDict[provider, DefaultDict[model, Dict[test_name, pass_status]]] - - providers_in_file: DefaultDict[provider, Set[model]] found in this file. - - tests_in_file: Set[test_name] found in this file. - - run_timestamp: Timestamp when the test was run - """ - if not os.path.exists(result_file): - print(f"Results file does not exist: {result_file}") - # Return empty defaultdicts/set matching the type hint - return defaultdict(lambda: defaultdict(dict)), defaultdict(set), set(), "" - - with open(result_file) as f: - results = json.load(f) - - # Initialize results dictionary with specific types - parsed_results: defaultdict[str, defaultdict[str, dict[str, bool]]] = defaultdict(lambda: defaultdict(dict)) - providers_in_file: defaultdict[str, set[str]] = defaultdict(set) - tests_in_file: set[str] = set() - # Extract provider from filename (e.g., "openai.json" -> "openai") - provider: str = result_file.stem - - # Extract run timestamp from the JSON data - run_timestamp_unix = results.get("run_timestamp") - run_timestamp_str = ( - time.strftime("%Y-%m-%d %H:%M:%S", time.localtime(run_timestamp_unix)) - if run_timestamp_unix is not None - else "Unknown" - ) - - # Debug: Print summary of test results - print(f"Test results summary for {provider}:") - print(f"Total tests: {results.get('summary', {}).get('total', 0)}") - print(f"Passed: {results.get('summary', {}).get('passed', 0)}") - print(f"Failed: {results.get('summary', {}).get('failed', 0)}") - print(f"Error: {results.get('summary', {}).get('error', 0)}") - print(f"Skipped: {results.get('summary', {}).get('skipped', 0)}") - - # Extract test results - if "tests" not in results or not results["tests"]: - print(f"No test results found in {result_file}") - # Return empty defaultdicts/set matching the type hint - return defaultdict(lambda: defaultdict(dict)), defaultdict(set), set(), "" - - # Process the tests - for test in results["tests"]: - test_id = test.get("nodeid", "") - - if not (call_phase := test.get("call")): - continue - call_outcome = call_phase.get("outcome") - if call_outcome not in ("passed", "failed"): - continue - - # --- Extract data from metadata --- - metadata = test.get("metadata", {}) - model = metadata.get("model") - case_id = metadata.get("case_id") # String ID (if provided) - case_index = metadata.get("case_index") # Integer index (if no ID provided) - - # Check if we have a model and at least one case identifier - if not model or (case_id is None and case_index is None): - print( - f"Warning: Missing 'model' or case identifier ('case_id'/'case_index') metadata for test: {test_id}. Skipping." - ) - continue - - try: - test_name_base = test_id.split("::")[1].split("[")[0] - except (IndexError, ValueError) as e: - print(f"Warning: Could not parse base test name for {test_id}. Error: {e}. Skipping.") - continue - - # Construct detailed test name using ID or index - if case_id is not None: - detailed_test_name = f"{test_name_base} ({case_id})" - elif case_index == 0: - # If case_id is missing and index is 0, assume single case, use base name only - detailed_test_name = test_name_base - elif case_index is not None: # case_index > 0 - # Use case_index for naming if case_id wasn't provided and index > 0 - detailed_test_name = f"{test_name_base} (case{case_index})" - else: - # This case should be prevented by the earlier check, but handle defensively - print(f"Error: No case identifier found for test {test_id} after initial check. Skipping.") - continue - - # Populate collections for this file - tests_in_file.add(detailed_test_name) - providers_in_file[provider].add(model) - - if call_outcome == "passed": - parsed_results[provider][model][detailed_test_name] = True - elif call_outcome == "failed": - parsed_results[provider][model][detailed_test_name] = False - - # Final Summary Warning (Optional) - if not parsed_results.get(provider): - print(f"Warning: No valid test results parsed for provider {provider} from file {result_file}") - - return parsed_results, providers_in_file, tests_in_file, run_timestamp_str - - -def generate_report( - results_dict: dict[str, Any], - providers: dict[str, set[str]], - all_tests: set[str], - provider_timestamps: dict[str, str], - output_file=None, -): - """Generate the markdown report. - - Args: - results_dict: Aggregated results [provider][model][test_name] -> status. - providers: Dict of all providers and their models {provider: {models}}. - The order of keys in this dict determines the report order. - all_tests: Set of all test names found. - provider_timestamps: Dict of provider to timestamp when tests were run - output_file: Optional path to save the report. - """ - if output_file is None: - # Default to creating the report in the same directory as this script - output_file = Path(__file__).parent / "REPORT.md" - else: - output_file = Path(output_file) - - # Convert provider model sets to sorted lists (use passed-in providers dict) - providers_sorted = {prov: sorted(models) for prov, models in providers.items()} - - # Sort tests alphabetically (use passed-in all_tests set) - sorted_tests = sorted(all_tests) - - # Calculate counts for each base test name - base_test_case_counts: defaultdict[str, int] = defaultdict(int) - base_test_name_map: dict[str, str] = {} - for test_name in sorted_tests: - match = re.match(r"^(.*?)( \([^)]+\))?$", test_name) - if match: - base_name = match.group(1).strip() - base_test_case_counts[base_name] += 1 - base_test_name_map[test_name] = base_name - else: - # Should not happen with current naming, but handle defensively - base_test_case_counts[test_name] += 1 - base_test_name_map[test_name] = test_name - - if not sorted_tests: - print("Warning: No test results found to generate a report.") - # Optionally create an empty report or return early - with open(output_file, "w") as f: - f.write("# Test Results Report\n\nNo test results found.\n") - print(f"Generated empty report: {output_file}") - return - - report = ["# Test Results Report\n"] - report.append(f"*Generated on: {time.strftime('%Y-%m-%d %H:%M:%S')}*\n") - report.append("*This report was generated by running `python tests/verifications/generate_report.py`*\n") - - # Icons for pass/fail - pass_icon = "✅" - fail_icon = "❌" - na_icon = "⚪" - - # Add emoji legend - report.append("## Legend\n") - report.append(f"- {pass_icon} - Test passed") - report.append(f"- {fail_icon} - Test failed") - report.append(f"- {na_icon} - Test not applicable or not run for this model") - report.append("\n") - - # Add a summary section - report.append("## Summary\n") - - # Count total tests and passes (use passed-in providers and all_tests) - total_tests = 0 - passed_tests = 0 - provider_totals = {} - for provider, models in providers_sorted.items(): - provider_passed = 0 - provider_total = 0 - if provider in results_dict: - for model in models: - if model in results_dict[provider]: - model_results = results_dict[provider][model] - for test in sorted_tests: - if test in model_results: - provider_total += 1 - total_tests += 1 - if model_results[test]: - provider_passed += 1 - passed_tests += 1 - provider_totals[provider] = (provider_passed, provider_total) - - # Add summary table (use the order from the providers dict keys) - report.append("| Provider | Pass Rate | Tests Passed | Total Tests |") - report.append("| --- | --- | --- | --- |") - # Iterate through providers in the order they appear in the input dict - for provider in providers_sorted.keys(): - passed, total = provider_totals.get(provider, (0, 0)) - pass_rate = f"{(passed / total * 100):.1f}%" if total > 0 else "N/A" - report.append(f"| {provider.capitalize()} | {pass_rate} | {passed} | {total} |") - report.append("\n") - - for provider in providers_sorted.keys(): - provider_models = providers_sorted[provider] # Use sorted models - if not provider_models: - continue - - report.append(f"\n## {provider.capitalize()}\n") - - # Add timestamp when test was run - if provider in provider_timestamps: - report.append(f"*Tests run on: {provider_timestamps[provider]}*\n") - - # Add test command for reproducing results - test_cmd_all = f"pytest tests/verifications/openai_api/test_chat_completion.py --provider={provider} -v" - report.append(f"```bash\n# Run all tests for this provider:\n{test_cmd_all}\n") - - # Find an example test with a case ID - example_base_test_name = None - example_case_id = None - # Get first test as fallback base, handle empty list - first_test_name = sorted_tests[0] if sorted_tests else "unknown_test" - - match = re.match(r"^(.*?) \((.*?)\)$", first_test_name) - if match: - example_base_test_name = match.group(1).strip() - example_case_id = match.group(2).strip() - else: - example_base_test_name = first_test_name - - base_name = base_test_name_map.get(first_test_name, first_test_name) # Get base name - case_count = base_test_case_counts.get(base_name, 1) # Get count - filter_str = f"{example_base_test_name} and {example_case_id}" if case_count > 1 else example_base_test_name - - test_cmd_specific_case = ( - f'pytest tests/verifications/openai_api/test_chat_completion.py --provider={provider} -k "{filter_str}"' - ) - report.append( - f"# Example: Run only the '{example_case_id}' case of {example_base_test_name}:\n{test_cmd_specific_case}\n```\n" - ) - - # Get display names (use passed-in providers dict) - provider_config = VERIFICATION_CONFIG.get("providers", {}).get(provider, {}) - display_name_map = provider_config.get("model_display_names", {}) - - # Add Model Key Table (use provider_models) - report.append(f"\n**Model Key ({provider.capitalize()})**\n") - provider_key_lines = ["| Display Name | Full Model ID |", "| --- | --- |"] - for model_id in provider_models: - display_name = display_name_map.get(model_id, model_id) - provider_key_lines.append(f"| {display_name} | `{model_id}` |") - report.extend(provider_key_lines) - report.append("\n") - - # Create results table header (use provider_models) - display_names = [display_name_map.get(m, m) for m in provider_models] - header = "| Test | " + " | ".join(display_names) + " |" - separator = "| --- | " + " | ".join(["---"] * len(provider_models)) + " |" - report.append(header) - report.append(separator) - - # Get results for this provider from results_dict - provider_results_data = results_dict.get(provider, {}) - - # Add rows for each test (use sorted_tests) - for test in sorted_tests: - # Determine display name based on case count - base_name = base_test_name_map.get(test, test) # Get base name - case_count = base_test_case_counts.get(base_name, 1) # Get count - display_test_name = base_name if case_count == 1 else test # Choose display name - row = f"| {display_test_name} |" # Use display name - - for model_id in provider_models: - if model_id in provider_results_data and test in provider_results_data[model_id]: - result = pass_icon if provider_results_data[model_id][test] else fail_icon - else: - result = na_icon - row += f" {result} |" - report.append(row) - - # Write to file - with open(output_file, "w") as f: - f.write("\n".join(report)) - f.write("\n") - - print(f"Report generated: {output_file}") - - -def main(): - parser = argparse.ArgumentParser(description="Generate test report") - parser.add_argument("--run-tests", action="store_true", help="Run tests before generating report") - parser.add_argument( - "--providers", - type=str, - nargs="+", - help="Specify providers to include/test (comma-separated or space-separated, default: uses DEFAULT_PROVIDERS)", - ) - parser.add_argument("--output", type=str, help="Output file location (default: tests/verifications/REPORT.md)") - parser.add_argument("--k", type=str, help="Keyword expression to filter tests (passed to pytest -k)") - args = parser.parse_args() - - all_results = {} - final_providers_order = {} # Dictionary to store results, preserving processing order - aggregated_tests = set() - provider_timestamps = {} - - # 1. Determine the desired list and order of providers - if args.providers: - desired_providers = [] - for provider_arg in args.providers: - desired_providers.extend([p.strip() for p in provider_arg.split(",")]) - else: - desired_providers = DEFAULT_PROVIDERS # Use default order/list - - # 2. Run tests if requested (using the desired provider list) - if args.run_tests: - run_multiple_tests(desired_providers, args.k) - - for provider in desired_providers: - # Construct the expected result file path directly - result_file = RESULTS_DIR / f"{provider}.json" - - if result_file.exists(): # Check if the specific file exists - print(f"Loading results for {provider} from {result_file}") - try: - parsed_data = parse_results(result_file) - parsed_results, providers_in_file, tests_in_file, run_timestamp = parsed_data - all_results.update(parsed_results) - aggregated_tests.update(tests_in_file) - - # Add models for this provider, ensuring it's added in the correct report order - if provider in providers_in_file: - if provider not in final_providers_order: - final_providers_order[provider] = set() - final_providers_order[provider].update(providers_in_file[provider]) - if run_timestamp != "Unknown": - provider_timestamps[provider] = run_timestamp - else: - print( - f"Warning: Provider '{provider}' found in desired list but not within its result file data ({result_file})." - ) - - except Exception as e: - print(f"Error parsing results for provider {provider} from {result_file}: {e}") - else: - # Only print warning if we expected results (i.e., provider was in the desired list) - print(f"Result file for desired provider '{provider}' not found at {result_file}. Skipping.") - - # 5. Generate the report using the filtered & ordered results - print(f"Final Provider Order for Report: {list(final_providers_order.keys())}") - generate_report(all_results, final_providers_order, aggregated_tests, provider_timestamps, args.output) - - -if __name__ == "__main__": - main() diff --git a/tests/verifications/openai-api-verification-run.yaml b/tests/verifications/openai-api-verification-run.yaml deleted file mode 100644 index d6d8cd07d..000000000 --- a/tests/verifications/openai-api-verification-run.yaml +++ /dev/null @@ -1,162 +0,0 @@ -# This is a temporary run file because model names used by the verification tests -# are not quite consistent with various pre-existing distributions. -# -version: '2' -image_name: openai-api-verification -apis: -- agents -- inference -- telemetry -- tool_runtime -- vector_io -- safety -providers: - inference: - - provider_id: together - provider_type: remote::together - config: - url: https://api.together.xyz/v1 - api_key: ${env.TOGETHER_API_KEY:} - - provider_id: fireworks - provider_type: remote::fireworks - config: - url: https://api.fireworks.ai/inference/v1 - api_key: ${env.FIREWORKS_API_KEY:} - - provider_id: groq - provider_type: remote::groq - config: - url: https://api.groq.com - api_key: ${env.GROQ_API_KEY:} - - provider_id: openai - provider_type: remote::openai - config: - url: https://api.openai.com/v1 - api_key: ${env.OPENAI_API_KEY:} - - provider_id: sentence-transformers - provider_type: inline::sentence-transformers - config: {} - vector_io: - - provider_id: faiss - provider_type: inline::faiss - config: - kvstore: - type: sqlite - namespace: null - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/openai}/faiss_store.db - telemetry: - - provider_id: meta-reference - provider_type: inline::meta-reference - config: - service_name: "${env.OTEL_SERVICE_NAME:\u200B}" - sinks: ${env.TELEMETRY_SINKS:console,sqlite} - sqlite_db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/openai-api-verification}/trace_store.db - safety: - - provider_id: llama-guard - provider_type: inline::llama-guard - config: - excluded_categories: [] - agents: - - provider_id: meta-reference - provider_type: inline::meta-reference - config: - persistence_store: - type: sqlite - namespace: null - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/openai}/agents_store.db - responses_store: - type: sqlite - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/openai}/responses_store.db - tool_runtime: - - provider_id: brave-search - provider_type: remote::brave-search - config: - api_key: ${env.BRAVE_SEARCH_API_KEY:} - max_results: 3 - - provider_id: tavily-search - provider_type: remote::tavily-search - config: - api_key: ${env.TAVILY_SEARCH_API_KEY:} - max_results: 3 - - provider_id: rag-runtime - provider_type: inline::rag-runtime - config: {} - - provider_id: model-context-protocol - provider_type: remote::model-context-protocol - config: {} - - provider_id: wolfram-alpha - provider_type: remote::wolfram-alpha - config: - api_key: ${env.WOLFRAM_ALPHA_API_KEY:} -metadata_store: - type: sqlite - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/openai}/registry.db -models: -- metadata: {} - model_id: together/meta-llama/Llama-3.3-70B-Instruct-Turbo - provider_id: together - provider_model_id: meta-llama/Llama-3.3-70B-Instruct-Turbo - model_type: llm -- metadata: {} - model_id: together/meta-llama/Llama-4-Scout-17B-16E-Instruct - provider_id: together - provider_model_id: meta-llama/Llama-4-Scout-17B-16E-Instruct - model_type: llm -- metadata: {} - model_id: together/meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8 - provider_id: together - provider_model_id: meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8 - model_type: llm -- metadata: {} - model_id: fireworks/llama-v3p3-70b-instruct - provider_id: fireworks - provider_model_id: accounts/fireworks/models/llama-v3p3-70b-instruct - model_type: llm -- metadata: {} - model_id: fireworks/llama4-scout-instruct-basic - provider_id: fireworks - provider_model_id: accounts/fireworks/models/llama4-scout-instruct-basic - model_type: llm -- metadata: {} - model_id: fireworks/llama4-maverick-instruct-basic - provider_id: fireworks - provider_model_id: accounts/fireworks/models/llama4-maverick-instruct-basic - model_type: llm -- metadata: {} - model_id: groq/llama-3.3-70b-versatile - provider_id: groq - provider_model_id: groq/llama-3.3-70b-versatile - model_type: llm -- metadata: {} - model_id: groq/llama-4-scout-17b-16e-instruct - provider_id: groq - provider_model_id: groq/meta-llama/llama-4-scout-17b-16e-instruct - model_type: llm -- metadata: {} - model_id: groq/llama-4-maverick-17b-128e-instruct - provider_id: groq - provider_model_id: groq/meta-llama/llama-4-maverick-17b-128e-instruct - model_type: llm -- metadata: {} - model_id: openai/gpt-4o - provider_id: openai - provider_model_id: openai/gpt-4o - model_type: llm -- metadata: {} - model_id: openai/gpt-4o-mini - provider_id: openai - provider_model_id: openai/gpt-4o-mini - model_type: llm -shields: [] -vector_dbs: [] -datasets: [] -scoring_fns: [] -benchmarks: [] -tool_groups: -- toolgroup_id: builtin::websearch - provider_id: tavily-search -- toolgroup_id: builtin::rag - provider_id: rag-runtime -- toolgroup_id: builtin::wolfram_alpha - provider_id: wolfram-alpha -server: - port: 8321 diff --git a/tests/verifications/openai_api/__init__.py b/tests/verifications/openai_api/__init__.py deleted file mode 100644 index 756f351d8..000000000 --- a/tests/verifications/openai_api/__init__.py +++ /dev/null @@ -1,5 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the terms described in the LICENSE file in -# the root directory of this source tree. diff --git a/tests/verifications/openai_api/conftest.py b/tests/verifications/openai_api/conftest.py deleted file mode 100644 index 9d773b8de..000000000 --- a/tests/verifications/openai_api/conftest.py +++ /dev/null @@ -1,40 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the terms described in the LICENSE file in -# the root directory of this source tree. - -from tests.verifications.openai_api.fixtures.fixtures import _load_all_verification_configs - - -def pytest_generate_tests(metafunc): - """Dynamically parametrize tests based on the selected provider and config.""" - if "model" in metafunc.fixturenames: - model = metafunc.config.getoption("model") - if model: - metafunc.parametrize("model", [model]) - return - - provider = metafunc.config.getoption("provider") - if not provider: - print("Warning: --provider not specified. Skipping model parametrization.") - metafunc.parametrize("model", []) - return - - try: - config_data = _load_all_verification_configs() - except (OSError, FileNotFoundError) as e: - print(f"ERROR loading verification configs: {e}") - config_data = {"providers": {}} - - provider_config = config_data.get("providers", {}).get(provider) - if provider_config: - models = provider_config.get("models", []) - if models: - metafunc.parametrize("model", models) - else: - print(f"Warning: No models found for provider '{provider}' in config.") - metafunc.parametrize("model", []) # Parametrize empty if no models found - else: - print(f"Warning: Provider '{provider}' not found in config. No models parametrized.") - metafunc.parametrize("model", []) # Parametrize empty if provider not found diff --git a/tests/verifications/openai_api/fixtures/__init__.py b/tests/verifications/openai_api/fixtures/__init__.py deleted file mode 100644 index 756f351d8..000000000 --- a/tests/verifications/openai_api/fixtures/__init__.py +++ /dev/null @@ -1,5 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the terms described in the LICENSE file in -# the root directory of this source tree. diff --git a/tests/verifications/openai_api/test_chat_completion.py b/tests/verifications/openai_api/test_chat_completion.py deleted file mode 100644 index 64e49d352..000000000 --- a/tests/verifications/openai_api/test_chat_completion.py +++ /dev/null @@ -1,717 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the terms described in the LICENSE file in -# the root directory of this source tree. - -import base64 -import copy -import json -from pathlib import Path -from typing import Any - -import pytest -from openai import APIError -from pydantic import BaseModel - -from tests.verifications.openai_api.fixtures.fixtures import ( - case_id_generator, - get_base_test_name, - should_skip_test, -) -from tests.verifications.openai_api.fixtures.load import load_test_cases - -chat_completion_test_cases = load_test_cases("chat_completion") - -THIS_DIR = Path(__file__).parent - - -@pytest.fixture -def multi_image_data(): - files = [ - THIS_DIR / "fixtures/images/vision_test_1.jpg", - THIS_DIR / "fixtures/images/vision_test_2.jpg", - THIS_DIR / "fixtures/images/vision_test_3.jpg", - ] - encoded_files = [] - for file in files: - with open(file, "rb") as image_file: - base64_data = base64.b64encode(image_file.read()).decode("utf-8") - encoded_files.append(f"data:image/jpeg;base64,{base64_data}") - return encoded_files - - -# --- Test Functions --- - - -@pytest.mark.parametrize( - "case", - chat_completion_test_cases["test_chat_basic"]["test_params"]["case"], - ids=case_id_generator, -) -def test_chat_non_streaming_basic(request, openai_client, model, provider, verification_config, case): - test_name_base = get_base_test_name(request) - if should_skip_test(verification_config, provider, model, test_name_base): - pytest.skip(f"Skipping {test_name_base} for model {model} on provider {provider} based on config.") - - response = openai_client.chat.completions.create( - model=model, - messages=case["input"]["messages"], - stream=False, - ) - assert response.choices[0].message.role == "assistant" - assert case["output"].lower() in response.choices[0].message.content.lower() - - -@pytest.mark.parametrize( - "case", - chat_completion_test_cases["test_chat_basic"]["test_params"]["case"], - ids=case_id_generator, -) -def test_chat_streaming_basic(request, openai_client, model, provider, verification_config, case): - test_name_base = get_base_test_name(request) - if should_skip_test(verification_config, provider, model, test_name_base): - pytest.skip(f"Skipping {test_name_base} for model {model} on provider {provider} based on config.") - - response = openai_client.chat.completions.create( - model=model, - messages=case["input"]["messages"], - stream=True, - ) - content = "" - for chunk in response: - content += chunk.choices[0].delta.content or "" - - # TODO: add detailed type validation - - assert case["output"].lower() in content.lower() - - -@pytest.mark.parametrize( - "case", - chat_completion_test_cases["test_chat_input_validation"]["test_params"]["case"], - ids=case_id_generator, -) -def test_chat_non_streaming_error_handling(request, openai_client, model, provider, verification_config, case): - test_name_base = get_base_test_name(request) - if should_skip_test(verification_config, provider, model, test_name_base): - pytest.skip(f"Skipping {test_name_base} for model {model} on provider {provider} based on config.") - - with pytest.raises(APIError) as e: - openai_client.chat.completions.create( - model=model, - messages=case["input"]["messages"], - stream=False, - tool_choice=case["input"]["tool_choice"] if "tool_choice" in case["input"] else None, - tools=case["input"]["tools"] if "tools" in case["input"] else None, - ) - assert case["output"]["error"]["status_code"] == e.value.status_code - - -@pytest.mark.parametrize( - "case", - chat_completion_test_cases["test_chat_input_validation"]["test_params"]["case"], - ids=case_id_generator, -) -def test_chat_streaming_error_handling(request, openai_client, model, provider, verification_config, case): - test_name_base = get_base_test_name(request) - if should_skip_test(verification_config, provider, model, test_name_base): - pytest.skip(f"Skipping {test_name_base} for model {model} on provider {provider} based on config.") - - with pytest.raises(APIError) as e: - response = openai_client.chat.completions.create( - model=model, - messages=case["input"]["messages"], - stream=True, - tool_choice=case["input"]["tool_choice"] if "tool_choice" in case["input"] else None, - tools=case["input"]["tools"] if "tools" in case["input"] else None, - ) - for _chunk in response: - pass - assert str(case["output"]["error"]["status_code"]) in e.value.message - - -@pytest.mark.parametrize( - "case", - chat_completion_test_cases["test_chat_image"]["test_params"]["case"], - ids=case_id_generator, -) -def test_chat_non_streaming_image(request, openai_client, model, provider, verification_config, case): - test_name_base = get_base_test_name(request) - if should_skip_test(verification_config, provider, model, test_name_base): - pytest.skip(f"Skipping {test_name_base} for model {model} on provider {provider} based on config.") - - response = openai_client.chat.completions.create( - model=model, - messages=case["input"]["messages"], - stream=False, - ) - assert response.choices[0].message.role == "assistant" - assert case["output"].lower() in response.choices[0].message.content.lower() - - -@pytest.mark.parametrize( - "case", - chat_completion_test_cases["test_chat_image"]["test_params"]["case"], - ids=case_id_generator, -) -def test_chat_streaming_image(request, openai_client, model, provider, verification_config, case): - test_name_base = get_base_test_name(request) - if should_skip_test(verification_config, provider, model, test_name_base): - pytest.skip(f"Skipping {test_name_base} for model {model} on provider {provider} based on config.") - - response = openai_client.chat.completions.create( - model=model, - messages=case["input"]["messages"], - stream=True, - ) - content = "" - for chunk in response: - content += chunk.choices[0].delta.content or "" - - # TODO: add detailed type validation - - assert case["output"].lower() in content.lower() - - -@pytest.mark.parametrize( - "case", - chat_completion_test_cases["test_chat_structured_output"]["test_params"]["case"], - ids=case_id_generator, -) -def test_chat_non_streaming_structured_output(request, openai_client, model, provider, verification_config, case): - test_name_base = get_base_test_name(request) - if should_skip_test(verification_config, provider, model, test_name_base): - pytest.skip(f"Skipping {test_name_base} for model {model} on provider {provider} based on config.") - - response = openai_client.chat.completions.create( - model=model, - messages=case["input"]["messages"], - response_format=case["input"]["response_format"], - stream=False, - ) - - assert response.choices[0].message.role == "assistant" - maybe_json_content = response.choices[0].message.content - - validate_structured_output(maybe_json_content, case["output"]) - - -@pytest.mark.parametrize( - "case", - chat_completion_test_cases["test_chat_structured_output"]["test_params"]["case"], - ids=case_id_generator, -) -def test_chat_streaming_structured_output(request, openai_client, model, provider, verification_config, case): - test_name_base = get_base_test_name(request) - if should_skip_test(verification_config, provider, model, test_name_base): - pytest.skip(f"Skipping {test_name_base} for model {model} on provider {provider} based on config.") - - response = openai_client.chat.completions.create( - model=model, - messages=case["input"]["messages"], - response_format=case["input"]["response_format"], - stream=True, - ) - maybe_json_content = "" - for chunk in response: - maybe_json_content += chunk.choices[0].delta.content or "" - validate_structured_output(maybe_json_content, case["output"]) - - -@pytest.mark.parametrize( - "case", - chat_completion_test_cases["test_tool_calling"]["test_params"]["case"], - ids=case_id_generator, -) -def test_chat_non_streaming_tool_calling(request, openai_client, model, provider, verification_config, case): - test_name_base = get_base_test_name(request) - if should_skip_test(verification_config, provider, model, test_name_base): - pytest.skip(f"Skipping {test_name_base} for model {model} on provider {provider} based on config.") - - response = openai_client.chat.completions.create( - model=model, - messages=case["input"]["messages"], - tools=case["input"]["tools"], - stream=False, - ) - - assert response.choices[0].message.role == "assistant" - assert len(response.choices[0].message.tool_calls) > 0 - assert case["output"] == "get_weather_tool_call" - assert response.choices[0].message.tool_calls[0].function.name == "get_weather" - # TODO: add detailed type validation - - -@pytest.mark.parametrize( - "case", - chat_completion_test_cases["test_tool_calling"]["test_params"]["case"], - ids=case_id_generator, -) -def test_chat_streaming_tool_calling(request, openai_client, model, provider, verification_config, case): - test_name_base = get_base_test_name(request) - if should_skip_test(verification_config, provider, model, test_name_base): - pytest.skip(f"Skipping {test_name_base} for model {model} on provider {provider} based on config.") - - stream = openai_client.chat.completions.create( - model=model, - messages=case["input"]["messages"], - tools=case["input"]["tools"], - stream=True, - ) - - _, tool_calls_buffer = _accumulate_streaming_tool_calls(stream) - assert len(tool_calls_buffer) == 1 - for call in tool_calls_buffer: - assert len(call["id"]) > 0 - function = call["function"] - assert function["name"] == "get_weather" - - args_dict = json.loads(function["arguments"]) - assert "san francisco" in args_dict["location"].lower() - - -@pytest.mark.parametrize( - "case", - chat_completion_test_cases["test_tool_calling"]["test_params"]["case"], # Reusing existing case for now - ids=case_id_generator, -) -def test_chat_non_streaming_tool_choice_required(request, openai_client, model, provider, verification_config, case): - test_name_base = get_base_test_name(request) - if should_skip_test(verification_config, provider, model, test_name_base): - pytest.skip(f"Skipping {test_name_base} for model {model} on provider {provider} based on config.") - - response = openai_client.chat.completions.create( - model=model, - messages=case["input"]["messages"], - tools=case["input"]["tools"], - tool_choice="required", # Force tool call - stream=False, - ) - - assert response.choices[0].message.role == "assistant" - assert len(response.choices[0].message.tool_calls) > 0, "Expected tool call when tool_choice='required'" - expected_tool_name = case["input"]["tools"][0]["function"]["name"] - assert response.choices[0].message.tool_calls[0].function.name == expected_tool_name - - -@pytest.mark.parametrize( - "case", - chat_completion_test_cases["test_tool_calling"]["test_params"]["case"], # Reusing existing case for now - ids=case_id_generator, -) -def test_chat_streaming_tool_choice_required(request, openai_client, model, provider, verification_config, case): - test_name_base = get_base_test_name(request) - if should_skip_test(verification_config, provider, model, test_name_base): - pytest.skip(f"Skipping {test_name_base} for model {model} on provider {provider} based on config.") - - stream = openai_client.chat.completions.create( - model=model, - messages=case["input"]["messages"], - tools=case["input"]["tools"], - tool_choice="required", # Force tool call - stream=True, - ) - - _, tool_calls_buffer = _accumulate_streaming_tool_calls(stream) - - assert len(tool_calls_buffer) > 0, "Expected tool call when tool_choice='required'" - expected_tool_name = case["input"]["tools"][0]["function"]["name"] - assert any(call["function"]["name"] == expected_tool_name for call in tool_calls_buffer), ( - f"Expected tool call '{expected_tool_name}' not found in stream" - ) - - -@pytest.mark.parametrize( - "case", - chat_completion_test_cases["test_tool_calling"]["test_params"]["case"], # Reusing existing case for now - ids=case_id_generator, -) -def test_chat_non_streaming_tool_choice_none(request, openai_client, model, provider, verification_config, case): - test_name_base = get_base_test_name(request) - if should_skip_test(verification_config, provider, model, test_name_base): - pytest.skip(f"Skipping {test_name_base} for model {model} on provider {provider} based on config.") - - response = openai_client.chat.completions.create( - model=model, - messages=case["input"]["messages"], - tools=case["input"]["tools"], - tool_choice="none", - stream=False, - ) - - assert response.choices[0].message.role == "assistant" - assert response.choices[0].message.tool_calls is None, "Expected no tool calls when tool_choice='none'" - assert response.choices[0].message.content is not None, "Expected content when tool_choice='none'" - - -@pytest.mark.parametrize( - "case", - chat_completion_test_cases["test_tool_calling"]["test_params"]["case"], # Reusing existing case for now - ids=case_id_generator, -) -def test_chat_streaming_tool_choice_none(request, openai_client, model, provider, verification_config, case): - test_name_base = get_base_test_name(request) - if should_skip_test(verification_config, provider, model, test_name_base): - pytest.skip(f"Skipping {test_name_base} for model {model} on provider {provider} based on config.") - - stream = openai_client.chat.completions.create( - model=model, - messages=case["input"]["messages"], - tools=case["input"]["tools"], - tool_choice="none", - stream=True, - ) - - content = "" - for chunk in stream: - delta = chunk.choices[0].delta - if delta.content: - content += delta.content - assert not delta.tool_calls, "Expected no tool call chunks when tool_choice='none'" - - assert len(content) > 0, "Expected content when tool_choice='none'" - - -@pytest.mark.parametrize( - "case", - chat_completion_test_cases.get("test_chat_multi_turn_tool_calling", {}).get("test_params", {}).get("case", []), - ids=case_id_generator, -) -def test_chat_non_streaming_multi_turn_tool_calling(request, openai_client, model, provider, verification_config, case): - """ - Test cases for multi-turn tool calling. - Tool calls are asserted. - Tool responses are provided in the test case. - Final response is asserted. - """ - - test_name_base = get_base_test_name(request) - if should_skip_test(verification_config, provider, model, test_name_base): - pytest.skip(f"Skipping {test_name_base} for model {model} on provider {provider} based on config.") - - # Create a copy of the messages list to avoid modifying the original - messages = [] - tools = case["input"]["tools"] - # Use deepcopy to prevent modification across runs/parametrization - expected_results = copy.deepcopy(case["expected"]) - tool_responses = copy.deepcopy(case.get("tool_responses", [])) - input_messages_turns = copy.deepcopy(case["input"]["messages"]) - - # keep going until either - # 1. we have messages to test in multi-turn - # 2. no messages but last message is tool response - while len(input_messages_turns) > 0 or (len(messages) > 0 and messages[-1]["role"] == "tool"): - # do not take new messages if last message is tool response - if len(messages) == 0 or messages[-1]["role"] != "tool": - new_messages = input_messages_turns.pop(0) - # Ensure new_messages is a list of message objects - if isinstance(new_messages, list): - messages.extend(new_messages) - else: - # If it's a single message object, add it directly - messages.append(new_messages) - - # --- API Call --- - response = openai_client.chat.completions.create( - model=model, - messages=messages, - tools=tools, - stream=False, - ) - - # --- Process Response --- - assistant_message = response.choices[0].message - messages.append(assistant_message.model_dump(exclude_unset=True)) - - assert assistant_message.role == "assistant" - - # Get the expected result data - expected = expected_results.pop(0) - num_tool_calls = expected["num_tool_calls"] - - # --- Assertions based on expected result --- - assert len(assistant_message.tool_calls or []) == num_tool_calls, ( - f"Expected {num_tool_calls} tool calls, but got {len(assistant_message.tool_calls or [])}" - ) - - if num_tool_calls > 0: - tool_call = assistant_message.tool_calls[0] - assert tool_call.function.name == expected["tool_name"], ( - f"Expected tool '{expected['tool_name']}', got '{tool_call.function.name}'" - ) - # Parse the JSON string arguments before comparing - actual_arguments = json.loads(tool_call.function.arguments) - assert actual_arguments == expected["tool_arguments"], ( - f"Expected arguments '{expected['tool_arguments']}', got '{actual_arguments}'" - ) - - # Prepare and append the tool response for the next turn - tool_response = tool_responses.pop(0) - messages.append( - { - "role": "tool", - "tool_call_id": tool_call.id, - "content": tool_response["response"], - } - ) - else: - assert assistant_message.content is not None, "Expected content, but none received." - expected_answers = expected["answer"] # This is now a list - content_lower = assistant_message.content.lower() - assert any(ans.lower() in content_lower for ans in expected_answers), ( - f"Expected one of {expected_answers} in content, but got: '{assistant_message.content}'" - ) - - -@pytest.mark.parametrize( - "case", - chat_completion_test_cases.get("test_chat_multi_turn_tool_calling", {}).get("test_params", {}).get("case", []), - ids=case_id_generator, -) -def test_chat_streaming_multi_turn_tool_calling(request, openai_client, model, provider, verification_config, case): - """ """ - test_name_base = get_base_test_name(request) - if should_skip_test(verification_config, provider, model, test_name_base): - pytest.skip(f"Skipping {test_name_base} for model {model} on provider {provider} based on config.") - - messages = [] - tools = case["input"]["tools"] - expected_results = copy.deepcopy(case["expected"]) - tool_responses = copy.deepcopy(case.get("tool_responses", [])) - input_messages_turns = copy.deepcopy(case["input"]["messages"]) - - while len(input_messages_turns) > 0 or (len(messages) > 0 and messages[-1]["role"] == "tool"): - if len(messages) == 0 or messages[-1]["role"] != "tool": - new_messages = input_messages_turns.pop(0) - if isinstance(new_messages, list): - messages.extend(new_messages) - else: - messages.append(new_messages) - - # --- API Call (Streaming) --- - stream = openai_client.chat.completions.create( - model=model, - messages=messages, - tools=tools, - stream=True, - ) - - # --- Process Stream --- - accumulated_content, accumulated_tool_calls = _accumulate_streaming_tool_calls(stream) - - # --- Construct Assistant Message for History --- - assistant_message_dict = {"role": "assistant"} - if accumulated_content: - assistant_message_dict["content"] = accumulated_content - if accumulated_tool_calls: - assistant_message_dict["tool_calls"] = accumulated_tool_calls - - messages.append(assistant_message_dict) - - # --- Assertions --- - expected = expected_results.pop(0) - num_tool_calls = expected["num_tool_calls"] - - assert len(accumulated_tool_calls or []) == num_tool_calls, ( - f"Expected {num_tool_calls} tool calls, but got {len(accumulated_tool_calls or [])}" - ) - - if num_tool_calls > 0: - # Use the first accumulated tool call for assertion - tool_call = accumulated_tool_calls[0] - assert tool_call["function"]["name"] == expected["tool_name"], ( - f"Expected tool '{expected['tool_name']}', got '{tool_call['function']['name']}'" - ) - # Parse the accumulated arguments string for comparison - actual_arguments = json.loads(tool_call["function"]["arguments"]) - assert actual_arguments == expected["tool_arguments"], ( - f"Expected arguments '{expected['tool_arguments']}', got '{actual_arguments}'" - ) - - # Prepare and append the tool response for the next turn - tool_response = tool_responses.pop(0) - messages.append( - { - "role": "tool", - "tool_call_id": tool_call["id"], - "content": tool_response["response"], - } - ) - else: - assert accumulated_content is not None and accumulated_content != "", "Expected content, but none received." - expected_answers = expected["answer"] - content_lower = accumulated_content.lower() - assert any(ans.lower() in content_lower for ans in expected_answers), ( - f"Expected one of {expected_answers} in content, but got: '{accumulated_content}'" - ) - - -@pytest.mark.parametrize("stream", [False, True], ids=["stream=False", "stream=True"]) -def test_chat_multi_turn_multiple_images( - request, openai_client, model, provider, verification_config, multi_image_data, stream -): - test_name_base = get_base_test_name(request) - if should_skip_test(verification_config, provider, model, test_name_base): - pytest.skip(f"Skipping {test_name_base} for model {model} on provider {provider} based on config.") - - messages_turn1 = [ - { - "role": "user", - "content": [ - { - "type": "image_url", - "image_url": { - "url": multi_image_data[0], - }, - }, - { - "type": "image_url", - "image_url": { - "url": multi_image_data[1], - }, - }, - { - "type": "text", - "text": "What furniture is in the first image that is not in the second image?", - }, - ], - }, - ] - - # First API call - response1 = openai_client.chat.completions.create( - model=model, - messages=messages_turn1, - stream=stream, - ) - if stream: - message_content1 = "" - for chunk in response1: - message_content1 += chunk.choices[0].delta.content or "" - else: - message_content1 = response1.choices[0].message.content - assert len(message_content1) > 0 - assert any(expected in message_content1.lower().strip() for expected in {"chair", "table"}), message_content1 - - # Prepare messages for the second turn - messages_turn2 = messages_turn1 + [ - {"role": "assistant", "content": message_content1}, - { - "role": "user", - "content": [ - { - "type": "image_url", - "image_url": { - "url": multi_image_data[2], - }, - }, - {"type": "text", "text": "What is in this image that is also in the first image?"}, - ], - }, - ] - - # Second API call - response2 = openai_client.chat.completions.create( - model=model, - messages=messages_turn2, - stream=stream, - ) - if stream: - message_content2 = "" - for chunk in response2: - message_content2 += chunk.choices[0].delta.content or "" - else: - message_content2 = response2.choices[0].message.content - assert len(message_content2) > 0 - assert any(expected in message_content2.lower().strip() for expected in {"bed"}), message_content2 - - -# --- Helper functions (structured output validation) --- - - -def get_structured_output(maybe_json_content: str, schema_name: str) -> Any | None: - if schema_name == "valid_calendar_event": - - class CalendarEvent(BaseModel): - name: str - date: str - participants: list[str] - - try: - calendar_event = CalendarEvent.model_validate_json(maybe_json_content) - return calendar_event - except Exception: - return None - elif schema_name == "valid_math_reasoning": - - class Step(BaseModel): - explanation: str - output: str - - class MathReasoning(BaseModel): - steps: list[Step] - final_answer: str - - try: - math_reasoning = MathReasoning.model_validate_json(maybe_json_content) - return math_reasoning - except Exception: - return None - - return None - - -def validate_structured_output(maybe_json_content: str, schema_name: str) -> None: - structured_output = get_structured_output(maybe_json_content, schema_name) - assert structured_output is not None - if schema_name == "valid_calendar_event": - assert structured_output.name is not None - assert structured_output.date is not None - assert len(structured_output.participants) == 2 - elif schema_name == "valid_math_reasoning": - assert len(structured_output.final_answer) > 0 - - -def _accumulate_streaming_tool_calls(stream): - """Accumulates tool calls and content from a streaming ChatCompletion response.""" - tool_calls_buffer = {} - current_id = None - full_content = "" # Initialize content accumulator - # Process streaming chunks - for chunk in stream: - choice = chunk.choices[0] - delta = choice.delta - - # Accumulate content - if delta.content: - full_content += delta.content - - if delta.tool_calls is None: - continue - - for tool_call_delta in delta.tool_calls: - if tool_call_delta.id: - current_id = tool_call_delta.id - call_id = current_id - # Skip if no ID seen yet for this tool call delta - if not call_id: - continue - func_delta = tool_call_delta.function - - if call_id not in tool_calls_buffer: - tool_calls_buffer[call_id] = { - "id": call_id, - "type": "function", # Assume function type - "function": {"name": None, "arguments": ""}, # Nested structure - } - - # Accumulate name and arguments into the nested function dict - if func_delta: - if func_delta.name: - tool_calls_buffer[call_id]["function"]["name"] = func_delta.name - if func_delta.arguments: - tool_calls_buffer[call_id]["function"]["arguments"] += func_delta.arguments - - # Return content and tool calls as a list - return full_content, list(tool_calls_buffer.values()) diff --git a/tests/verifications/test_results/fireworks.json b/tests/verifications/test_results/fireworks.json deleted file mode 100644 index ef5cf142e..000000000 --- a/tests/verifications/test_results/fireworks.json +++ /dev/null @@ -1,3751 +0,0 @@ -{ - "created": 1744918448.686489, - "duration": 254.68238854408264, - "exitcode": 1, - "root": "/home/erichuang/llama-stack", - "environment": {}, - "summary": { - "passed": 40, - "skipped": 4, - "failed": 40, - "total": 84, - "collected": 84 - }, - "collectors": [ - { - "nodeid": "", - "outcome": "passed", - "result": [ - { - "nodeid": "tests/verifications/openai_api/test_chat_completion.py", - "type": "Module" - } - ] - }, - { - "nodeid": "tests/verifications/openai_api/test_chat_completion.py", - "outcome": "passed", - "result": [ - { - "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_basic[accounts/fireworks/models/llama-v3p3-70b-instruct-earth]", - "type": "Function", - "lineno": 95 - }, - { - "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_basic[accounts/fireworks/models/llama-v3p3-70b-instruct-saturn]", - "type": "Function", - "lineno": 95 - }, - { - "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_basic[accounts/fireworks/models/llama4-scout-instruct-basic-earth]", - "type": "Function", - "lineno": 95 - }, - { - "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_basic[accounts/fireworks/models/llama4-scout-instruct-basic-saturn]", - "type": "Function", - "lineno": 95 - }, - { - "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_basic[accounts/fireworks/models/llama4-maverick-instruct-basic-earth]", - "type": "Function", - "lineno": 95 - }, - { - "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_basic[accounts/fireworks/models/llama4-maverick-instruct-basic-saturn]", - "type": "Function", - "lineno": 95 - }, - { - "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_basic[accounts/fireworks/models/llama-v3p3-70b-instruct-earth]", - "type": "Function", - "lineno": 114 - }, - { - "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_basic[accounts/fireworks/models/llama-v3p3-70b-instruct-saturn]", - "type": "Function", - "lineno": 114 - }, - { - "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_basic[accounts/fireworks/models/llama4-scout-instruct-basic-earth]", - "type": "Function", - "lineno": 114 - }, - { - "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_basic[accounts/fireworks/models/llama4-scout-instruct-basic-saturn]", - "type": "Function", - "lineno": 114 - }, - { - "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_basic[accounts/fireworks/models/llama4-maverick-instruct-basic-earth]", - "type": "Function", - "lineno": 114 - }, - { - "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_basic[accounts/fireworks/models/llama4-maverick-instruct-basic-saturn]", - "type": "Function", - "lineno": 114 - }, - { - "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_image[accounts/fireworks/models/llama-v3p3-70b-instruct-case0]", - "type": "Function", - "lineno": 138 - }, - { - "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_image[accounts/fireworks/models/llama4-scout-instruct-basic-case0]", - "type": "Function", - "lineno": 138 - }, - { - "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_image[accounts/fireworks/models/llama4-maverick-instruct-basic-case0]", - "type": "Function", - "lineno": 138 - }, - { - "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_image[accounts/fireworks/models/llama-v3p3-70b-instruct-case0]", - "type": "Function", - "lineno": 157 - }, - { - "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_image[accounts/fireworks/models/llama4-scout-instruct-basic-case0]", - "type": "Function", - "lineno": 157 - }, - { - "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_image[accounts/fireworks/models/llama4-maverick-instruct-basic-case0]", - "type": "Function", - "lineno": 157 - }, - { - "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_structured_output[accounts/fireworks/models/llama-v3p3-70b-instruct-calendar]", - "type": "Function", - "lineno": 181 - }, - { - "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_structured_output[accounts/fireworks/models/llama-v3p3-70b-instruct-math]", - "type": "Function", - "lineno": 181 - }, - { - "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_structured_output[accounts/fireworks/models/llama4-scout-instruct-basic-calendar]", - "type": "Function", - "lineno": 181 - }, - { - "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_structured_output[accounts/fireworks/models/llama4-scout-instruct-basic-math]", - "type": "Function", - "lineno": 181 - }, - { - "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_structured_output[accounts/fireworks/models/llama4-maverick-instruct-basic-calendar]", - "type": "Function", - "lineno": 181 - }, - { - "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_structured_output[accounts/fireworks/models/llama4-maverick-instruct-basic-math]", - "type": "Function", - "lineno": 181 - }, - { - "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_structured_output[accounts/fireworks/models/llama-v3p3-70b-instruct-calendar]", - "type": "Function", - "lineno": 204 - }, - { - "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_structured_output[accounts/fireworks/models/llama-v3p3-70b-instruct-math]", - "type": "Function", - "lineno": 204 - }, - { - "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_structured_output[accounts/fireworks/models/llama4-scout-instruct-basic-calendar]", - "type": "Function", - "lineno": 204 - }, - { - "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_structured_output[accounts/fireworks/models/llama4-scout-instruct-basic-math]", - "type": "Function", - "lineno": 204 - }, - { - "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_structured_output[accounts/fireworks/models/llama4-maverick-instruct-basic-calendar]", - "type": "Function", - "lineno": 204 - }, - { - "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_structured_output[accounts/fireworks/models/llama4-maverick-instruct-basic-math]", - "type": "Function", - "lineno": 204 - }, - { - "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_tool_calling[accounts/fireworks/models/llama-v3p3-70b-instruct-case0]", - "type": "Function", - "lineno": 226 - }, - { - "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_tool_calling[accounts/fireworks/models/llama4-scout-instruct-basic-case0]", - "type": "Function", - "lineno": 226 - }, - { - "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_tool_calling[accounts/fireworks/models/llama4-maverick-instruct-basic-case0]", - "type": "Function", - "lineno": 226 - }, - { - "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_tool_calling[accounts/fireworks/models/llama-v3p3-70b-instruct-case0]", - "type": "Function", - "lineno": 250 - }, - { - "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_tool_calling[accounts/fireworks/models/llama4-scout-instruct-basic-case0]", - "type": "Function", - "lineno": 250 - }, - { - "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_tool_calling[accounts/fireworks/models/llama4-maverick-instruct-basic-case0]", - "type": "Function", - "lineno": 250 - }, - { - "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_tool_choice_required[accounts/fireworks/models/llama-v3p3-70b-instruct-case0]", - "type": "Function", - "lineno": 278 - }, - { - "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_tool_choice_required[accounts/fireworks/models/llama4-scout-instruct-basic-case0]", - "type": "Function", - "lineno": 278 - }, - { - "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_tool_choice_required[accounts/fireworks/models/llama4-maverick-instruct-basic-case0]", - "type": "Function", - "lineno": 278 - }, - { - "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_tool_choice_required[accounts/fireworks/models/llama-v3p3-70b-instruct-case0]", - "type": "Function", - "lineno": 302 - }, - { - "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_tool_choice_required[accounts/fireworks/models/llama4-scout-instruct-basic-case0]", - "type": "Function", - "lineno": 302 - }, - { - "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_tool_choice_required[accounts/fireworks/models/llama4-maverick-instruct-basic-case0]", - "type": "Function", - "lineno": 302 - }, - { - "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_tool_choice_none[accounts/fireworks/models/llama-v3p3-70b-instruct-case0]", - "type": "Function", - "lineno": 329 - }, - { - "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_tool_choice_none[accounts/fireworks/models/llama4-scout-instruct-basic-case0]", - "type": "Function", - "lineno": 329 - }, - { - "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_tool_choice_none[accounts/fireworks/models/llama4-maverick-instruct-basic-case0]", - "type": "Function", - "lineno": 329 - }, - { - "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_tool_choice_none[accounts/fireworks/models/llama-v3p3-70b-instruct-case0]", - "type": "Function", - "lineno": 352 - }, - { - "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_tool_choice_none[accounts/fireworks/models/llama4-scout-instruct-basic-case0]", - "type": "Function", - "lineno": 352 - }, - { - "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_tool_choice_none[accounts/fireworks/models/llama4-maverick-instruct-basic-case0]", - "type": "Function", - "lineno": 352 - }, - { - "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_multi_turn_tool_calling[accounts/fireworks/models/llama-v3p3-70b-instruct-text_then_weather_tool]", - "type": "Function", - "lineno": 380 - }, - { - "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_multi_turn_tool_calling[accounts/fireworks/models/llama-v3p3-70b-instruct-weather_tool_then_text]", - "type": "Function", - "lineno": 380 - }, - { - "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_multi_turn_tool_calling[accounts/fireworks/models/llama-v3p3-70b-instruct-add_product_tool]", - "type": "Function", - "lineno": 380 - }, - { - "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_multi_turn_tool_calling[accounts/fireworks/models/llama-v3p3-70b-instruct-get_then_create_event_tool]", - "type": "Function", - "lineno": 380 - }, - { - "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_multi_turn_tool_calling[accounts/fireworks/models/llama-v3p3-70b-instruct-compare_monthly_expense_tool]", - "type": "Function", - "lineno": 380 - }, - { - "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_multi_turn_tool_calling[accounts/fireworks/models/llama4-scout-instruct-basic-text_then_weather_tool]", - "type": "Function", - "lineno": 380 - }, - { - "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_multi_turn_tool_calling[accounts/fireworks/models/llama4-scout-instruct-basic-weather_tool_then_text]", - "type": "Function", - "lineno": 380 - }, - { - "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_multi_turn_tool_calling[accounts/fireworks/models/llama4-scout-instruct-basic-add_product_tool]", - "type": "Function", - "lineno": 380 - }, - { - "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_multi_turn_tool_calling[accounts/fireworks/models/llama4-scout-instruct-basic-get_then_create_event_tool]", - "type": "Function", - "lineno": 380 - }, - { - "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_multi_turn_tool_calling[accounts/fireworks/models/llama4-scout-instruct-basic-compare_monthly_expense_tool]", - "type": "Function", - "lineno": 380 - }, - { - "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_multi_turn_tool_calling[accounts/fireworks/models/llama4-maverick-instruct-basic-text_then_weather_tool]", - "type": "Function", - "lineno": 380 - }, - { - "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_multi_turn_tool_calling[accounts/fireworks/models/llama4-maverick-instruct-basic-weather_tool_then_text]", - "type": "Function", - "lineno": 380 - }, - { - "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_multi_turn_tool_calling[accounts/fireworks/models/llama4-maverick-instruct-basic-add_product_tool]", - "type": "Function", - "lineno": 380 - }, - { - "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_multi_turn_tool_calling[accounts/fireworks/models/llama4-maverick-instruct-basic-get_then_create_event_tool]", - "type": "Function", - "lineno": 380 - }, - { - "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_multi_turn_tool_calling[accounts/fireworks/models/llama4-maverick-instruct-basic-compare_monthly_expense_tool]", - "type": "Function", - "lineno": 380 - }, - { - "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_multi_turn_tool_calling[accounts/fireworks/models/llama-v3p3-70b-instruct-text_then_weather_tool]", - "type": "Function", - "lineno": 471 - }, - { - "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_multi_turn_tool_calling[accounts/fireworks/models/llama-v3p3-70b-instruct-weather_tool_then_text]", - "type": "Function", - "lineno": 471 - }, - { - "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_multi_turn_tool_calling[accounts/fireworks/models/llama-v3p3-70b-instruct-add_product_tool]", - "type": "Function", - "lineno": 471 - }, - { - "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_multi_turn_tool_calling[accounts/fireworks/models/llama-v3p3-70b-instruct-get_then_create_event_tool]", - "type": "Function", - "lineno": 471 - }, - { - "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_multi_turn_tool_calling[accounts/fireworks/models/llama-v3p3-70b-instruct-compare_monthly_expense_tool]", - "type": "Function", - "lineno": 471 - }, - { - "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_multi_turn_tool_calling[accounts/fireworks/models/llama4-scout-instruct-basic-text_then_weather_tool]", - "type": "Function", - "lineno": 471 - }, - { - "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_multi_turn_tool_calling[accounts/fireworks/models/llama4-scout-instruct-basic-weather_tool_then_text]", - "type": "Function", - "lineno": 471 - }, - { - "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_multi_turn_tool_calling[accounts/fireworks/models/llama4-scout-instruct-basic-add_product_tool]", - "type": "Function", - "lineno": 471 - }, - { - "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_multi_turn_tool_calling[accounts/fireworks/models/llama4-scout-instruct-basic-get_then_create_event_tool]", - "type": "Function", - "lineno": 471 - }, - { - "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_multi_turn_tool_calling[accounts/fireworks/models/llama4-scout-instruct-basic-compare_monthly_expense_tool]", - "type": "Function", - "lineno": 471 - }, - { - "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_multi_turn_tool_calling[accounts/fireworks/models/llama4-maverick-instruct-basic-text_then_weather_tool]", - "type": "Function", - "lineno": 471 - }, - { - "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_multi_turn_tool_calling[accounts/fireworks/models/llama4-maverick-instruct-basic-weather_tool_then_text]", - "type": "Function", - "lineno": 471 - }, - { - "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_multi_turn_tool_calling[accounts/fireworks/models/llama4-maverick-instruct-basic-add_product_tool]", - "type": "Function", - "lineno": 471 - }, - { - "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_multi_turn_tool_calling[accounts/fireworks/models/llama4-maverick-instruct-basic-get_then_create_event_tool]", - "type": "Function", - "lineno": 471 - }, - { - "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_multi_turn_tool_calling[accounts/fireworks/models/llama4-maverick-instruct-basic-compare_monthly_expense_tool]", - "type": "Function", - "lineno": 471 - }, - { - "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_multi_turn_multiple_images[accounts/fireworks/models/llama-v3p3-70b-instruct-stream=False]", - "type": "Function", - "lineno": 554 - }, - { - "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_multi_turn_multiple_images[accounts/fireworks/models/llama-v3p3-70b-instruct-stream=True]", - "type": "Function", - "lineno": 554 - }, - { - "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_multi_turn_multiple_images[accounts/fireworks/models/llama4-scout-instruct-basic-stream=False]", - "type": "Function", - "lineno": 554 - }, - { - "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_multi_turn_multiple_images[accounts/fireworks/models/llama4-scout-instruct-basic-stream=True]", - "type": "Function", - "lineno": 554 - }, - { - "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_multi_turn_multiple_images[accounts/fireworks/models/llama4-maverick-instruct-basic-stream=False]", - "type": "Function", - "lineno": 554 - }, - { - "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_multi_turn_multiple_images[accounts/fireworks/models/llama4-maverick-instruct-basic-stream=True]", - "type": "Function", - "lineno": 554 - } - ] - } - ], - "tests": [ - { - "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_basic[accounts/fireworks/models/llama-v3p3-70b-instruct-earth]", - "lineno": 95, - "outcome": "passed", - "keywords": [ - "test_chat_non_streaming_basic[accounts/fireworks/models/llama-v3p3-70b-instruct-earth]", - "parametrize", - "pytestmark", - "accounts/fireworks/models/llama-v3p3-70b-instruct-earth", - "test_chat_completion.py", - "openai_api", - "verifications", - "tests", - "llama-stack", - "" - ], - "metadata": { - "model": "accounts/fireworks/models/llama-v3p3-70b-instruct", - "case_id": "earth" - }, - "setup": { - "duration": 0.13845239393413067, - "outcome": "passed" - }, - "call": { - "duration": 1.3300942620262504, - "outcome": "passed" - }, - "teardown": { - "duration": 0.00025453977286815643, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_basic[accounts/fireworks/models/llama-v3p3-70b-instruct-saturn]", - "lineno": 95, - "outcome": "passed", - "keywords": [ - "test_chat_non_streaming_basic[accounts/fireworks/models/llama-v3p3-70b-instruct-saturn]", - "parametrize", - "pytestmark", - "accounts/fireworks/models/llama-v3p3-70b-instruct-saturn", - "test_chat_completion.py", - "openai_api", - "verifications", - "tests", - "llama-stack", - "" - ], - "metadata": { - "model": "accounts/fireworks/models/llama-v3p3-70b-instruct", - "case_id": "saturn" - }, - "setup": { - "duration": 0.0806605163961649, - "outcome": "passed" - }, - "call": { - "duration": 0.6202042903751135, - "outcome": "passed" - }, - "teardown": { - "duration": 0.00026358477771282196, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_basic[accounts/fireworks/models/llama4-scout-instruct-basic-earth]", - "lineno": 95, - "outcome": "passed", - "keywords": [ - "test_chat_non_streaming_basic[accounts/fireworks/models/llama4-scout-instruct-basic-earth]", - "parametrize", - "pytestmark", - "accounts/fireworks/models/llama4-scout-instruct-basic-earth", - "test_chat_completion.py", - "openai_api", - "verifications", - "tests", - "llama-stack", - "" - ], - "metadata": { - "model": "accounts/fireworks/models/llama4-scout-instruct-basic", - "case_id": "earth" - }, - "setup": { - "duration": 0.07190297450870275, - "outcome": "passed" - }, - "call": { - "duration": 0.7458920907229185, - "outcome": "passed" - }, - "teardown": { - "duration": 0.00024067144840955734, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_basic[accounts/fireworks/models/llama4-scout-instruct-basic-saturn]", - "lineno": 95, - "outcome": "passed", - "keywords": [ - "test_chat_non_streaming_basic[accounts/fireworks/models/llama4-scout-instruct-basic-saturn]", - "parametrize", - "pytestmark", - "accounts/fireworks/models/llama4-scout-instruct-basic-saturn", - "test_chat_completion.py", - "openai_api", - "verifications", - "tests", - "llama-stack", - "" - ], - "metadata": { - "model": "accounts/fireworks/models/llama4-scout-instruct-basic", - "case_id": "saturn" - }, - "setup": { - "duration": 0.07551384158432484, - "outcome": "passed" - }, - "call": { - "duration": 0.6140249809250236, - "outcome": "passed" - }, - "teardown": { - "duration": 0.00024476367980241776, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_basic[accounts/fireworks/models/llama4-maverick-instruct-basic-earth]", - "lineno": 95, - "outcome": "passed", - "keywords": [ - "test_chat_non_streaming_basic[accounts/fireworks/models/llama4-maverick-instruct-basic-earth]", - "parametrize", - "pytestmark", - "accounts/fireworks/models/llama4-maverick-instruct-basic-earth", - "test_chat_completion.py", - "openai_api", - "verifications", - "tests", - "llama-stack", - "" - ], - "metadata": { - "model": "accounts/fireworks/models/llama4-maverick-instruct-basic", - "case_id": "earth" - }, - "setup": { - "duration": 0.07434738799929619, - "outcome": "passed" - }, - "call": { - "duration": 1.6738943997770548, - "outcome": "passed" - }, - "teardown": { - "duration": 0.000227426178753376, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_basic[accounts/fireworks/models/llama4-maverick-instruct-basic-saturn]", - "lineno": 95, - "outcome": "passed", - "keywords": [ - "test_chat_non_streaming_basic[accounts/fireworks/models/llama4-maverick-instruct-basic-saturn]", - "parametrize", - "pytestmark", - "accounts/fireworks/models/llama4-maverick-instruct-basic-saturn", - "test_chat_completion.py", - "openai_api", - "verifications", - "tests", - "llama-stack", - "" - ], - "metadata": { - "model": "accounts/fireworks/models/llama4-maverick-instruct-basic", - "case_id": "saturn" - }, - "setup": { - "duration": 0.07130288146436214, - "outcome": "passed" - }, - "call": { - "duration": 1.337895905598998, - "outcome": "passed" - }, - "teardown": { - "duration": 0.00028038304299116135, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_basic[accounts/fireworks/models/llama-v3p3-70b-instruct-earth]", - "lineno": 114, - "outcome": "passed", - "keywords": [ - "test_chat_streaming_basic[accounts/fireworks/models/llama-v3p3-70b-instruct-earth]", - "parametrize", - "pytestmark", - "accounts/fireworks/models/llama-v3p3-70b-instruct-earth", - "test_chat_completion.py", - "openai_api", - "verifications", - "tests", - "llama-stack", - "" - ], - "metadata": { - "model": "accounts/fireworks/models/llama-v3p3-70b-instruct", - "case_id": "earth" - }, - "setup": { - "duration": 0.0727478675544262, - "outcome": "passed" - }, - "call": { - "duration": 0.7670011632144451, - "outcome": "passed" - }, - "teardown": { - "duration": 0.00023174844682216644, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_basic[accounts/fireworks/models/llama-v3p3-70b-instruct-saturn]", - "lineno": 114, - "outcome": "passed", - "keywords": [ - "test_chat_streaming_basic[accounts/fireworks/models/llama-v3p3-70b-instruct-saturn]", - "parametrize", - "pytestmark", - "accounts/fireworks/models/llama-v3p3-70b-instruct-saturn", - "test_chat_completion.py", - "openai_api", - "verifications", - "tests", - "llama-stack", - "" - ], - "metadata": { - "model": "accounts/fireworks/models/llama-v3p3-70b-instruct", - "case_id": "saturn" - }, - "setup": { - "duration": 0.07163545861840248, - "outcome": "passed" - }, - "call": { - "duration": 0.7582714259624481, - "outcome": "passed" - }, - "teardown": { - "duration": 0.00028524454683065414, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_basic[accounts/fireworks/models/llama4-scout-instruct-basic-earth]", - "lineno": 114, - "outcome": "passed", - "keywords": [ - "test_chat_streaming_basic[accounts/fireworks/models/llama4-scout-instruct-basic-earth]", - "parametrize", - "pytestmark", - "accounts/fireworks/models/llama4-scout-instruct-basic-earth", - "test_chat_completion.py", - "openai_api", - "verifications", - "tests", - "llama-stack", - "" - ], - "metadata": { - "model": "accounts/fireworks/models/llama4-scout-instruct-basic", - "case_id": "earth" - }, - "setup": { - "duration": 0.08122281823307276, - "outcome": "passed" - }, - "call": { - "duration": 0.6061851140111685, - "outcome": "passed" - }, - "teardown": { - "duration": 0.0002497304230928421, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_basic[accounts/fireworks/models/llama4-scout-instruct-basic-saturn]", - "lineno": 114, - "outcome": "passed", - "keywords": [ - "test_chat_streaming_basic[accounts/fireworks/models/llama4-scout-instruct-basic-saturn]", - "parametrize", - "pytestmark", - "accounts/fireworks/models/llama4-scout-instruct-basic-saturn", - "test_chat_completion.py", - "openai_api", - "verifications", - "tests", - "llama-stack", - "" - ], - "metadata": { - "model": "accounts/fireworks/models/llama4-scout-instruct-basic", - "case_id": "saturn" - }, - "setup": { - "duration": 0.07185561209917068, - "outcome": "passed" - }, - "call": { - "duration": 0.7516075978055596, - "outcome": "passed" - }, - "teardown": { - "duration": 0.00026526860892772675, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_basic[accounts/fireworks/models/llama4-maverick-instruct-basic-earth]", - "lineno": 114, - "outcome": "passed", - "keywords": [ - "test_chat_streaming_basic[accounts/fireworks/models/llama4-maverick-instruct-basic-earth]", - "parametrize", - "pytestmark", - "accounts/fireworks/models/llama4-maverick-instruct-basic-earth", - "test_chat_completion.py", - "openai_api", - "verifications", - "tests", - "llama-stack", - "" - ], - "metadata": { - "model": "accounts/fireworks/models/llama4-maverick-instruct-basic", - "case_id": "earth" - }, - "setup": { - "duration": 0.07012896798551083, - "outcome": "passed" - }, - "call": { - "duration": 1.8946502823382616, - "outcome": "passed" - }, - "teardown": { - "duration": 0.0002452842891216278, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_basic[accounts/fireworks/models/llama4-maverick-instruct-basic-saturn]", - "lineno": 114, - "outcome": "passed", - "keywords": [ - "test_chat_streaming_basic[accounts/fireworks/models/llama4-maverick-instruct-basic-saturn]", - "parametrize", - "pytestmark", - "accounts/fireworks/models/llama4-maverick-instruct-basic-saturn", - "test_chat_completion.py", - "openai_api", - "verifications", - "tests", - "llama-stack", - "" - ], - "metadata": { - "model": "accounts/fireworks/models/llama4-maverick-instruct-basic", - "case_id": "saturn" - }, - "setup": { - "duration": 0.06955648958683014, - "outcome": "passed" - }, - "call": { - "duration": 1.0446623722091317, - "outcome": "passed" - }, - "teardown": { - "duration": 0.00023738667368888855, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_image[accounts/fireworks/models/llama-v3p3-70b-instruct-case0]", - "lineno": 138, - "outcome": "skipped", - "keywords": [ - "test_chat_non_streaming_image[accounts/fireworks/models/llama-v3p3-70b-instruct-case0]", - "parametrize", - "pytestmark", - "accounts/fireworks/models/llama-v3p3-70b-instruct-case0", - "test_chat_completion.py", - "openai_api", - "verifications", - "tests", - "llama-stack", - "" - ], - "metadata": { - "model": "accounts/fireworks/models/llama-v3p3-70b-instruct", - "case_id": "case0" - }, - "setup": { - "duration": 0.07077906839549541, - "outcome": "passed" - }, - "call": { - "duration": 0.00021365191787481308, - "outcome": "skipped", - "longrepr": "('/home/erichuang/llama-stack/tests/verifications/openai_api/test_chat_completion.py', 147, 'Skipped: Skipping test_chat_non_streaming_image for model accounts/fireworks/models/llama-v3p3-70b-instruct on provider fireworks based on config.')" - }, - "teardown": { - "duration": 0.00018982868641614914, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_image[accounts/fireworks/models/llama4-scout-instruct-basic-case0]", - "lineno": 138, - "outcome": "passed", - "keywords": [ - "test_chat_non_streaming_image[accounts/fireworks/models/llama4-scout-instruct-basic-case0]", - "parametrize", - "pytestmark", - "accounts/fireworks/models/llama4-scout-instruct-basic-case0", - "test_chat_completion.py", - "openai_api", - "verifications", - "tests", - "llama-stack", - "" - ], - "metadata": { - "model": "accounts/fireworks/models/llama4-scout-instruct-basic", - "case_id": "case0" - }, - "setup": { - "duration": 0.07118859142065048, - "outcome": "passed" - }, - "call": { - "duration": 4.20654855389148, - "outcome": "passed" - }, - "teardown": { - "duration": 0.00023640412837266922, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_image[accounts/fireworks/models/llama4-maverick-instruct-basic-case0]", - "lineno": 138, - "outcome": "passed", - "keywords": [ - "test_chat_non_streaming_image[accounts/fireworks/models/llama4-maverick-instruct-basic-case0]", - "parametrize", - "pytestmark", - "accounts/fireworks/models/llama4-maverick-instruct-basic-case0", - "test_chat_completion.py", - "openai_api", - "verifications", - "tests", - "llama-stack", - "" - ], - "metadata": { - "model": "accounts/fireworks/models/llama4-maverick-instruct-basic", - "case_id": "case0" - }, - "setup": { - "duration": 0.07351029943674803, - "outcome": "passed" - }, - "call": { - "duration": 4.875292049720883, - "outcome": "passed" - }, - "teardown": { - "duration": 0.0002571679651737213, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_image[accounts/fireworks/models/llama-v3p3-70b-instruct-case0]", - "lineno": 157, - "outcome": "skipped", - "keywords": [ - "test_chat_streaming_image[accounts/fireworks/models/llama-v3p3-70b-instruct-case0]", - "parametrize", - "pytestmark", - "accounts/fireworks/models/llama-v3p3-70b-instruct-case0", - "test_chat_completion.py", - "openai_api", - "verifications", - "tests", - "llama-stack", - "" - ], - "metadata": { - "model": "accounts/fireworks/models/llama-v3p3-70b-instruct", - "case_id": "case0" - }, - "setup": { - "duration": 0.07474396284669638, - "outcome": "passed" - }, - "call": { - "duration": 0.0002510417252779007, - "outcome": "skipped", - "longrepr": "('/home/erichuang/llama-stack/tests/verifications/openai_api/test_chat_completion.py', 166, 'Skipped: Skipping test_chat_streaming_image for model accounts/fireworks/models/llama-v3p3-70b-instruct on provider fireworks based on config.')" - }, - "teardown": { - "duration": 0.00020200759172439575, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_image[accounts/fireworks/models/llama4-scout-instruct-basic-case0]", - "lineno": 157, - "outcome": "passed", - "keywords": [ - "test_chat_streaming_image[accounts/fireworks/models/llama4-scout-instruct-basic-case0]", - "parametrize", - "pytestmark", - "accounts/fireworks/models/llama4-scout-instruct-basic-case0", - "test_chat_completion.py", - "openai_api", - "verifications", - "tests", - "llama-stack", - "" - ], - "metadata": { - "model": "accounts/fireworks/models/llama4-scout-instruct-basic", - "case_id": "case0" - }, - "setup": { - "duration": 0.07380561903119087, - "outcome": "passed" - }, - "call": { - "duration": 2.0082657346501946, - "outcome": "passed" - }, - "teardown": { - "duration": 0.0002522030845284462, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_image[accounts/fireworks/models/llama4-maverick-instruct-basic-case0]", - "lineno": 157, - "outcome": "passed", - "keywords": [ - "test_chat_streaming_image[accounts/fireworks/models/llama4-maverick-instruct-basic-case0]", - "parametrize", - "pytestmark", - "accounts/fireworks/models/llama4-maverick-instruct-basic-case0", - "test_chat_completion.py", - "openai_api", - "verifications", - "tests", - "llama-stack", - "" - ], - "metadata": { - "model": "accounts/fireworks/models/llama4-maverick-instruct-basic", - "case_id": "case0" - }, - "setup": { - "duration": 0.07040839456021786, - "outcome": "passed" - }, - "call": { - "duration": 4.871666649356484, - "outcome": "passed" - }, - "teardown": { - "duration": 0.0002490682527422905, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_structured_output[accounts/fireworks/models/llama-v3p3-70b-instruct-calendar]", - "lineno": 181, - "outcome": "passed", - "keywords": [ - "test_chat_non_streaming_structured_output[accounts/fireworks/models/llama-v3p3-70b-instruct-calendar]", - "parametrize", - "pytestmark", - "accounts/fireworks/models/llama-v3p3-70b-instruct-calendar", - "test_chat_completion.py", - "openai_api", - "verifications", - "tests", - "llama-stack", - "" - ], - "metadata": { - "model": "accounts/fireworks/models/llama-v3p3-70b-instruct", - "case_id": "calendar" - }, - "setup": { - "duration": 0.07167178671807051, - "outcome": "passed" - }, - "call": { - "duration": 0.9903911761939526, - "outcome": "passed" - }, - "teardown": { - "duration": 0.0002704570069909096, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_structured_output[accounts/fireworks/models/llama-v3p3-70b-instruct-math]", - "lineno": 181, - "outcome": "passed", - "keywords": [ - "test_chat_non_streaming_structured_output[accounts/fireworks/models/llama-v3p3-70b-instruct-math]", - "parametrize", - "pytestmark", - "accounts/fireworks/models/llama-v3p3-70b-instruct-math", - "test_chat_completion.py", - "openai_api", - "verifications", - "tests", - "llama-stack", - "" - ], - "metadata": { - "model": "accounts/fireworks/models/llama-v3p3-70b-instruct", - "case_id": "math" - }, - "setup": { - "duration": 0.07073096185922623, - "outcome": "passed" - }, - "call": { - "duration": 3.9858130905777216, - "outcome": "passed" - }, - "teardown": { - "duration": 0.00024665892124176025, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_structured_output[accounts/fireworks/models/llama4-scout-instruct-basic-calendar]", - "lineno": 181, - "outcome": "passed", - "keywords": [ - "test_chat_non_streaming_structured_output[accounts/fireworks/models/llama4-scout-instruct-basic-calendar]", - "parametrize", - "pytestmark", - "accounts/fireworks/models/llama4-scout-instruct-basic-calendar", - "test_chat_completion.py", - "openai_api", - "verifications", - "tests", - "llama-stack", - "" - ], - "metadata": { - "model": "accounts/fireworks/models/llama4-scout-instruct-basic", - "case_id": "calendar" - }, - "setup": { - "duration": 0.07138721086084843, - "outcome": "passed" - }, - "call": { - "duration": 1.1312237158417702, - "outcome": "passed" - }, - "teardown": { - "duration": 0.00027671270072460175, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_structured_output[accounts/fireworks/models/llama4-scout-instruct-basic-math]", - "lineno": 181, - "outcome": "passed", - "keywords": [ - "test_chat_non_streaming_structured_output[accounts/fireworks/models/llama4-scout-instruct-basic-math]", - "parametrize", - "pytestmark", - "accounts/fireworks/models/llama4-scout-instruct-basic-math", - "test_chat_completion.py", - "openai_api", - "verifications", - "tests", - "llama-stack", - "" - ], - "metadata": { - "model": "accounts/fireworks/models/llama4-scout-instruct-basic", - "case_id": "math" - }, - "setup": { - "duration": 0.08204951789230108, - "outcome": "passed" - }, - "call": { - "duration": 2.7500197598710656, - "outcome": "passed" - }, - "teardown": { - "duration": 0.00024303700774908066, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_structured_output[accounts/fireworks/models/llama4-maverick-instruct-basic-calendar]", - "lineno": 181, - "outcome": "passed", - "keywords": [ - "test_chat_non_streaming_structured_output[accounts/fireworks/models/llama4-maverick-instruct-basic-calendar]", - "parametrize", - "pytestmark", - "accounts/fireworks/models/llama4-maverick-instruct-basic-calendar", - "test_chat_completion.py", - "openai_api", - "verifications", - "tests", - "llama-stack", - "" - ], - "metadata": { - "model": "accounts/fireworks/models/llama4-maverick-instruct-basic", - "case_id": "calendar" - }, - "setup": { - "duration": 0.07405088562518358, - "outcome": "passed" - }, - "call": { - "duration": 1.238045932725072, - "outcome": "passed" - }, - "teardown": { - "duration": 0.00024984683841466904, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_structured_output[accounts/fireworks/models/llama4-maverick-instruct-basic-math]", - "lineno": 181, - "outcome": "passed", - "keywords": [ - "test_chat_non_streaming_structured_output[accounts/fireworks/models/llama4-maverick-instruct-basic-math]", - "parametrize", - "pytestmark", - "accounts/fireworks/models/llama4-maverick-instruct-basic-math", - "test_chat_completion.py", - "openai_api", - "verifications", - "tests", - "llama-stack", - "" - ], - "metadata": { - "model": "accounts/fireworks/models/llama4-maverick-instruct-basic", - "case_id": "math" - }, - "setup": { - "duration": 0.07009329181164503, - "outcome": "passed" - }, - "call": { - "duration": 3.55908961314708, - "outcome": "passed" - }, - "teardown": { - "duration": 0.00026627909392118454, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_structured_output[accounts/fireworks/models/llama-v3p3-70b-instruct-calendar]", - "lineno": 204, - "outcome": "passed", - "keywords": [ - "test_chat_streaming_structured_output[accounts/fireworks/models/llama-v3p3-70b-instruct-calendar]", - "parametrize", - "pytestmark", - "accounts/fireworks/models/llama-v3p3-70b-instruct-calendar", - "test_chat_completion.py", - "openai_api", - "verifications", - "tests", - "llama-stack", - "" - ], - "metadata": { - "model": "accounts/fireworks/models/llama-v3p3-70b-instruct", - "case_id": "calendar" - }, - "setup": { - "duration": 0.07596437353640795, - "outcome": "passed" - }, - "call": { - "duration": 1.0093460381031036, - "outcome": "passed" - }, - "teardown": { - "duration": 0.0002171723172068596, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_structured_output[accounts/fireworks/models/llama-v3p3-70b-instruct-math]", - "lineno": 204, - "outcome": "passed", - "keywords": [ - "test_chat_streaming_structured_output[accounts/fireworks/models/llama-v3p3-70b-instruct-math]", - "parametrize", - "pytestmark", - "accounts/fireworks/models/llama-v3p3-70b-instruct-math", - "test_chat_completion.py", - "openai_api", - "verifications", - "tests", - "llama-stack", - "" - ], - "metadata": { - "model": "accounts/fireworks/models/llama-v3p3-70b-instruct", - "case_id": "math" - }, - "setup": { - "duration": 0.06995268166065216, - "outcome": "passed" - }, - "call": { - "duration": 2.617857910692692, - "outcome": "passed" - }, - "teardown": { - "duration": 0.00024063047021627426, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_structured_output[accounts/fireworks/models/llama4-scout-instruct-basic-calendar]", - "lineno": 204, - "outcome": "passed", - "keywords": [ - "test_chat_streaming_structured_output[accounts/fireworks/models/llama4-scout-instruct-basic-calendar]", - "parametrize", - "pytestmark", - "accounts/fireworks/models/llama4-scout-instruct-basic-calendar", - "test_chat_completion.py", - "openai_api", - "verifications", - "tests", - "llama-stack", - "" - ], - "metadata": { - "model": "accounts/fireworks/models/llama4-scout-instruct-basic", - "case_id": "calendar" - }, - "setup": { - "duration": 0.0729895168915391, - "outcome": "passed" - }, - "call": { - "duration": 0.9500969992950559, - "outcome": "passed" - }, - "teardown": { - "duration": 0.000257221981883049, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_structured_output[accounts/fireworks/models/llama4-scout-instruct-basic-math]", - "lineno": 204, - "outcome": "passed", - "keywords": [ - "test_chat_streaming_structured_output[accounts/fireworks/models/llama4-scout-instruct-basic-math]", - "parametrize", - "pytestmark", - "accounts/fireworks/models/llama4-scout-instruct-basic-math", - "test_chat_completion.py", - "openai_api", - "verifications", - "tests", - "llama-stack", - "" - ], - "metadata": { - "model": "accounts/fireworks/models/llama4-scout-instruct-basic", - "case_id": "math" - }, - "setup": { - "duration": 0.07070339564234018, - "outcome": "passed" - }, - "call": { - "duration": 2.6405998673290014, - "outcome": "passed" - }, - "teardown": { - "duration": 0.0002397783100605011, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_structured_output[accounts/fireworks/models/llama4-maverick-instruct-basic-calendar]", - "lineno": 204, - "outcome": "passed", - "keywords": [ - "test_chat_streaming_structured_output[accounts/fireworks/models/llama4-maverick-instruct-basic-calendar]", - "parametrize", - "pytestmark", - "accounts/fireworks/models/llama4-maverick-instruct-basic-calendar", - "test_chat_completion.py", - "openai_api", - "verifications", - "tests", - "llama-stack", - "" - ], - "metadata": { - "model": "accounts/fireworks/models/llama4-maverick-instruct-basic", - "case_id": "calendar" - }, - "setup": { - "duration": 0.07140882592648268, - "outcome": "passed" - }, - "call": { - "duration": 0.7515814090147614, - "outcome": "passed" - }, - "teardown": { - "duration": 0.0002773841843008995, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_structured_output[accounts/fireworks/models/llama4-maverick-instruct-basic-math]", - "lineno": 204, - "outcome": "passed", - "keywords": [ - "test_chat_streaming_structured_output[accounts/fireworks/models/llama4-maverick-instruct-basic-math]", - "parametrize", - "pytestmark", - "accounts/fireworks/models/llama4-maverick-instruct-basic-math", - "test_chat_completion.py", - "openai_api", - "verifications", - "tests", - "llama-stack", - "" - ], - "metadata": { - "model": "accounts/fireworks/models/llama4-maverick-instruct-basic", - "case_id": "math" - }, - "setup": { - "duration": 0.07105506956577301, - "outcome": "passed" - }, - "call": { - "duration": 3.091084435582161, - "outcome": "passed" - }, - "teardown": { - "duration": 0.0002588946372270584, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_tool_calling[accounts/fireworks/models/llama-v3p3-70b-instruct-case0]", - "lineno": 226, - "outcome": "failed", - "keywords": [ - "test_chat_non_streaming_tool_calling[accounts/fireworks/models/llama-v3p3-70b-instruct-case0]", - "parametrize", - "pytestmark", - "accounts/fireworks/models/llama-v3p3-70b-instruct-case0", - "test_chat_completion.py", - "openai_api", - "verifications", - "tests", - "llama-stack", - "" - ], - "metadata": { - "model": "accounts/fireworks/models/llama-v3p3-70b-instruct", - "case_id": "case0" - }, - "setup": { - "duration": 0.07215945608913898, - "outcome": "passed" - }, - "call": { - "duration": 1.13668860681355, - "outcome": "failed", - "crash": { - "path": "/home/erichuang/llama-stack/tests/verifications/openai_api/test_chat_completion.py", - "lineno": 245, - "message": "TypeError: object of type 'NoneType' has no len()" - }, - "traceback": [ - { - "path": "tests/verifications/openai_api/test_chat_completion.py", - "lineno": 245, - "message": "TypeError" - } - ], - "longrepr": "request = >\nopenai_client = \nmodel = 'accounts/fireworks/models/llama-v3p3-70b-instruct'\nprovider = 'fireworks'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...-versatile', 'meta-llama/llama-4-scout-17b-16e-instruct', 'meta-llama/llama-4-maverick-17b-128e-instruct'], ...}, ...}}\ncase = {'input': {'messages': [{'content': 'You are a helpful assistant that can use tools to get information.', 'role': 'sys..., 'properties': {...}, 'required': [...], 'type': 'object'}}, 'type': 'function'}]}, 'output': 'get_weather_tool_call'}\n\n @pytest.mark.parametrize(\n \"case\",\n chat_completion_test_cases[\"test_tool_calling\"][\"test_params\"][\"case\"],\n ids=case_id_generator,\n )\n def test_chat_non_streaming_tool_calling(request, openai_client, model, provider, verification_config, case):\n test_name_base = get_base_test_name(request)\n if should_skip_test(verification_config, provider, model, test_name_base):\n pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n \n response = openai_client.chat.completions.create(\n model=model,\n messages=case[\"input\"][\"messages\"],\n tools=case[\"input\"][\"tools\"],\n stream=False,\n )\n \n assert response.choices[0].message.role == \"assistant\"\n> assert len(response.choices[0].message.tool_calls) > 0\nE TypeError: object of type 'NoneType' has no len()\n\ntests/verifications/openai_api/test_chat_completion.py:245: TypeError" - }, - "teardown": { - "duration": 0.0003727646544575691, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_tool_calling[accounts/fireworks/models/llama4-scout-instruct-basic-case0]", - "lineno": 226, - "outcome": "failed", - "keywords": [ - "test_chat_non_streaming_tool_calling[accounts/fireworks/models/llama4-scout-instruct-basic-case0]", - "parametrize", - "pytestmark", - "accounts/fireworks/models/llama4-scout-instruct-basic-case0", - "test_chat_completion.py", - "openai_api", - "verifications", - "tests", - "llama-stack", - "" - ], - "metadata": { - "model": "accounts/fireworks/models/llama4-scout-instruct-basic", - "case_id": "case0" - }, - "setup": { - "duration": 0.07085339725017548, - "outcome": "passed" - }, - "call": { - "duration": 6.564900263212621, - "outcome": "failed", - "crash": { - "path": "/home/erichuang/llama-stack/tests/verifications/openai_api/test_chat_completion.py", - "lineno": 245, - "message": "TypeError: object of type 'NoneType' has no len()" - }, - "traceback": [ - { - "path": "tests/verifications/openai_api/test_chat_completion.py", - "lineno": 245, - "message": "TypeError" - } - ], - "longrepr": "request = >\nopenai_client = \nmodel = 'accounts/fireworks/models/llama4-scout-instruct-basic'\nprovider = 'fireworks'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...-versatile', 'meta-llama/llama-4-scout-17b-16e-instruct', 'meta-llama/llama-4-maverick-17b-128e-instruct'], ...}, ...}}\ncase = {'input': {'messages': [{'content': 'You are a helpful assistant that can use tools to get information.', 'role': 'sys..., 'properties': {...}, 'required': [...], 'type': 'object'}}, 'type': 'function'}]}, 'output': 'get_weather_tool_call'}\n\n @pytest.mark.parametrize(\n \"case\",\n chat_completion_test_cases[\"test_tool_calling\"][\"test_params\"][\"case\"],\n ids=case_id_generator,\n )\n def test_chat_non_streaming_tool_calling(request, openai_client, model, provider, verification_config, case):\n test_name_base = get_base_test_name(request)\n if should_skip_test(verification_config, provider, model, test_name_base):\n pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n \n response = openai_client.chat.completions.create(\n model=model,\n messages=case[\"input\"][\"messages\"],\n tools=case[\"input\"][\"tools\"],\n stream=False,\n )\n \n assert response.choices[0].message.role == \"assistant\"\n> assert len(response.choices[0].message.tool_calls) > 0\nE TypeError: object of type 'NoneType' has no len()\n\ntests/verifications/openai_api/test_chat_completion.py:245: TypeError" - }, - "teardown": { - "duration": 0.00036074407398700714, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_tool_calling[accounts/fireworks/models/llama4-maverick-instruct-basic-case0]", - "lineno": 226, - "outcome": "failed", - "keywords": [ - "test_chat_non_streaming_tool_calling[accounts/fireworks/models/llama4-maverick-instruct-basic-case0]", - "parametrize", - "pytestmark", - "accounts/fireworks/models/llama4-maverick-instruct-basic-case0", - "test_chat_completion.py", - "openai_api", - "verifications", - "tests", - "llama-stack", - "" - ], - "metadata": { - "model": "accounts/fireworks/models/llama4-maverick-instruct-basic", - "case_id": "case0" - }, - "setup": { - "duration": 0.07105840742588043, - "outcome": "passed" - }, - "call": { - "duration": 1.9664474660530686, - "outcome": "failed", - "crash": { - "path": "/home/erichuang/llama-stack/tests/verifications/openai_api/test_chat_completion.py", - "lineno": 245, - "message": "TypeError: object of type 'NoneType' has no len()" - }, - "traceback": [ - { - "path": "tests/verifications/openai_api/test_chat_completion.py", - "lineno": 245, - "message": "TypeError" - } - ], - "longrepr": "request = >\nopenai_client = \nmodel = 'accounts/fireworks/models/llama4-maverick-instruct-basic'\nprovider = 'fireworks'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...-versatile', 'meta-llama/llama-4-scout-17b-16e-instruct', 'meta-llama/llama-4-maverick-17b-128e-instruct'], ...}, ...}}\ncase = {'input': {'messages': [{'content': 'You are a helpful assistant that can use tools to get information.', 'role': 'sys..., 'properties': {...}, 'required': [...], 'type': 'object'}}, 'type': 'function'}]}, 'output': 'get_weather_tool_call'}\n\n @pytest.mark.parametrize(\n \"case\",\n chat_completion_test_cases[\"test_tool_calling\"][\"test_params\"][\"case\"],\n ids=case_id_generator,\n )\n def test_chat_non_streaming_tool_calling(request, openai_client, model, provider, verification_config, case):\n test_name_base = get_base_test_name(request)\n if should_skip_test(verification_config, provider, model, test_name_base):\n pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n \n response = openai_client.chat.completions.create(\n model=model,\n messages=case[\"input\"][\"messages\"],\n tools=case[\"input\"][\"tools\"],\n stream=False,\n )\n \n assert response.choices[0].message.role == \"assistant\"\n> assert len(response.choices[0].message.tool_calls) > 0\nE TypeError: object of type 'NoneType' has no len()\n\ntests/verifications/openai_api/test_chat_completion.py:245: TypeError" - }, - "teardown": { - "duration": 0.0003125220537185669, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_tool_calling[accounts/fireworks/models/llama-v3p3-70b-instruct-case0]", - "lineno": 250, - "outcome": "failed", - "keywords": [ - "test_chat_streaming_tool_calling[accounts/fireworks/models/llama-v3p3-70b-instruct-case0]", - "parametrize", - "pytestmark", - "accounts/fireworks/models/llama-v3p3-70b-instruct-case0", - "test_chat_completion.py", - "openai_api", - "verifications", - "tests", - "llama-stack", - "" - ], - "metadata": { - "model": "accounts/fireworks/models/llama-v3p3-70b-instruct", - "case_id": "case0" - }, - "setup": { - "duration": 0.07491886802017689, - "outcome": "passed" - }, - "call": { - "duration": 1.6239055208861828, - "outcome": "failed", - "crash": { - "path": "/home/erichuang/llama-stack/tests/verifications/openai_api/test_chat_completion.py", - "lineno": 269, - "message": "assert 0 == 1\n + where 0 = len([])" - }, - "traceback": [ - { - "path": "tests/verifications/openai_api/test_chat_completion.py", - "lineno": 269, - "message": "AssertionError" - } - ], - "longrepr": "request = >\nopenai_client = \nmodel = 'accounts/fireworks/models/llama-v3p3-70b-instruct'\nprovider = 'fireworks'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...-versatile', 'meta-llama/llama-4-scout-17b-16e-instruct', 'meta-llama/llama-4-maverick-17b-128e-instruct'], ...}, ...}}\ncase = {'input': {'messages': [{'content': 'You are a helpful assistant that can use tools to get information.', 'role': 'sys..., 'properties': {...}, 'required': [...], 'type': 'object'}}, 'type': 'function'}]}, 'output': 'get_weather_tool_call'}\n\n @pytest.mark.parametrize(\n \"case\",\n chat_completion_test_cases[\"test_tool_calling\"][\"test_params\"][\"case\"],\n ids=case_id_generator,\n )\n def test_chat_streaming_tool_calling(request, openai_client, model, provider, verification_config, case):\n test_name_base = get_base_test_name(request)\n if should_skip_test(verification_config, provider, model, test_name_base):\n pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n \n stream = openai_client.chat.completions.create(\n model=model,\n messages=case[\"input\"][\"messages\"],\n tools=case[\"input\"][\"tools\"],\n stream=True,\n )\n \n _, tool_calls_buffer = _accumulate_streaming_tool_calls(stream)\n> assert len(tool_calls_buffer) == 1\nE assert 0 == 1\nE + where 0 = len([])\n\ntests/verifications/openai_api/test_chat_completion.py:269: AssertionError" - }, - "teardown": { - "duration": 0.0003996873274445534, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_tool_calling[accounts/fireworks/models/llama4-scout-instruct-basic-case0]", - "lineno": 250, - "outcome": "failed", - "keywords": [ - "test_chat_streaming_tool_calling[accounts/fireworks/models/llama4-scout-instruct-basic-case0]", - "parametrize", - "pytestmark", - "accounts/fireworks/models/llama4-scout-instruct-basic-case0", - "test_chat_completion.py", - "openai_api", - "verifications", - "tests", - "llama-stack", - "" - ], - "metadata": { - "model": "accounts/fireworks/models/llama4-scout-instruct-basic", - "case_id": "case0" - }, - "setup": { - "duration": 0.07084537390619516, - "outcome": "passed" - }, - "call": { - "duration": 7.175910825841129, - "outcome": "failed", - "crash": { - "path": "/home/erichuang/llama-stack/tests/verifications/openai_api/test_chat_completion.py", - "lineno": 269, - "message": "assert 0 == 1\n + where 0 = len([])" - }, - "traceback": [ - { - "path": "tests/verifications/openai_api/test_chat_completion.py", - "lineno": 269, - "message": "AssertionError" - } - ], - "longrepr": "request = >\nopenai_client = \nmodel = 'accounts/fireworks/models/llama4-scout-instruct-basic'\nprovider = 'fireworks'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...-versatile', 'meta-llama/llama-4-scout-17b-16e-instruct', 'meta-llama/llama-4-maverick-17b-128e-instruct'], ...}, ...}}\ncase = {'input': {'messages': [{'content': 'You are a helpful assistant that can use tools to get information.', 'role': 'sys..., 'properties': {...}, 'required': [...], 'type': 'object'}}, 'type': 'function'}]}, 'output': 'get_weather_tool_call'}\n\n @pytest.mark.parametrize(\n \"case\",\n chat_completion_test_cases[\"test_tool_calling\"][\"test_params\"][\"case\"],\n ids=case_id_generator,\n )\n def test_chat_streaming_tool_calling(request, openai_client, model, provider, verification_config, case):\n test_name_base = get_base_test_name(request)\n if should_skip_test(verification_config, provider, model, test_name_base):\n pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n \n stream = openai_client.chat.completions.create(\n model=model,\n messages=case[\"input\"][\"messages\"],\n tools=case[\"input\"][\"tools\"],\n stream=True,\n )\n \n _, tool_calls_buffer = _accumulate_streaming_tool_calls(stream)\n> assert len(tool_calls_buffer) == 1\nE assert 0 == 1\nE + where 0 = len([])\n\ntests/verifications/openai_api/test_chat_completion.py:269: AssertionError" - }, - "teardown": { - "duration": 0.0003013862296938896, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_tool_calling[accounts/fireworks/models/llama4-maverick-instruct-basic-case0]", - "lineno": 250, - "outcome": "failed", - "keywords": [ - "test_chat_streaming_tool_calling[accounts/fireworks/models/llama4-maverick-instruct-basic-case0]", - "parametrize", - "pytestmark", - "accounts/fireworks/models/llama4-maverick-instruct-basic-case0", - "test_chat_completion.py", - "openai_api", - "verifications", - "tests", - "llama-stack", - "" - ], - "metadata": { - "model": "accounts/fireworks/models/llama4-maverick-instruct-basic", - "case_id": "case0" - }, - "setup": { - "duration": 0.07152015157043934, - "outcome": "passed" - }, - "call": { - "duration": 9.749054622836411, - "outcome": "failed", - "crash": { - "path": "/home/erichuang/llama-stack/tests/verifications/openai_api/test_chat_completion.py", - "lineno": 269, - "message": "assert 0 == 1\n + where 0 = len([])" - }, - "traceback": [ - { - "path": "tests/verifications/openai_api/test_chat_completion.py", - "lineno": 269, - "message": "AssertionError" - } - ], - "longrepr": "request = >\nopenai_client = \nmodel = 'accounts/fireworks/models/llama4-maverick-instruct-basic'\nprovider = 'fireworks'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...-versatile', 'meta-llama/llama-4-scout-17b-16e-instruct', 'meta-llama/llama-4-maverick-17b-128e-instruct'], ...}, ...}}\ncase = {'input': {'messages': [{'content': 'You are a helpful assistant that can use tools to get information.', 'role': 'sys..., 'properties': {...}, 'required': [...], 'type': 'object'}}, 'type': 'function'}]}, 'output': 'get_weather_tool_call'}\n\n @pytest.mark.parametrize(\n \"case\",\n chat_completion_test_cases[\"test_tool_calling\"][\"test_params\"][\"case\"],\n ids=case_id_generator,\n )\n def test_chat_streaming_tool_calling(request, openai_client, model, provider, verification_config, case):\n test_name_base = get_base_test_name(request)\n if should_skip_test(verification_config, provider, model, test_name_base):\n pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n \n stream = openai_client.chat.completions.create(\n model=model,\n messages=case[\"input\"][\"messages\"],\n tools=case[\"input\"][\"tools\"],\n stream=True,\n )\n \n _, tool_calls_buffer = _accumulate_streaming_tool_calls(stream)\n> assert len(tool_calls_buffer) == 1\nE assert 0 == 1\nE + where 0 = len([])\n\ntests/verifications/openai_api/test_chat_completion.py:269: AssertionError" - }, - "teardown": { - "duration": 0.0002990690991282463, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_tool_choice_required[accounts/fireworks/models/llama-v3p3-70b-instruct-case0]", - "lineno": 278, - "outcome": "passed", - "keywords": [ - "test_chat_non_streaming_tool_choice_required[accounts/fireworks/models/llama-v3p3-70b-instruct-case0]", - "parametrize", - "pytestmark", - "accounts/fireworks/models/llama-v3p3-70b-instruct-case0", - "test_chat_completion.py", - "openai_api", - "verifications", - "tests", - "llama-stack", - "" - ], - "metadata": { - "model": "accounts/fireworks/models/llama-v3p3-70b-instruct", - "case_id": "case0" - }, - "setup": { - "duration": 0.07075500208884478, - "outcome": "passed" - }, - "call": { - "duration": 0.9870151281356812, - "outcome": "passed" - }, - "teardown": { - "duration": 0.00022785458713769913, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_tool_choice_required[accounts/fireworks/models/llama4-scout-instruct-basic-case0]", - "lineno": 278, - "outcome": "failed", - "keywords": [ - "test_chat_non_streaming_tool_choice_required[accounts/fireworks/models/llama4-scout-instruct-basic-case0]", - "parametrize", - "pytestmark", - "accounts/fireworks/models/llama4-scout-instruct-basic-case0", - "test_chat_completion.py", - "openai_api", - "verifications", - "tests", - "llama-stack", - "" - ], - "metadata": { - "model": "accounts/fireworks/models/llama4-scout-instruct-basic", - "case_id": "case0" - }, - "setup": { - "duration": 0.0698307491838932, - "outcome": "passed" - }, - "call": { - "duration": 4.061793921515346, - "outcome": "failed", - "crash": { - "path": "/home/erichuang/llama-stack/tests/verifications/openai_api/test_chat_completion.py", - "lineno": 298, - "message": "TypeError: object of type 'NoneType' has no len()" - }, - "traceback": [ - { - "path": "tests/verifications/openai_api/test_chat_completion.py", - "lineno": 298, - "message": "TypeError" - } - ], - "longrepr": "request = >\nopenai_client = \nmodel = 'accounts/fireworks/models/llama4-scout-instruct-basic'\nprovider = 'fireworks'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...-versatile', 'meta-llama/llama-4-scout-17b-16e-instruct', 'meta-llama/llama-4-maverick-17b-128e-instruct'], ...}, ...}}\ncase = {'input': {'messages': [{'content': 'You are a helpful assistant that can use tools to get information.', 'role': 'sys..., 'properties': {...}, 'required': [...], 'type': 'object'}}, 'type': 'function'}]}, 'output': 'get_weather_tool_call'}\n\n @pytest.mark.parametrize(\n \"case\",\n chat_completion_test_cases[\"test_tool_calling\"][\"test_params\"][\"case\"], # Reusing existing case for now\n ids=case_id_generator,\n )\n def test_chat_non_streaming_tool_choice_required(request, openai_client, model, provider, verification_config, case):\n test_name_base = get_base_test_name(request)\n if should_skip_test(verification_config, provider, model, test_name_base):\n pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n \n response = openai_client.chat.completions.create(\n model=model,\n messages=case[\"input\"][\"messages\"],\n tools=case[\"input\"][\"tools\"],\n tool_choice=\"required\", # Force tool call\n stream=False,\n )\n \n assert response.choices[0].message.role == \"assistant\"\n> assert len(response.choices[0].message.tool_calls) > 0, \"Expected tool call when tool_choice='required'\"\nE TypeError: object of type 'NoneType' has no len()\n\ntests/verifications/openai_api/test_chat_completion.py:298: TypeError" - }, - "teardown": { - "duration": 0.00028742197901010513, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_tool_choice_required[accounts/fireworks/models/llama4-maverick-instruct-basic-case0]", - "lineno": 278, - "outcome": "failed", - "keywords": [ - "test_chat_non_streaming_tool_choice_required[accounts/fireworks/models/llama4-maverick-instruct-basic-case0]", - "parametrize", - "pytestmark", - "accounts/fireworks/models/llama4-maverick-instruct-basic-case0", - "test_chat_completion.py", - "openai_api", - "verifications", - "tests", - "llama-stack", - "" - ], - "metadata": { - "model": "accounts/fireworks/models/llama4-maverick-instruct-basic", - "case_id": "case0" - }, - "setup": { - "duration": 0.07069965451955795, - "outcome": "passed" - }, - "call": { - "duration": 24.973835667595267, - "outcome": "failed", - "crash": { - "path": "/home/erichuang/llama-stack/tests/verifications/openai_api/test_chat_completion.py", - "lineno": 298, - "message": "TypeError: object of type 'NoneType' has no len()" - }, - "traceback": [ - { - "path": "tests/verifications/openai_api/test_chat_completion.py", - "lineno": 298, - "message": "TypeError" - } - ], - "longrepr": "request = >\nopenai_client = \nmodel = 'accounts/fireworks/models/llama4-maverick-instruct-basic'\nprovider = 'fireworks'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...-versatile', 'meta-llama/llama-4-scout-17b-16e-instruct', 'meta-llama/llama-4-maverick-17b-128e-instruct'], ...}, ...}}\ncase = {'input': {'messages': [{'content': 'You are a helpful assistant that can use tools to get information.', 'role': 'sys..., 'properties': {...}, 'required': [...], 'type': 'object'}}, 'type': 'function'}]}, 'output': 'get_weather_tool_call'}\n\n @pytest.mark.parametrize(\n \"case\",\n chat_completion_test_cases[\"test_tool_calling\"][\"test_params\"][\"case\"], # Reusing existing case for now\n ids=case_id_generator,\n )\n def test_chat_non_streaming_tool_choice_required(request, openai_client, model, provider, verification_config, case):\n test_name_base = get_base_test_name(request)\n if should_skip_test(verification_config, provider, model, test_name_base):\n pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n \n response = openai_client.chat.completions.create(\n model=model,\n messages=case[\"input\"][\"messages\"],\n tools=case[\"input\"][\"tools\"],\n tool_choice=\"required\", # Force tool call\n stream=False,\n )\n \n assert response.choices[0].message.role == \"assistant\"\n> assert len(response.choices[0].message.tool_calls) > 0, \"Expected tool call when tool_choice='required'\"\nE TypeError: object of type 'NoneType' has no len()\n\ntests/verifications/openai_api/test_chat_completion.py:298: TypeError" - }, - "teardown": { - "duration": 0.00034868158400058746, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_tool_choice_required[accounts/fireworks/models/llama-v3p3-70b-instruct-case0]", - "lineno": 302, - "outcome": "passed", - "keywords": [ - "test_chat_streaming_tool_choice_required[accounts/fireworks/models/llama-v3p3-70b-instruct-case0]", - "parametrize", - "pytestmark", - "accounts/fireworks/models/llama-v3p3-70b-instruct-case0", - "test_chat_completion.py", - "openai_api", - "verifications", - "tests", - "llama-stack", - "" - ], - "metadata": { - "model": "accounts/fireworks/models/llama-v3p3-70b-instruct", - "case_id": "case0" - }, - "setup": { - "duration": 0.07031871005892754, - "outcome": "passed" - }, - "call": { - "duration": 0.7874777475371957, - "outcome": "passed" - }, - "teardown": { - "duration": 0.00027067307382822037, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_tool_choice_required[accounts/fireworks/models/llama4-scout-instruct-basic-case0]", - "lineno": 302, - "outcome": "failed", - "keywords": [ - "test_chat_streaming_tool_choice_required[accounts/fireworks/models/llama4-scout-instruct-basic-case0]", - "parametrize", - "pytestmark", - "accounts/fireworks/models/llama4-scout-instruct-basic-case0", - "test_chat_completion.py", - "openai_api", - "verifications", - "tests", - "llama-stack", - "" - ], - "metadata": { - "model": "accounts/fireworks/models/llama4-scout-instruct-basic", - "case_id": "case0" - }, - "setup": { - "duration": 0.07194838207215071, - "outcome": "passed" - }, - "call": { - "duration": 5.034253670834005, - "outcome": "failed", - "crash": { - "path": "/home/erichuang/llama-stack/tests/verifications/openai_api/test_chat_completion.py", - "lineno": 323, - "message": "AssertionError: Expected tool call when tool_choice='required'\nassert 0 > 0\n + where 0 = len([])" - }, - "traceback": [ - { - "path": "tests/verifications/openai_api/test_chat_completion.py", - "lineno": 323, - "message": "AssertionError" - } - ], - "longrepr": "request = >\nopenai_client = \nmodel = 'accounts/fireworks/models/llama4-scout-instruct-basic'\nprovider = 'fireworks'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...-versatile', 'meta-llama/llama-4-scout-17b-16e-instruct', 'meta-llama/llama-4-maverick-17b-128e-instruct'], ...}, ...}}\ncase = {'input': {'messages': [{'content': 'You are a helpful assistant that can use tools to get information.', 'role': 'sys..., 'properties': {...}, 'required': [...], 'type': 'object'}}, 'type': 'function'}]}, 'output': 'get_weather_tool_call'}\n\n @pytest.mark.parametrize(\n \"case\",\n chat_completion_test_cases[\"test_tool_calling\"][\"test_params\"][\"case\"], # Reusing existing case for now\n ids=case_id_generator,\n )\n def test_chat_streaming_tool_choice_required(request, openai_client, model, provider, verification_config, case):\n test_name_base = get_base_test_name(request)\n if should_skip_test(verification_config, provider, model, test_name_base):\n pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n \n stream = openai_client.chat.completions.create(\n model=model,\n messages=case[\"input\"][\"messages\"],\n tools=case[\"input\"][\"tools\"],\n tool_choice=\"required\", # Force tool call\n stream=True,\n )\n \n _, tool_calls_buffer = _accumulate_streaming_tool_calls(stream)\n \n> assert len(tool_calls_buffer) > 0, \"Expected tool call when tool_choice='required'\"\nE AssertionError: Expected tool call when tool_choice='required'\nE assert 0 > 0\nE + where 0 = len([])\n\ntests/verifications/openai_api/test_chat_completion.py:323: AssertionError" - }, - "teardown": { - "duration": 0.00030618347227573395, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_tool_choice_required[accounts/fireworks/models/llama4-maverick-instruct-basic-case0]", - "lineno": 302, - "outcome": "failed", - "keywords": [ - "test_chat_streaming_tool_choice_required[accounts/fireworks/models/llama4-maverick-instruct-basic-case0]", - "parametrize", - "pytestmark", - "accounts/fireworks/models/llama4-maverick-instruct-basic-case0", - "test_chat_completion.py", - "openai_api", - "verifications", - "tests", - "llama-stack", - "" - ], - "metadata": { - "model": "accounts/fireworks/models/llama4-maverick-instruct-basic", - "case_id": "case0" - }, - "setup": { - "duration": 0.07107715681195259, - "outcome": "passed" - }, - "call": { - "duration": 6.841737313196063, - "outcome": "failed", - "crash": { - "path": "/home/erichuang/llama-stack/tests/verifications/openai_api/test_chat_completion.py", - "lineno": 323, - "message": "AssertionError: Expected tool call when tool_choice='required'\nassert 0 > 0\n + where 0 = len([])" - }, - "traceback": [ - { - "path": "tests/verifications/openai_api/test_chat_completion.py", - "lineno": 323, - "message": "AssertionError" - } - ], - "longrepr": "request = >\nopenai_client = \nmodel = 'accounts/fireworks/models/llama4-maverick-instruct-basic'\nprovider = 'fireworks'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...-versatile', 'meta-llama/llama-4-scout-17b-16e-instruct', 'meta-llama/llama-4-maverick-17b-128e-instruct'], ...}, ...}}\ncase = {'input': {'messages': [{'content': 'You are a helpful assistant that can use tools to get information.', 'role': 'sys..., 'properties': {...}, 'required': [...], 'type': 'object'}}, 'type': 'function'}]}, 'output': 'get_weather_tool_call'}\n\n @pytest.mark.parametrize(\n \"case\",\n chat_completion_test_cases[\"test_tool_calling\"][\"test_params\"][\"case\"], # Reusing existing case for now\n ids=case_id_generator,\n )\n def test_chat_streaming_tool_choice_required(request, openai_client, model, provider, verification_config, case):\n test_name_base = get_base_test_name(request)\n if should_skip_test(verification_config, provider, model, test_name_base):\n pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n \n stream = openai_client.chat.completions.create(\n model=model,\n messages=case[\"input\"][\"messages\"],\n tools=case[\"input\"][\"tools\"],\n tool_choice=\"required\", # Force tool call\n stream=True,\n )\n \n _, tool_calls_buffer = _accumulate_streaming_tool_calls(stream)\n \n> assert len(tool_calls_buffer) > 0, \"Expected tool call when tool_choice='required'\"\nE AssertionError: Expected tool call when tool_choice='required'\nE assert 0 > 0\nE + where 0 = len([])\n\ntests/verifications/openai_api/test_chat_completion.py:323: AssertionError" - }, - "teardown": { - "duration": 0.0003354279324412346, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_tool_choice_none[accounts/fireworks/models/llama-v3p3-70b-instruct-case0]", - "lineno": 329, - "outcome": "passed", - "keywords": [ - "test_chat_non_streaming_tool_choice_none[accounts/fireworks/models/llama-v3p3-70b-instruct-case0]", - "parametrize", - "pytestmark", - "accounts/fireworks/models/llama-v3p3-70b-instruct-case0", - "test_chat_completion.py", - "openai_api", - "verifications", - "tests", - "llama-stack", - "" - ], - "metadata": { - "model": "accounts/fireworks/models/llama-v3p3-70b-instruct", - "case_id": "case0" - }, - "setup": { - "duration": 0.0726231737062335, - "outcome": "passed" - }, - "call": { - "duration": 0.7659661257639527, - "outcome": "passed" - }, - "teardown": { - "duration": 0.0003337552770972252, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_tool_choice_none[accounts/fireworks/models/llama4-scout-instruct-basic-case0]", - "lineno": 329, - "outcome": "passed", - "keywords": [ - "test_chat_non_streaming_tool_choice_none[accounts/fireworks/models/llama4-scout-instruct-basic-case0]", - "parametrize", - "pytestmark", - "accounts/fireworks/models/llama4-scout-instruct-basic-case0", - "test_chat_completion.py", - "openai_api", - "verifications", - "tests", - "llama-stack", - "" - ], - "metadata": { - "model": "accounts/fireworks/models/llama4-scout-instruct-basic", - "case_id": "case0" - }, - "setup": { - "duration": 0.09297824744135141, - "outcome": "passed" - }, - "call": { - "duration": 3.257608976215124, - "outcome": "passed" - }, - "teardown": { - "duration": 0.00022768322378396988, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_tool_choice_none[accounts/fireworks/models/llama4-maverick-instruct-basic-case0]", - "lineno": 329, - "outcome": "passed", - "keywords": [ - "test_chat_non_streaming_tool_choice_none[accounts/fireworks/models/llama4-maverick-instruct-basic-case0]", - "parametrize", - "pytestmark", - "accounts/fireworks/models/llama4-maverick-instruct-basic-case0", - "test_chat_completion.py", - "openai_api", - "verifications", - "tests", - "llama-stack", - "" - ], - "metadata": { - "model": "accounts/fireworks/models/llama4-maverick-instruct-basic", - "case_id": "case0" - }, - "setup": { - "duration": 0.0726541867479682, - "outcome": "passed" - }, - "call": { - "duration": 4.5413802824914455, - "outcome": "passed" - }, - "teardown": { - "duration": 0.00026340410113334656, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_tool_choice_none[accounts/fireworks/models/llama-v3p3-70b-instruct-case0]", - "lineno": 352, - "outcome": "passed", - "keywords": [ - "test_chat_streaming_tool_choice_none[accounts/fireworks/models/llama-v3p3-70b-instruct-case0]", - "parametrize", - "pytestmark", - "accounts/fireworks/models/llama-v3p3-70b-instruct-case0", - "test_chat_completion.py", - "openai_api", - "verifications", - "tests", - "llama-stack", - "" - ], - "metadata": { - "model": "accounts/fireworks/models/llama-v3p3-70b-instruct", - "case_id": "case0" - }, - "setup": { - "duration": 0.07666508108377457, - "outcome": "passed" - }, - "call": { - "duration": 0.5535151390358806, - "outcome": "passed" - }, - "teardown": { - "duration": 0.0003251638263463974, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_tool_choice_none[accounts/fireworks/models/llama4-scout-instruct-basic-case0]", - "lineno": 352, - "outcome": "passed", - "keywords": [ - "test_chat_streaming_tool_choice_none[accounts/fireworks/models/llama4-scout-instruct-basic-case0]", - "parametrize", - "pytestmark", - "accounts/fireworks/models/llama4-scout-instruct-basic-case0", - "test_chat_completion.py", - "openai_api", - "verifications", - "tests", - "llama-stack", - "" - ], - "metadata": { - "model": "accounts/fireworks/models/llama4-scout-instruct-basic", - "case_id": "case0" - }, - "setup": { - "duration": 0.09550460614264011, - "outcome": "passed" - }, - "call": { - "duration": 1.171110725030303, - "outcome": "passed" - }, - "teardown": { - "duration": 0.0002604629844427109, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_tool_choice_none[accounts/fireworks/models/llama4-maverick-instruct-basic-case0]", - "lineno": 352, - "outcome": "passed", - "keywords": [ - "test_chat_streaming_tool_choice_none[accounts/fireworks/models/llama4-maverick-instruct-basic-case0]", - "parametrize", - "pytestmark", - "accounts/fireworks/models/llama4-maverick-instruct-basic-case0", - "test_chat_completion.py", - "openai_api", - "verifications", - "tests", - "llama-stack", - "" - ], - "metadata": { - "model": "accounts/fireworks/models/llama4-maverick-instruct-basic", - "case_id": "case0" - }, - "setup": { - "duration": 0.07114547491073608, - "outcome": "passed" - }, - "call": { - "duration": 27.369331603869796, - "outcome": "passed" - }, - "teardown": { - "duration": 0.00023956969380378723, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_multi_turn_tool_calling[accounts/fireworks/models/llama-v3p3-70b-instruct-text_then_weather_tool]", - "lineno": 380, - "outcome": "failed", - "keywords": [ - "test_chat_non_streaming_multi_turn_tool_calling[accounts/fireworks/models/llama-v3p3-70b-instruct-text_then_weather_tool]", - "parametrize", - "pytestmark", - "accounts/fireworks/models/llama-v3p3-70b-instruct-text_then_weather_tool", - "test_chat_completion.py", - "openai_api", - "verifications", - "tests", - "llama-stack", - "" - ], - "metadata": { - "model": "accounts/fireworks/models/llama-v3p3-70b-instruct", - "case_id": "text_then_weather_tool" - }, - "setup": { - "duration": 0.07612851448357105, - "outcome": "passed" - }, - "call": { - "duration": 2.10164753254503, - "outcome": "failed", - "crash": { - "path": "/home/erichuang/llama-stack/tests/verifications/openai_api/test_chat_completion.py", - "lineno": 467, - "message": "AssertionError: Expected one of ['sol'] in content, but got: 'I cannot perform this task as it requires additional functionality that is not available in the given functions.'\nassert False\n + where False = any(. at 0x7f1acda87ca0>)" - }, - "traceback": [ - { - "path": "tests/verifications/openai_api/test_chat_completion.py", - "lineno": 467, - "message": "AssertionError" - } - ], - "longrepr": "request = >\nopenai_client = \nmodel = 'accounts/fireworks/models/llama-v3p3-70b-instruct'\nprovider = 'fireworks'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...-versatile', 'meta-llama/llama-4-scout-17b-16e-instruct', 'meta-llama/llama-4-maverick-17b-128e-instruct'], ...}, ...}}\ncase = {'case_id': 'text_then_weather_tool', 'expected': [{'answer': ['sol'], 'num_tool_calls': 0}, {'num_tool_calls': 1, 'to...], 'type': 'object'}}, 'type': 'function'}]}, 'tool_responses': [{'response': \"{'response': '70 degrees and foggy'}\"}]}\n\n @pytest.mark.parametrize(\n \"case\",\n chat_completion_test_cases.get(\"test_chat_multi_turn_tool_calling\", {}).get(\"test_params\", {}).get(\"case\", []),\n ids=case_id_generator,\n )\n def test_chat_non_streaming_multi_turn_tool_calling(request, openai_client, model, provider, verification_config, case):\n \"\"\"\n Test cases for multi-turn tool calling.\n Tool calls are asserted.\n Tool responses are provided in the test case.\n Final response is asserted.\n \"\"\"\n \n test_name_base = get_base_test_name(request)\n if should_skip_test(verification_config, provider, model, test_name_base):\n pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n \n # Create a copy of the messages list to avoid modifying the original\n messages = []\n tools = case[\"input\"][\"tools\"]\n # Use deepcopy to prevent modification across runs/parametrization\n expected_results = copy.deepcopy(case[\"expected\"])\n tool_responses = copy.deepcopy(case.get(\"tool_responses\", []))\n input_messages_turns = copy.deepcopy(case[\"input\"][\"messages\"])\n \n # keep going until either\n # 1. we have messages to test in multi-turn\n # 2. no messages but last message is tool response\n while len(input_messages_turns) > 0 or (len(messages) > 0 and messages[-1][\"role\"] == \"tool\"):\n # do not take new messages if last message is tool response\n if len(messages) == 0 or messages[-1][\"role\"] != \"tool\":\n new_messages = input_messages_turns.pop(0)\n # Ensure new_messages is a list of message objects\n if isinstance(new_messages, list):\n messages.extend(new_messages)\n else:\n # If it's a single message object, add it directly\n messages.append(new_messages)\n \n # --- API Call ---\n response = openai_client.chat.completions.create(\n model=model,\n messages=messages,\n tools=tools,\n stream=False,\n )\n \n # --- Process Response ---\n assistant_message = response.choices[0].message\n messages.append(assistant_message.model_dump(exclude_unset=True))\n \n assert assistant_message.role == \"assistant\"\n \n # Get the expected result data\n expected = expected_results.pop(0)\n num_tool_calls = expected[\"num_tool_calls\"]\n \n # --- Assertions based on expected result ---\n assert len(assistant_message.tool_calls or []) == num_tool_calls, (\n f\"Expected {num_tool_calls} tool calls, but got {len(assistant_message.tool_calls or [])}\"\n )\n \n if num_tool_calls > 0:\n tool_call = assistant_message.tool_calls[0]\n assert tool_call.function.name == expected[\"tool_name\"], (\n f\"Expected tool '{expected['tool_name']}', got '{tool_call.function.name}'\"\n )\n # Parse the JSON string arguments before comparing\n actual_arguments = json.loads(tool_call.function.arguments)\n assert actual_arguments == expected[\"tool_arguments\"], (\n f\"Expected arguments '{expected['tool_arguments']}', got '{actual_arguments}'\"\n )\n \n # Prepare and append the tool response for the next turn\n tool_response = tool_responses.pop(0)\n messages.append(\n {\n \"role\": \"tool\",\n \"tool_call_id\": tool_call.id,\n \"content\": tool_response[\"response\"],\n }\n )\n else:\n assert assistant_message.content is not None, \"Expected content, but none received.\"\n expected_answers = expected[\"answer\"] # This is now a list\n content_lower = assistant_message.content.lower()\n> assert any(ans.lower() in content_lower for ans in expected_answers), (\n f\"Expected one of {expected_answers} in content, but got: '{assistant_message.content}'\"\n )\nE AssertionError: Expected one of ['sol'] in content, but got: 'I cannot perform this task as it requires additional functionality that is not available in the given functions.'\nE assert False\nE + where False = any(. at 0x7f1acda87ca0>)\n\ntests/verifications/openai_api/test_chat_completion.py:467: AssertionError" - }, - "teardown": { - "duration": 0.00030514132231473923, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_multi_turn_tool_calling[accounts/fireworks/models/llama-v3p3-70b-instruct-weather_tool_then_text]", - "lineno": 380, - "outcome": "failed", - "keywords": [ - "test_chat_non_streaming_multi_turn_tool_calling[accounts/fireworks/models/llama-v3p3-70b-instruct-weather_tool_then_text]", - "parametrize", - "pytestmark", - "accounts/fireworks/models/llama-v3p3-70b-instruct-weather_tool_then_text", - "test_chat_completion.py", - "openai_api", - "verifications", - "tests", - "llama-stack", - "" - ], - "metadata": { - "model": "accounts/fireworks/models/llama-v3p3-70b-instruct", - "case_id": "weather_tool_then_text" - }, - "setup": { - "duration": 0.07009781803935766, - "outcome": "passed" - }, - "call": { - "duration": 2.49614445772022, - "outcome": "failed", - "crash": { - "path": "/home/erichuang/llama-stack/tests/verifications/openai_api/test_chat_completion.py", - "lineno": 439, - "message": "AssertionError: Expected 1 tool calls, but got 0\nassert 0 == 1\n + where 0 = len((None or []))\n + where None = ChatCompletionMessage(content='{\"type\": \"function\", \"name\": \"get_weather\", \"parameters\": {\"location\": \"San Francisco, CA\"}}', refusal=None, role='assistant', annotations=None, audio=None, function_call=None, tool_calls=None).tool_calls" - }, - "traceback": [ - { - "path": "tests/verifications/openai_api/test_chat_completion.py", - "lineno": 439, - "message": "AssertionError" - } - ], - "longrepr": "request = >\nopenai_client = \nmodel = 'accounts/fireworks/models/llama-v3p3-70b-instruct'\nprovider = 'fireworks'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...-versatile', 'meta-llama/llama-4-scout-17b-16e-instruct', 'meta-llama/llama-4-maverick-17b-128e-instruct'], ...}, ...}}\ncase = {'case_id': 'weather_tool_then_text', 'expected': [{'num_tool_calls': 1, 'tool_arguments': {'location': 'San Francisco...], 'type': 'object'}}, 'type': 'function'}]}, 'tool_responses': [{'response': \"{'response': '70 degrees and foggy'}\"}]}\n\n @pytest.mark.parametrize(\n \"case\",\n chat_completion_test_cases.get(\"test_chat_multi_turn_tool_calling\", {}).get(\"test_params\", {}).get(\"case\", []),\n ids=case_id_generator,\n )\n def test_chat_non_streaming_multi_turn_tool_calling(request, openai_client, model, provider, verification_config, case):\n \"\"\"\n Test cases for multi-turn tool calling.\n Tool calls are asserted.\n Tool responses are provided in the test case.\n Final response is asserted.\n \"\"\"\n \n test_name_base = get_base_test_name(request)\n if should_skip_test(verification_config, provider, model, test_name_base):\n pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n \n # Create a copy of the messages list to avoid modifying the original\n messages = []\n tools = case[\"input\"][\"tools\"]\n # Use deepcopy to prevent modification across runs/parametrization\n expected_results = copy.deepcopy(case[\"expected\"])\n tool_responses = copy.deepcopy(case.get(\"tool_responses\", []))\n input_messages_turns = copy.deepcopy(case[\"input\"][\"messages\"])\n \n # keep going until either\n # 1. we have messages to test in multi-turn\n # 2. no messages but last message is tool response\n while len(input_messages_turns) > 0 or (len(messages) > 0 and messages[-1][\"role\"] == \"tool\"):\n # do not take new messages if last message is tool response\n if len(messages) == 0 or messages[-1][\"role\"] != \"tool\":\n new_messages = input_messages_turns.pop(0)\n # Ensure new_messages is a list of message objects\n if isinstance(new_messages, list):\n messages.extend(new_messages)\n else:\n # If it's a single message object, add it directly\n messages.append(new_messages)\n \n # --- API Call ---\n response = openai_client.chat.completions.create(\n model=model,\n messages=messages,\n tools=tools,\n stream=False,\n )\n \n # --- Process Response ---\n assistant_message = response.choices[0].message\n messages.append(assistant_message.model_dump(exclude_unset=True))\n \n assert assistant_message.role == \"assistant\"\n \n # Get the expected result data\n expected = expected_results.pop(0)\n num_tool_calls = expected[\"num_tool_calls\"]\n \n # --- Assertions based on expected result ---\n> assert len(assistant_message.tool_calls or []) == num_tool_calls, (\n f\"Expected {num_tool_calls} tool calls, but got {len(assistant_message.tool_calls or [])}\"\n )\nE AssertionError: Expected 1 tool calls, but got 0\nE assert 0 == 1\nE + where 0 = len((None or []))\nE + where None = ChatCompletionMessage(content='{\"type\": \"function\", \"name\": \"get_weather\", \"parameters\": {\"location\": \"San Francisco, CA\"}}', refusal=None, role='assistant', annotations=None, audio=None, function_call=None, tool_calls=None).tool_calls\n\ntests/verifications/openai_api/test_chat_completion.py:439: AssertionError" - }, - "teardown": { - "duration": 0.00035297591239213943, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_multi_turn_tool_calling[accounts/fireworks/models/llama-v3p3-70b-instruct-add_product_tool]", - "lineno": 380, - "outcome": "failed", - "keywords": [ - "test_chat_non_streaming_multi_turn_tool_calling[accounts/fireworks/models/llama-v3p3-70b-instruct-add_product_tool]", - "parametrize", - "pytestmark", - "accounts/fireworks/models/llama-v3p3-70b-instruct-add_product_tool", - "test_chat_completion.py", - "openai_api", - "verifications", - "tests", - "llama-stack", - "" - ], - "metadata": { - "model": "accounts/fireworks/models/llama-v3p3-70b-instruct", - "case_id": "add_product_tool" - }, - "setup": { - "duration": 0.0719120567664504, - "outcome": "passed" - }, - "call": { - "duration": 1.181352874264121, - "outcome": "failed", - "crash": { - "path": "/home/erichuang/llama-stack/tests/verifications/openai_api/test_chat_completion.py", - "lineno": 439, - "message": "AssertionError: Expected 1 tool calls, but got 0\nassert 0 == 1\n + where 0 = len((None or []))\n + where None = ChatCompletionMessage(content='{\"type\": \"function\", \"name\": \"addProduct\", \"parameters\": {\"name\": \"Widget\", \"price\": \"19.99\", \"inStock\": \"true\", \"tags\": \"[\\\\\"new\\\\\", \\\\\"sale\\\\\"]\"}}', refusal=None, role='assistant', annotations=None, audio=None, function_call=None, tool_calls=None).tool_calls" - }, - "traceback": [ - { - "path": "tests/verifications/openai_api/test_chat_completion.py", - "lineno": 439, - "message": "AssertionError" - } - ], - "longrepr": "request = >\nopenai_client = \nmodel = 'accounts/fireworks/models/llama-v3p3-70b-instruct'\nprovider = 'fireworks'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...-versatile', 'meta-llama/llama-4-scout-17b-16e-instruct', 'meta-llama/llama-4-maverick-17b-128e-instruct'], ...}, ...}}\ncase = {'case_id': 'add_product_tool', 'expected': [{'num_tool_calls': 1, 'tool_arguments': {'inStock': True, 'name': 'Widget...}}, 'type': 'function'}]}, 'tool_responses': [{'response': \"{'response': 'Successfully added product with id: 123'}\"}]}\n\n @pytest.mark.parametrize(\n \"case\",\n chat_completion_test_cases.get(\"test_chat_multi_turn_tool_calling\", {}).get(\"test_params\", {}).get(\"case\", []),\n ids=case_id_generator,\n )\n def test_chat_non_streaming_multi_turn_tool_calling(request, openai_client, model, provider, verification_config, case):\n \"\"\"\n Test cases for multi-turn tool calling.\n Tool calls are asserted.\n Tool responses are provided in the test case.\n Final response is asserted.\n \"\"\"\n \n test_name_base = get_base_test_name(request)\n if should_skip_test(verification_config, provider, model, test_name_base):\n pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n \n # Create a copy of the messages list to avoid modifying the original\n messages = []\n tools = case[\"input\"][\"tools\"]\n # Use deepcopy to prevent modification across runs/parametrization\n expected_results = copy.deepcopy(case[\"expected\"])\n tool_responses = copy.deepcopy(case.get(\"tool_responses\", []))\n input_messages_turns = copy.deepcopy(case[\"input\"][\"messages\"])\n \n # keep going until either\n # 1. we have messages to test in multi-turn\n # 2. no messages but last message is tool response\n while len(input_messages_turns) > 0 or (len(messages) > 0 and messages[-1][\"role\"] == \"tool\"):\n # do not take new messages if last message is tool response\n if len(messages) == 0 or messages[-1][\"role\"] != \"tool\":\n new_messages = input_messages_turns.pop(0)\n # Ensure new_messages is a list of message objects\n if isinstance(new_messages, list):\n messages.extend(new_messages)\n else:\n # If it's a single message object, add it directly\n messages.append(new_messages)\n \n # --- API Call ---\n response = openai_client.chat.completions.create(\n model=model,\n messages=messages,\n tools=tools,\n stream=False,\n )\n \n # --- Process Response ---\n assistant_message = response.choices[0].message\n messages.append(assistant_message.model_dump(exclude_unset=True))\n \n assert assistant_message.role == \"assistant\"\n \n # Get the expected result data\n expected = expected_results.pop(0)\n num_tool_calls = expected[\"num_tool_calls\"]\n \n # --- Assertions based on expected result ---\n> assert len(assistant_message.tool_calls or []) == num_tool_calls, (\n f\"Expected {num_tool_calls} tool calls, but got {len(assistant_message.tool_calls or [])}\"\n )\nE AssertionError: Expected 1 tool calls, but got 0\nE assert 0 == 1\nE + where 0 = len((None or []))\nE + where None = ChatCompletionMessage(content='{\"type\": \"function\", \"name\": \"addProduct\", \"parameters\": {\"name\": \"Widget\", \"price\": \"19.99\", \"inStock\": \"true\", \"tags\": \"[\\\\\"new\\\\\", \\\\\"sale\\\\\"]\"}}', refusal=None, role='assistant', annotations=None, audio=None, function_call=None, tool_calls=None).tool_calls\n\ntests/verifications/openai_api/test_chat_completion.py:439: AssertionError" - }, - "teardown": { - "duration": 0.000303901731967926, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_multi_turn_tool_calling[accounts/fireworks/models/llama-v3p3-70b-instruct-get_then_create_event_tool]", - "lineno": 380, - "outcome": "failed", - "keywords": [ - "test_chat_non_streaming_multi_turn_tool_calling[accounts/fireworks/models/llama-v3p3-70b-instruct-get_then_create_event_tool]", - "parametrize", - "pytestmark", - "accounts/fireworks/models/llama-v3p3-70b-instruct-get_then_create_event_tool", - "test_chat_completion.py", - "openai_api", - "verifications", - "tests", - "llama-stack", - "" - ], - "metadata": { - "model": "accounts/fireworks/models/llama-v3p3-70b-instruct", - "case_id": "get_then_create_event_tool" - }, - "setup": { - "duration": 0.07158921286463737, - "outcome": "passed" - }, - "call": { - "duration": 3.7202864307910204, - "outcome": "failed", - "crash": { - "path": "/home/erichuang/llama-stack/tests/verifications/openai_api/test_chat_completion.py", - "lineno": 439, - "message": "AssertionError: Expected 1 tool calls, but got 0\nassert 0 == 1\n + where 0 = len((None or []))\n + where None = ChatCompletionMessage(content='{\"type\": \"function\", \"name\": \"get_event\", \"parameters\": {\"date\": \"2025-03-03\", \"time\": \"10:00\"}}', refusal=None, role='assistant', annotations=None, audio=None, function_call=None, tool_calls=None).tool_calls" - }, - "traceback": [ - { - "path": "tests/verifications/openai_api/test_chat_completion.py", - "lineno": 439, - "message": "AssertionError" - } - ], - "longrepr": "request = >\nopenai_client = \nmodel = 'accounts/fireworks/models/llama-v3p3-70b-instruct'\nprovider = 'fireworks'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...-versatile', 'meta-llama/llama-4-scout-17b-16e-instruct', 'meta-llama/llama-4-maverick-17b-128e-instruct'], ...}, ...}}\ncase = {'case_id': 'get_then_create_event_tool', 'expected': [{'num_tool_calls': 1, 'tool_arguments': {'date': '2025-03-03', ...ents found for 2025-03-03 at 10:00'}\"}, {'response': \"{'response': 'Successfully created new event with id: e_123'}\"}]}\n\n @pytest.mark.parametrize(\n \"case\",\n chat_completion_test_cases.get(\"test_chat_multi_turn_tool_calling\", {}).get(\"test_params\", {}).get(\"case\", []),\n ids=case_id_generator,\n )\n def test_chat_non_streaming_multi_turn_tool_calling(request, openai_client, model, provider, verification_config, case):\n \"\"\"\n Test cases for multi-turn tool calling.\n Tool calls are asserted.\n Tool responses are provided in the test case.\n Final response is asserted.\n \"\"\"\n \n test_name_base = get_base_test_name(request)\n if should_skip_test(verification_config, provider, model, test_name_base):\n pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n \n # Create a copy of the messages list to avoid modifying the original\n messages = []\n tools = case[\"input\"][\"tools\"]\n # Use deepcopy to prevent modification across runs/parametrization\n expected_results = copy.deepcopy(case[\"expected\"])\n tool_responses = copy.deepcopy(case.get(\"tool_responses\", []))\n input_messages_turns = copy.deepcopy(case[\"input\"][\"messages\"])\n \n # keep going until either\n # 1. we have messages to test in multi-turn\n # 2. no messages but last message is tool response\n while len(input_messages_turns) > 0 or (len(messages) > 0 and messages[-1][\"role\"] == \"tool\"):\n # do not take new messages if last message is tool response\n if len(messages) == 0 or messages[-1][\"role\"] != \"tool\":\n new_messages = input_messages_turns.pop(0)\n # Ensure new_messages is a list of message objects\n if isinstance(new_messages, list):\n messages.extend(new_messages)\n else:\n # If it's a single message object, add it directly\n messages.append(new_messages)\n \n # --- API Call ---\n response = openai_client.chat.completions.create(\n model=model,\n messages=messages,\n tools=tools,\n stream=False,\n )\n \n # --- Process Response ---\n assistant_message = response.choices[0].message\n messages.append(assistant_message.model_dump(exclude_unset=True))\n \n assert assistant_message.role == \"assistant\"\n \n # Get the expected result data\n expected = expected_results.pop(0)\n num_tool_calls = expected[\"num_tool_calls\"]\n \n # --- Assertions based on expected result ---\n> assert len(assistant_message.tool_calls or []) == num_tool_calls, (\n f\"Expected {num_tool_calls} tool calls, but got {len(assistant_message.tool_calls or [])}\"\n )\nE AssertionError: Expected 1 tool calls, but got 0\nE assert 0 == 1\nE + where 0 = len((None or []))\nE + where None = ChatCompletionMessage(content='{\"type\": \"function\", \"name\": \"get_event\", \"parameters\": {\"date\": \"2025-03-03\", \"time\": \"10:00\"}}', refusal=None, role='assistant', annotations=None, audio=None, function_call=None, tool_calls=None).tool_calls\n\ntests/verifications/openai_api/test_chat_completion.py:439: AssertionError" - }, - "teardown": { - "duration": 0.0003700554370880127, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_multi_turn_tool_calling[accounts/fireworks/models/llama-v3p3-70b-instruct-compare_monthly_expense_tool]", - "lineno": 380, - "outcome": "failed", - "keywords": [ - "test_chat_non_streaming_multi_turn_tool_calling[accounts/fireworks/models/llama-v3p3-70b-instruct-compare_monthly_expense_tool]", - "parametrize", - "pytestmark", - "accounts/fireworks/models/llama-v3p3-70b-instruct-compare_monthly_expense_tool", - "test_chat_completion.py", - "openai_api", - "verifications", - "tests", - "llama-stack", - "" - ], - "metadata": { - "model": "accounts/fireworks/models/llama-v3p3-70b-instruct", - "case_id": "compare_monthly_expense_tool" - }, - "setup": { - "duration": 0.07388217654079199, - "outcome": "passed" - }, - "call": { - "duration": 0.6030126195400953, - "outcome": "failed", - "crash": { - "path": "/home/erichuang/llama-stack/tests/verifications/openai_api/test_chat_completion.py", - "lineno": 439, - "message": "AssertionError: Expected 1 tool calls, but got 0\nassert 0 == 1\n + where 0 = len((None or []))\n + where None = ChatCompletionMessage(content='{\"type\": \"function\", \"name\": \"getMonthlyExpenseSummary\", \"parameters\": {\"month\": \"1\", \"year\": \"2025\"}}', refusal=None, role='assistant', annotations=None, audio=None, function_call=None, tool_calls=None).tool_calls" - }, - "traceback": [ - { - "path": "tests/verifications/openai_api/test_chat_completion.py", - "lineno": 439, - "message": "AssertionError" - } - ], - "longrepr": "request = >\nopenai_client = \nmodel = 'accounts/fireworks/models/llama-v3p3-70b-instruct'\nprovider = 'fireworks'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...-versatile', 'meta-llama/llama-4-scout-17b-16e-instruct', 'meta-llama/llama-4-maverick-17b-128e-instruct'], ...}, ...}}\ncase = {'case_id': 'compare_monthly_expense_tool', 'expected': [{'num_tool_calls': 1, 'tool_arguments': {'month': 1, 'year': ... 'Total expenses for January 2025: $1000'}\"}, {'response': \"{'response': 'Total expenses for February 2024: $2000'}\"}]}\n\n @pytest.mark.parametrize(\n \"case\",\n chat_completion_test_cases.get(\"test_chat_multi_turn_tool_calling\", {}).get(\"test_params\", {}).get(\"case\", []),\n ids=case_id_generator,\n )\n def test_chat_non_streaming_multi_turn_tool_calling(request, openai_client, model, provider, verification_config, case):\n \"\"\"\n Test cases for multi-turn tool calling.\n Tool calls are asserted.\n Tool responses are provided in the test case.\n Final response is asserted.\n \"\"\"\n \n test_name_base = get_base_test_name(request)\n if should_skip_test(verification_config, provider, model, test_name_base):\n pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n \n # Create a copy of the messages list to avoid modifying the original\n messages = []\n tools = case[\"input\"][\"tools\"]\n # Use deepcopy to prevent modification across runs/parametrization\n expected_results = copy.deepcopy(case[\"expected\"])\n tool_responses = copy.deepcopy(case.get(\"tool_responses\", []))\n input_messages_turns = copy.deepcopy(case[\"input\"][\"messages\"])\n \n # keep going until either\n # 1. we have messages to test in multi-turn\n # 2. no messages but last message is tool response\n while len(input_messages_turns) > 0 or (len(messages) > 0 and messages[-1][\"role\"] == \"tool\"):\n # do not take new messages if last message is tool response\n if len(messages) == 0 or messages[-1][\"role\"] != \"tool\":\n new_messages = input_messages_turns.pop(0)\n # Ensure new_messages is a list of message objects\n if isinstance(new_messages, list):\n messages.extend(new_messages)\n else:\n # If it's a single message object, add it directly\n messages.append(new_messages)\n \n # --- API Call ---\n response = openai_client.chat.completions.create(\n model=model,\n messages=messages,\n tools=tools,\n stream=False,\n )\n \n # --- Process Response ---\n assistant_message = response.choices[0].message\n messages.append(assistant_message.model_dump(exclude_unset=True))\n \n assert assistant_message.role == \"assistant\"\n \n # Get the expected result data\n expected = expected_results.pop(0)\n num_tool_calls = expected[\"num_tool_calls\"]\n \n # --- Assertions based on expected result ---\n> assert len(assistant_message.tool_calls or []) == num_tool_calls, (\n f\"Expected {num_tool_calls} tool calls, but got {len(assistant_message.tool_calls or [])}\"\n )\nE AssertionError: Expected 1 tool calls, but got 0\nE assert 0 == 1\nE + where 0 = len((None or []))\nE + where None = ChatCompletionMessage(content='{\"type\": \"function\", \"name\": \"getMonthlyExpenseSummary\", \"parameters\": {\"month\": \"1\", \"year\": \"2025\"}}', refusal=None, role='assistant', annotations=None, audio=None, function_call=None, tool_calls=None).tool_calls\n\ntests/verifications/openai_api/test_chat_completion.py:439: AssertionError" - }, - "teardown": { - "duration": 0.0003188345581293106, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_multi_turn_tool_calling[accounts/fireworks/models/llama4-scout-instruct-basic-text_then_weather_tool]", - "lineno": 380, - "outcome": "failed", - "keywords": [ - "test_chat_non_streaming_multi_turn_tool_calling[accounts/fireworks/models/llama4-scout-instruct-basic-text_then_weather_tool]", - "parametrize", - "pytestmark", - "accounts/fireworks/models/llama4-scout-instruct-basic-text_then_weather_tool", - "test_chat_completion.py", - "openai_api", - "verifications", - "tests", - "llama-stack", - "" - ], - "metadata": { - "model": "accounts/fireworks/models/llama4-scout-instruct-basic", - "case_id": "text_then_weather_tool" - }, - "setup": { - "duration": 0.07314795535057783, - "outcome": "passed" - }, - "call": { - "duration": 1.0849075820297003, - "outcome": "failed", - "crash": { - "path": "/home/erichuang/llama-stack/tests/verifications/openai_api/test_chat_completion.py", - "lineno": 467, - "message": "AssertionError: Expected one of ['sol'] in content, but got: '{\"name\": \"get_weather\", \"parameters\": {\"description\": \"Get the current weather\", \"parameters\": {\"type\": \"object\", \"properties\": {\"location\": {\"description\": \"The city and state (both required). e.g. San Francisco, CA.\", \"type\": \"string\"}}}}'\nassert False\n + where False = any(. at 0x7f1acdad8970>)" - }, - "traceback": [ - { - "path": "tests/verifications/openai_api/test_chat_completion.py", - "lineno": 467, - "message": "AssertionError" - } - ], - "longrepr": "request = >\nopenai_client = \nmodel = 'accounts/fireworks/models/llama4-scout-instruct-basic'\nprovider = 'fireworks'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...-versatile', 'meta-llama/llama-4-scout-17b-16e-instruct', 'meta-llama/llama-4-maverick-17b-128e-instruct'], ...}, ...}}\ncase = {'case_id': 'text_then_weather_tool', 'expected': [{'answer': ['sol'], 'num_tool_calls': 0}, {'num_tool_calls': 1, 'to...], 'type': 'object'}}, 'type': 'function'}]}, 'tool_responses': [{'response': \"{'response': '70 degrees and foggy'}\"}]}\n\n @pytest.mark.parametrize(\n \"case\",\n chat_completion_test_cases.get(\"test_chat_multi_turn_tool_calling\", {}).get(\"test_params\", {}).get(\"case\", []),\n ids=case_id_generator,\n )\n def test_chat_non_streaming_multi_turn_tool_calling(request, openai_client, model, provider, verification_config, case):\n \"\"\"\n Test cases for multi-turn tool calling.\n Tool calls are asserted.\n Tool responses are provided in the test case.\n Final response is asserted.\n \"\"\"\n \n test_name_base = get_base_test_name(request)\n if should_skip_test(verification_config, provider, model, test_name_base):\n pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n \n # Create a copy of the messages list to avoid modifying the original\n messages = []\n tools = case[\"input\"][\"tools\"]\n # Use deepcopy to prevent modification across runs/parametrization\n expected_results = copy.deepcopy(case[\"expected\"])\n tool_responses = copy.deepcopy(case.get(\"tool_responses\", []))\n input_messages_turns = copy.deepcopy(case[\"input\"][\"messages\"])\n \n # keep going until either\n # 1. we have messages to test in multi-turn\n # 2. no messages but last message is tool response\n while len(input_messages_turns) > 0 or (len(messages) > 0 and messages[-1][\"role\"] == \"tool\"):\n # do not take new messages if last message is tool response\n if len(messages) == 0 or messages[-1][\"role\"] != \"tool\":\n new_messages = input_messages_turns.pop(0)\n # Ensure new_messages is a list of message objects\n if isinstance(new_messages, list):\n messages.extend(new_messages)\n else:\n # If it's a single message object, add it directly\n messages.append(new_messages)\n \n # --- API Call ---\n response = openai_client.chat.completions.create(\n model=model,\n messages=messages,\n tools=tools,\n stream=False,\n )\n \n # --- Process Response ---\n assistant_message = response.choices[0].message\n messages.append(assistant_message.model_dump(exclude_unset=True))\n \n assert assistant_message.role == \"assistant\"\n \n # Get the expected result data\n expected = expected_results.pop(0)\n num_tool_calls = expected[\"num_tool_calls\"]\n \n # --- Assertions based on expected result ---\n assert len(assistant_message.tool_calls or []) == num_tool_calls, (\n f\"Expected {num_tool_calls} tool calls, but got {len(assistant_message.tool_calls or [])}\"\n )\n \n if num_tool_calls > 0:\n tool_call = assistant_message.tool_calls[0]\n assert tool_call.function.name == expected[\"tool_name\"], (\n f\"Expected tool '{expected['tool_name']}', got '{tool_call.function.name}'\"\n )\n # Parse the JSON string arguments before comparing\n actual_arguments = json.loads(tool_call.function.arguments)\n assert actual_arguments == expected[\"tool_arguments\"], (\n f\"Expected arguments '{expected['tool_arguments']}', got '{actual_arguments}'\"\n )\n \n # Prepare and append the tool response for the next turn\n tool_response = tool_responses.pop(0)\n messages.append(\n {\n \"role\": \"tool\",\n \"tool_call_id\": tool_call.id,\n \"content\": tool_response[\"response\"],\n }\n )\n else:\n assert assistant_message.content is not None, \"Expected content, but none received.\"\n expected_answers = expected[\"answer\"] # This is now a list\n content_lower = assistant_message.content.lower()\n> assert any(ans.lower() in content_lower for ans in expected_answers), (\n f\"Expected one of {expected_answers} in content, but got: '{assistant_message.content}'\"\n )\nE AssertionError: Expected one of ['sol'] in content, but got: '{\"name\": \"get_weather\", \"parameters\": {\"description\": \"Get the current weather\", \"parameters\": {\"type\": \"object\", \"properties\": {\"location\": {\"description\": \"The city and state (both required). e.g. San Francisco, CA.\", \"type\": \"string\"}}}}'\nE assert False\nE + where False = any(. at 0x7f1acdad8970>)\n\ntests/verifications/openai_api/test_chat_completion.py:467: AssertionError" - }, - "teardown": { - "duration": 0.00032442156225442886, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_multi_turn_tool_calling[accounts/fireworks/models/llama4-scout-instruct-basic-weather_tool_then_text]", - "lineno": 380, - "outcome": "failed", - "keywords": [ - "test_chat_non_streaming_multi_turn_tool_calling[accounts/fireworks/models/llama4-scout-instruct-basic-weather_tool_then_text]", - "parametrize", - "pytestmark", - "accounts/fireworks/models/llama4-scout-instruct-basic-weather_tool_then_text", - "test_chat_completion.py", - "openai_api", - "verifications", - "tests", - "llama-stack", - "" - ], - "metadata": { - "model": "accounts/fireworks/models/llama4-scout-instruct-basic", - "case_id": "weather_tool_then_text" - }, - "setup": { - "duration": 0.07257637288421392, - "outcome": "passed" - }, - "call": { - "duration": 1.1364115234464407, - "outcome": "failed", - "crash": { - "path": "/home/erichuang/llama-stack/tests/verifications/openai_api/test_chat_completion.py", - "lineno": 439, - "message": "AssertionError: Expected 1 tool calls, but got 0\nassert 0 == 1\n + where 0 = len((None or []))\n + where None = ChatCompletionMessage(content='{\"name\": \"get_weather\", \"parameters\": {\"description\": \"Get the current weather\", \"parameters\": {\"type\": \"object\", \"properties\": {\"location\": {\"description\": \"The city and state (both required)\", \"type\": \"string\"}}}, \"required\": [\"location\"]}}', refusal=None, role='assistant', annotations=None, audio=None, function_call=None, tool_calls=None).tool_calls" - }, - "traceback": [ - { - "path": "tests/verifications/openai_api/test_chat_completion.py", - "lineno": 439, - "message": "AssertionError" - } - ], - "longrepr": "request = >\nopenai_client = \nmodel = 'accounts/fireworks/models/llama4-scout-instruct-basic'\nprovider = 'fireworks'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...-versatile', 'meta-llama/llama-4-scout-17b-16e-instruct', 'meta-llama/llama-4-maverick-17b-128e-instruct'], ...}, ...}}\ncase = {'case_id': 'weather_tool_then_text', 'expected': [{'num_tool_calls': 1, 'tool_arguments': {'location': 'San Francisco...], 'type': 'object'}}, 'type': 'function'}]}, 'tool_responses': [{'response': \"{'response': '70 degrees and foggy'}\"}]}\n\n @pytest.mark.parametrize(\n \"case\",\n chat_completion_test_cases.get(\"test_chat_multi_turn_tool_calling\", {}).get(\"test_params\", {}).get(\"case\", []),\n ids=case_id_generator,\n )\n def test_chat_non_streaming_multi_turn_tool_calling(request, openai_client, model, provider, verification_config, case):\n \"\"\"\n Test cases for multi-turn tool calling.\n Tool calls are asserted.\n Tool responses are provided in the test case.\n Final response is asserted.\n \"\"\"\n \n test_name_base = get_base_test_name(request)\n if should_skip_test(verification_config, provider, model, test_name_base):\n pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n \n # Create a copy of the messages list to avoid modifying the original\n messages = []\n tools = case[\"input\"][\"tools\"]\n # Use deepcopy to prevent modification across runs/parametrization\n expected_results = copy.deepcopy(case[\"expected\"])\n tool_responses = copy.deepcopy(case.get(\"tool_responses\", []))\n input_messages_turns = copy.deepcopy(case[\"input\"][\"messages\"])\n \n # keep going until either\n # 1. we have messages to test in multi-turn\n # 2. no messages but last message is tool response\n while len(input_messages_turns) > 0 or (len(messages) > 0 and messages[-1][\"role\"] == \"tool\"):\n # do not take new messages if last message is tool response\n if len(messages) == 0 or messages[-1][\"role\"] != \"tool\":\n new_messages = input_messages_turns.pop(0)\n # Ensure new_messages is a list of message objects\n if isinstance(new_messages, list):\n messages.extend(new_messages)\n else:\n # If it's a single message object, add it directly\n messages.append(new_messages)\n \n # --- API Call ---\n response = openai_client.chat.completions.create(\n model=model,\n messages=messages,\n tools=tools,\n stream=False,\n )\n \n # --- Process Response ---\n assistant_message = response.choices[0].message\n messages.append(assistant_message.model_dump(exclude_unset=True))\n \n assert assistant_message.role == \"assistant\"\n \n # Get the expected result data\n expected = expected_results.pop(0)\n num_tool_calls = expected[\"num_tool_calls\"]\n \n # --- Assertions based on expected result ---\n> assert len(assistant_message.tool_calls or []) == num_tool_calls, (\n f\"Expected {num_tool_calls} tool calls, but got {len(assistant_message.tool_calls or [])}\"\n )\nE AssertionError: Expected 1 tool calls, but got 0\nE assert 0 == 1\nE + where 0 = len((None or []))\nE + where None = ChatCompletionMessage(content='{\"name\": \"get_weather\", \"parameters\": {\"description\": \"Get the current weather\", \"parameters\": {\"type\": \"object\", \"properties\": {\"location\": {\"description\": \"The city and state (both required)\", \"type\": \"string\"}}}, \"required\": [\"location\"]}}', refusal=None, role='assistant', annotations=None, audio=None, function_call=None, tool_calls=None).tool_calls\n\ntests/verifications/openai_api/test_chat_completion.py:439: AssertionError" - }, - "teardown": { - "duration": 0.0003107702359557152, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_multi_turn_tool_calling[accounts/fireworks/models/llama4-scout-instruct-basic-add_product_tool]", - "lineno": 380, - "outcome": "failed", - "keywords": [ - "test_chat_non_streaming_multi_turn_tool_calling[accounts/fireworks/models/llama4-scout-instruct-basic-add_product_tool]", - "parametrize", - "pytestmark", - "accounts/fireworks/models/llama4-scout-instruct-basic-add_product_tool", - "test_chat_completion.py", - "openai_api", - "verifications", - "tests", - "llama-stack", - "" - ], - "metadata": { - "model": "accounts/fireworks/models/llama4-scout-instruct-basic", - "case_id": "add_product_tool" - }, - "setup": { - "duration": 0.0716616166755557, - "outcome": "passed" - }, - "call": { - "duration": 1.6755285635590553, - "outcome": "failed", - "crash": { - "path": "/home/erichuang/llama-stack/tests/verifications/openai_api/test_chat_completion.py", - "lineno": 439, - "message": "AssertionError: Expected 1 tool calls, but got 0\nassert 0 == 1\n + where 0 = len((None or []))\n + where None = ChatCompletionMessage(content='{\"name\": \"addProduct\", \"parameters\": {\"name\": {\"type\": \"string\", \"value\": \"Widget\"}, \"description\": {\"type\": \"string\", \"value\": \"Name of the product\"}, \"price\": {\"type\": \"number\", \"value\": 19.99}, \"inStock\": {\"type\": \"boolean\", \"value\": true}, \"tags\": {\"type\": \"array\", \"value\": [\"new\", \"sale\"]}}}', refusal=None, role='assistant', annotations=None, audio=None, function_call=None, tool_calls=None).tool_calls" - }, - "traceback": [ - { - "path": "tests/verifications/openai_api/test_chat_completion.py", - "lineno": 439, - "message": "AssertionError" - } - ], - "longrepr": "request = >\nopenai_client = \nmodel = 'accounts/fireworks/models/llama4-scout-instruct-basic'\nprovider = 'fireworks'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...-versatile', 'meta-llama/llama-4-scout-17b-16e-instruct', 'meta-llama/llama-4-maverick-17b-128e-instruct'], ...}, ...}}\ncase = {'case_id': 'add_product_tool', 'expected': [{'num_tool_calls': 1, 'tool_arguments': {'inStock': True, 'name': 'Widget...}}, 'type': 'function'}]}, 'tool_responses': [{'response': \"{'response': 'Successfully added product with id: 123'}\"}]}\n\n @pytest.mark.parametrize(\n \"case\",\n chat_completion_test_cases.get(\"test_chat_multi_turn_tool_calling\", {}).get(\"test_params\", {}).get(\"case\", []),\n ids=case_id_generator,\n )\n def test_chat_non_streaming_multi_turn_tool_calling(request, openai_client, model, provider, verification_config, case):\n \"\"\"\n Test cases for multi-turn tool calling.\n Tool calls are asserted.\n Tool responses are provided in the test case.\n Final response is asserted.\n \"\"\"\n \n test_name_base = get_base_test_name(request)\n if should_skip_test(verification_config, provider, model, test_name_base):\n pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n \n # Create a copy of the messages list to avoid modifying the original\n messages = []\n tools = case[\"input\"][\"tools\"]\n # Use deepcopy to prevent modification across runs/parametrization\n expected_results = copy.deepcopy(case[\"expected\"])\n tool_responses = copy.deepcopy(case.get(\"tool_responses\", []))\n input_messages_turns = copy.deepcopy(case[\"input\"][\"messages\"])\n \n # keep going until either\n # 1. we have messages to test in multi-turn\n # 2. no messages but last message is tool response\n while len(input_messages_turns) > 0 or (len(messages) > 0 and messages[-1][\"role\"] == \"tool\"):\n # do not take new messages if last message is tool response\n if len(messages) == 0 or messages[-1][\"role\"] != \"tool\":\n new_messages = input_messages_turns.pop(0)\n # Ensure new_messages is a list of message objects\n if isinstance(new_messages, list):\n messages.extend(new_messages)\n else:\n # If it's a single message object, add it directly\n messages.append(new_messages)\n \n # --- API Call ---\n response = openai_client.chat.completions.create(\n model=model,\n messages=messages,\n tools=tools,\n stream=False,\n )\n \n # --- Process Response ---\n assistant_message = response.choices[0].message\n messages.append(assistant_message.model_dump(exclude_unset=True))\n \n assert assistant_message.role == \"assistant\"\n \n # Get the expected result data\n expected = expected_results.pop(0)\n num_tool_calls = expected[\"num_tool_calls\"]\n \n # --- Assertions based on expected result ---\n> assert len(assistant_message.tool_calls or []) == num_tool_calls, (\n f\"Expected {num_tool_calls} tool calls, but got {len(assistant_message.tool_calls or [])}\"\n )\nE AssertionError: Expected 1 tool calls, but got 0\nE assert 0 == 1\nE + where 0 = len((None or []))\nE + where None = ChatCompletionMessage(content='{\"name\": \"addProduct\", \"parameters\": {\"name\": {\"type\": \"string\", \"value\": \"Widget\"}, \"description\": {\"type\": \"string\", \"value\": \"Name of the product\"}, \"price\": {\"type\": \"number\", \"value\": 19.99}, \"inStock\": {\"type\": \"boolean\", \"value\": true}, \"tags\": {\"type\": \"array\", \"value\": [\"new\", \"sale\"]}}}', refusal=None, role='assistant', annotations=None, audio=None, function_call=None, tool_calls=None).tool_calls\n\ntests/verifications/openai_api/test_chat_completion.py:439: AssertionError" - }, - "teardown": { - "duration": 0.0003323536366224289, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_multi_turn_tool_calling[accounts/fireworks/models/llama4-scout-instruct-basic-get_then_create_event_tool]", - "lineno": 380, - "outcome": "failed", - "keywords": [ - "test_chat_non_streaming_multi_turn_tool_calling[accounts/fireworks/models/llama4-scout-instruct-basic-get_then_create_event_tool]", - "parametrize", - "pytestmark", - "accounts/fireworks/models/llama4-scout-instruct-basic-get_then_create_event_tool", - "test_chat_completion.py", - "openai_api", - "verifications", - "tests", - "llama-stack", - "" - ], - "metadata": { - "model": "accounts/fireworks/models/llama4-scout-instruct-basic", - "case_id": "get_then_create_event_tool" - }, - "setup": { - "duration": 0.07031949236989021, - "outcome": "passed" - }, - "call": { - "duration": 2.363899651914835, - "outcome": "failed", - "crash": { - "path": "/home/erichuang/llama-stack/tests/verifications/openai_api/test_chat_completion.py", - "lineno": 439, - "message": "AssertionError: Expected 1 tool calls, but got 0\nassert 0 == 1\n + where 0 = len((None or []))\n + where None = ChatCompletionMessage(content='{\"name\": \"get_event\", \"parameters\": {\"date\": {\"date\": \"March 3rd\"}, \"time\": {\"time\": \"10 am\"}}}assistant\\n\\n{\"name\": \"get_event\", \"parameters\": {\"date\": {\"date\": \"2025-03-03\"}, \"time\": {\"time\": \"10:00\"}}}assistant\\n\\n{\"name\": \"get_event\", \"parameters\": {\"date\": {\"date\": \"2025-03-03\"}, \"time\": {\"time\": \"10:00\"}}}assistant\\n\\n{\"name\": \"get_event\", \"parameters\": {\"date\": {\"date\": \"2025-03-03\"}, \"time\": {\"time\": \"10:00\"}}}assistant\\n\\nThe function provided is not sufficient for me to answer the question.assistant\\n\\n{\"name\": \"get_event\", \"parameters\": {\"date\": {\"date\": \"2025-03-03\"}, \"time\": {\"time\": \"10:00\"}}}assistant\\n\\nThe function provided is not sufficient for me to answer the question.', refusal=None, role='assistant', annotations=None, audio=None, function_call=None, tool_calls=None).tool_calls" - }, - "traceback": [ - { - "path": "tests/verifications/openai_api/test_chat_completion.py", - "lineno": 439, - "message": "AssertionError" - } - ], - "longrepr": "request = >\nopenai_client = \nmodel = 'accounts/fireworks/models/llama4-scout-instruct-basic'\nprovider = 'fireworks'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...-versatile', 'meta-llama/llama-4-scout-17b-16e-instruct', 'meta-llama/llama-4-maverick-17b-128e-instruct'], ...}, ...}}\ncase = {'case_id': 'get_then_create_event_tool', 'expected': [{'num_tool_calls': 1, 'tool_arguments': {'date': '2025-03-03', ...ents found for 2025-03-03 at 10:00'}\"}, {'response': \"{'response': 'Successfully created new event with id: e_123'}\"}]}\n\n @pytest.mark.parametrize(\n \"case\",\n chat_completion_test_cases.get(\"test_chat_multi_turn_tool_calling\", {}).get(\"test_params\", {}).get(\"case\", []),\n ids=case_id_generator,\n )\n def test_chat_non_streaming_multi_turn_tool_calling(request, openai_client, model, provider, verification_config, case):\n \"\"\"\n Test cases for multi-turn tool calling.\n Tool calls are asserted.\n Tool responses are provided in the test case.\n Final response is asserted.\n \"\"\"\n \n test_name_base = get_base_test_name(request)\n if should_skip_test(verification_config, provider, model, test_name_base):\n pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n \n # Create a copy of the messages list to avoid modifying the original\n messages = []\n tools = case[\"input\"][\"tools\"]\n # Use deepcopy to prevent modification across runs/parametrization\n expected_results = copy.deepcopy(case[\"expected\"])\n tool_responses = copy.deepcopy(case.get(\"tool_responses\", []))\n input_messages_turns = copy.deepcopy(case[\"input\"][\"messages\"])\n \n # keep going until either\n # 1. we have messages to test in multi-turn\n # 2. no messages but last message is tool response\n while len(input_messages_turns) > 0 or (len(messages) > 0 and messages[-1][\"role\"] == \"tool\"):\n # do not take new messages if last message is tool response\n if len(messages) == 0 or messages[-1][\"role\"] != \"tool\":\n new_messages = input_messages_turns.pop(0)\n # Ensure new_messages is a list of message objects\n if isinstance(new_messages, list):\n messages.extend(new_messages)\n else:\n # If it's a single message object, add it directly\n messages.append(new_messages)\n \n # --- API Call ---\n response = openai_client.chat.completions.create(\n model=model,\n messages=messages,\n tools=tools,\n stream=False,\n )\n \n # --- Process Response ---\n assistant_message = response.choices[0].message\n messages.append(assistant_message.model_dump(exclude_unset=True))\n \n assert assistant_message.role == \"assistant\"\n \n # Get the expected result data\n expected = expected_results.pop(0)\n num_tool_calls = expected[\"num_tool_calls\"]\n \n # --- Assertions based on expected result ---\n> assert len(assistant_message.tool_calls or []) == num_tool_calls, (\n f\"Expected {num_tool_calls} tool calls, but got {len(assistant_message.tool_calls or [])}\"\n )\nE AssertionError: Expected 1 tool calls, but got 0\nE assert 0 == 1\nE + where 0 = len((None or []))\nE + where None = ChatCompletionMessage(content='{\"name\": \"get_event\", \"parameters\": {\"date\": {\"date\": \"March 3rd\"}, \"time\": {\"time\": \"10 am\"}}}assistant\\n\\n{\"name\": \"get_event\", \"parameters\": {\"date\": {\"date\": \"2025-03-03\"}, \"time\": {\"time\": \"10:00\"}}}assistant\\n\\n{\"name\": \"get_event\", \"parameters\": {\"date\": {\"date\": \"2025-03-03\"}, \"time\": {\"time\": \"10:00\"}}}assistant\\n\\n{\"name\": \"get_event\", \"parameters\": {\"date\": {\"date\": \"2025-03-03\"}, \"time\": {\"time\": \"10:00\"}}}assistant\\n\\nThe function provided is not sufficient for me to answer the question.assistant\\n\\n{\"name\": \"get_event\", \"parameters\": {\"date\": {\"date\": \"2025-03-03\"}, \"time\": {\"time\": \"10:00\"}}}assistant\\n\\nThe function provided is not sufficient for me to answer the question.', refusal=None, role='assistant', annotations=None, audio=None, function_call=None, tool_calls=None).tool_calls\n\ntests/verifications/openai_api/test_chat_completion.py:439: AssertionError" - }, - "teardown": { - "duration": 0.0003245687112212181, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_multi_turn_tool_calling[accounts/fireworks/models/llama4-scout-instruct-basic-compare_monthly_expense_tool]", - "lineno": 380, - "outcome": "failed", - "keywords": [ - "test_chat_non_streaming_multi_turn_tool_calling[accounts/fireworks/models/llama4-scout-instruct-basic-compare_monthly_expense_tool]", - "parametrize", - "pytestmark", - "accounts/fireworks/models/llama4-scout-instruct-basic-compare_monthly_expense_tool", - "test_chat_completion.py", - "openai_api", - "verifications", - "tests", - "llama-stack", - "" - ], - "metadata": { - "model": "accounts/fireworks/models/llama4-scout-instruct-basic", - "case_id": "compare_monthly_expense_tool" - }, - "setup": { - "duration": 0.07069017831236124, - "outcome": "passed" - }, - "call": { - "duration": 1.8757586162537336, - "outcome": "failed", - "crash": { - "path": "/home/erichuang/llama-stack/tests/verifications/openai_api/test_chat_completion.py", - "lineno": 439, - "message": "AssertionError: Expected 1 tool calls, but got 0\nassert 0 == 1\n + where 0 = len((None or []))\n + where None = ChatCompletionMessage(content='{\"name\": \"getMonthlyExpenseSummary\", \"parameters\": {\"month\": {\"description\": \"Month of the year (1-12)\", \"type\": \"integer\"}, \"year\": {\"description\": \"Year\", \"type\": \"integer\"}}}assistant\\n\\n{\"name\": \"getMonthlyExpenseSummary\", \"parameters\": {\"month\": {\"description\": \"Month of the year (1-12)\", \"type\": \"integer\"}, \"year\": {\"description\": \"Year\", \"type\": \"integer\"}}}assistant\\n\\n{\"name\": \"getMonthlyExpenseSummary\", \"parameters\": {\"month\": {\"description\": \"Month of the year (1-12)\", \"type\": \"integer\", \"value\": 1}, \"year\": {\"description\": \"Year\", \"type\": \"integer\", \"value\": 2025}}}', refusal=None, role='assistant', annotations=None, audio=None, function_call=None, tool_calls=None).tool_calls" - }, - "traceback": [ - { - "path": "tests/verifications/openai_api/test_chat_completion.py", - "lineno": 439, - "message": "AssertionError" - } - ], - "longrepr": "request = >\nopenai_client = \nmodel = 'accounts/fireworks/models/llama4-scout-instruct-basic'\nprovider = 'fireworks'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...-versatile', 'meta-llama/llama-4-scout-17b-16e-instruct', 'meta-llama/llama-4-maverick-17b-128e-instruct'], ...}, ...}}\ncase = {'case_id': 'compare_monthly_expense_tool', 'expected': [{'num_tool_calls': 1, 'tool_arguments': {'month': 1, 'year': ... 'Total expenses for January 2025: $1000'}\"}, {'response': \"{'response': 'Total expenses for February 2024: $2000'}\"}]}\n\n @pytest.mark.parametrize(\n \"case\",\n chat_completion_test_cases.get(\"test_chat_multi_turn_tool_calling\", {}).get(\"test_params\", {}).get(\"case\", []),\n ids=case_id_generator,\n )\n def test_chat_non_streaming_multi_turn_tool_calling(request, openai_client, model, provider, verification_config, case):\n \"\"\"\n Test cases for multi-turn tool calling.\n Tool calls are asserted.\n Tool responses are provided in the test case.\n Final response is asserted.\n \"\"\"\n \n test_name_base = get_base_test_name(request)\n if should_skip_test(verification_config, provider, model, test_name_base):\n pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n \n # Create a copy of the messages list to avoid modifying the original\n messages = []\n tools = case[\"input\"][\"tools\"]\n # Use deepcopy to prevent modification across runs/parametrization\n expected_results = copy.deepcopy(case[\"expected\"])\n tool_responses = copy.deepcopy(case.get(\"tool_responses\", []))\n input_messages_turns = copy.deepcopy(case[\"input\"][\"messages\"])\n \n # keep going until either\n # 1. we have messages to test in multi-turn\n # 2. no messages but last message is tool response\n while len(input_messages_turns) > 0 or (len(messages) > 0 and messages[-1][\"role\"] == \"tool\"):\n # do not take new messages if last message is tool response\n if len(messages) == 0 or messages[-1][\"role\"] != \"tool\":\n new_messages = input_messages_turns.pop(0)\n # Ensure new_messages is a list of message objects\n if isinstance(new_messages, list):\n messages.extend(new_messages)\n else:\n # If it's a single message object, add it directly\n messages.append(new_messages)\n \n # --- API Call ---\n response = openai_client.chat.completions.create(\n model=model,\n messages=messages,\n tools=tools,\n stream=False,\n )\n \n # --- Process Response ---\n assistant_message = response.choices[0].message\n messages.append(assistant_message.model_dump(exclude_unset=True))\n \n assert assistant_message.role == \"assistant\"\n \n # Get the expected result data\n expected = expected_results.pop(0)\n num_tool_calls = expected[\"num_tool_calls\"]\n \n # --- Assertions based on expected result ---\n> assert len(assistant_message.tool_calls or []) == num_tool_calls, (\n f\"Expected {num_tool_calls} tool calls, but got {len(assistant_message.tool_calls or [])}\"\n )\nE AssertionError: Expected 1 tool calls, but got 0\nE assert 0 == 1\nE + where 0 = len((None or []))\nE + where None = ChatCompletionMessage(content='{\"name\": \"getMonthlyExpenseSummary\", \"parameters\": {\"month\": {\"description\": \"Month of the year (1-12)\", \"type\": \"integer\"}, \"year\": {\"description\": \"Year\", \"type\": \"integer\"}}}assistant\\n\\n{\"name\": \"getMonthlyExpenseSummary\", \"parameters\": {\"month\": {\"description\": \"Month of the year (1-12)\", \"type\": \"integer\"}, \"year\": {\"description\": \"Year\", \"type\": \"integer\"}}}assistant\\n\\n{\"name\": \"getMonthlyExpenseSummary\", \"parameters\": {\"month\": {\"description\": \"Month of the year (1-12)\", \"type\": \"integer\", \"value\": 1}, \"year\": {\"description\": \"Year\", \"type\": \"integer\", \"value\": 2025}}}', refusal=None, role='assistant', annotations=None, audio=None, function_call=None, tool_calls=None).tool_calls\n\ntests/verifications/openai_api/test_chat_completion.py:439: AssertionError" - }, - "teardown": { - "duration": 0.00030215736478567123, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_multi_turn_tool_calling[accounts/fireworks/models/llama4-maverick-instruct-basic-text_then_weather_tool]", - "lineno": 380, - "outcome": "failed", - "keywords": [ - "test_chat_non_streaming_multi_turn_tool_calling[accounts/fireworks/models/llama4-maverick-instruct-basic-text_then_weather_tool]", - "parametrize", - "pytestmark", - "accounts/fireworks/models/llama4-maverick-instruct-basic-text_then_weather_tool", - "test_chat_completion.py", - "openai_api", - "verifications", - "tests", - "llama-stack", - "" - ], - "metadata": { - "model": "accounts/fireworks/models/llama4-maverick-instruct-basic", - "case_id": "text_then_weather_tool" - }, - "setup": { - "duration": 0.07024750486016273, - "outcome": "passed" - }, - "call": { - "duration": 2.9532439298927784, - "outcome": "failed", - "crash": { - "path": "/home/erichuang/llama-stack/tests/verifications/openai_api/test_chat_completion.py", - "lineno": 467, - "message": "AssertionError: Expected one of ['sol'] in content, but got: 'Since there's no function defined to directly answer \"What's the name of the Sun in latin?\", I'll assume there's a general knowledge or information retrieval function available. Let's call it \"get_general_knowledge\". \n \n Here is a potential JSON response for a function call:\n \n {\"name\": \"get_general_knowledge\", \"parameters\": {\"query\": \"Latin name of the Sun\"}} \n \n However, the exact function and parameter names might vary based on the actual function definitions available. If we consider the given function \"get_weather\" and its parameters, it doesn't fit the prompt. Therefore, based on a hypothetical \"get_general_knowledge\" function, the response is provided. \n \n If the actual available functions were listed, a more accurate response could be provided. \n \n For the sake of the given prompt and assuming the presence of a \"get_general_knowledge\" function, the response is:\n \n {\"name\": \"get_general_knowledge\", \"parameters\": {\"query\": \"Latin name of the Sun\"}}'\nassert False\n + where False = any(. at 0x7f1acd9d54d0>)" - }, - "traceback": [ - { - "path": "tests/verifications/openai_api/test_chat_completion.py", - "lineno": 467, - "message": "AssertionError" - } - ], - "longrepr": "request = >\nopenai_client = \nmodel = 'accounts/fireworks/models/llama4-maverick-instruct-basic'\nprovider = 'fireworks'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...-versatile', 'meta-llama/llama-4-scout-17b-16e-instruct', 'meta-llama/llama-4-maverick-17b-128e-instruct'], ...}, ...}}\ncase = {'case_id': 'text_then_weather_tool', 'expected': [{'answer': ['sol'], 'num_tool_calls': 0}, {'num_tool_calls': 1, 'to...], 'type': 'object'}}, 'type': 'function'}]}, 'tool_responses': [{'response': \"{'response': '70 degrees and foggy'}\"}]}\n\n @pytest.mark.parametrize(\n \"case\",\n chat_completion_test_cases.get(\"test_chat_multi_turn_tool_calling\", {}).get(\"test_params\", {}).get(\"case\", []),\n ids=case_id_generator,\n )\n def test_chat_non_streaming_multi_turn_tool_calling(request, openai_client, model, provider, verification_config, case):\n \"\"\"\n Test cases for multi-turn tool calling.\n Tool calls are asserted.\n Tool responses are provided in the test case.\n Final response is asserted.\n \"\"\"\n \n test_name_base = get_base_test_name(request)\n if should_skip_test(verification_config, provider, model, test_name_base):\n pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n \n # Create a copy of the messages list to avoid modifying the original\n messages = []\n tools = case[\"input\"][\"tools\"]\n # Use deepcopy to prevent modification across runs/parametrization\n expected_results = copy.deepcopy(case[\"expected\"])\n tool_responses = copy.deepcopy(case.get(\"tool_responses\", []))\n input_messages_turns = copy.deepcopy(case[\"input\"][\"messages\"])\n \n # keep going until either\n # 1. we have messages to test in multi-turn\n # 2. no messages but last message is tool response\n while len(input_messages_turns) > 0 or (len(messages) > 0 and messages[-1][\"role\"] == \"tool\"):\n # do not take new messages if last message is tool response\n if len(messages) == 0 or messages[-1][\"role\"] != \"tool\":\n new_messages = input_messages_turns.pop(0)\n # Ensure new_messages is a list of message objects\n if isinstance(new_messages, list):\n messages.extend(new_messages)\n else:\n # If it's a single message object, add it directly\n messages.append(new_messages)\n \n # --- API Call ---\n response = openai_client.chat.completions.create(\n model=model,\n messages=messages,\n tools=tools,\n stream=False,\n )\n \n # --- Process Response ---\n assistant_message = response.choices[0].message\n messages.append(assistant_message.model_dump(exclude_unset=True))\n \n assert assistant_message.role == \"assistant\"\n \n # Get the expected result data\n expected = expected_results.pop(0)\n num_tool_calls = expected[\"num_tool_calls\"]\n \n # --- Assertions based on expected result ---\n assert len(assistant_message.tool_calls or []) == num_tool_calls, (\n f\"Expected {num_tool_calls} tool calls, but got {len(assistant_message.tool_calls or [])}\"\n )\n \n if num_tool_calls > 0:\n tool_call = assistant_message.tool_calls[0]\n assert tool_call.function.name == expected[\"tool_name\"], (\n f\"Expected tool '{expected['tool_name']}', got '{tool_call.function.name}'\"\n )\n # Parse the JSON string arguments before comparing\n actual_arguments = json.loads(tool_call.function.arguments)\n assert actual_arguments == expected[\"tool_arguments\"], (\n f\"Expected arguments '{expected['tool_arguments']}', got '{actual_arguments}'\"\n )\n \n # Prepare and append the tool response for the next turn\n tool_response = tool_responses.pop(0)\n messages.append(\n {\n \"role\": \"tool\",\n \"tool_call_id\": tool_call.id,\n \"content\": tool_response[\"response\"],\n }\n )\n else:\n assert assistant_message.content is not None, \"Expected content, but none received.\"\n expected_answers = expected[\"answer\"] # This is now a list\n content_lower = assistant_message.content.lower()\n> assert any(ans.lower() in content_lower for ans in expected_answers), (\n f\"Expected one of {expected_answers} in content, but got: '{assistant_message.content}'\"\n )\nE AssertionError: Expected one of ['sol'] in content, but got: 'Since there's no function defined to directly answer \"What's the name of the Sun in latin?\", I'll assume there's a general knowledge or information retrieval function available. Let's call it \"get_general_knowledge\". \nE \nE Here is a potential JSON response for a function call:\nE \nE {\"name\": \"get_general_knowledge\", \"parameters\": {\"query\": \"Latin name of the Sun\"}} \nE \nE However, the exact function and parameter names might vary based on the actual function definitions available. If we consider the given function \"get_weather\" and its parameters, it doesn't fit the prompt. Therefore, based on a hypothetical \"get_general_knowledge\" function, the response is provided. \nE \nE If the actual available functions were listed, a more accurate response could be provided. \nE \nE For the sake of the given prompt and assuming the presence of a \"get_general_knowledge\" function, the response is:\nE \nE {\"name\": \"get_general_knowledge\", \"parameters\": {\"query\": \"Latin name of the Sun\"}}'\nE assert False\nE + where False = any(. at 0x7f1acd9d54d0>)\n\ntests/verifications/openai_api/test_chat_completion.py:467: AssertionError" - }, - "teardown": { - "duration": 0.00038253143429756165, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_multi_turn_tool_calling[accounts/fireworks/models/llama4-maverick-instruct-basic-weather_tool_then_text]", - "lineno": 380, - "outcome": "failed", - "keywords": [ - "test_chat_non_streaming_multi_turn_tool_calling[accounts/fireworks/models/llama4-maverick-instruct-basic-weather_tool_then_text]", - "parametrize", - "pytestmark", - "accounts/fireworks/models/llama4-maverick-instruct-basic-weather_tool_then_text", - "test_chat_completion.py", - "openai_api", - "verifications", - "tests", - "llama-stack", - "" - ], - "metadata": { - "model": "accounts/fireworks/models/llama4-maverick-instruct-basic", - "case_id": "weather_tool_then_text" - }, - "setup": { - "duration": 0.07193771284073591, - "outcome": "passed" - }, - "call": { - "duration": 0.9909431086853147, - "outcome": "failed", - "crash": { - "path": "/home/erichuang/llama-stack/tests/verifications/openai_api/test_chat_completion.py", - "lineno": 439, - "message": "AssertionError: Expected 1 tool calls, but got 0\nassert 0 == 1\n + where 0 = len((None or []))\n + where None = ChatCompletionMessage(content='{\"name\": \"get_weather\", \"parameters\": {\"location\": \"San Francisco, CA\"}}', refusal=None, role='assistant', annotations=None, audio=None, function_call=None, tool_calls=None).tool_calls" - }, - "traceback": [ - { - "path": "tests/verifications/openai_api/test_chat_completion.py", - "lineno": 439, - "message": "AssertionError" - } - ], - "longrepr": "request = >\nopenai_client = \nmodel = 'accounts/fireworks/models/llama4-maverick-instruct-basic'\nprovider = 'fireworks'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...-versatile', 'meta-llama/llama-4-scout-17b-16e-instruct', 'meta-llama/llama-4-maverick-17b-128e-instruct'], ...}, ...}}\ncase = {'case_id': 'weather_tool_then_text', 'expected': [{'num_tool_calls': 1, 'tool_arguments': {'location': 'San Francisco...], 'type': 'object'}}, 'type': 'function'}]}, 'tool_responses': [{'response': \"{'response': '70 degrees and foggy'}\"}]}\n\n @pytest.mark.parametrize(\n \"case\",\n chat_completion_test_cases.get(\"test_chat_multi_turn_tool_calling\", {}).get(\"test_params\", {}).get(\"case\", []),\n ids=case_id_generator,\n )\n def test_chat_non_streaming_multi_turn_tool_calling(request, openai_client, model, provider, verification_config, case):\n \"\"\"\n Test cases for multi-turn tool calling.\n Tool calls are asserted.\n Tool responses are provided in the test case.\n Final response is asserted.\n \"\"\"\n \n test_name_base = get_base_test_name(request)\n if should_skip_test(verification_config, provider, model, test_name_base):\n pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n \n # Create a copy of the messages list to avoid modifying the original\n messages = []\n tools = case[\"input\"][\"tools\"]\n # Use deepcopy to prevent modification across runs/parametrization\n expected_results = copy.deepcopy(case[\"expected\"])\n tool_responses = copy.deepcopy(case.get(\"tool_responses\", []))\n input_messages_turns = copy.deepcopy(case[\"input\"][\"messages\"])\n \n # keep going until either\n # 1. we have messages to test in multi-turn\n # 2. no messages but last message is tool response\n while len(input_messages_turns) > 0 or (len(messages) > 0 and messages[-1][\"role\"] == \"tool\"):\n # do not take new messages if last message is tool response\n if len(messages) == 0 or messages[-1][\"role\"] != \"tool\":\n new_messages = input_messages_turns.pop(0)\n # Ensure new_messages is a list of message objects\n if isinstance(new_messages, list):\n messages.extend(new_messages)\n else:\n # If it's a single message object, add it directly\n messages.append(new_messages)\n \n # --- API Call ---\n response = openai_client.chat.completions.create(\n model=model,\n messages=messages,\n tools=tools,\n stream=False,\n )\n \n # --- Process Response ---\n assistant_message = response.choices[0].message\n messages.append(assistant_message.model_dump(exclude_unset=True))\n \n assert assistant_message.role == \"assistant\"\n \n # Get the expected result data\n expected = expected_results.pop(0)\n num_tool_calls = expected[\"num_tool_calls\"]\n \n # --- Assertions based on expected result ---\n> assert len(assistant_message.tool_calls or []) == num_tool_calls, (\n f\"Expected {num_tool_calls} tool calls, but got {len(assistant_message.tool_calls or [])}\"\n )\nE AssertionError: Expected 1 tool calls, but got 0\nE assert 0 == 1\nE + where 0 = len((None or []))\nE + where None = ChatCompletionMessage(content='{\"name\": \"get_weather\", \"parameters\": {\"location\": \"San Francisco, CA\"}}', refusal=None, role='assistant', annotations=None, audio=None, function_call=None, tool_calls=None).tool_calls\n\ntests/verifications/openai_api/test_chat_completion.py:439: AssertionError" - }, - "teardown": { - "duration": 0.0003658318892121315, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_multi_turn_tool_calling[accounts/fireworks/models/llama4-maverick-instruct-basic-add_product_tool]", - "lineno": 380, - "outcome": "failed", - "keywords": [ - "test_chat_non_streaming_multi_turn_tool_calling[accounts/fireworks/models/llama4-maverick-instruct-basic-add_product_tool]", - "parametrize", - "pytestmark", - "accounts/fireworks/models/llama4-maverick-instruct-basic-add_product_tool", - "test_chat_completion.py", - "openai_api", - "verifications", - "tests", - "llama-stack", - "" - ], - "metadata": { - "model": "accounts/fireworks/models/llama4-maverick-instruct-basic", - "case_id": "add_product_tool" - }, - "setup": { - "duration": 0.0702557684853673, - "outcome": "passed" - }, - "call": { - "duration": 0.8836336443200707, - "outcome": "failed", - "crash": { - "path": "/home/erichuang/llama-stack/tests/verifications/openai_api/test_chat_completion.py", - "lineno": 439, - "message": "AssertionError: Expected 1 tool calls, but got 0\nassert 0 == 1\n + where 0 = len((None or []))\n + where None = ChatCompletionMessage(content='{\"name\": \"addProduct\", \"parameters\": {\"name\": \"Widget\", \"price\": 19.99, \"inStock\": true, \"tags\": [\"new\", \"sale\"]}}', refusal=None, role='assistant', annotations=None, audio=None, function_call=None, tool_calls=None).tool_calls" - }, - "traceback": [ - { - "path": "tests/verifications/openai_api/test_chat_completion.py", - "lineno": 439, - "message": "AssertionError" - } - ], - "longrepr": "request = >\nopenai_client = \nmodel = 'accounts/fireworks/models/llama4-maverick-instruct-basic'\nprovider = 'fireworks'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...-versatile', 'meta-llama/llama-4-scout-17b-16e-instruct', 'meta-llama/llama-4-maverick-17b-128e-instruct'], ...}, ...}}\ncase = {'case_id': 'add_product_tool', 'expected': [{'num_tool_calls': 1, 'tool_arguments': {'inStock': True, 'name': 'Widget...}}, 'type': 'function'}]}, 'tool_responses': [{'response': \"{'response': 'Successfully added product with id: 123'}\"}]}\n\n @pytest.mark.parametrize(\n \"case\",\n chat_completion_test_cases.get(\"test_chat_multi_turn_tool_calling\", {}).get(\"test_params\", {}).get(\"case\", []),\n ids=case_id_generator,\n )\n def test_chat_non_streaming_multi_turn_tool_calling(request, openai_client, model, provider, verification_config, case):\n \"\"\"\n Test cases for multi-turn tool calling.\n Tool calls are asserted.\n Tool responses are provided in the test case.\n Final response is asserted.\n \"\"\"\n \n test_name_base = get_base_test_name(request)\n if should_skip_test(verification_config, provider, model, test_name_base):\n pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n \n # Create a copy of the messages list to avoid modifying the original\n messages = []\n tools = case[\"input\"][\"tools\"]\n # Use deepcopy to prevent modification across runs/parametrization\n expected_results = copy.deepcopy(case[\"expected\"])\n tool_responses = copy.deepcopy(case.get(\"tool_responses\", []))\n input_messages_turns = copy.deepcopy(case[\"input\"][\"messages\"])\n \n # keep going until either\n # 1. we have messages to test in multi-turn\n # 2. no messages but last message is tool response\n while len(input_messages_turns) > 0 or (len(messages) > 0 and messages[-1][\"role\"] == \"tool\"):\n # do not take new messages if last message is tool response\n if len(messages) == 0 or messages[-1][\"role\"] != \"tool\":\n new_messages = input_messages_turns.pop(0)\n # Ensure new_messages is a list of message objects\n if isinstance(new_messages, list):\n messages.extend(new_messages)\n else:\n # If it's a single message object, add it directly\n messages.append(new_messages)\n \n # --- API Call ---\n response = openai_client.chat.completions.create(\n model=model,\n messages=messages,\n tools=tools,\n stream=False,\n )\n \n # --- Process Response ---\n assistant_message = response.choices[0].message\n messages.append(assistant_message.model_dump(exclude_unset=True))\n \n assert assistant_message.role == \"assistant\"\n \n # Get the expected result data\n expected = expected_results.pop(0)\n num_tool_calls = expected[\"num_tool_calls\"]\n \n # --- Assertions based on expected result ---\n> assert len(assistant_message.tool_calls or []) == num_tool_calls, (\n f\"Expected {num_tool_calls} tool calls, but got {len(assistant_message.tool_calls or [])}\"\n )\nE AssertionError: Expected 1 tool calls, but got 0\nE assert 0 == 1\nE + where 0 = len((None or []))\nE + where None = ChatCompletionMessage(content='{\"name\": \"addProduct\", \"parameters\": {\"name\": \"Widget\", \"price\": 19.99, \"inStock\": true, \"tags\": [\"new\", \"sale\"]}}', refusal=None, role='assistant', annotations=None, audio=None, function_call=None, tool_calls=None).tool_calls\n\ntests/verifications/openai_api/test_chat_completion.py:439: AssertionError" - }, - "teardown": { - "duration": 0.00036840979009866714, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_multi_turn_tool_calling[accounts/fireworks/models/llama4-maverick-instruct-basic-get_then_create_event_tool]", - "lineno": 380, - "outcome": "failed", - "keywords": [ - "test_chat_non_streaming_multi_turn_tool_calling[accounts/fireworks/models/llama4-maverick-instruct-basic-get_then_create_event_tool]", - "parametrize", - "pytestmark", - "accounts/fireworks/models/llama4-maverick-instruct-basic-get_then_create_event_tool", - "test_chat_completion.py", - "openai_api", - "verifications", - "tests", - "llama-stack", - "" - ], - "metadata": { - "model": "accounts/fireworks/models/llama4-maverick-instruct-basic", - "case_id": "get_then_create_event_tool" - }, - "setup": { - "duration": 0.07019469328224659, - "outcome": "passed" - }, - "call": { - "duration": 7.394101745449007, - "outcome": "failed", - "crash": { - "path": "/home/erichuang/llama-stack/tests/verifications/openai_api/test_chat_completion.py", - "lineno": 439, - "message": "AssertionError: Expected 1 tool calls, but got 0\nassert 0 == 1\n + where 0 = len((None or []))\n + where None = ChatCompletionMessage(content='{\"type\": \"function\", \"name\": \"get_event\", \"parameters\": {\"date\": \"2025-03-03\", \"time\": \"10:00\"}} \"\\n\\n{\"type\": \"function\", \"name\": \"get_event\", \"parameters\": {\"date\": \"2025-03-03\", \"time\": \"10:00\"}}assistant\\n\\n{\"type\": \"function\", \"name\": \"get_event\", \"parameters\": {\"date\": \"2025-03-03\", \"time\": \"10:00\"}}assistant\\n\\nimport json\\noutput = json.loads(\"{\\\\\"type\\\\\": \\\\\"function\\\\\", \\\\\"name\\\\\": \\\\\"get_event\\\\\", \\\\\"parameters\\\\\": {\\\\\"date\\\\\": \\\\\"2025-03-03\\\\\", \\\\\"time\\\\\": \\\\\"10:00\\\\\"}}\")\\nprint(output)assistant\\n\\nimport json\\n\\n# Define the input data\\ndata = \\'[{\"type\": \"function\", \"name\": \"create_event\", \"parameters\": {\"name\": \"Meeting\", \"date\": \"2025-03-03\", \"time\": \"10:00\", \"location\": \"Conference Room\", \"participants\": [\"John\", \"Jane\"]}}, {\"type\": \"function\", \"name\": \"get_event\", \"parameters\": {\"date\": \"2025-03-03\", \"time\": \"10:00\"}}]\\'\\n\\n# Parse the JSON string into a Python object\\njson_data = json.loads(data)\\n\\n# Print the parsed data\\nprint(json_data)assistant\\n\\nimport json\\n\\ndef check_meeting(data):\\n # Parse the JSON string into a Python object\\n json_data = json.loads(data)\\n\\n # Check if there is a meeting on March 3rd at 10 am\\n for item in json_data:\\n if item[\\'type\\'] == \\'function\\' and item[\\'name\\'] == \\'get_event\\':\\n date = item[\\'parameters\\'][\\'date\\']\\n time = item[\\'parameters\\'][\\'time\\']\\n # Assuming you have a function to check if there is a meeting at the given date and time\\n # For simplicity, let\\'s assume the function is called \\'has_meeting\\'\\n # if has_meeting(date, time):\\n # return \\'Yes\\'\\n # else:\\n # return \\'No\\'\\n return \\'Yes\\' # For demonstration purposes, let\\'s assume there is a meeting\\n\\n return \\'No\\'\\n\\ndata = \\'[{\"type\": \"function\", \"name\": \"create_event\", \"parameters\": {\"name\": \"Meeting\", \"date\": \"2025-03-03\", \"time\": \"10:00\", \"location\": \"Conference Room\", \"participants\": [\"John\", \"Jane\"]}}, {\"type\": \"function\", \"name\": \"get_event\", \"parameters\": {\"date\": \"2025-03-03\", \"time\": \"10:00\"}}]\\'\\nprint(check_meeting(data))assistant\\n\\nYes.', refusal=None, role='assistant', annotations=None, audio=None, function_call=None, tool_calls=None).tool_calls" - }, - "traceback": [ - { - "path": "tests/verifications/openai_api/test_chat_completion.py", - "lineno": 439, - "message": "AssertionError" - } - ], - "longrepr": "request = >\nopenai_client = \nmodel = 'accounts/fireworks/models/llama4-maverick-instruct-basic'\nprovider = 'fireworks'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...-versatile', 'meta-llama/llama-4-scout-17b-16e-instruct', 'meta-llama/llama-4-maverick-17b-128e-instruct'], ...}, ...}}\ncase = {'case_id': 'get_then_create_event_tool', 'expected': [{'num_tool_calls': 1, 'tool_arguments': {'date': '2025-03-03', ...ents found for 2025-03-03 at 10:00'}\"}, {'response': \"{'response': 'Successfully created new event with id: e_123'}\"}]}\n\n @pytest.mark.parametrize(\n \"case\",\n chat_completion_test_cases.get(\"test_chat_multi_turn_tool_calling\", {}).get(\"test_params\", {}).get(\"case\", []),\n ids=case_id_generator,\n )\n def test_chat_non_streaming_multi_turn_tool_calling(request, openai_client, model, provider, verification_config, case):\n \"\"\"\n Test cases for multi-turn tool calling.\n Tool calls are asserted.\n Tool responses are provided in the test case.\n Final response is asserted.\n \"\"\"\n \n test_name_base = get_base_test_name(request)\n if should_skip_test(verification_config, provider, model, test_name_base):\n pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n \n # Create a copy of the messages list to avoid modifying the original\n messages = []\n tools = case[\"input\"][\"tools\"]\n # Use deepcopy to prevent modification across runs/parametrization\n expected_results = copy.deepcopy(case[\"expected\"])\n tool_responses = copy.deepcopy(case.get(\"tool_responses\", []))\n input_messages_turns = copy.deepcopy(case[\"input\"][\"messages\"])\n \n # keep going until either\n # 1. we have messages to test in multi-turn\n # 2. no messages but last message is tool response\n while len(input_messages_turns) > 0 or (len(messages) > 0 and messages[-1][\"role\"] == \"tool\"):\n # do not take new messages if last message is tool response\n if len(messages) == 0 or messages[-1][\"role\"] != \"tool\":\n new_messages = input_messages_turns.pop(0)\n # Ensure new_messages is a list of message objects\n if isinstance(new_messages, list):\n messages.extend(new_messages)\n else:\n # If it's a single message object, add it directly\n messages.append(new_messages)\n \n # --- API Call ---\n response = openai_client.chat.completions.create(\n model=model,\n messages=messages,\n tools=tools,\n stream=False,\n )\n \n # --- Process Response ---\n assistant_message = response.choices[0].message\n messages.append(assistant_message.model_dump(exclude_unset=True))\n \n assert assistant_message.role == \"assistant\"\n \n # Get the expected result data\n expected = expected_results.pop(0)\n num_tool_calls = expected[\"num_tool_calls\"]\n \n # --- Assertions based on expected result ---\n> assert len(assistant_message.tool_calls or []) == num_tool_calls, (\n f\"Expected {num_tool_calls} tool calls, but got {len(assistant_message.tool_calls or [])}\"\n )\nE AssertionError: Expected 1 tool calls, but got 0\nE assert 0 == 1\nE + where 0 = len((None or []))\nE + where None = ChatCompletionMessage(content='{\"type\": \"function\", \"name\": \"get_event\", \"parameters\": {\"date\": \"2025-03-03\", \"time\": \"10:00\"}} \"\\n\\n{\"type\": \"function\", \"name\": \"get_event\", \"parameters\": {\"date\": \"2025-03-03\", \"time\": \"10:00\"}}assistant\\n\\n{\"type\": \"function\", \"name\": \"get_event\", \"parameters\": {\"date\": \"2025-03-03\", \"time\": \"10:00\"}}assistant\\n\\nimport json\\noutput = json.loads(\"{\\\\\"type\\\\\": \\\\\"function\\\\\", \\\\\"name\\\\\": \\\\\"get_event\\\\\", \\\\\"parameters\\\\\": {\\\\\"date\\\\\": \\\\\"2025-03-03\\\\\", \\\\\"time\\\\\": \\\\\"10:00\\\\\"}}\")\\nprint(output)assistant\\n\\nimport json\\n\\n# Define the input data\\ndata = \\'[{\"type\": \"function\", \"name\": \"create_event\", \"parameters\": {\"name\": \"Meeting\", \"date\": \"2025-03-03\", \"time\": \"10:00\", \"location\": \"Conference Room\", \"participants\": [\"John\", \"Jane\"]}}, {\"type\": \"function\", \"name\": \"get_event\", \"parameters\": {\"date\": \"2025-03-03\", \"time\": \"10:00\"}}]\\'\\n\\n# Parse the JSON string into a Python object\\njson_data = json.loads(data)\\n\\n# Print the parsed data\\nprint(json_data)assistant\\n\\nimport json\\n\\ndef check_meeting(data):\\n # Parse the JSON string into a Python object\\n json_data = json.loads(data)\\n\\n # Check if there is a meeting on March 3rd at 10 am\\n for item in json_data:\\n if item[\\'type\\'] == \\'function\\' and item[\\'name\\'] == \\'get_event\\':\\n date = item[\\'parameters\\'][\\'date\\']\\n time = item[\\'parameters\\'][\\'time\\']\\n # Assuming you have a function to check if there is a meeting at the given date and time\\n # For simplicity, let\\'s assume the function is called \\'has_meeting\\'\\n # if has_meeting(date, time):\\n # return \\'Yes\\'\\n # else:\\n # return \\'No\\'\\n return \\'Yes\\' # For demonstration purposes, let\\'s assume there is a meeting\\n\\n return \\'No\\'\\n\\ndata = \\'[{\"type\": \"function\", \"name\": \"create_event\", \"parameters\": {\"name\": \"Meeting\", \"date\": \"2025-03-03\", \"time\": \"10:00\", \"location\": \"Conference Room\", \"participants\": [\"John\", \"Jane\"]}}, {\"type\": \"function\", \"name\": \"get_event\", \"parameters\": {\"date\": \"2025-03-03\", \"time\": \"10:00\"}}]\\'\\nprint(check_meeting(data))assistant\\n\\nYes.', refusal=None, role='assistant', annotations=None, audio=None, function_call=None, tool_calls=None).tool_calls\n\ntests/verifications/openai_api/test_chat_completion.py:439: AssertionError" - }, - "teardown": { - "duration": 0.0003475993871688843, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_multi_turn_tool_calling[accounts/fireworks/models/llama4-maverick-instruct-basic-compare_monthly_expense_tool]", - "lineno": 380, - "outcome": "failed", - "keywords": [ - "test_chat_non_streaming_multi_turn_tool_calling[accounts/fireworks/models/llama4-maverick-instruct-basic-compare_monthly_expense_tool]", - "parametrize", - "pytestmark", - "accounts/fireworks/models/llama4-maverick-instruct-basic-compare_monthly_expense_tool", - "test_chat_completion.py", - "openai_api", - "verifications", - "tests", - "llama-stack", - "" - ], - "metadata": { - "model": "accounts/fireworks/models/llama4-maverick-instruct-basic", - "case_id": "compare_monthly_expense_tool" - }, - "setup": { - "duration": 0.07140176557004452, - "outcome": "passed" - }, - "call": { - "duration": 1.5649437978863716, - "outcome": "failed", - "crash": { - "path": "/home/erichuang/llama-stack/tests/verifications/openai_api/test_chat_completion.py", - "lineno": 439, - "message": "AssertionError: Expected 1 tool calls, but got 0\nassert 0 == 1\n + where 0 = len((None or []))\n + where None = ChatCompletionMessage(content='{\"name\": \"getMonthlyExpenseSummary\", \"parameters\": {\"month\": 1, \"year\": 2024}}\"\" \"\" \" \"\"\"\"\"\"\"\"\"\"\"\"\" \"\" \"\"\" \"}\",\"\" \" \"}\",\"\" \" \"}\",\"\" \" \"{\" \"name\" \": \"getMonthlyExpenseSummary\", \"parameters\": {\"month\": 1, \"year\": 2024}}\"', refusal=None, role='assistant', annotations=None, audio=None, function_call=None, tool_calls=None).tool_calls" - }, - "traceback": [ - { - "path": "tests/verifications/openai_api/test_chat_completion.py", - "lineno": 439, - "message": "AssertionError" - } - ], - "longrepr": "request = >\nopenai_client = \nmodel = 'accounts/fireworks/models/llama4-maverick-instruct-basic'\nprovider = 'fireworks'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...-versatile', 'meta-llama/llama-4-scout-17b-16e-instruct', 'meta-llama/llama-4-maverick-17b-128e-instruct'], ...}, ...}}\ncase = {'case_id': 'compare_monthly_expense_tool', 'expected': [{'num_tool_calls': 1, 'tool_arguments': {'month': 1, 'year': ... 'Total expenses for January 2025: $1000'}\"}, {'response': \"{'response': 'Total expenses for February 2024: $2000'}\"}]}\n\n @pytest.mark.parametrize(\n \"case\",\n chat_completion_test_cases.get(\"test_chat_multi_turn_tool_calling\", {}).get(\"test_params\", {}).get(\"case\", []),\n ids=case_id_generator,\n )\n def test_chat_non_streaming_multi_turn_tool_calling(request, openai_client, model, provider, verification_config, case):\n \"\"\"\n Test cases for multi-turn tool calling.\n Tool calls are asserted.\n Tool responses are provided in the test case.\n Final response is asserted.\n \"\"\"\n \n test_name_base = get_base_test_name(request)\n if should_skip_test(verification_config, provider, model, test_name_base):\n pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n \n # Create a copy of the messages list to avoid modifying the original\n messages = []\n tools = case[\"input\"][\"tools\"]\n # Use deepcopy to prevent modification across runs/parametrization\n expected_results = copy.deepcopy(case[\"expected\"])\n tool_responses = copy.deepcopy(case.get(\"tool_responses\", []))\n input_messages_turns = copy.deepcopy(case[\"input\"][\"messages\"])\n \n # keep going until either\n # 1. we have messages to test in multi-turn\n # 2. no messages but last message is tool response\n while len(input_messages_turns) > 0 or (len(messages) > 0 and messages[-1][\"role\"] == \"tool\"):\n # do not take new messages if last message is tool response\n if len(messages) == 0 or messages[-1][\"role\"] != \"tool\":\n new_messages = input_messages_turns.pop(0)\n # Ensure new_messages is a list of message objects\n if isinstance(new_messages, list):\n messages.extend(new_messages)\n else:\n # If it's a single message object, add it directly\n messages.append(new_messages)\n \n # --- API Call ---\n response = openai_client.chat.completions.create(\n model=model,\n messages=messages,\n tools=tools,\n stream=False,\n )\n \n # --- Process Response ---\n assistant_message = response.choices[0].message\n messages.append(assistant_message.model_dump(exclude_unset=True))\n \n assert assistant_message.role == \"assistant\"\n \n # Get the expected result data\n expected = expected_results.pop(0)\n num_tool_calls = expected[\"num_tool_calls\"]\n \n # --- Assertions based on expected result ---\n> assert len(assistant_message.tool_calls or []) == num_tool_calls, (\n f\"Expected {num_tool_calls} tool calls, but got {len(assistant_message.tool_calls or [])}\"\n )\nE AssertionError: Expected 1 tool calls, but got 0\nE assert 0 == 1\nE + where 0 = len((None or []))\nE + where None = ChatCompletionMessage(content='{\"name\": \"getMonthlyExpenseSummary\", \"parameters\": {\"month\": 1, \"year\": 2024}}\"\" \"\" \" \"\"\"\"\"\"\"\"\"\"\"\"\" \"\" \"\"\" \"}\",\"\" \" \"}\",\"\" \" \"}\",\"\" \" \"{\" \"name\" \": \"getMonthlyExpenseSummary\", \"parameters\": {\"month\": 1, \"year\": 2024}}\"', refusal=None, role='assistant', annotations=None, audio=None, function_call=None, tool_calls=None).tool_calls\n\ntests/verifications/openai_api/test_chat_completion.py:439: AssertionError" - }, - "teardown": { - "duration": 0.00034684035927057266, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_multi_turn_tool_calling[accounts/fireworks/models/llama-v3p3-70b-instruct-text_then_weather_tool]", - "lineno": 471, - "outcome": "failed", - "keywords": [ - "test_chat_streaming_multi_turn_tool_calling[accounts/fireworks/models/llama-v3p3-70b-instruct-text_then_weather_tool]", - "parametrize", - "pytestmark", - "accounts/fireworks/models/llama-v3p3-70b-instruct-text_then_weather_tool", - "test_chat_completion.py", - "openai_api", - "verifications", - "tests", - "llama-stack", - "" - ], - "metadata": { - "model": "accounts/fireworks/models/llama-v3p3-70b-instruct", - "case_id": "text_then_weather_tool" - }, - "setup": { - "duration": 0.07161083538085222, - "outcome": "passed" - }, - "call": { - "duration": 0.972024847753346, - "outcome": "failed", - "crash": { - "path": "/home/erichuang/llama-stack/tests/verifications/openai_api/test_chat_completion.py", - "lineno": 550, - "message": "AssertionError: Expected one of ['sol'] in content, but got: 'I cannot perform this task as it requires additional functionality that is not available in the given functions.'\nassert False\n + where False = any(. at 0x7f1acd9d4510>)" - }, - "traceback": [ - { - "path": "tests/verifications/openai_api/test_chat_completion.py", - "lineno": 550, - "message": "AssertionError" - } - ], - "longrepr": "request = >\nopenai_client = \nmodel = 'accounts/fireworks/models/llama-v3p3-70b-instruct'\nprovider = 'fireworks'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...-versatile', 'meta-llama/llama-4-scout-17b-16e-instruct', 'meta-llama/llama-4-maverick-17b-128e-instruct'], ...}, ...}}\ncase = {'case_id': 'text_then_weather_tool', 'expected': [{'answer': ['sol'], 'num_tool_calls': 0}, {'num_tool_calls': 1, 'to...], 'type': 'object'}}, 'type': 'function'}]}, 'tool_responses': [{'response': \"{'response': '70 degrees and foggy'}\"}]}\n\n @pytest.mark.parametrize(\n \"case\",\n chat_completion_test_cases.get(\"test_chat_multi_turn_tool_calling\", {}).get(\"test_params\", {}).get(\"case\", []),\n ids=case_id_generator,\n )\n def test_chat_streaming_multi_turn_tool_calling(request, openai_client, model, provider, verification_config, case):\n \"\"\" \"\"\"\n test_name_base = get_base_test_name(request)\n if should_skip_test(verification_config, provider, model, test_name_base):\n pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n \n messages = []\n tools = case[\"input\"][\"tools\"]\n expected_results = copy.deepcopy(case[\"expected\"])\n tool_responses = copy.deepcopy(case.get(\"tool_responses\", []))\n input_messages_turns = copy.deepcopy(case[\"input\"][\"messages\"])\n \n while len(input_messages_turns) > 0 or (len(messages) > 0 and messages[-1][\"role\"] == \"tool\"):\n if len(messages) == 0 or messages[-1][\"role\"] != \"tool\":\n new_messages = input_messages_turns.pop(0)\n if isinstance(new_messages, list):\n messages.extend(new_messages)\n else:\n messages.append(new_messages)\n \n # --- API Call (Streaming) ---\n stream = openai_client.chat.completions.create(\n model=model,\n messages=messages,\n tools=tools,\n stream=True,\n )\n \n # --- Process Stream ---\n accumulated_content, accumulated_tool_calls = _accumulate_streaming_tool_calls(stream)\n \n # --- Construct Assistant Message for History ---\n assistant_message_dict = {\"role\": \"assistant\"}\n if accumulated_content:\n assistant_message_dict[\"content\"] = accumulated_content\n if accumulated_tool_calls:\n assistant_message_dict[\"tool_calls\"] = accumulated_tool_calls\n \n messages.append(assistant_message_dict)\n \n # --- Assertions ---\n expected = expected_results.pop(0)\n num_tool_calls = expected[\"num_tool_calls\"]\n \n assert len(accumulated_tool_calls or []) == num_tool_calls, (\n f\"Expected {num_tool_calls} tool calls, but got {len(accumulated_tool_calls or [])}\"\n )\n \n if num_tool_calls > 0:\n # Use the first accumulated tool call for assertion\n tool_call = accumulated_tool_calls[0]\n assert tool_call[\"function\"][\"name\"] == expected[\"tool_name\"], (\n f\"Expected tool '{expected['tool_name']}', got '{tool_call['function']['name']}'\"\n )\n # Parse the accumulated arguments string for comparison\n actual_arguments = json.loads(tool_call[\"function\"][\"arguments\"])\n assert actual_arguments == expected[\"tool_arguments\"], (\n f\"Expected arguments '{expected['tool_arguments']}', got '{actual_arguments}'\"\n )\n \n # Prepare and append the tool response for the next turn\n tool_response = tool_responses.pop(0)\n messages.append(\n {\n \"role\": \"tool\",\n \"tool_call_id\": tool_call[\"id\"],\n \"content\": tool_response[\"response\"],\n }\n )\n else:\n assert accumulated_content is not None and accumulated_content != \"\", \"Expected content, but none received.\"\n expected_answers = expected[\"answer\"]\n content_lower = accumulated_content.lower()\n> assert any(ans.lower() in content_lower for ans in expected_answers), (\n f\"Expected one of {expected_answers} in content, but got: '{accumulated_content}'\"\n )\nE AssertionError: Expected one of ['sol'] in content, but got: 'I cannot perform this task as it requires additional functionality that is not available in the given functions.'\nE assert False\nE + where False = any(. at 0x7f1acd9d4510>)\n\ntests/verifications/openai_api/test_chat_completion.py:550: AssertionError" - }, - "teardown": { - "duration": 0.0003080591559410095, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_multi_turn_tool_calling[accounts/fireworks/models/llama-v3p3-70b-instruct-weather_tool_then_text]", - "lineno": 471, - "outcome": "failed", - "keywords": [ - "test_chat_streaming_multi_turn_tool_calling[accounts/fireworks/models/llama-v3p3-70b-instruct-weather_tool_then_text]", - "parametrize", - "pytestmark", - "accounts/fireworks/models/llama-v3p3-70b-instruct-weather_tool_then_text", - "test_chat_completion.py", - "openai_api", - "verifications", - "tests", - "llama-stack", - "" - ], - "metadata": { - "model": "accounts/fireworks/models/llama-v3p3-70b-instruct", - "case_id": "weather_tool_then_text" - }, - "setup": { - "duration": 0.07267874106764793, - "outcome": "passed" - }, - "call": { - "duration": 0.632216920144856, - "outcome": "failed", - "crash": { - "path": "/home/erichuang/llama-stack/tests/verifications/openai_api/test_chat_completion.py", - "lineno": 521, - "message": "AssertionError: Expected 1 tool calls, but got 0\nassert 0 == 1\n + where 0 = len(([] or []))" - }, - "traceback": [ - { - "path": "tests/verifications/openai_api/test_chat_completion.py", - "lineno": 521, - "message": "AssertionError" - } - ], - "longrepr": "request = >\nopenai_client = \nmodel = 'accounts/fireworks/models/llama-v3p3-70b-instruct'\nprovider = 'fireworks'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...-versatile', 'meta-llama/llama-4-scout-17b-16e-instruct', 'meta-llama/llama-4-maverick-17b-128e-instruct'], ...}, ...}}\ncase = {'case_id': 'weather_tool_then_text', 'expected': [{'num_tool_calls': 1, 'tool_arguments': {'location': 'San Francisco...], 'type': 'object'}}, 'type': 'function'}]}, 'tool_responses': [{'response': \"{'response': '70 degrees and foggy'}\"}]}\n\n @pytest.mark.parametrize(\n \"case\",\n chat_completion_test_cases.get(\"test_chat_multi_turn_tool_calling\", {}).get(\"test_params\", {}).get(\"case\", []),\n ids=case_id_generator,\n )\n def test_chat_streaming_multi_turn_tool_calling(request, openai_client, model, provider, verification_config, case):\n \"\"\" \"\"\"\n test_name_base = get_base_test_name(request)\n if should_skip_test(verification_config, provider, model, test_name_base):\n pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n \n messages = []\n tools = case[\"input\"][\"tools\"]\n expected_results = copy.deepcopy(case[\"expected\"])\n tool_responses = copy.deepcopy(case.get(\"tool_responses\", []))\n input_messages_turns = copy.deepcopy(case[\"input\"][\"messages\"])\n \n while len(input_messages_turns) > 0 or (len(messages) > 0 and messages[-1][\"role\"] == \"tool\"):\n if len(messages) == 0 or messages[-1][\"role\"] != \"tool\":\n new_messages = input_messages_turns.pop(0)\n if isinstance(new_messages, list):\n messages.extend(new_messages)\n else:\n messages.append(new_messages)\n \n # --- API Call (Streaming) ---\n stream = openai_client.chat.completions.create(\n model=model,\n messages=messages,\n tools=tools,\n stream=True,\n )\n \n # --- Process Stream ---\n accumulated_content, accumulated_tool_calls = _accumulate_streaming_tool_calls(stream)\n \n # --- Construct Assistant Message for History ---\n assistant_message_dict = {\"role\": \"assistant\"}\n if accumulated_content:\n assistant_message_dict[\"content\"] = accumulated_content\n if accumulated_tool_calls:\n assistant_message_dict[\"tool_calls\"] = accumulated_tool_calls\n \n messages.append(assistant_message_dict)\n \n # --- Assertions ---\n expected = expected_results.pop(0)\n num_tool_calls = expected[\"num_tool_calls\"]\n \n> assert len(accumulated_tool_calls or []) == num_tool_calls, (\n f\"Expected {num_tool_calls} tool calls, but got {len(accumulated_tool_calls or [])}\"\n )\nE AssertionError: Expected 1 tool calls, but got 0\nE assert 0 == 1\nE + where 0 = len(([] or []))\n\ntests/verifications/openai_api/test_chat_completion.py:521: AssertionError" - }, - "teardown": { - "duration": 0.0003350367769598961, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_multi_turn_tool_calling[accounts/fireworks/models/llama-v3p3-70b-instruct-add_product_tool]", - "lineno": 471, - "outcome": "failed", - "keywords": [ - "test_chat_streaming_multi_turn_tool_calling[accounts/fireworks/models/llama-v3p3-70b-instruct-add_product_tool]", - "parametrize", - "pytestmark", - "accounts/fireworks/models/llama-v3p3-70b-instruct-add_product_tool", - "test_chat_completion.py", - "openai_api", - "verifications", - "tests", - "llama-stack", - "" - ], - "metadata": { - "model": "accounts/fireworks/models/llama-v3p3-70b-instruct", - "case_id": "add_product_tool" - }, - "setup": { - "duration": 0.0707720061764121, - "outcome": "passed" - }, - "call": { - "duration": 0.9429405080154538, - "outcome": "failed", - "crash": { - "path": "/home/erichuang/llama-stack/tests/verifications/openai_api/test_chat_completion.py", - "lineno": 521, - "message": "AssertionError: Expected 1 tool calls, but got 0\nassert 0 == 1\n + where 0 = len(([] or []))" - }, - "traceback": [ - { - "path": "tests/verifications/openai_api/test_chat_completion.py", - "lineno": 521, - "message": "AssertionError" - } - ], - "longrepr": "request = >\nopenai_client = \nmodel = 'accounts/fireworks/models/llama-v3p3-70b-instruct'\nprovider = 'fireworks'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...-versatile', 'meta-llama/llama-4-scout-17b-16e-instruct', 'meta-llama/llama-4-maverick-17b-128e-instruct'], ...}, ...}}\ncase = {'case_id': 'add_product_tool', 'expected': [{'num_tool_calls': 1, 'tool_arguments': {'inStock': True, 'name': 'Widget...}}, 'type': 'function'}]}, 'tool_responses': [{'response': \"{'response': 'Successfully added product with id: 123'}\"}]}\n\n @pytest.mark.parametrize(\n \"case\",\n chat_completion_test_cases.get(\"test_chat_multi_turn_tool_calling\", {}).get(\"test_params\", {}).get(\"case\", []),\n ids=case_id_generator,\n )\n def test_chat_streaming_multi_turn_tool_calling(request, openai_client, model, provider, verification_config, case):\n \"\"\" \"\"\"\n test_name_base = get_base_test_name(request)\n if should_skip_test(verification_config, provider, model, test_name_base):\n pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n \n messages = []\n tools = case[\"input\"][\"tools\"]\n expected_results = copy.deepcopy(case[\"expected\"])\n tool_responses = copy.deepcopy(case.get(\"tool_responses\", []))\n input_messages_turns = copy.deepcopy(case[\"input\"][\"messages\"])\n \n while len(input_messages_turns) > 0 or (len(messages) > 0 and messages[-1][\"role\"] == \"tool\"):\n if len(messages) == 0 or messages[-1][\"role\"] != \"tool\":\n new_messages = input_messages_turns.pop(0)\n if isinstance(new_messages, list):\n messages.extend(new_messages)\n else:\n messages.append(new_messages)\n \n # --- API Call (Streaming) ---\n stream = openai_client.chat.completions.create(\n model=model,\n messages=messages,\n tools=tools,\n stream=True,\n )\n \n # --- Process Stream ---\n accumulated_content, accumulated_tool_calls = _accumulate_streaming_tool_calls(stream)\n \n # --- Construct Assistant Message for History ---\n assistant_message_dict = {\"role\": \"assistant\"}\n if accumulated_content:\n assistant_message_dict[\"content\"] = accumulated_content\n if accumulated_tool_calls:\n assistant_message_dict[\"tool_calls\"] = accumulated_tool_calls\n \n messages.append(assistant_message_dict)\n \n # --- Assertions ---\n expected = expected_results.pop(0)\n num_tool_calls = expected[\"num_tool_calls\"]\n \n> assert len(accumulated_tool_calls or []) == num_tool_calls, (\n f\"Expected {num_tool_calls} tool calls, but got {len(accumulated_tool_calls or [])}\"\n )\nE AssertionError: Expected 1 tool calls, but got 0\nE assert 0 == 1\nE + where 0 = len(([] or []))\n\ntests/verifications/openai_api/test_chat_completion.py:521: AssertionError" - }, - "teardown": { - "duration": 0.0002858620136976242, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_multi_turn_tool_calling[accounts/fireworks/models/llama-v3p3-70b-instruct-get_then_create_event_tool]", - "lineno": 471, - "outcome": "failed", - "keywords": [ - "test_chat_streaming_multi_turn_tool_calling[accounts/fireworks/models/llama-v3p3-70b-instruct-get_then_create_event_tool]", - "parametrize", - "pytestmark", - "accounts/fireworks/models/llama-v3p3-70b-instruct-get_then_create_event_tool", - "test_chat_completion.py", - "openai_api", - "verifications", - "tests", - "llama-stack", - "" - ], - "metadata": { - "model": "accounts/fireworks/models/llama-v3p3-70b-instruct", - "case_id": "get_then_create_event_tool" - }, - "setup": { - "duration": 0.06923680566251278, - "outcome": "passed" - }, - "call": { - "duration": 0.7107308339327574, - "outcome": "failed", - "crash": { - "path": "/home/erichuang/llama-stack/tests/verifications/openai_api/test_chat_completion.py", - "lineno": 521, - "message": "AssertionError: Expected 1 tool calls, but got 0\nassert 0 == 1\n + where 0 = len(([] or []))" - }, - "traceback": [ - { - "path": "tests/verifications/openai_api/test_chat_completion.py", - "lineno": 521, - "message": "AssertionError" - } - ], - "longrepr": "request = >\nopenai_client = \nmodel = 'accounts/fireworks/models/llama-v3p3-70b-instruct'\nprovider = 'fireworks'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...-versatile', 'meta-llama/llama-4-scout-17b-16e-instruct', 'meta-llama/llama-4-maverick-17b-128e-instruct'], ...}, ...}}\ncase = {'case_id': 'get_then_create_event_tool', 'expected': [{'num_tool_calls': 1, 'tool_arguments': {'date': '2025-03-03', ...ents found for 2025-03-03 at 10:00'}\"}, {'response': \"{'response': 'Successfully created new event with id: e_123'}\"}]}\n\n @pytest.mark.parametrize(\n \"case\",\n chat_completion_test_cases.get(\"test_chat_multi_turn_tool_calling\", {}).get(\"test_params\", {}).get(\"case\", []),\n ids=case_id_generator,\n )\n def test_chat_streaming_multi_turn_tool_calling(request, openai_client, model, provider, verification_config, case):\n \"\"\" \"\"\"\n test_name_base = get_base_test_name(request)\n if should_skip_test(verification_config, provider, model, test_name_base):\n pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n \n messages = []\n tools = case[\"input\"][\"tools\"]\n expected_results = copy.deepcopy(case[\"expected\"])\n tool_responses = copy.deepcopy(case.get(\"tool_responses\", []))\n input_messages_turns = copy.deepcopy(case[\"input\"][\"messages\"])\n \n while len(input_messages_turns) > 0 or (len(messages) > 0 and messages[-1][\"role\"] == \"tool\"):\n if len(messages) == 0 or messages[-1][\"role\"] != \"tool\":\n new_messages = input_messages_turns.pop(0)\n if isinstance(new_messages, list):\n messages.extend(new_messages)\n else:\n messages.append(new_messages)\n \n # --- API Call (Streaming) ---\n stream = openai_client.chat.completions.create(\n model=model,\n messages=messages,\n tools=tools,\n stream=True,\n )\n \n # --- Process Stream ---\n accumulated_content, accumulated_tool_calls = _accumulate_streaming_tool_calls(stream)\n \n # --- Construct Assistant Message for History ---\n assistant_message_dict = {\"role\": \"assistant\"}\n if accumulated_content:\n assistant_message_dict[\"content\"] = accumulated_content\n if accumulated_tool_calls:\n assistant_message_dict[\"tool_calls\"] = accumulated_tool_calls\n \n messages.append(assistant_message_dict)\n \n # --- Assertions ---\n expected = expected_results.pop(0)\n num_tool_calls = expected[\"num_tool_calls\"]\n \n> assert len(accumulated_tool_calls or []) == num_tool_calls, (\n f\"Expected {num_tool_calls} tool calls, but got {len(accumulated_tool_calls or [])}\"\n )\nE AssertionError: Expected 1 tool calls, but got 0\nE assert 0 == 1\nE + where 0 = len(([] or []))\n\ntests/verifications/openai_api/test_chat_completion.py:521: AssertionError" - }, - "teardown": { - "duration": 0.0003181472420692444, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_multi_turn_tool_calling[accounts/fireworks/models/llama-v3p3-70b-instruct-compare_monthly_expense_tool]", - "lineno": 471, - "outcome": "failed", - "keywords": [ - "test_chat_streaming_multi_turn_tool_calling[accounts/fireworks/models/llama-v3p3-70b-instruct-compare_monthly_expense_tool]", - "parametrize", - "pytestmark", - "accounts/fireworks/models/llama-v3p3-70b-instruct-compare_monthly_expense_tool", - "test_chat_completion.py", - "openai_api", - "verifications", - "tests", - "llama-stack", - "" - ], - "metadata": { - "model": "accounts/fireworks/models/llama-v3p3-70b-instruct", - "case_id": "compare_monthly_expense_tool" - }, - "setup": { - "duration": 0.07021687645465136, - "outcome": "passed" - }, - "call": { - "duration": 0.7717038569971919, - "outcome": "failed", - "crash": { - "path": "/home/erichuang/llama-stack/tests/verifications/openai_api/test_chat_completion.py", - "lineno": 521, - "message": "AssertionError: Expected 1 tool calls, but got 0\nassert 0 == 1\n + where 0 = len(([] or []))" - }, - "traceback": [ - { - "path": "tests/verifications/openai_api/test_chat_completion.py", - "lineno": 521, - "message": "AssertionError" - } - ], - "longrepr": "request = >\nopenai_client = \nmodel = 'accounts/fireworks/models/llama-v3p3-70b-instruct'\nprovider = 'fireworks'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...-versatile', 'meta-llama/llama-4-scout-17b-16e-instruct', 'meta-llama/llama-4-maverick-17b-128e-instruct'], ...}, ...}}\ncase = {'case_id': 'compare_monthly_expense_tool', 'expected': [{'num_tool_calls': 1, 'tool_arguments': {'month': 1, 'year': ... 'Total expenses for January 2025: $1000'}\"}, {'response': \"{'response': 'Total expenses for February 2024: $2000'}\"}]}\n\n @pytest.mark.parametrize(\n \"case\",\n chat_completion_test_cases.get(\"test_chat_multi_turn_tool_calling\", {}).get(\"test_params\", {}).get(\"case\", []),\n ids=case_id_generator,\n )\n def test_chat_streaming_multi_turn_tool_calling(request, openai_client, model, provider, verification_config, case):\n \"\"\" \"\"\"\n test_name_base = get_base_test_name(request)\n if should_skip_test(verification_config, provider, model, test_name_base):\n pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n \n messages = []\n tools = case[\"input\"][\"tools\"]\n expected_results = copy.deepcopy(case[\"expected\"])\n tool_responses = copy.deepcopy(case.get(\"tool_responses\", []))\n input_messages_turns = copy.deepcopy(case[\"input\"][\"messages\"])\n \n while len(input_messages_turns) > 0 or (len(messages) > 0 and messages[-1][\"role\"] == \"tool\"):\n if len(messages) == 0 or messages[-1][\"role\"] != \"tool\":\n new_messages = input_messages_turns.pop(0)\n if isinstance(new_messages, list):\n messages.extend(new_messages)\n else:\n messages.append(new_messages)\n \n # --- API Call (Streaming) ---\n stream = openai_client.chat.completions.create(\n model=model,\n messages=messages,\n tools=tools,\n stream=True,\n )\n \n # --- Process Stream ---\n accumulated_content, accumulated_tool_calls = _accumulate_streaming_tool_calls(stream)\n \n # --- Construct Assistant Message for History ---\n assistant_message_dict = {\"role\": \"assistant\"}\n if accumulated_content:\n assistant_message_dict[\"content\"] = accumulated_content\n if accumulated_tool_calls:\n assistant_message_dict[\"tool_calls\"] = accumulated_tool_calls\n \n messages.append(assistant_message_dict)\n \n # --- Assertions ---\n expected = expected_results.pop(0)\n num_tool_calls = expected[\"num_tool_calls\"]\n \n> assert len(accumulated_tool_calls or []) == num_tool_calls, (\n f\"Expected {num_tool_calls} tool calls, but got {len(accumulated_tool_calls or [])}\"\n )\nE AssertionError: Expected 1 tool calls, but got 0\nE assert 0 == 1\nE + where 0 = len(([] or []))\n\ntests/verifications/openai_api/test_chat_completion.py:521: AssertionError" - }, - "teardown": { - "duration": 0.00030398648232221603, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_multi_turn_tool_calling[accounts/fireworks/models/llama4-scout-instruct-basic-text_then_weather_tool]", - "lineno": 471, - "outcome": "failed", - "keywords": [ - "test_chat_streaming_multi_turn_tool_calling[accounts/fireworks/models/llama4-scout-instruct-basic-text_then_weather_tool]", - "parametrize", - "pytestmark", - "accounts/fireworks/models/llama4-scout-instruct-basic-text_then_weather_tool", - "test_chat_completion.py", - "openai_api", - "verifications", - "tests", - "llama-stack", - "" - ], - "metadata": { - "model": "accounts/fireworks/models/llama4-scout-instruct-basic", - "case_id": "text_then_weather_tool" - }, - "setup": { - "duration": 0.07320436742156744, - "outcome": "passed" - }, - "call": { - "duration": 1.2869794629514217, - "outcome": "failed", - "crash": { - "path": "/home/erichuang/llama-stack/tests/verifications/openai_api/test_chat_completion.py", - "lineno": 550, - "message": "AssertionError: Expected one of ['sol'] in content, but got: '{\"name\": \"get_weather\", \"parameters\": {\"description\": \"Get the current weather\", \"parameters\": {\"type\": \"object\", \"properties\": {\"location\": {\"description\": \"The city and state (both required) (e.g. San Francisco, CA.\", \"type\": \"string\"}}}, \"required\": [\"location\"]}}'\nassert False\n + where False = any(. at 0x7f1acd9b8e40>)" - }, - "traceback": [ - { - "path": "tests/verifications/openai_api/test_chat_completion.py", - "lineno": 550, - "message": "AssertionError" - } - ], - "longrepr": "request = >\nopenai_client = \nmodel = 'accounts/fireworks/models/llama4-scout-instruct-basic'\nprovider = 'fireworks'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...-versatile', 'meta-llama/llama-4-scout-17b-16e-instruct', 'meta-llama/llama-4-maverick-17b-128e-instruct'], ...}, ...}}\ncase = {'case_id': 'text_then_weather_tool', 'expected': [{'answer': ['sol'], 'num_tool_calls': 0}, {'num_tool_calls': 1, 'to...], 'type': 'object'}}, 'type': 'function'}]}, 'tool_responses': [{'response': \"{'response': '70 degrees and foggy'}\"}]}\n\n @pytest.mark.parametrize(\n \"case\",\n chat_completion_test_cases.get(\"test_chat_multi_turn_tool_calling\", {}).get(\"test_params\", {}).get(\"case\", []),\n ids=case_id_generator,\n )\n def test_chat_streaming_multi_turn_tool_calling(request, openai_client, model, provider, verification_config, case):\n \"\"\" \"\"\"\n test_name_base = get_base_test_name(request)\n if should_skip_test(verification_config, provider, model, test_name_base):\n pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n \n messages = []\n tools = case[\"input\"][\"tools\"]\n expected_results = copy.deepcopy(case[\"expected\"])\n tool_responses = copy.deepcopy(case.get(\"tool_responses\", []))\n input_messages_turns = copy.deepcopy(case[\"input\"][\"messages\"])\n \n while len(input_messages_turns) > 0 or (len(messages) > 0 and messages[-1][\"role\"] == \"tool\"):\n if len(messages) == 0 or messages[-1][\"role\"] != \"tool\":\n new_messages = input_messages_turns.pop(0)\n if isinstance(new_messages, list):\n messages.extend(new_messages)\n else:\n messages.append(new_messages)\n \n # --- API Call (Streaming) ---\n stream = openai_client.chat.completions.create(\n model=model,\n messages=messages,\n tools=tools,\n stream=True,\n )\n \n # --- Process Stream ---\n accumulated_content, accumulated_tool_calls = _accumulate_streaming_tool_calls(stream)\n \n # --- Construct Assistant Message for History ---\n assistant_message_dict = {\"role\": \"assistant\"}\n if accumulated_content:\n assistant_message_dict[\"content\"] = accumulated_content\n if accumulated_tool_calls:\n assistant_message_dict[\"tool_calls\"] = accumulated_tool_calls\n \n messages.append(assistant_message_dict)\n \n # --- Assertions ---\n expected = expected_results.pop(0)\n num_tool_calls = expected[\"num_tool_calls\"]\n \n assert len(accumulated_tool_calls or []) == num_tool_calls, (\n f\"Expected {num_tool_calls} tool calls, but got {len(accumulated_tool_calls or [])}\"\n )\n \n if num_tool_calls > 0:\n # Use the first accumulated tool call for assertion\n tool_call = accumulated_tool_calls[0]\n assert tool_call[\"function\"][\"name\"] == expected[\"tool_name\"], (\n f\"Expected tool '{expected['tool_name']}', got '{tool_call['function']['name']}'\"\n )\n # Parse the accumulated arguments string for comparison\n actual_arguments = json.loads(tool_call[\"function\"][\"arguments\"])\n assert actual_arguments == expected[\"tool_arguments\"], (\n f\"Expected arguments '{expected['tool_arguments']}', got '{actual_arguments}'\"\n )\n \n # Prepare and append the tool response for the next turn\n tool_response = tool_responses.pop(0)\n messages.append(\n {\n \"role\": \"tool\",\n \"tool_call_id\": tool_call[\"id\"],\n \"content\": tool_response[\"response\"],\n }\n )\n else:\n assert accumulated_content is not None and accumulated_content != \"\", \"Expected content, but none received.\"\n expected_answers = expected[\"answer\"]\n content_lower = accumulated_content.lower()\n> assert any(ans.lower() in content_lower for ans in expected_answers), (\n f\"Expected one of {expected_answers} in content, but got: '{accumulated_content}'\"\n )\nE AssertionError: Expected one of ['sol'] in content, but got: '{\"name\": \"get_weather\", \"parameters\": {\"description\": \"Get the current weather\", \"parameters\": {\"type\": \"object\", \"properties\": {\"location\": {\"description\": \"The city and state (both required) (e.g. San Francisco, CA.\", \"type\": \"string\"}}}, \"required\": [\"location\"]}}'\nE assert False\nE + where False = any(. at 0x7f1acd9b8e40>)\n\ntests/verifications/openai_api/test_chat_completion.py:550: AssertionError" - }, - "teardown": { - "duration": 0.0003076540306210518, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_multi_turn_tool_calling[accounts/fireworks/models/llama4-scout-instruct-basic-weather_tool_then_text]", - "lineno": 471, - "outcome": "failed", - "keywords": [ - "test_chat_streaming_multi_turn_tool_calling[accounts/fireworks/models/llama4-scout-instruct-basic-weather_tool_then_text]", - "parametrize", - "pytestmark", - "accounts/fireworks/models/llama4-scout-instruct-basic-weather_tool_then_text", - "test_chat_completion.py", - "openai_api", - "verifications", - "tests", - "llama-stack", - "" - ], - "metadata": { - "model": "accounts/fireworks/models/llama4-scout-instruct-basic", - "case_id": "weather_tool_then_text" - }, - "setup": { - "duration": 0.0732570867985487, - "outcome": "passed" - }, - "call": { - "duration": 0.9204158475622535, - "outcome": "failed", - "crash": { - "path": "/home/erichuang/llama-stack/tests/verifications/openai_api/test_chat_completion.py", - "lineno": 521, - "message": "AssertionError: Expected 1 tool calls, but got 0\nassert 0 == 1\n + where 0 = len(([] or []))" - }, - "traceback": [ - { - "path": "tests/verifications/openai_api/test_chat_completion.py", - "lineno": 521, - "message": "AssertionError" - } - ], - "longrepr": "request = >\nopenai_client = \nmodel = 'accounts/fireworks/models/llama4-scout-instruct-basic'\nprovider = 'fireworks'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...-versatile', 'meta-llama/llama-4-scout-17b-16e-instruct', 'meta-llama/llama-4-maverick-17b-128e-instruct'], ...}, ...}}\ncase = {'case_id': 'weather_tool_then_text', 'expected': [{'num_tool_calls': 1, 'tool_arguments': {'location': 'San Francisco...], 'type': 'object'}}, 'type': 'function'}]}, 'tool_responses': [{'response': \"{'response': '70 degrees and foggy'}\"}]}\n\n @pytest.mark.parametrize(\n \"case\",\n chat_completion_test_cases.get(\"test_chat_multi_turn_tool_calling\", {}).get(\"test_params\", {}).get(\"case\", []),\n ids=case_id_generator,\n )\n def test_chat_streaming_multi_turn_tool_calling(request, openai_client, model, provider, verification_config, case):\n \"\"\" \"\"\"\n test_name_base = get_base_test_name(request)\n if should_skip_test(verification_config, provider, model, test_name_base):\n pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n \n messages = []\n tools = case[\"input\"][\"tools\"]\n expected_results = copy.deepcopy(case[\"expected\"])\n tool_responses = copy.deepcopy(case.get(\"tool_responses\", []))\n input_messages_turns = copy.deepcopy(case[\"input\"][\"messages\"])\n \n while len(input_messages_turns) > 0 or (len(messages) > 0 and messages[-1][\"role\"] == \"tool\"):\n if len(messages) == 0 or messages[-1][\"role\"] != \"tool\":\n new_messages = input_messages_turns.pop(0)\n if isinstance(new_messages, list):\n messages.extend(new_messages)\n else:\n messages.append(new_messages)\n \n # --- API Call (Streaming) ---\n stream = openai_client.chat.completions.create(\n model=model,\n messages=messages,\n tools=tools,\n stream=True,\n )\n \n # --- Process Stream ---\n accumulated_content, accumulated_tool_calls = _accumulate_streaming_tool_calls(stream)\n \n # --- Construct Assistant Message for History ---\n assistant_message_dict = {\"role\": \"assistant\"}\n if accumulated_content:\n assistant_message_dict[\"content\"] = accumulated_content\n if accumulated_tool_calls:\n assistant_message_dict[\"tool_calls\"] = accumulated_tool_calls\n \n messages.append(assistant_message_dict)\n \n # --- Assertions ---\n expected = expected_results.pop(0)\n num_tool_calls = expected[\"num_tool_calls\"]\n \n> assert len(accumulated_tool_calls or []) == num_tool_calls, (\n f\"Expected {num_tool_calls} tool calls, but got {len(accumulated_tool_calls or [])}\"\n )\nE AssertionError: Expected 1 tool calls, but got 0\nE assert 0 == 1\nE + where 0 = len(([] or []))\n\ntests/verifications/openai_api/test_chat_completion.py:521: AssertionError" - }, - "teardown": { - "duration": 0.000310627743601799, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_multi_turn_tool_calling[accounts/fireworks/models/llama4-scout-instruct-basic-add_product_tool]", - "lineno": 471, - "outcome": "failed", - "keywords": [ - "test_chat_streaming_multi_turn_tool_calling[accounts/fireworks/models/llama4-scout-instruct-basic-add_product_tool]", - "parametrize", - "pytestmark", - "accounts/fireworks/models/llama4-scout-instruct-basic-add_product_tool", - "test_chat_completion.py", - "openai_api", - "verifications", - "tests", - "llama-stack", - "" - ], - "metadata": { - "model": "accounts/fireworks/models/llama4-scout-instruct-basic", - "case_id": "add_product_tool" - }, - "setup": { - "duration": 0.07232664246112108, - "outcome": "passed" - }, - "call": { - "duration": 3.829266043379903, - "outcome": "failed", - "crash": { - "path": "/home/erichuang/llama-stack/tests/verifications/openai_api/test_chat_completion.py", - "lineno": 521, - "message": "AssertionError: Expected 1 tool calls, but got 0\nassert 0 == 1\n + where 0 = len(([] or []))" - }, - "traceback": [ - { - "path": "tests/verifications/openai_api/test_chat_completion.py", - "lineno": 521, - "message": "AssertionError" - } - ], - "longrepr": "request = >\nopenai_client = \nmodel = 'accounts/fireworks/models/llama4-scout-instruct-basic'\nprovider = 'fireworks'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...-versatile', 'meta-llama/llama-4-scout-17b-16e-instruct', 'meta-llama/llama-4-maverick-17b-128e-instruct'], ...}, ...}}\ncase = {'case_id': 'add_product_tool', 'expected': [{'num_tool_calls': 1, 'tool_arguments': {'inStock': True, 'name': 'Widget...}}, 'type': 'function'}]}, 'tool_responses': [{'response': \"{'response': 'Successfully added product with id: 123'}\"}]}\n\n @pytest.mark.parametrize(\n \"case\",\n chat_completion_test_cases.get(\"test_chat_multi_turn_tool_calling\", {}).get(\"test_params\", {}).get(\"case\", []),\n ids=case_id_generator,\n )\n def test_chat_streaming_multi_turn_tool_calling(request, openai_client, model, provider, verification_config, case):\n \"\"\" \"\"\"\n test_name_base = get_base_test_name(request)\n if should_skip_test(verification_config, provider, model, test_name_base):\n pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n \n messages = []\n tools = case[\"input\"][\"tools\"]\n expected_results = copy.deepcopy(case[\"expected\"])\n tool_responses = copy.deepcopy(case.get(\"tool_responses\", []))\n input_messages_turns = copy.deepcopy(case[\"input\"][\"messages\"])\n \n while len(input_messages_turns) > 0 or (len(messages) > 0 and messages[-1][\"role\"] == \"tool\"):\n if len(messages) == 0 or messages[-1][\"role\"] != \"tool\":\n new_messages = input_messages_turns.pop(0)\n if isinstance(new_messages, list):\n messages.extend(new_messages)\n else:\n messages.append(new_messages)\n \n # --- API Call (Streaming) ---\n stream = openai_client.chat.completions.create(\n model=model,\n messages=messages,\n tools=tools,\n stream=True,\n )\n \n # --- Process Stream ---\n accumulated_content, accumulated_tool_calls = _accumulate_streaming_tool_calls(stream)\n \n # --- Construct Assistant Message for History ---\n assistant_message_dict = {\"role\": \"assistant\"}\n if accumulated_content:\n assistant_message_dict[\"content\"] = accumulated_content\n if accumulated_tool_calls:\n assistant_message_dict[\"tool_calls\"] = accumulated_tool_calls\n \n messages.append(assistant_message_dict)\n \n # --- Assertions ---\n expected = expected_results.pop(0)\n num_tool_calls = expected[\"num_tool_calls\"]\n \n> assert len(accumulated_tool_calls or []) == num_tool_calls, (\n f\"Expected {num_tool_calls} tool calls, but got {len(accumulated_tool_calls or [])}\"\n )\nE AssertionError: Expected 1 tool calls, but got 0\nE assert 0 == 1\nE + where 0 = len(([] or []))\n\ntests/verifications/openai_api/test_chat_completion.py:521: AssertionError" - }, - "teardown": { - "duration": 0.00034091807901859283, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_multi_turn_tool_calling[accounts/fireworks/models/llama4-scout-instruct-basic-get_then_create_event_tool]", - "lineno": 471, - "outcome": "failed", - "keywords": [ - "test_chat_streaming_multi_turn_tool_calling[accounts/fireworks/models/llama4-scout-instruct-basic-get_then_create_event_tool]", - "parametrize", - "pytestmark", - "accounts/fireworks/models/llama4-scout-instruct-basic-get_then_create_event_tool", - "test_chat_completion.py", - "openai_api", - "verifications", - "tests", - "llama-stack", - "" - ], - "metadata": { - "model": "accounts/fireworks/models/llama4-scout-instruct-basic", - "case_id": "get_then_create_event_tool" - }, - "setup": { - "duration": 0.07045515719801188, - "outcome": "passed" - }, - "call": { - "duration": 6.550140863284469, - "outcome": "failed", - "crash": { - "path": "/home/erichuang/llama-stack/tests/verifications/openai_api/test_chat_completion.py", - "lineno": 521, - "message": "AssertionError: Expected 1 tool calls, but got 0\nassert 0 == 1\n + where 0 = len(([] or []))" - }, - "traceback": [ - { - "path": "tests/verifications/openai_api/test_chat_completion.py", - "lineno": 521, - "message": "AssertionError" - } - ], - "longrepr": "request = >\nopenai_client = \nmodel = 'accounts/fireworks/models/llama4-scout-instruct-basic'\nprovider = 'fireworks'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...-versatile', 'meta-llama/llama-4-scout-17b-16e-instruct', 'meta-llama/llama-4-maverick-17b-128e-instruct'], ...}, ...}}\ncase = {'case_id': 'get_then_create_event_tool', 'expected': [{'num_tool_calls': 1, 'tool_arguments': {'date': '2025-03-03', ...ents found for 2025-03-03 at 10:00'}\"}, {'response': \"{'response': 'Successfully created new event with id: e_123'}\"}]}\n\n @pytest.mark.parametrize(\n \"case\",\n chat_completion_test_cases.get(\"test_chat_multi_turn_tool_calling\", {}).get(\"test_params\", {}).get(\"case\", []),\n ids=case_id_generator,\n )\n def test_chat_streaming_multi_turn_tool_calling(request, openai_client, model, provider, verification_config, case):\n \"\"\" \"\"\"\n test_name_base = get_base_test_name(request)\n if should_skip_test(verification_config, provider, model, test_name_base):\n pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n \n messages = []\n tools = case[\"input\"][\"tools\"]\n expected_results = copy.deepcopy(case[\"expected\"])\n tool_responses = copy.deepcopy(case.get(\"tool_responses\", []))\n input_messages_turns = copy.deepcopy(case[\"input\"][\"messages\"])\n \n while len(input_messages_turns) > 0 or (len(messages) > 0 and messages[-1][\"role\"] == \"tool\"):\n if len(messages) == 0 or messages[-1][\"role\"] != \"tool\":\n new_messages = input_messages_turns.pop(0)\n if isinstance(new_messages, list):\n messages.extend(new_messages)\n else:\n messages.append(new_messages)\n \n # --- API Call (Streaming) ---\n stream = openai_client.chat.completions.create(\n model=model,\n messages=messages,\n tools=tools,\n stream=True,\n )\n \n # --- Process Stream ---\n accumulated_content, accumulated_tool_calls = _accumulate_streaming_tool_calls(stream)\n \n # --- Construct Assistant Message for History ---\n assistant_message_dict = {\"role\": \"assistant\"}\n if accumulated_content:\n assistant_message_dict[\"content\"] = accumulated_content\n if accumulated_tool_calls:\n assistant_message_dict[\"tool_calls\"] = accumulated_tool_calls\n \n messages.append(assistant_message_dict)\n \n # --- Assertions ---\n expected = expected_results.pop(0)\n num_tool_calls = expected[\"num_tool_calls\"]\n \n> assert len(accumulated_tool_calls or []) == num_tool_calls, (\n f\"Expected {num_tool_calls} tool calls, but got {len(accumulated_tool_calls or [])}\"\n )\nE AssertionError: Expected 1 tool calls, but got 0\nE assert 0 == 1\nE + where 0 = len(([] or []))\n\ntests/verifications/openai_api/test_chat_completion.py:521: AssertionError" - }, - "teardown": { - "duration": 0.0003092316910624504, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_multi_turn_tool_calling[accounts/fireworks/models/llama4-scout-instruct-basic-compare_monthly_expense_tool]", - "lineno": 471, - "outcome": "failed", - "keywords": [ - "test_chat_streaming_multi_turn_tool_calling[accounts/fireworks/models/llama4-scout-instruct-basic-compare_monthly_expense_tool]", - "parametrize", - "pytestmark", - "accounts/fireworks/models/llama4-scout-instruct-basic-compare_monthly_expense_tool", - "test_chat_completion.py", - "openai_api", - "verifications", - "tests", - "llama-stack", - "" - ], - "metadata": { - "model": "accounts/fireworks/models/llama4-scout-instruct-basic", - "case_id": "compare_monthly_expense_tool" - }, - "setup": { - "duration": 0.07400601450353861, - "outcome": "passed" - }, - "call": { - "duration": 3.142588397487998, - "outcome": "failed", - "crash": { - "path": "/home/erichuang/llama-stack/tests/verifications/openai_api/test_chat_completion.py", - "lineno": 521, - "message": "AssertionError: Expected 1 tool calls, but got 0\nassert 0 == 1\n + where 0 = len(([] or []))" - }, - "traceback": [ - { - "path": "tests/verifications/openai_api/test_chat_completion.py", - "lineno": 521, - "message": "AssertionError" - } - ], - "longrepr": "request = >\nopenai_client = \nmodel = 'accounts/fireworks/models/llama4-scout-instruct-basic'\nprovider = 'fireworks'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...-versatile', 'meta-llama/llama-4-scout-17b-16e-instruct', 'meta-llama/llama-4-maverick-17b-128e-instruct'], ...}, ...}}\ncase = {'case_id': 'compare_monthly_expense_tool', 'expected': [{'num_tool_calls': 1, 'tool_arguments': {'month': 1, 'year': ... 'Total expenses for January 2025: $1000'}\"}, {'response': \"{'response': 'Total expenses for February 2024: $2000'}\"}]}\n\n @pytest.mark.parametrize(\n \"case\",\n chat_completion_test_cases.get(\"test_chat_multi_turn_tool_calling\", {}).get(\"test_params\", {}).get(\"case\", []),\n ids=case_id_generator,\n )\n def test_chat_streaming_multi_turn_tool_calling(request, openai_client, model, provider, verification_config, case):\n \"\"\" \"\"\"\n test_name_base = get_base_test_name(request)\n if should_skip_test(verification_config, provider, model, test_name_base):\n pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n \n messages = []\n tools = case[\"input\"][\"tools\"]\n expected_results = copy.deepcopy(case[\"expected\"])\n tool_responses = copy.deepcopy(case.get(\"tool_responses\", []))\n input_messages_turns = copy.deepcopy(case[\"input\"][\"messages\"])\n \n while len(input_messages_turns) > 0 or (len(messages) > 0 and messages[-1][\"role\"] == \"tool\"):\n if len(messages) == 0 or messages[-1][\"role\"] != \"tool\":\n new_messages = input_messages_turns.pop(0)\n if isinstance(new_messages, list):\n messages.extend(new_messages)\n else:\n messages.append(new_messages)\n \n # --- API Call (Streaming) ---\n stream = openai_client.chat.completions.create(\n model=model,\n messages=messages,\n tools=tools,\n stream=True,\n )\n \n # --- Process Stream ---\n accumulated_content, accumulated_tool_calls = _accumulate_streaming_tool_calls(stream)\n \n # --- Construct Assistant Message for History ---\n assistant_message_dict = {\"role\": \"assistant\"}\n if accumulated_content:\n assistant_message_dict[\"content\"] = accumulated_content\n if accumulated_tool_calls:\n assistant_message_dict[\"tool_calls\"] = accumulated_tool_calls\n \n messages.append(assistant_message_dict)\n \n # --- Assertions ---\n expected = expected_results.pop(0)\n num_tool_calls = expected[\"num_tool_calls\"]\n \n> assert len(accumulated_tool_calls or []) == num_tool_calls, (\n f\"Expected {num_tool_calls} tool calls, but got {len(accumulated_tool_calls or [])}\"\n )\nE AssertionError: Expected 1 tool calls, but got 0\nE assert 0 == 1\nE + where 0 = len(([] or []))\n\ntests/verifications/openai_api/test_chat_completion.py:521: AssertionError" - }, - "teardown": { - "duration": 0.0003124792128801346, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_multi_turn_tool_calling[accounts/fireworks/models/llama4-maverick-instruct-basic-text_then_weather_tool]", - "lineno": 471, - "outcome": "failed", - "keywords": [ - "test_chat_streaming_multi_turn_tool_calling[accounts/fireworks/models/llama4-maverick-instruct-basic-text_then_weather_tool]", - "parametrize", - "pytestmark", - "accounts/fireworks/models/llama4-maverick-instruct-basic-text_then_weather_tool", - "test_chat_completion.py", - "openai_api", - "verifications", - "tests", - "llama-stack", - "" - ], - "metadata": { - "model": "accounts/fireworks/models/llama4-maverick-instruct-basic", - "case_id": "text_then_weather_tool" - }, - "setup": { - "duration": 0.07049713470041752, - "outcome": "passed" - }, - "call": { - "duration": 4.074657499790192, - "outcome": "failed", - "crash": { - "path": "/home/erichuang/llama-stack/tests/verifications/openai_api/test_chat_completion.py", - "lineno": 550, - "message": "AssertionError: Expected one of ['sol'] in content, but got: 'Since the provided text describes a JSON schema for a function call to get the weather, and the prompt asks for the name of the Sun in Latin, we need to identify a suitable function that can provide this information. However, the given schema is for a \"get_weather\" function, which doesn't directly relate to the question about the Sun's name in Latin.\n \n Assuming there's another function available that can provide information about celestial bodies or their names in different languages, we might look for something like \"get_celestial_body_info\" or a similar function.\n \n However, based on the given format and the information provided, it seems there's an implication that we should directly provide a response in the specified JSON format for a hypothetical or related function. Let's assume a function named \"get_celestial_body_name\" that takes parameters like \"body\" and \"language\".\n \n Given the constraint of the format and assuming a function that fits, we might construct a response like:\n \n ```json\n {\n \"name\": \"get_celestial_body_name\",\n \"parameters\": {\n \"body\": \"Sun\",\n \"language\": \"Latin\"\n }\n }\n ```\n \n This response implies the existence of a function \"get_celestial_body_name\" that can take the name of a celestial body and a language as input and return the name of the celestial body in that language. \n \n So, the response is:\n {\"name\": \"get_celestial_body_name\", \"parameters\": {\"body\": \"Sun\", \"language\": \"Latin\"}}'\nassert False\n + where False = any(. at 0x7f1acdaba030>)" - }, - "traceback": [ - { - "path": "tests/verifications/openai_api/test_chat_completion.py", - "lineno": 550, - "message": "AssertionError" - } - ], - "longrepr": "request = >\nopenai_client = \nmodel = 'accounts/fireworks/models/llama4-maverick-instruct-basic'\nprovider = 'fireworks'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...-versatile', 'meta-llama/llama-4-scout-17b-16e-instruct', 'meta-llama/llama-4-maverick-17b-128e-instruct'], ...}, ...}}\ncase = {'case_id': 'text_then_weather_tool', 'expected': [{'answer': ['sol'], 'num_tool_calls': 0}, {'num_tool_calls': 1, 'to...], 'type': 'object'}}, 'type': 'function'}]}, 'tool_responses': [{'response': \"{'response': '70 degrees and foggy'}\"}]}\n\n @pytest.mark.parametrize(\n \"case\",\n chat_completion_test_cases.get(\"test_chat_multi_turn_tool_calling\", {}).get(\"test_params\", {}).get(\"case\", []),\n ids=case_id_generator,\n )\n def test_chat_streaming_multi_turn_tool_calling(request, openai_client, model, provider, verification_config, case):\n \"\"\" \"\"\"\n test_name_base = get_base_test_name(request)\n if should_skip_test(verification_config, provider, model, test_name_base):\n pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n \n messages = []\n tools = case[\"input\"][\"tools\"]\n expected_results = copy.deepcopy(case[\"expected\"])\n tool_responses = copy.deepcopy(case.get(\"tool_responses\", []))\n input_messages_turns = copy.deepcopy(case[\"input\"][\"messages\"])\n \n while len(input_messages_turns) > 0 or (len(messages) > 0 and messages[-1][\"role\"] == \"tool\"):\n if len(messages) == 0 or messages[-1][\"role\"] != \"tool\":\n new_messages = input_messages_turns.pop(0)\n if isinstance(new_messages, list):\n messages.extend(new_messages)\n else:\n messages.append(new_messages)\n \n # --- API Call (Streaming) ---\n stream = openai_client.chat.completions.create(\n model=model,\n messages=messages,\n tools=tools,\n stream=True,\n )\n \n # --- Process Stream ---\n accumulated_content, accumulated_tool_calls = _accumulate_streaming_tool_calls(stream)\n \n # --- Construct Assistant Message for History ---\n assistant_message_dict = {\"role\": \"assistant\"}\n if accumulated_content:\n assistant_message_dict[\"content\"] = accumulated_content\n if accumulated_tool_calls:\n assistant_message_dict[\"tool_calls\"] = accumulated_tool_calls\n \n messages.append(assistant_message_dict)\n \n # --- Assertions ---\n expected = expected_results.pop(0)\n num_tool_calls = expected[\"num_tool_calls\"]\n \n assert len(accumulated_tool_calls or []) == num_tool_calls, (\n f\"Expected {num_tool_calls} tool calls, but got {len(accumulated_tool_calls or [])}\"\n )\n \n if num_tool_calls > 0:\n # Use the first accumulated tool call for assertion\n tool_call = accumulated_tool_calls[0]\n assert tool_call[\"function\"][\"name\"] == expected[\"tool_name\"], (\n f\"Expected tool '{expected['tool_name']}', got '{tool_call['function']['name']}'\"\n )\n # Parse the accumulated arguments string for comparison\n actual_arguments = json.loads(tool_call[\"function\"][\"arguments\"])\n assert actual_arguments == expected[\"tool_arguments\"], (\n f\"Expected arguments '{expected['tool_arguments']}', got '{actual_arguments}'\"\n )\n \n # Prepare and append the tool response for the next turn\n tool_response = tool_responses.pop(0)\n messages.append(\n {\n \"role\": \"tool\",\n \"tool_call_id\": tool_call[\"id\"],\n \"content\": tool_response[\"response\"],\n }\n )\n else:\n assert accumulated_content is not None and accumulated_content != \"\", \"Expected content, but none received.\"\n expected_answers = expected[\"answer\"]\n content_lower = accumulated_content.lower()\n> assert any(ans.lower() in content_lower for ans in expected_answers), (\n f\"Expected one of {expected_answers} in content, but got: '{accumulated_content}'\"\n )\nE AssertionError: Expected one of ['sol'] in content, but got: 'Since the provided text describes a JSON schema for a function call to get the weather, and the prompt asks for the name of the Sun in Latin, we need to identify a suitable function that can provide this information. However, the given schema is for a \"get_weather\" function, which doesn't directly relate to the question about the Sun's name in Latin.\nE \nE Assuming there's another function available that can provide information about celestial bodies or their names in different languages, we might look for something like \"get_celestial_body_info\" or a similar function.\nE \nE However, based on the given format and the information provided, it seems there's an implication that we should directly provide a response in the specified JSON format for a hypothetical or related function. Let's assume a function named \"get_celestial_body_name\" that takes parameters like \"body\" and \"language\".\nE \nE Given the constraint of the format and assuming a function that fits, we might construct a response like:\nE \nE ```json\nE {\nE \"name\": \"get_celestial_body_name\",\nE \"parameters\": {\nE \"body\": \"Sun\",\nE \"language\": \"Latin\"\nE }\nE }\nE ```\nE \nE This response implies the existence of a function \"get_celestial_body_name\" that can take the name of a celestial body and a language as input and return the name of the celestial body in that language. \nE \nE So, the response is:\nE {\"name\": \"get_celestial_body_name\", \"parameters\": {\"body\": \"Sun\", \"language\": \"Latin\"}}'\nE assert False\nE + where False = any(. at 0x7f1acdaba030>)\n\ntests/verifications/openai_api/test_chat_completion.py:550: AssertionError" - }, - "teardown": { - "duration": 0.00031174439936876297, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_multi_turn_tool_calling[accounts/fireworks/models/llama4-maverick-instruct-basic-weather_tool_then_text]", - "lineno": 471, - "outcome": "failed", - "keywords": [ - "test_chat_streaming_multi_turn_tool_calling[accounts/fireworks/models/llama4-maverick-instruct-basic-weather_tool_then_text]", - "parametrize", - "pytestmark", - "accounts/fireworks/models/llama4-maverick-instruct-basic-weather_tool_then_text", - "test_chat_completion.py", - "openai_api", - "verifications", - "tests", - "llama-stack", - "" - ], - "metadata": { - "model": "accounts/fireworks/models/llama4-maverick-instruct-basic", - "case_id": "weather_tool_then_text" - }, - "setup": { - "duration": 0.07156828418374062, - "outcome": "passed" - }, - "call": { - "duration": 0.6585372854024172, - "outcome": "failed", - "crash": { - "path": "/home/erichuang/llama-stack/tests/verifications/openai_api/test_chat_completion.py", - "lineno": 521, - "message": "AssertionError: Expected 1 tool calls, but got 0\nassert 0 == 1\n + where 0 = len(([] or []))" - }, - "traceback": [ - { - "path": "tests/verifications/openai_api/test_chat_completion.py", - "lineno": 521, - "message": "AssertionError" - } - ], - "longrepr": "request = >\nopenai_client = \nmodel = 'accounts/fireworks/models/llama4-maverick-instruct-basic'\nprovider = 'fireworks'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...-versatile', 'meta-llama/llama-4-scout-17b-16e-instruct', 'meta-llama/llama-4-maverick-17b-128e-instruct'], ...}, ...}}\ncase = {'case_id': 'weather_tool_then_text', 'expected': [{'num_tool_calls': 1, 'tool_arguments': {'location': 'San Francisco...], 'type': 'object'}}, 'type': 'function'}]}, 'tool_responses': [{'response': \"{'response': '70 degrees and foggy'}\"}]}\n\n @pytest.mark.parametrize(\n \"case\",\n chat_completion_test_cases.get(\"test_chat_multi_turn_tool_calling\", {}).get(\"test_params\", {}).get(\"case\", []),\n ids=case_id_generator,\n )\n def test_chat_streaming_multi_turn_tool_calling(request, openai_client, model, provider, verification_config, case):\n \"\"\" \"\"\"\n test_name_base = get_base_test_name(request)\n if should_skip_test(verification_config, provider, model, test_name_base):\n pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n \n messages = []\n tools = case[\"input\"][\"tools\"]\n expected_results = copy.deepcopy(case[\"expected\"])\n tool_responses = copy.deepcopy(case.get(\"tool_responses\", []))\n input_messages_turns = copy.deepcopy(case[\"input\"][\"messages\"])\n \n while len(input_messages_turns) > 0 or (len(messages) > 0 and messages[-1][\"role\"] == \"tool\"):\n if len(messages) == 0 or messages[-1][\"role\"] != \"tool\":\n new_messages = input_messages_turns.pop(0)\n if isinstance(new_messages, list):\n messages.extend(new_messages)\n else:\n messages.append(new_messages)\n \n # --- API Call (Streaming) ---\n stream = openai_client.chat.completions.create(\n model=model,\n messages=messages,\n tools=tools,\n stream=True,\n )\n \n # --- Process Stream ---\n accumulated_content, accumulated_tool_calls = _accumulate_streaming_tool_calls(stream)\n \n # --- Construct Assistant Message for History ---\n assistant_message_dict = {\"role\": \"assistant\"}\n if accumulated_content:\n assistant_message_dict[\"content\"] = accumulated_content\n if accumulated_tool_calls:\n assistant_message_dict[\"tool_calls\"] = accumulated_tool_calls\n \n messages.append(assistant_message_dict)\n \n # --- Assertions ---\n expected = expected_results.pop(0)\n num_tool_calls = expected[\"num_tool_calls\"]\n \n> assert len(accumulated_tool_calls or []) == num_tool_calls, (\n f\"Expected {num_tool_calls} tool calls, but got {len(accumulated_tool_calls or [])}\"\n )\nE AssertionError: Expected 1 tool calls, but got 0\nE assert 0 == 1\nE + where 0 = len(([] or []))\n\ntests/verifications/openai_api/test_chat_completion.py:521: AssertionError" - }, - "teardown": { - "duration": 0.0003233151510357857, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_multi_turn_tool_calling[accounts/fireworks/models/llama4-maverick-instruct-basic-add_product_tool]", - "lineno": 471, - "outcome": "failed", - "keywords": [ - "test_chat_streaming_multi_turn_tool_calling[accounts/fireworks/models/llama4-maverick-instruct-basic-add_product_tool]", - "parametrize", - "pytestmark", - "accounts/fireworks/models/llama4-maverick-instruct-basic-add_product_tool", - "test_chat_completion.py", - "openai_api", - "verifications", - "tests", - "llama-stack", - "" - ], - "metadata": { - "model": "accounts/fireworks/models/llama4-maverick-instruct-basic", - "case_id": "add_product_tool" - }, - "setup": { - "duration": 0.07135927956551313, - "outcome": "passed" - }, - "call": { - "duration": 1.0483367526903749, - "outcome": "failed", - "crash": { - "path": "/home/erichuang/llama-stack/tests/verifications/openai_api/test_chat_completion.py", - "lineno": 521, - "message": "AssertionError: Expected 1 tool calls, but got 0\nassert 0 == 1\n + where 0 = len(([] or []))" - }, - "traceback": [ - { - "path": "tests/verifications/openai_api/test_chat_completion.py", - "lineno": 521, - "message": "AssertionError" - } - ], - "longrepr": "request = >\nopenai_client = \nmodel = 'accounts/fireworks/models/llama4-maverick-instruct-basic'\nprovider = 'fireworks'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...-versatile', 'meta-llama/llama-4-scout-17b-16e-instruct', 'meta-llama/llama-4-maverick-17b-128e-instruct'], ...}, ...}}\ncase = {'case_id': 'add_product_tool', 'expected': [{'num_tool_calls': 1, 'tool_arguments': {'inStock': True, 'name': 'Widget...}}, 'type': 'function'}]}, 'tool_responses': [{'response': \"{'response': 'Successfully added product with id: 123'}\"}]}\n\n @pytest.mark.parametrize(\n \"case\",\n chat_completion_test_cases.get(\"test_chat_multi_turn_tool_calling\", {}).get(\"test_params\", {}).get(\"case\", []),\n ids=case_id_generator,\n )\n def test_chat_streaming_multi_turn_tool_calling(request, openai_client, model, provider, verification_config, case):\n \"\"\" \"\"\"\n test_name_base = get_base_test_name(request)\n if should_skip_test(verification_config, provider, model, test_name_base):\n pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n \n messages = []\n tools = case[\"input\"][\"tools\"]\n expected_results = copy.deepcopy(case[\"expected\"])\n tool_responses = copy.deepcopy(case.get(\"tool_responses\", []))\n input_messages_turns = copy.deepcopy(case[\"input\"][\"messages\"])\n \n while len(input_messages_turns) > 0 or (len(messages) > 0 and messages[-1][\"role\"] == \"tool\"):\n if len(messages) == 0 or messages[-1][\"role\"] != \"tool\":\n new_messages = input_messages_turns.pop(0)\n if isinstance(new_messages, list):\n messages.extend(new_messages)\n else:\n messages.append(new_messages)\n \n # --- API Call (Streaming) ---\n stream = openai_client.chat.completions.create(\n model=model,\n messages=messages,\n tools=tools,\n stream=True,\n )\n \n # --- Process Stream ---\n accumulated_content, accumulated_tool_calls = _accumulate_streaming_tool_calls(stream)\n \n # --- Construct Assistant Message for History ---\n assistant_message_dict = {\"role\": \"assistant\"}\n if accumulated_content:\n assistant_message_dict[\"content\"] = accumulated_content\n if accumulated_tool_calls:\n assistant_message_dict[\"tool_calls\"] = accumulated_tool_calls\n \n messages.append(assistant_message_dict)\n \n # --- Assertions ---\n expected = expected_results.pop(0)\n num_tool_calls = expected[\"num_tool_calls\"]\n \n> assert len(accumulated_tool_calls or []) == num_tool_calls, (\n f\"Expected {num_tool_calls} tool calls, but got {len(accumulated_tool_calls or [])}\"\n )\nE AssertionError: Expected 1 tool calls, but got 0\nE assert 0 == 1\nE + where 0 = len(([] or []))\n\ntests/verifications/openai_api/test_chat_completion.py:521: AssertionError" - }, - "teardown": { - "duration": 0.00028971116989851, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_multi_turn_tool_calling[accounts/fireworks/models/llama4-maverick-instruct-basic-get_then_create_event_tool]", - "lineno": 471, - "outcome": "failed", - "keywords": [ - "test_chat_streaming_multi_turn_tool_calling[accounts/fireworks/models/llama4-maverick-instruct-basic-get_then_create_event_tool]", - "parametrize", - "pytestmark", - "accounts/fireworks/models/llama4-maverick-instruct-basic-get_then_create_event_tool", - "test_chat_completion.py", - "openai_api", - "verifications", - "tests", - "llama-stack", - "" - ], - "metadata": { - "model": "accounts/fireworks/models/llama4-maverick-instruct-basic", - "case_id": "get_then_create_event_tool" - }, - "setup": { - "duration": 0.07051362749189138, - "outcome": "passed" - }, - "call": { - "duration": 4.592376064509153, - "outcome": "failed", - "crash": { - "path": "/home/erichuang/llama-stack/tests/verifications/openai_api/test_chat_completion.py", - "lineno": 521, - "message": "AssertionError: Expected 1 tool calls, but got 0\nassert 0 == 1\n + where 0 = len(([] or []))" - }, - "traceback": [ - { - "path": "tests/verifications/openai_api/test_chat_completion.py", - "lineno": 521, - "message": "AssertionError" - } - ], - "longrepr": "request = >\nopenai_client = \nmodel = 'accounts/fireworks/models/llama4-maverick-instruct-basic'\nprovider = 'fireworks'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...-versatile', 'meta-llama/llama-4-scout-17b-16e-instruct', 'meta-llama/llama-4-maverick-17b-128e-instruct'], ...}, ...}}\ncase = {'case_id': 'get_then_create_event_tool', 'expected': [{'num_tool_calls': 1, 'tool_arguments': {'date': '2025-03-03', ...ents found for 2025-03-03 at 10:00'}\"}, {'response': \"{'response': 'Successfully created new event with id: e_123'}\"}]}\n\n @pytest.mark.parametrize(\n \"case\",\n chat_completion_test_cases.get(\"test_chat_multi_turn_tool_calling\", {}).get(\"test_params\", {}).get(\"case\", []),\n ids=case_id_generator,\n )\n def test_chat_streaming_multi_turn_tool_calling(request, openai_client, model, provider, verification_config, case):\n \"\"\" \"\"\"\n test_name_base = get_base_test_name(request)\n if should_skip_test(verification_config, provider, model, test_name_base):\n pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n \n messages = []\n tools = case[\"input\"][\"tools\"]\n expected_results = copy.deepcopy(case[\"expected\"])\n tool_responses = copy.deepcopy(case.get(\"tool_responses\", []))\n input_messages_turns = copy.deepcopy(case[\"input\"][\"messages\"])\n \n while len(input_messages_turns) > 0 or (len(messages) > 0 and messages[-1][\"role\"] == \"tool\"):\n if len(messages) == 0 or messages[-1][\"role\"] != \"tool\":\n new_messages = input_messages_turns.pop(0)\n if isinstance(new_messages, list):\n messages.extend(new_messages)\n else:\n messages.append(new_messages)\n \n # --- API Call (Streaming) ---\n stream = openai_client.chat.completions.create(\n model=model,\n messages=messages,\n tools=tools,\n stream=True,\n )\n \n # --- Process Stream ---\n accumulated_content, accumulated_tool_calls = _accumulate_streaming_tool_calls(stream)\n \n # --- Construct Assistant Message for History ---\n assistant_message_dict = {\"role\": \"assistant\"}\n if accumulated_content:\n assistant_message_dict[\"content\"] = accumulated_content\n if accumulated_tool_calls:\n assistant_message_dict[\"tool_calls\"] = accumulated_tool_calls\n \n messages.append(assistant_message_dict)\n \n # --- Assertions ---\n expected = expected_results.pop(0)\n num_tool_calls = expected[\"num_tool_calls\"]\n \n> assert len(accumulated_tool_calls or []) == num_tool_calls, (\n f\"Expected {num_tool_calls} tool calls, but got {len(accumulated_tool_calls or [])}\"\n )\nE AssertionError: Expected 1 tool calls, but got 0\nE assert 0 == 1\nE + where 0 = len(([] or []))\n\ntests/verifications/openai_api/test_chat_completion.py:521: AssertionError" - }, - "teardown": { - "duration": 0.00029074493795633316, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_multi_turn_tool_calling[accounts/fireworks/models/llama4-maverick-instruct-basic-compare_monthly_expense_tool]", - "lineno": 471, - "outcome": "failed", - "keywords": [ - "test_chat_streaming_multi_turn_tool_calling[accounts/fireworks/models/llama4-maverick-instruct-basic-compare_monthly_expense_tool]", - "parametrize", - "pytestmark", - "accounts/fireworks/models/llama4-maverick-instruct-basic-compare_monthly_expense_tool", - "test_chat_completion.py", - "openai_api", - "verifications", - "tests", - "llama-stack", - "" - ], - "metadata": { - "model": "accounts/fireworks/models/llama4-maverick-instruct-basic", - "case_id": "compare_monthly_expense_tool" - }, - "setup": { - "duration": 0.07347700279206038, - "outcome": "passed" - }, - "call": { - "duration": 1.5335856154561043, - "outcome": "failed", - "crash": { - "path": "/home/erichuang/llama-stack/tests/verifications/openai_api/test_chat_completion.py", - "lineno": 521, - "message": "AssertionError: Expected 1 tool calls, but got 0\nassert 0 == 1\n + where 0 = len(([] or []))" - }, - "traceback": [ - { - "path": "tests/verifications/openai_api/test_chat_completion.py", - "lineno": 521, - "message": "AssertionError" - } - ], - "longrepr": "request = >\nopenai_client = \nmodel = 'accounts/fireworks/models/llama4-maverick-instruct-basic'\nprovider = 'fireworks'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...-versatile', 'meta-llama/llama-4-scout-17b-16e-instruct', 'meta-llama/llama-4-maverick-17b-128e-instruct'], ...}, ...}}\ncase = {'case_id': 'compare_monthly_expense_tool', 'expected': [{'num_tool_calls': 1, 'tool_arguments': {'month': 1, 'year': ... 'Total expenses for January 2025: $1000'}\"}, {'response': \"{'response': 'Total expenses for February 2024: $2000'}\"}]}\n\n @pytest.mark.parametrize(\n \"case\",\n chat_completion_test_cases.get(\"test_chat_multi_turn_tool_calling\", {}).get(\"test_params\", {}).get(\"case\", []),\n ids=case_id_generator,\n )\n def test_chat_streaming_multi_turn_tool_calling(request, openai_client, model, provider, verification_config, case):\n \"\"\" \"\"\"\n test_name_base = get_base_test_name(request)\n if should_skip_test(verification_config, provider, model, test_name_base):\n pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n \n messages = []\n tools = case[\"input\"][\"tools\"]\n expected_results = copy.deepcopy(case[\"expected\"])\n tool_responses = copy.deepcopy(case.get(\"tool_responses\", []))\n input_messages_turns = copy.deepcopy(case[\"input\"][\"messages\"])\n \n while len(input_messages_turns) > 0 or (len(messages) > 0 and messages[-1][\"role\"] == \"tool\"):\n if len(messages) == 0 or messages[-1][\"role\"] != \"tool\":\n new_messages = input_messages_turns.pop(0)\n if isinstance(new_messages, list):\n messages.extend(new_messages)\n else:\n messages.append(new_messages)\n \n # --- API Call (Streaming) ---\n stream = openai_client.chat.completions.create(\n model=model,\n messages=messages,\n tools=tools,\n stream=True,\n )\n \n # --- Process Stream ---\n accumulated_content, accumulated_tool_calls = _accumulate_streaming_tool_calls(stream)\n \n # --- Construct Assistant Message for History ---\n assistant_message_dict = {\"role\": \"assistant\"}\n if accumulated_content:\n assistant_message_dict[\"content\"] = accumulated_content\n if accumulated_tool_calls:\n assistant_message_dict[\"tool_calls\"] = accumulated_tool_calls\n \n messages.append(assistant_message_dict)\n \n # --- Assertions ---\n expected = expected_results.pop(0)\n num_tool_calls = expected[\"num_tool_calls\"]\n \n> assert len(accumulated_tool_calls or []) == num_tool_calls, (\n f\"Expected {num_tool_calls} tool calls, but got {len(accumulated_tool_calls or [])}\"\n )\nE AssertionError: Expected 1 tool calls, but got 0\nE assert 0 == 1\nE + where 0 = len(([] or []))\n\ntests/verifications/openai_api/test_chat_completion.py:521: AssertionError" - }, - "teardown": { - "duration": 0.0003180811181664467, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_multi_turn_multiple_images[accounts/fireworks/models/llama-v3p3-70b-instruct-stream=False]", - "lineno": 554, - "outcome": "skipped", - "keywords": [ - "test_chat_multi_turn_multiple_images[accounts/fireworks/models/llama-v3p3-70b-instruct-stream=False]", - "parametrize", - "pytestmark", - "accounts/fireworks/models/llama-v3p3-70b-instruct-stream=False", - "test_chat_completion.py", - "openai_api", - "verifications", - "tests", - "llama-stack", - "" - ], - "metadata": { - "model": "accounts/fireworks/models/llama-v3p3-70b-instruct", - "case_id": "stream=False" - }, - "setup": { - "duration": 0.07250582799315453, - "outcome": "passed" - }, - "call": { - "duration": 0.00022417306900024414, - "outcome": "skipped", - "longrepr": "('/home/erichuang/llama-stack/tests/verifications/openai_api/test_chat_completion.py', 561, 'Skipped: Skipping test_chat_multi_turn_multiple_images for model accounts/fireworks/models/llama-v3p3-70b-instruct on provider fireworks based on config.')" - }, - "teardown": { - "duration": 0.0036543207243084908, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_multi_turn_multiple_images[accounts/fireworks/models/llama-v3p3-70b-instruct-stream=True]", - "lineno": 554, - "outcome": "skipped", - "keywords": [ - "test_chat_multi_turn_multiple_images[accounts/fireworks/models/llama-v3p3-70b-instruct-stream=True]", - "parametrize", - "pytestmark", - "accounts/fireworks/models/llama-v3p3-70b-instruct-stream=True", - "test_chat_completion.py", - "openai_api", - "verifications", - "tests", - "llama-stack", - "" - ], - "metadata": { - "model": "accounts/fireworks/models/llama-v3p3-70b-instruct", - "case_id": "stream=True" - }, - "setup": { - "duration": 0.07320290431380272, - "outcome": "passed" - }, - "call": { - "duration": 0.0002203313633799553, - "outcome": "skipped", - "longrepr": "('/home/erichuang/llama-stack/tests/verifications/openai_api/test_chat_completion.py', 561, 'Skipped: Skipping test_chat_multi_turn_multiple_images for model accounts/fireworks/models/llama-v3p3-70b-instruct on provider fireworks based on config.')" - }, - "teardown": { - "duration": 0.00035103876143693924, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_multi_turn_multiple_images[accounts/fireworks/models/llama4-scout-instruct-basic-stream=False]", - "lineno": 554, - "outcome": "passed", - "keywords": [ - "test_chat_multi_turn_multiple_images[accounts/fireworks/models/llama4-scout-instruct-basic-stream=False]", - "parametrize", - "pytestmark", - "accounts/fireworks/models/llama4-scout-instruct-basic-stream=False", - "test_chat_completion.py", - "openai_api", - "verifications", - "tests", - "llama-stack", - "" - ], - "metadata": { - "model": "accounts/fireworks/models/llama4-scout-instruct-basic", - "case_id": "stream=False" - }, - "setup": { - "duration": 0.07001570798456669, - "outcome": "passed" - }, - "call": { - "duration": 6.779760396108031, - "outcome": "passed" - }, - "teardown": { - "duration": 0.00023057777434587479, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_multi_turn_multiple_images[accounts/fireworks/models/llama4-scout-instruct-basic-stream=True]", - "lineno": 554, - "outcome": "passed", - "keywords": [ - "test_chat_multi_turn_multiple_images[accounts/fireworks/models/llama4-scout-instruct-basic-stream=True]", - "parametrize", - "pytestmark", - "accounts/fireworks/models/llama4-scout-instruct-basic-stream=True", - "test_chat_completion.py", - "openai_api", - "verifications", - "tests", - "llama-stack", - "" - ], - "metadata": { - "model": "accounts/fireworks/models/llama4-scout-instruct-basic", - "case_id": "stream=True" - }, - "setup": { - "duration": 0.07039657514542341, - "outcome": "passed" - }, - "call": { - "duration": 4.335017805919051, - "outcome": "passed" - }, - "teardown": { - "duration": 0.00023656059056520462, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_multi_turn_multiple_images[accounts/fireworks/models/llama4-maverick-instruct-basic-stream=False]", - "lineno": 554, - "outcome": "passed", - "keywords": [ - "test_chat_multi_turn_multiple_images[accounts/fireworks/models/llama4-maverick-instruct-basic-stream=False]", - "parametrize", - "pytestmark", - "accounts/fireworks/models/llama4-maverick-instruct-basic-stream=False", - "test_chat_completion.py", - "openai_api", - "verifications", - "tests", - "llama-stack", - "" - ], - "metadata": { - "model": "accounts/fireworks/models/llama4-maverick-instruct-basic", - "case_id": "stream=False" - }, - "setup": { - "duration": 0.07107001543045044, - "outcome": "passed" - }, - "call": { - "duration": 5.857806807383895, - "outcome": "passed" - }, - "teardown": { - "duration": 0.00028312671929597855, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_multi_turn_multiple_images[accounts/fireworks/models/llama4-maverick-instruct-basic-stream=True]", - "lineno": 554, - "outcome": "passed", - "keywords": [ - "test_chat_multi_turn_multiple_images[accounts/fireworks/models/llama4-maverick-instruct-basic-stream=True]", - "parametrize", - "pytestmark", - "accounts/fireworks/models/llama4-maverick-instruct-basic-stream=True", - "test_chat_completion.py", - "openai_api", - "verifications", - "tests", - "llama-stack", - "" - ], - "metadata": { - "model": "accounts/fireworks/models/llama4-maverick-instruct-basic", - "case_id": "stream=True" - }, - "setup": { - "duration": 0.07257402781397104, - "outcome": "passed" - }, - "call": { - "duration": 5.412369452416897, - "outcome": "passed" - }, - "teardown": { - "duration": 0.0018147435039281845, - "outcome": "passed" - } - } - ], - "run_timestamp": 1744918193 -} diff --git a/tests/verifications/test_results/meta_reference.json b/tests/verifications/test_results/meta_reference.json deleted file mode 100644 index 9f9a6de82..000000000 --- a/tests/verifications/test_results/meta_reference.json +++ /dev/null @@ -1,1097 +0,0 @@ -{ - "created": 1744918847.712677, - "duration": 215.2132911682129, - "exitcode": 0, - "root": "/home/erichuang/llama-stack", - "environment": {}, - "summary": { - "passed": 28, - "total": 28, - "collected": 28 - }, - "collectors": [ - { - "nodeid": "", - "outcome": "passed", - "result": [ - { - "nodeid": "tests/verifications/openai_api/test_chat_completion.py", - "type": "Module" - } - ] - }, - { - "nodeid": "tests/verifications/openai_api/test_chat_completion.py", - "outcome": "passed", - "result": [ - { - "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_basic[meta-llama/Llama-4-Scout-17B-16E-Instruct-earth]", - "type": "Function", - "lineno": 95 - }, - { - "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_basic[meta-llama/Llama-4-Scout-17B-16E-Instruct-saturn]", - "type": "Function", - "lineno": 95 - }, - { - "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_basic[meta-llama/Llama-4-Scout-17B-16E-Instruct-earth]", - "type": "Function", - "lineno": 114 - }, - { - "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_basic[meta-llama/Llama-4-Scout-17B-16E-Instruct-saturn]", - "type": "Function", - "lineno": 114 - }, - { - "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_image[meta-llama/Llama-4-Scout-17B-16E-Instruct-case0]", - "type": "Function", - "lineno": 138 - }, - { - "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_image[meta-llama/Llama-4-Scout-17B-16E-Instruct-case0]", - "type": "Function", - "lineno": 157 - }, - { - "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_structured_output[meta-llama/Llama-4-Scout-17B-16E-Instruct-calendar]", - "type": "Function", - "lineno": 181 - }, - { - "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_structured_output[meta-llama/Llama-4-Scout-17B-16E-Instruct-math]", - "type": "Function", - "lineno": 181 - }, - { - "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_structured_output[meta-llama/Llama-4-Scout-17B-16E-Instruct-calendar]", - "type": "Function", - "lineno": 204 - }, - { - "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_structured_output[meta-llama/Llama-4-Scout-17B-16E-Instruct-math]", - "type": "Function", - "lineno": 204 - }, - { - "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_tool_calling[meta-llama/Llama-4-Scout-17B-16E-Instruct-case0]", - "type": "Function", - "lineno": 226 - }, - { - "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_tool_calling[meta-llama/Llama-4-Scout-17B-16E-Instruct-case0]", - "type": "Function", - "lineno": 250 - }, - { - "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_tool_choice_required[meta-llama/Llama-4-Scout-17B-16E-Instruct-case0]", - "type": "Function", - "lineno": 278 - }, - { - "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_tool_choice_required[meta-llama/Llama-4-Scout-17B-16E-Instruct-case0]", - "type": "Function", - "lineno": 302 - }, - { - "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_tool_choice_none[meta-llama/Llama-4-Scout-17B-16E-Instruct-case0]", - "type": "Function", - "lineno": 329 - }, - { - "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_tool_choice_none[meta-llama/Llama-4-Scout-17B-16E-Instruct-case0]", - "type": "Function", - "lineno": 352 - }, - { - "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_multi_turn_tool_calling[meta-llama/Llama-4-Scout-17B-16E-Instruct-text_then_weather_tool]", - "type": "Function", - "lineno": 380 - }, - { - "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_multi_turn_tool_calling[meta-llama/Llama-4-Scout-17B-16E-Instruct-weather_tool_then_text]", - "type": "Function", - "lineno": 380 - }, - { - "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_multi_turn_tool_calling[meta-llama/Llama-4-Scout-17B-16E-Instruct-add_product_tool]", - "type": "Function", - "lineno": 380 - }, - { - "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_multi_turn_tool_calling[meta-llama/Llama-4-Scout-17B-16E-Instruct-get_then_create_event_tool]", - "type": "Function", - "lineno": 380 - }, - { - "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_multi_turn_tool_calling[meta-llama/Llama-4-Scout-17B-16E-Instruct-compare_monthly_expense_tool]", - "type": "Function", - "lineno": 380 - }, - { - "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_multi_turn_tool_calling[meta-llama/Llama-4-Scout-17B-16E-Instruct-text_then_weather_tool]", - "type": "Function", - "lineno": 471 - }, - { - "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_multi_turn_tool_calling[meta-llama/Llama-4-Scout-17B-16E-Instruct-weather_tool_then_text]", - "type": "Function", - "lineno": 471 - }, - { - "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_multi_turn_tool_calling[meta-llama/Llama-4-Scout-17B-16E-Instruct-add_product_tool]", - "type": "Function", - "lineno": 471 - }, - { - "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_multi_turn_tool_calling[meta-llama/Llama-4-Scout-17B-16E-Instruct-get_then_create_event_tool]", - "type": "Function", - "lineno": 471 - }, - { - "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_multi_turn_tool_calling[meta-llama/Llama-4-Scout-17B-16E-Instruct-compare_monthly_expense_tool]", - "type": "Function", - "lineno": 471 - }, - { - "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_multi_turn_multiple_images[meta-llama/Llama-4-Scout-17B-16E-Instruct-stream=False]", - "type": "Function", - "lineno": 554 - }, - { - "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_multi_turn_multiple_images[meta-llama/Llama-4-Scout-17B-16E-Instruct-stream=True]", - "type": "Function", - "lineno": 554 - } - ] - } - ], - "tests": [ - { - "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_basic[meta-llama/Llama-4-Scout-17B-16E-Instruct-earth]", - "lineno": 95, - "outcome": "passed", - "keywords": [ - "test_chat_non_streaming_basic[meta-llama/Llama-4-Scout-17B-16E-Instruct-earth]", - "parametrize", - "pytestmark", - "meta-llama/Llama-4-Scout-17B-16E-Instruct-earth", - "test_chat_completion.py", - "openai_api", - "verifications", - "tests", - "llama-stack", - "" - ], - "metadata": { - "model": "meta-llama/Llama-4-Scout-17B-16E-Instruct", - "case_id": "earth" - }, - "setup": { - "duration": 0.09800294879823923, - "outcome": "passed" - }, - "call": { - "duration": 4.066351721994579, - "outcome": "passed" - }, - "teardown": { - "duration": 0.00025077443569898605, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_basic[meta-llama/Llama-4-Scout-17B-16E-Instruct-saturn]", - "lineno": 95, - "outcome": "passed", - "keywords": [ - "test_chat_non_streaming_basic[meta-llama/Llama-4-Scout-17B-16E-Instruct-saturn]", - "parametrize", - "pytestmark", - "meta-llama/Llama-4-Scout-17B-16E-Instruct-saturn", - "test_chat_completion.py", - "openai_api", - "verifications", - "tests", - "llama-stack", - "" - ], - "metadata": { - "model": "meta-llama/Llama-4-Scout-17B-16E-Instruct", - "case_id": "saturn" - }, - "setup": { - "duration": 0.07197055127471685, - "outcome": "passed" - }, - "call": { - "duration": 1.1918699434027076, - "outcome": "passed" - }, - "teardown": { - "duration": 0.00027959980070590973, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_basic[meta-llama/Llama-4-Scout-17B-16E-Instruct-earth]", - "lineno": 114, - "outcome": "passed", - "keywords": [ - "test_chat_streaming_basic[meta-llama/Llama-4-Scout-17B-16E-Instruct-earth]", - "parametrize", - "pytestmark", - "meta-llama/Llama-4-Scout-17B-16E-Instruct-earth", - "test_chat_completion.py", - "openai_api", - "verifications", - "tests", - "llama-stack", - "" - ], - "metadata": { - "model": "meta-llama/Llama-4-Scout-17B-16E-Instruct", - "case_id": "earth" - }, - "setup": { - "duration": 0.07294174749404192, - "outcome": "passed" - }, - "call": { - "duration": 2.027987685985863, - "outcome": "passed" - }, - "teardown": { - "duration": 0.00026049185544252396, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_basic[meta-llama/Llama-4-Scout-17B-16E-Instruct-saturn]", - "lineno": 114, - "outcome": "passed", - "keywords": [ - "test_chat_streaming_basic[meta-llama/Llama-4-Scout-17B-16E-Instruct-saturn]", - "parametrize", - "pytestmark", - "meta-llama/Llama-4-Scout-17B-16E-Instruct-saturn", - "test_chat_completion.py", - "openai_api", - "verifications", - "tests", - "llama-stack", - "" - ], - "metadata": { - "model": "meta-llama/Llama-4-Scout-17B-16E-Instruct", - "case_id": "saturn" - }, - "setup": { - "duration": 0.0741243390366435, - "outcome": "passed" - }, - "call": { - "duration": 1.2185465842485428, - "outcome": "passed" - }, - "teardown": { - "duration": 0.0002712178975343704, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_image[meta-llama/Llama-4-Scout-17B-16E-Instruct-case0]", - "lineno": 138, - "outcome": "passed", - "keywords": [ - "test_chat_non_streaming_image[meta-llama/Llama-4-Scout-17B-16E-Instruct-case0]", - "parametrize", - "pytestmark", - "meta-llama/Llama-4-Scout-17B-16E-Instruct-case0", - "test_chat_completion.py", - "openai_api", - "verifications", - "tests", - "llama-stack", - "" - ], - "metadata": { - "model": "meta-llama/Llama-4-Scout-17B-16E-Instruct", - "case_id": "case0" - }, - "setup": { - "duration": 0.07473955396562815, - "outcome": "passed" - }, - "call": { - "duration": 10.396870554424822, - "outcome": "passed" - }, - "teardown": { - "duration": 0.00025566015392541885, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_image[meta-llama/Llama-4-Scout-17B-16E-Instruct-case0]", - "lineno": 157, - "outcome": "passed", - "keywords": [ - "test_chat_streaming_image[meta-llama/Llama-4-Scout-17B-16E-Instruct-case0]", - "parametrize", - "pytestmark", - "meta-llama/Llama-4-Scout-17B-16E-Instruct-case0", - "test_chat_completion.py", - "openai_api", - "verifications", - "tests", - "llama-stack", - "" - ], - "metadata": { - "model": "meta-llama/Llama-4-Scout-17B-16E-Instruct", - "case_id": "case0" - }, - "setup": { - "duration": 0.07153997663408518, - "outcome": "passed" - }, - "call": { - "duration": 10.59731453191489, - "outcome": "passed" - }, - "teardown": { - "duration": 0.0002689240500330925, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_structured_output[meta-llama/Llama-4-Scout-17B-16E-Instruct-calendar]", - "lineno": 181, - "outcome": "passed", - "keywords": [ - "test_chat_non_streaming_structured_output[meta-llama/Llama-4-Scout-17B-16E-Instruct-calendar]", - "parametrize", - "pytestmark", - "meta-llama/Llama-4-Scout-17B-16E-Instruct-calendar", - "test_chat_completion.py", - "openai_api", - "verifications", - "tests", - "llama-stack", - "" - ], - "metadata": { - "model": "meta-llama/Llama-4-Scout-17B-16E-Instruct", - "case_id": "calendar" - }, - "setup": { - "duration": 0.07629724312573671, - "outcome": "passed" - }, - "call": { - "duration": 5.293915126472712, - "outcome": "passed" - }, - "teardown": { - "duration": 0.0002626115456223488, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_structured_output[meta-llama/Llama-4-Scout-17B-16E-Instruct-math]", - "lineno": 181, - "outcome": "passed", - "keywords": [ - "test_chat_non_streaming_structured_output[meta-llama/Llama-4-Scout-17B-16E-Instruct-math]", - "parametrize", - "pytestmark", - "meta-llama/Llama-4-Scout-17B-16E-Instruct-math", - "test_chat_completion.py", - "openai_api", - "verifications", - "tests", - "llama-stack", - "" - ], - "metadata": { - "model": "meta-llama/Llama-4-Scout-17B-16E-Instruct", - "case_id": "math" - }, - "setup": { - "duration": 0.07231003511697054, - "outcome": "passed" - }, - "call": { - "duration": 19.020215207710862, - "outcome": "passed" - }, - "teardown": { - "duration": 0.00025262776762247086, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_structured_output[meta-llama/Llama-4-Scout-17B-16E-Instruct-calendar]", - "lineno": 204, - "outcome": "passed", - "keywords": [ - "test_chat_streaming_structured_output[meta-llama/Llama-4-Scout-17B-16E-Instruct-calendar]", - "parametrize", - "pytestmark", - "meta-llama/Llama-4-Scout-17B-16E-Instruct-calendar", - "test_chat_completion.py", - "openai_api", - "verifications", - "tests", - "llama-stack", - "" - ], - "metadata": { - "model": "meta-llama/Llama-4-Scout-17B-16E-Instruct", - "case_id": "calendar" - }, - "setup": { - "duration": 0.07291634101420641, - "outcome": "passed" - }, - "call": { - "duration": 6.105666604824364, - "outcome": "passed" - }, - "teardown": { - "duration": 0.00027642492204904556, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_structured_output[meta-llama/Llama-4-Scout-17B-16E-Instruct-math]", - "lineno": 204, - "outcome": "passed", - "keywords": [ - "test_chat_streaming_structured_output[meta-llama/Llama-4-Scout-17B-16E-Instruct-math]", - "parametrize", - "pytestmark", - "meta-llama/Llama-4-Scout-17B-16E-Instruct-math", - "test_chat_completion.py", - "openai_api", - "verifications", - "tests", - "llama-stack", - "" - ], - "metadata": { - "model": "meta-llama/Llama-4-Scout-17B-16E-Instruct", - "case_id": "math" - }, - "setup": { - "duration": 0.07050449773669243, - "outcome": "passed" - }, - "call": { - "duration": 19.080777555704117, - "outcome": "passed" - }, - "teardown": { - "duration": 0.000232757069170475, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_tool_calling[meta-llama/Llama-4-Scout-17B-16E-Instruct-case0]", - "lineno": 226, - "outcome": "passed", - "keywords": [ - "test_chat_non_streaming_tool_calling[meta-llama/Llama-4-Scout-17B-16E-Instruct-case0]", - "parametrize", - "pytestmark", - "meta-llama/Llama-4-Scout-17B-16E-Instruct-case0", - "test_chat_completion.py", - "openai_api", - "verifications", - "tests", - "llama-stack", - "" - ], - "metadata": { - "model": "meta-llama/Llama-4-Scout-17B-16E-Instruct", - "case_id": "case0" - }, - "setup": { - "duration": 0.07927203364670277, - "outcome": "passed" - }, - "call": { - "duration": 0.7760327504947782, - "outcome": "passed" - }, - "teardown": { - "duration": 0.00024862587451934814, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_tool_calling[meta-llama/Llama-4-Scout-17B-16E-Instruct-case0]", - "lineno": 250, - "outcome": "passed", - "keywords": [ - "test_chat_streaming_tool_calling[meta-llama/Llama-4-Scout-17B-16E-Instruct-case0]", - "parametrize", - "pytestmark", - "meta-llama/Llama-4-Scout-17B-16E-Instruct-case0", - "test_chat_completion.py", - "openai_api", - "verifications", - "tests", - "llama-stack", - "" - ], - "metadata": { - "model": "meta-llama/Llama-4-Scout-17B-16E-Instruct", - "case_id": "case0" - }, - "setup": { - "duration": 0.07514432724565268, - "outcome": "passed" - }, - "call": { - "duration": 0.7971448050811887, - "outcome": "passed" - }, - "teardown": { - "duration": 0.0002687377855181694, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_tool_choice_required[meta-llama/Llama-4-Scout-17B-16E-Instruct-case0]", - "lineno": 278, - "outcome": "passed", - "keywords": [ - "test_chat_non_streaming_tool_choice_required[meta-llama/Llama-4-Scout-17B-16E-Instruct-case0]", - "parametrize", - "pytestmark", - "meta-llama/Llama-4-Scout-17B-16E-Instruct-case0", - "test_chat_completion.py", - "openai_api", - "verifications", - "tests", - "llama-stack", - "" - ], - "metadata": { - "model": "meta-llama/Llama-4-Scout-17B-16E-Instruct", - "case_id": "case0" - }, - "setup": { - "duration": 0.07167623657733202, - "outcome": "passed" - }, - "call": { - "duration": 0.6906132427975535, - "outcome": "passed" - }, - "teardown": { - "duration": 0.0003270544111728668, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_tool_choice_required[meta-llama/Llama-4-Scout-17B-16E-Instruct-case0]", - "lineno": 302, - "outcome": "passed", - "keywords": [ - "test_chat_streaming_tool_choice_required[meta-llama/Llama-4-Scout-17B-16E-Instruct-case0]", - "parametrize", - "pytestmark", - "meta-llama/Llama-4-Scout-17B-16E-Instruct-case0", - "test_chat_completion.py", - "openai_api", - "verifications", - "tests", - "llama-stack", - "" - ], - "metadata": { - "model": "meta-llama/Llama-4-Scout-17B-16E-Instruct", - "case_id": "case0" - }, - "setup": { - "duration": 0.0725558316335082, - "outcome": "passed" - }, - "call": { - "duration": 0.9245227407664061, - "outcome": "passed" - }, - "teardown": { - "duration": 0.0002602478489279747, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_tool_choice_none[meta-llama/Llama-4-Scout-17B-16E-Instruct-case0]", - "lineno": 329, - "outcome": "passed", - "keywords": [ - "test_chat_non_streaming_tool_choice_none[meta-llama/Llama-4-Scout-17B-16E-Instruct-case0]", - "parametrize", - "pytestmark", - "meta-llama/Llama-4-Scout-17B-16E-Instruct-case0", - "test_chat_completion.py", - "openai_api", - "verifications", - "tests", - "llama-stack", - "" - ], - "metadata": { - "model": "meta-llama/Llama-4-Scout-17B-16E-Instruct", - "case_id": "case0" - }, - "setup": { - "duration": 0.07299680262804031, - "outcome": "passed" - }, - "call": { - "duration": 31.90802155341953, - "outcome": "passed" - }, - "teardown": { - "duration": 0.00023696757853031158, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_tool_choice_none[meta-llama/Llama-4-Scout-17B-16E-Instruct-case0]", - "lineno": 352, - "outcome": "passed", - "keywords": [ - "test_chat_streaming_tool_choice_none[meta-llama/Llama-4-Scout-17B-16E-Instruct-case0]", - "parametrize", - "pytestmark", - "meta-llama/Llama-4-Scout-17B-16E-Instruct-case0", - "test_chat_completion.py", - "openai_api", - "verifications", - "tests", - "llama-stack", - "" - ], - "metadata": { - "model": "meta-llama/Llama-4-Scout-17B-16E-Instruct", - "case_id": "case0" - }, - "setup": { - "duration": 0.07331038825213909, - "outcome": "passed" - }, - "call": { - "duration": 39.341348845511675, - "outcome": "passed" - }, - "teardown": { - "duration": 0.00022847391664981842, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_multi_turn_tool_calling[meta-llama/Llama-4-Scout-17B-16E-Instruct-text_then_weather_tool]", - "lineno": 380, - "outcome": "passed", - "keywords": [ - "test_chat_non_streaming_multi_turn_tool_calling[meta-llama/Llama-4-Scout-17B-16E-Instruct-text_then_weather_tool]", - "parametrize", - "pytestmark", - "meta-llama/Llama-4-Scout-17B-16E-Instruct-text_then_weather_tool", - "test_chat_completion.py", - "openai_api", - "verifications", - "tests", - "llama-stack", - "" - ], - "metadata": { - "model": "meta-llama/Llama-4-Scout-17B-16E-Instruct", - "case_id": "text_then_weather_tool" - }, - "setup": { - "duration": 0.10512833576649427, - "outcome": "passed" - }, - "call": { - "duration": 2.9590865215286613, - "outcome": "passed" - }, - "teardown": { - "duration": 0.0002405792474746704, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_multi_turn_tool_calling[meta-llama/Llama-4-Scout-17B-16E-Instruct-weather_tool_then_text]", - "lineno": 380, - "outcome": "passed", - "keywords": [ - "test_chat_non_streaming_multi_turn_tool_calling[meta-llama/Llama-4-Scout-17B-16E-Instruct-weather_tool_then_text]", - "parametrize", - "pytestmark", - "meta-llama/Llama-4-Scout-17B-16E-Instruct-weather_tool_then_text", - "test_chat_completion.py", - "openai_api", - "verifications", - "tests", - "llama-stack", - "" - ], - "metadata": { - "model": "meta-llama/Llama-4-Scout-17B-16E-Instruct", - "case_id": "weather_tool_then_text" - }, - "setup": { - "duration": 0.07294358871877193, - "outcome": "passed" - }, - "call": { - "duration": 1.7672317335382104, - "outcome": "passed" - }, - "teardown": { - "duration": 0.0003217160701751709, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_multi_turn_tool_calling[meta-llama/Llama-4-Scout-17B-16E-Instruct-add_product_tool]", - "lineno": 380, - "outcome": "passed", - "keywords": [ - "test_chat_non_streaming_multi_turn_tool_calling[meta-llama/Llama-4-Scout-17B-16E-Instruct-add_product_tool]", - "parametrize", - "pytestmark", - "meta-llama/Llama-4-Scout-17B-16E-Instruct-add_product_tool", - "test_chat_completion.py", - "openai_api", - "verifications", - "tests", - "llama-stack", - "" - ], - "metadata": { - "model": "meta-llama/Llama-4-Scout-17B-16E-Instruct", - "case_id": "add_product_tool" - }, - "setup": { - "duration": 0.11179900728166103, - "outcome": "passed" - }, - "call": { - "duration": 2.411543940193951, - "outcome": "passed" - }, - "teardown": { - "duration": 0.00023025460541248322, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_multi_turn_tool_calling[meta-llama/Llama-4-Scout-17B-16E-Instruct-get_then_create_event_tool]", - "lineno": 380, - "outcome": "passed", - "keywords": [ - "test_chat_non_streaming_multi_turn_tool_calling[meta-llama/Llama-4-Scout-17B-16E-Instruct-get_then_create_event_tool]", - "parametrize", - "pytestmark", - "meta-llama/Llama-4-Scout-17B-16E-Instruct-get_then_create_event_tool", - "test_chat_completion.py", - "openai_api", - "verifications", - "tests", - "llama-stack", - "" - ], - "metadata": { - "model": "meta-llama/Llama-4-Scout-17B-16E-Instruct", - "case_id": "get_then_create_event_tool" - }, - "setup": { - "duration": 0.07234534807503223, - "outcome": "passed" - }, - "call": { - "duration": 4.438527720049024, - "outcome": "passed" - }, - "teardown": { - "duration": 0.00028106197714805603, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_multi_turn_tool_calling[meta-llama/Llama-4-Scout-17B-16E-Instruct-compare_monthly_expense_tool]", - "lineno": 380, - "outcome": "passed", - "keywords": [ - "test_chat_non_streaming_multi_turn_tool_calling[meta-llama/Llama-4-Scout-17B-16E-Instruct-compare_monthly_expense_tool]", - "parametrize", - "pytestmark", - "meta-llama/Llama-4-Scout-17B-16E-Instruct-compare_monthly_expense_tool", - "test_chat_completion.py", - "openai_api", - "verifications", - "tests", - "llama-stack", - "" - ], - "metadata": { - "model": "meta-llama/Llama-4-Scout-17B-16E-Instruct", - "case_id": "compare_monthly_expense_tool" - }, - "setup": { - "duration": 0.06979168020188808, - "outcome": "passed" - }, - "call": { - "duration": 3.186668715439737, - "outcome": "passed" - }, - "teardown": { - "duration": 0.0002599591389298439, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_multi_turn_tool_calling[meta-llama/Llama-4-Scout-17B-16E-Instruct-text_then_weather_tool]", - "lineno": 471, - "outcome": "passed", - "keywords": [ - "test_chat_streaming_multi_turn_tool_calling[meta-llama/Llama-4-Scout-17B-16E-Instruct-text_then_weather_tool]", - "parametrize", - "pytestmark", - "meta-llama/Llama-4-Scout-17B-16E-Instruct-text_then_weather_tool", - "test_chat_completion.py", - "openai_api", - "verifications", - "tests", - "llama-stack", - "" - ], - "metadata": { - "model": "meta-llama/Llama-4-Scout-17B-16E-Instruct", - "case_id": "text_then_weather_tool" - }, - "setup": { - "duration": 0.07083943020552397, - "outcome": "passed" - }, - "call": { - "duration": 2.31697681453079, - "outcome": "passed" - }, - "teardown": { - "duration": 0.00029378384351730347, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_multi_turn_tool_calling[meta-llama/Llama-4-Scout-17B-16E-Instruct-weather_tool_then_text]", - "lineno": 471, - "outcome": "passed", - "keywords": [ - "test_chat_streaming_multi_turn_tool_calling[meta-llama/Llama-4-Scout-17B-16E-Instruct-weather_tool_then_text]", - "parametrize", - "pytestmark", - "meta-llama/Llama-4-Scout-17B-16E-Instruct-weather_tool_then_text", - "test_chat_completion.py", - "openai_api", - "verifications", - "tests", - "llama-stack", - "" - ], - "metadata": { - "model": "meta-llama/Llama-4-Scout-17B-16E-Instruct", - "case_id": "weather_tool_then_text" - }, - "setup": { - "duration": 0.07374998275190592, - "outcome": "passed" - }, - "call": { - "duration": 1.7863417640328407, - "outcome": "passed" - }, - "teardown": { - "duration": 0.00025129225105047226, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_multi_turn_tool_calling[meta-llama/Llama-4-Scout-17B-16E-Instruct-add_product_tool]", - "lineno": 471, - "outcome": "passed", - "keywords": [ - "test_chat_streaming_multi_turn_tool_calling[meta-llama/Llama-4-Scout-17B-16E-Instruct-add_product_tool]", - "parametrize", - "pytestmark", - "meta-llama/Llama-4-Scout-17B-16E-Instruct-add_product_tool", - "test_chat_completion.py", - "openai_api", - "verifications", - "tests", - "llama-stack", - "" - ], - "metadata": { - "model": "meta-llama/Llama-4-Scout-17B-16E-Instruct", - "case_id": "add_product_tool" - }, - "setup": { - "duration": 0.07009322382509708, - "outcome": "passed" - }, - "call": { - "duration": 2.248749589547515, - "outcome": "passed" - }, - "teardown": { - "duration": 0.00022566411644220352, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_multi_turn_tool_calling[meta-llama/Llama-4-Scout-17B-16E-Instruct-get_then_create_event_tool]", - "lineno": 471, - "outcome": "passed", - "keywords": [ - "test_chat_streaming_multi_turn_tool_calling[meta-llama/Llama-4-Scout-17B-16E-Instruct-get_then_create_event_tool]", - "parametrize", - "pytestmark", - "meta-llama/Llama-4-Scout-17B-16E-Instruct-get_then_create_event_tool", - "test_chat_completion.py", - "openai_api", - "verifications", - "tests", - "llama-stack", - "" - ], - "metadata": { - "model": "meta-llama/Llama-4-Scout-17B-16E-Instruct", - "case_id": "get_then_create_event_tool" - }, - "setup": { - "duration": 0.10290939453989267, - "outcome": "passed" - }, - "call": { - "duration": 4.644147016108036, - "outcome": "passed" - }, - "teardown": { - "duration": 0.0002319561317563057, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_multi_turn_tool_calling[meta-llama/Llama-4-Scout-17B-16E-Instruct-compare_monthly_expense_tool]", - "lineno": 471, - "outcome": "passed", - "keywords": [ - "test_chat_streaming_multi_turn_tool_calling[meta-llama/Llama-4-Scout-17B-16E-Instruct-compare_monthly_expense_tool]", - "parametrize", - "pytestmark", - "meta-llama/Llama-4-Scout-17B-16E-Instruct-compare_monthly_expense_tool", - "test_chat_completion.py", - "openai_api", - "verifications", - "tests", - "llama-stack", - "" - ], - "metadata": { - "model": "meta-llama/Llama-4-Scout-17B-16E-Instruct", - "case_id": "compare_monthly_expense_tool" - }, - "setup": { - "duration": 0.07125874608755112, - "outcome": "passed" - }, - "call": { - "duration": 3.2340452317148447, - "outcome": "passed" - }, - "teardown": { - "duration": 0.0002202410250902176, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_multi_turn_multiple_images[meta-llama/Llama-4-Scout-17B-16E-Instruct-stream=False]", - "lineno": 554, - "outcome": "passed", - "keywords": [ - "test_chat_multi_turn_multiple_images[meta-llama/Llama-4-Scout-17B-16E-Instruct-stream=False]", - "parametrize", - "pytestmark", - "meta-llama/Llama-4-Scout-17B-16E-Instruct-stream=False", - "test_chat_completion.py", - "openai_api", - "verifications", - "tests", - "llama-stack", - "" - ], - "metadata": { - "model": "meta-llama/Llama-4-Scout-17B-16E-Instruct", - "case_id": "stream=False" - }, - "setup": { - "duration": 0.07085523661226034, - "outcome": "passed" - }, - "call": { - "duration": 17.7453119084239, - "outcome": "passed" - }, - "teardown": { - "duration": 0.00037308502942323685, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_multi_turn_multiple_images[meta-llama/Llama-4-Scout-17B-16E-Instruct-stream=True]", - "lineno": 554, - "outcome": "passed", - "keywords": [ - "test_chat_multi_turn_multiple_images[meta-llama/Llama-4-Scout-17B-16E-Instruct-stream=True]", - "parametrize", - "pytestmark", - "meta-llama/Llama-4-Scout-17B-16E-Instruct-stream=True", - "test_chat_completion.py", - "openai_api", - "verifications", - "tests", - "llama-stack", - "" - ], - "metadata": { - "model": "meta-llama/Llama-4-Scout-17B-16E-Instruct", - "case_id": "stream=True" - }, - "setup": { - "duration": 0.07670701760798693, - "outcome": "passed" - }, - "call": { - "duration": 12.663874679245055, - "outcome": "passed" - }, - "teardown": { - "duration": 0.0008251797407865524, - "outcome": "passed" - } - } - ], - "run_timestamp": 1744918631 -} diff --git a/tests/verifications/test_results/openai.json b/tests/verifications/test_results/openai.json deleted file mode 100644 index f40b8f532..000000000 --- a/tests/verifications/test_results/openai.json +++ /dev/null @@ -1,2161 +0,0 @@ -{ - "created": 1744918586.2136743, - "duration": 136.56194758415222, - "exitcode": 0, - "root": "/home/erichuang/llama-stack", - "environment": {}, - "summary": { - "passed": 56, - "total": 56, - "collected": 56 - }, - "collectors": [ - { - "nodeid": "", - "outcome": "passed", - "result": [ - { - "nodeid": "tests/verifications/openai_api/test_chat_completion.py", - "type": "Module" - } - ] - }, - { - "nodeid": "tests/verifications/openai_api/test_chat_completion.py", - "outcome": "passed", - "result": [ - { - "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_basic[gpt-4o-earth]", - "type": "Function", - "lineno": 95 - }, - { - "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_basic[gpt-4o-saturn]", - "type": "Function", - "lineno": 95 - }, - { - "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_basic[gpt-4o-mini-earth]", - "type": "Function", - "lineno": 95 - }, - { - "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_basic[gpt-4o-mini-saturn]", - "type": "Function", - "lineno": 95 - }, - { - "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_basic[gpt-4o-earth]", - "type": "Function", - "lineno": 114 - }, - { - "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_basic[gpt-4o-saturn]", - "type": "Function", - "lineno": 114 - }, - { - "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_basic[gpt-4o-mini-earth]", - "type": "Function", - "lineno": 114 - }, - { - "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_basic[gpt-4o-mini-saturn]", - "type": "Function", - "lineno": 114 - }, - { - "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_image[gpt-4o-case0]", - "type": "Function", - "lineno": 138 - }, - { - "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_image[gpt-4o-mini-case0]", - "type": "Function", - "lineno": 138 - }, - { - "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_image[gpt-4o-case0]", - "type": "Function", - "lineno": 157 - }, - { - "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_image[gpt-4o-mini-case0]", - "type": "Function", - "lineno": 157 - }, - { - "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_structured_output[gpt-4o-calendar]", - "type": "Function", - "lineno": 181 - }, - { - "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_structured_output[gpt-4o-math]", - "type": "Function", - "lineno": 181 - }, - { - "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_structured_output[gpt-4o-mini-calendar]", - "type": "Function", - "lineno": 181 - }, - { - "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_structured_output[gpt-4o-mini-math]", - "type": "Function", - "lineno": 181 - }, - { - "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_structured_output[gpt-4o-calendar]", - "type": "Function", - "lineno": 204 - }, - { - "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_structured_output[gpt-4o-math]", - "type": "Function", - "lineno": 204 - }, - { - "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_structured_output[gpt-4o-mini-calendar]", - "type": "Function", - "lineno": 204 - }, - { - "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_structured_output[gpt-4o-mini-math]", - "type": "Function", - "lineno": 204 - }, - { - "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_tool_calling[gpt-4o-case0]", - "type": "Function", - "lineno": 226 - }, - { - "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_tool_calling[gpt-4o-mini-case0]", - "type": "Function", - "lineno": 226 - }, - { - "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_tool_calling[gpt-4o-case0]", - "type": "Function", - "lineno": 250 - }, - { - "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_tool_calling[gpt-4o-mini-case0]", - "type": "Function", - "lineno": 250 - }, - { - "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_tool_choice_required[gpt-4o-case0]", - "type": "Function", - "lineno": 278 - }, - { - "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_tool_choice_required[gpt-4o-mini-case0]", - "type": "Function", - "lineno": 278 - }, - { - "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_tool_choice_required[gpt-4o-case0]", - "type": "Function", - "lineno": 302 - }, - { - "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_tool_choice_required[gpt-4o-mini-case0]", - "type": "Function", - "lineno": 302 - }, - { - "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_tool_choice_none[gpt-4o-case0]", - "type": "Function", - "lineno": 329 - }, - { - "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_tool_choice_none[gpt-4o-mini-case0]", - "type": "Function", - "lineno": 329 - }, - { - "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_tool_choice_none[gpt-4o-case0]", - "type": "Function", - "lineno": 352 - }, - { - "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_tool_choice_none[gpt-4o-mini-case0]", - "type": "Function", - "lineno": 352 - }, - { - "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_multi_turn_tool_calling[gpt-4o-text_then_weather_tool]", - "type": "Function", - "lineno": 380 - }, - { - "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_multi_turn_tool_calling[gpt-4o-weather_tool_then_text]", - "type": "Function", - "lineno": 380 - }, - { - "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_multi_turn_tool_calling[gpt-4o-add_product_tool]", - "type": "Function", - "lineno": 380 - }, - { - "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_multi_turn_tool_calling[gpt-4o-get_then_create_event_tool]", - "type": "Function", - "lineno": 380 - }, - { - "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_multi_turn_tool_calling[gpt-4o-compare_monthly_expense_tool]", - "type": "Function", - "lineno": 380 - }, - { - "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_multi_turn_tool_calling[gpt-4o-mini-text_then_weather_tool]", - "type": "Function", - "lineno": 380 - }, - { - "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_multi_turn_tool_calling[gpt-4o-mini-weather_tool_then_text]", - "type": "Function", - "lineno": 380 - }, - { - "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_multi_turn_tool_calling[gpt-4o-mini-add_product_tool]", - "type": "Function", - "lineno": 380 - }, - { - "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_multi_turn_tool_calling[gpt-4o-mini-get_then_create_event_tool]", - "type": "Function", - "lineno": 380 - }, - { - "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_multi_turn_tool_calling[gpt-4o-mini-compare_monthly_expense_tool]", - "type": "Function", - "lineno": 380 - }, - { - "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_multi_turn_tool_calling[gpt-4o-text_then_weather_tool]", - "type": "Function", - "lineno": 471 - }, - { - "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_multi_turn_tool_calling[gpt-4o-weather_tool_then_text]", - "type": "Function", - "lineno": 471 - }, - { - "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_multi_turn_tool_calling[gpt-4o-add_product_tool]", - "type": "Function", - "lineno": 471 - }, - { - "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_multi_turn_tool_calling[gpt-4o-get_then_create_event_tool]", - "type": "Function", - "lineno": 471 - }, - { - "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_multi_turn_tool_calling[gpt-4o-compare_monthly_expense_tool]", - "type": "Function", - "lineno": 471 - }, - { - "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_multi_turn_tool_calling[gpt-4o-mini-text_then_weather_tool]", - "type": "Function", - "lineno": 471 - }, - { - "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_multi_turn_tool_calling[gpt-4o-mini-weather_tool_then_text]", - "type": "Function", - "lineno": 471 - }, - { - "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_multi_turn_tool_calling[gpt-4o-mini-add_product_tool]", - "type": "Function", - "lineno": 471 - }, - { - "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_multi_turn_tool_calling[gpt-4o-mini-get_then_create_event_tool]", - "type": "Function", - "lineno": 471 - }, - { - "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_multi_turn_tool_calling[gpt-4o-mini-compare_monthly_expense_tool]", - "type": "Function", - "lineno": 471 - }, - { - "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_multi_turn_multiple_images[gpt-4o-stream=False]", - "type": "Function", - "lineno": 554 - }, - { - "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_multi_turn_multiple_images[gpt-4o-stream=True]", - "type": "Function", - "lineno": 554 - }, - { - "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_multi_turn_multiple_images[gpt-4o-mini-stream=False]", - "type": "Function", - "lineno": 554 - }, - { - "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_multi_turn_multiple_images[gpt-4o-mini-stream=True]", - "type": "Function", - "lineno": 554 - } - ] - } - ], - "tests": [ - { - "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_basic[gpt-4o-earth]", - "lineno": 95, - "outcome": "passed", - "keywords": [ - "test_chat_non_streaming_basic[gpt-4o-earth]", - "parametrize", - "pytestmark", - "gpt-4o-earth", - "test_chat_completion.py", - "openai_api", - "verifications", - "tests", - "llama-stack", - "" - ], - "metadata": { - "model": "gpt-4o", - "case_id": "earth" - }, - "setup": { - "duration": 0.09683514852076769, - "outcome": "passed" - }, - "call": { - "duration": 1.2521671634167433, - "outcome": "passed" - }, - "teardown": { - "duration": 0.0002309884876012802, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_basic[gpt-4o-saturn]", - "lineno": 95, - "outcome": "passed", - "keywords": [ - "test_chat_non_streaming_basic[gpt-4o-saturn]", - "parametrize", - "pytestmark", - "gpt-4o-saturn", - "test_chat_completion.py", - "openai_api", - "verifications", - "tests", - "llama-stack", - "" - ], - "metadata": { - "model": "gpt-4o", - "case_id": "saturn" - }, - "setup": { - "duration": 0.08609516825526953, - "outcome": "passed" - }, - "call": { - "duration": 0.8818014115095139, - "outcome": "passed" - }, - "teardown": { - "duration": 0.0002558426931500435, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_basic[gpt-4o-mini-earth]", - "lineno": 95, - "outcome": "passed", - "keywords": [ - "test_chat_non_streaming_basic[gpt-4o-mini-earth]", - "parametrize", - "pytestmark", - "gpt-4o-mini-earth", - "test_chat_completion.py", - "openai_api", - "verifications", - "tests", - "llama-stack", - "" - ], - "metadata": { - "model": "gpt-4o-mini", - "case_id": "earth" - }, - "setup": { - "duration": 0.07237763796001673, - "outcome": "passed" - }, - "call": { - "duration": 0.44337860122323036, - "outcome": "passed" - }, - "teardown": { - "duration": 0.00027293339371681213, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_basic[gpt-4o-mini-saturn]", - "lineno": 95, - "outcome": "passed", - "keywords": [ - "test_chat_non_streaming_basic[gpt-4o-mini-saturn]", - "parametrize", - "pytestmark", - "gpt-4o-mini-saturn", - "test_chat_completion.py", - "openai_api", - "verifications", - "tests", - "llama-stack", - "" - ], - "metadata": { - "model": "gpt-4o-mini", - "case_id": "saturn" - }, - "setup": { - "duration": 0.07486020587384701, - "outcome": "passed" - }, - "call": { - "duration": 0.7754815155640244, - "outcome": "passed" - }, - "teardown": { - "duration": 0.00026193633675575256, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_basic[gpt-4o-earth]", - "lineno": 114, - "outcome": "passed", - "keywords": [ - "test_chat_streaming_basic[gpt-4o-earth]", - "parametrize", - "pytestmark", - "gpt-4o-earth", - "test_chat_completion.py", - "openai_api", - "verifications", - "tests", - "llama-stack", - "" - ], - "metadata": { - "model": "gpt-4o", - "case_id": "earth" - }, - "setup": { - "duration": 0.07270221784710884, - "outcome": "passed" - }, - "call": { - "duration": 0.5725504904985428, - "outcome": "passed" - }, - "teardown": { - "duration": 0.00025644712150096893, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_basic[gpt-4o-saturn]", - "lineno": 114, - "outcome": "passed", - "keywords": [ - "test_chat_streaming_basic[gpt-4o-saturn]", - "parametrize", - "pytestmark", - "gpt-4o-saturn", - "test_chat_completion.py", - "openai_api", - "verifications", - "tests", - "llama-stack", - "" - ], - "metadata": { - "model": "gpt-4o", - "case_id": "saturn" - }, - "setup": { - "duration": 0.07263980247080326, - "outcome": "passed" - }, - "call": { - "duration": 0.6277077253907919, - "outcome": "passed" - }, - "teardown": { - "duration": 0.0002706516534090042, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_basic[gpt-4o-mini-earth]", - "lineno": 114, - "outcome": "passed", - "keywords": [ - "test_chat_streaming_basic[gpt-4o-mini-earth]", - "parametrize", - "pytestmark", - "gpt-4o-mini-earth", - "test_chat_completion.py", - "openai_api", - "verifications", - "tests", - "llama-stack", - "" - ], - "metadata": { - "model": "gpt-4o-mini", - "case_id": "earth" - }, - "setup": { - "duration": 0.07290142774581909, - "outcome": "passed" - }, - "call": { - "duration": 0.45955433789640665, - "outcome": "passed" - }, - "teardown": { - "duration": 0.0002704532817006111, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_basic[gpt-4o-mini-saturn]", - "lineno": 114, - "outcome": "passed", - "keywords": [ - "test_chat_streaming_basic[gpt-4o-mini-saturn]", - "parametrize", - "pytestmark", - "gpt-4o-mini-saturn", - "test_chat_completion.py", - "openai_api", - "verifications", - "tests", - "llama-stack", - "" - ], - "metadata": { - "model": "gpt-4o-mini", - "case_id": "saturn" - }, - "setup": { - "duration": 0.0736015671864152, - "outcome": "passed" - }, - "call": { - "duration": 1.1738686058670282, - "outcome": "passed" - }, - "teardown": { - "duration": 0.00026966072618961334, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_image[gpt-4o-case0]", - "lineno": 138, - "outcome": "passed", - "keywords": [ - "test_chat_non_streaming_image[gpt-4o-case0]", - "parametrize", - "pytestmark", - "gpt-4o-case0", - "test_chat_completion.py", - "openai_api", - "verifications", - "tests", - "llama-stack", - "" - ], - "metadata": { - "model": "gpt-4o", - "case_id": "case0" - }, - "setup": { - "duration": 0.07560365367680788, - "outcome": "passed" - }, - "call": { - "duration": 2.4073661137372255, - "outcome": "passed" - }, - "teardown": { - "duration": 0.0002443268895149231, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_image[gpt-4o-mini-case0]", - "lineno": 138, - "outcome": "passed", - "keywords": [ - "test_chat_non_streaming_image[gpt-4o-mini-case0]", - "parametrize", - "pytestmark", - "gpt-4o-mini-case0", - "test_chat_completion.py", - "openai_api", - "verifications", - "tests", - "llama-stack", - "" - ], - "metadata": { - "model": "gpt-4o-mini", - "case_id": "case0" - }, - "setup": { - "duration": 0.06925276480615139, - "outcome": "passed" - }, - "call": { - "duration": 2.777276105247438, - "outcome": "passed" - }, - "teardown": { - "duration": 0.0002748873084783554, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_image[gpt-4o-case0]", - "lineno": 157, - "outcome": "passed", - "keywords": [ - "test_chat_streaming_image[gpt-4o-case0]", - "parametrize", - "pytestmark", - "gpt-4o-case0", - "test_chat_completion.py", - "openai_api", - "verifications", - "tests", - "llama-stack", - "" - ], - "metadata": { - "model": "gpt-4o", - "case_id": "case0" - }, - "setup": { - "duration": 0.07098669931292534, - "outcome": "passed" - }, - "call": { - "duration": 3.0149426590651274, - "outcome": "passed" - }, - "teardown": { - "duration": 0.0002702716737985611, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_image[gpt-4o-mini-case0]", - "lineno": 157, - "outcome": "passed", - "keywords": [ - "test_chat_streaming_image[gpt-4o-mini-case0]", - "parametrize", - "pytestmark", - "gpt-4o-mini-case0", - "test_chat_completion.py", - "openai_api", - "verifications", - "tests", - "llama-stack", - "" - ], - "metadata": { - "model": "gpt-4o-mini", - "case_id": "case0" - }, - "setup": { - "duration": 0.07316321693360806, - "outcome": "passed" - }, - "call": { - "duration": 2.401849321089685, - "outcome": "passed" - }, - "teardown": { - "duration": 0.0003180522471666336, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_structured_output[gpt-4o-calendar]", - "lineno": 181, - "outcome": "passed", - "keywords": [ - "test_chat_non_streaming_structured_output[gpt-4o-calendar]", - "parametrize", - "pytestmark", - "gpt-4o-calendar", - "test_chat_completion.py", - "openai_api", - "verifications", - "tests", - "llama-stack", - "" - ], - "metadata": { - "model": "gpt-4o", - "case_id": "calendar" - }, - "setup": { - "duration": 0.07038832642138004, - "outcome": "passed" - }, - "call": { - "duration": 1.0188098661601543, - "outcome": "passed" - }, - "teardown": { - "duration": 0.00027244072407484055, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_structured_output[gpt-4o-math]", - "lineno": 181, - "outcome": "passed", - "keywords": [ - "test_chat_non_streaming_structured_output[gpt-4o-math]", - "parametrize", - "pytestmark", - "gpt-4o-math", - "test_chat_completion.py", - "openai_api", - "verifications", - "tests", - "llama-stack", - "" - ], - "metadata": { - "model": "gpt-4o", - "case_id": "math" - }, - "setup": { - "duration": 0.07331131957471371, - "outcome": "passed" - }, - "call": { - "duration": 7.0907115917652845, - "outcome": "passed" - }, - "teardown": { - "duration": 0.0003256639465689659, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_structured_output[gpt-4o-mini-calendar]", - "lineno": 181, - "outcome": "passed", - "keywords": [ - "test_chat_non_streaming_structured_output[gpt-4o-mini-calendar]", - "parametrize", - "pytestmark", - "gpt-4o-mini-calendar", - "test_chat_completion.py", - "openai_api", - "verifications", - "tests", - "llama-stack", - "" - ], - "metadata": { - "model": "gpt-4o-mini", - "case_id": "calendar" - }, - "setup": { - "duration": 0.0749899847432971, - "outcome": "passed" - }, - "call": { - "duration": 0.6721736947074533, - "outcome": "passed" - }, - "teardown": { - "duration": 0.0002617714926600456, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_structured_output[gpt-4o-mini-math]", - "lineno": 181, - "outcome": "passed", - "keywords": [ - "test_chat_non_streaming_structured_output[gpt-4o-mini-math]", - "parametrize", - "pytestmark", - "gpt-4o-mini-math", - "test_chat_completion.py", - "openai_api", - "verifications", - "tests", - "llama-stack", - "" - ], - "metadata": { - "model": "gpt-4o-mini", - "case_id": "math" - }, - "setup": { - "duration": 0.07268172968178988, - "outcome": "passed" - }, - "call": { - "duration": 2.6800331017002463, - "outcome": "passed" - }, - "teardown": { - "duration": 0.0002518612891435623, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_structured_output[gpt-4o-calendar]", - "lineno": 204, - "outcome": "passed", - "keywords": [ - "test_chat_streaming_structured_output[gpt-4o-calendar]", - "parametrize", - "pytestmark", - "gpt-4o-calendar", - "test_chat_completion.py", - "openai_api", - "verifications", - "tests", - "llama-stack", - "" - ], - "metadata": { - "model": "gpt-4o", - "case_id": "calendar" - }, - "setup": { - "duration": 0.07150284852832556, - "outcome": "passed" - }, - "call": { - "duration": 0.6667193034663796, - "outcome": "passed" - }, - "teardown": { - "duration": 0.00025727134197950363, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_structured_output[gpt-4o-math]", - "lineno": 204, - "outcome": "passed", - "keywords": [ - "test_chat_streaming_structured_output[gpt-4o-math]", - "parametrize", - "pytestmark", - "gpt-4o-math", - "test_chat_completion.py", - "openai_api", - "verifications", - "tests", - "llama-stack", - "" - ], - "metadata": { - "model": "gpt-4o", - "case_id": "math" - }, - "setup": { - "duration": 0.07039738819003105, - "outcome": "passed" - }, - "call": { - "duration": 4.870940984226763, - "outcome": "passed" - }, - "teardown": { - "duration": 0.00025987718254327774, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_structured_output[gpt-4o-mini-calendar]", - "lineno": 204, - "outcome": "passed", - "keywords": [ - "test_chat_streaming_structured_output[gpt-4o-mini-calendar]", - "parametrize", - "pytestmark", - "gpt-4o-mini-calendar", - "test_chat_completion.py", - "openai_api", - "verifications", - "tests", - "llama-stack", - "" - ], - "metadata": { - "model": "gpt-4o-mini", - "case_id": "calendar" - }, - "setup": { - "duration": 0.07166357431560755, - "outcome": "passed" - }, - "call": { - "duration": 0.9911826532334089, - "outcome": "passed" - }, - "teardown": { - "duration": 0.00028301775455474854, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_structured_output[gpt-4o-mini-math]", - "lineno": 204, - "outcome": "passed", - "keywords": [ - "test_chat_streaming_structured_output[gpt-4o-mini-math]", - "parametrize", - "pytestmark", - "gpt-4o-mini-math", - "test_chat_completion.py", - "openai_api", - "verifications", - "tests", - "llama-stack", - "" - ], - "metadata": { - "model": "gpt-4o-mini", - "case_id": "math" - }, - "setup": { - "duration": 0.07489973120391369, - "outcome": "passed" - }, - "call": { - "duration": 5.81621040776372, - "outcome": "passed" - }, - "teardown": { - "duration": 0.00027776509523391724, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_tool_calling[gpt-4o-case0]", - "lineno": 226, - "outcome": "passed", - "keywords": [ - "test_chat_non_streaming_tool_calling[gpt-4o-case0]", - "parametrize", - "pytestmark", - "gpt-4o-case0", - "test_chat_completion.py", - "openai_api", - "verifications", - "tests", - "llama-stack", - "" - ], - "metadata": { - "model": "gpt-4o", - "case_id": "case0" - }, - "setup": { - "duration": 0.0709689250215888, - "outcome": "passed" - }, - "call": { - "duration": 0.6838962603360415, - "outcome": "passed" - }, - "teardown": { - "duration": 0.00038875360041856766, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_tool_calling[gpt-4o-mini-case0]", - "lineno": 226, - "outcome": "passed", - "keywords": [ - "test_chat_non_streaming_tool_calling[gpt-4o-mini-case0]", - "parametrize", - "pytestmark", - "gpt-4o-mini-case0", - "test_chat_completion.py", - "openai_api", - "verifications", - "tests", - "llama-stack", - "" - ], - "metadata": { - "model": "gpt-4o-mini", - "case_id": "case0" - }, - "setup": { - "duration": 0.07440952491015196, - "outcome": "passed" - }, - "call": { - "duration": 0.6124099707230926, - "outcome": "passed" - }, - "teardown": { - "duration": 0.00031805597245693207, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_tool_calling[gpt-4o-case0]", - "lineno": 250, - "outcome": "passed", - "keywords": [ - "test_chat_streaming_tool_calling[gpt-4o-case0]", - "parametrize", - "pytestmark", - "gpt-4o-case0", - "test_chat_completion.py", - "openai_api", - "verifications", - "tests", - "llama-stack", - "" - ], - "metadata": { - "model": "gpt-4o", - "case_id": "case0" - }, - "setup": { - "duration": 0.07558728754520416, - "outcome": "passed" - }, - "call": { - "duration": 1.0413735723122954, - "outcome": "passed" - }, - "teardown": { - "duration": 0.00026555173099040985, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_tool_calling[gpt-4o-mini-case0]", - "lineno": 250, - "outcome": "passed", - "keywords": [ - "test_chat_streaming_tool_calling[gpt-4o-mini-case0]", - "parametrize", - "pytestmark", - "gpt-4o-mini-case0", - "test_chat_completion.py", - "openai_api", - "verifications", - "tests", - "llama-stack", - "" - ], - "metadata": { - "model": "gpt-4o-mini", - "case_id": "case0" - }, - "setup": { - "duration": 0.07159029692411423, - "outcome": "passed" - }, - "call": { - "duration": 0.619917850010097, - "outcome": "passed" - }, - "teardown": { - "duration": 0.00026798900216817856, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_tool_choice_required[gpt-4o-case0]", - "lineno": 278, - "outcome": "passed", - "keywords": [ - "test_chat_non_streaming_tool_choice_required[gpt-4o-case0]", - "parametrize", - "pytestmark", - "gpt-4o-case0", - "test_chat_completion.py", - "openai_api", - "verifications", - "tests", - "llama-stack", - "" - ], - "metadata": { - "model": "gpt-4o", - "case_id": "case0" - }, - "setup": { - "duration": 0.10359053406864405, - "outcome": "passed" - }, - "call": { - "duration": 0.6396236326545477, - "outcome": "passed" - }, - "teardown": { - "duration": 0.000257750041782856, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_tool_choice_required[gpt-4o-mini-case0]", - "lineno": 278, - "outcome": "passed", - "keywords": [ - "test_chat_non_streaming_tool_choice_required[gpt-4o-mini-case0]", - "parametrize", - "pytestmark", - "gpt-4o-mini-case0", - "test_chat_completion.py", - "openai_api", - "verifications", - "tests", - "llama-stack", - "" - ], - "metadata": { - "model": "gpt-4o-mini", - "case_id": "case0" - }, - "setup": { - "duration": 0.07243514712899923, - "outcome": "passed" - }, - "call": { - "duration": 0.6169720906764269, - "outcome": "passed" - }, - "teardown": { - "duration": 0.0002462640404701233, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_tool_choice_required[gpt-4o-case0]", - "lineno": 302, - "outcome": "passed", - "keywords": [ - "test_chat_streaming_tool_choice_required[gpt-4o-case0]", - "parametrize", - "pytestmark", - "gpt-4o-case0", - "test_chat_completion.py", - "openai_api", - "verifications", - "tests", - "llama-stack", - "" - ], - "metadata": { - "model": "gpt-4o", - "case_id": "case0" - }, - "setup": { - "duration": 0.07266584690660238, - "outcome": "passed" - }, - "call": { - "duration": 0.9391414495185018, - "outcome": "passed" - }, - "teardown": { - "duration": 0.0003280108794569969, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_tool_choice_required[gpt-4o-mini-case0]", - "lineno": 302, - "outcome": "passed", - "keywords": [ - "test_chat_streaming_tool_choice_required[gpt-4o-mini-case0]", - "parametrize", - "pytestmark", - "gpt-4o-mini-case0", - "test_chat_completion.py", - "openai_api", - "verifications", - "tests", - "llama-stack", - "" - ], - "metadata": { - "model": "gpt-4o-mini", - "case_id": "case0" - }, - "setup": { - "duration": 0.08437065314501524, - "outcome": "passed" - }, - "call": { - "duration": 0.6935106571763754, - "outcome": "passed" - }, - "teardown": { - "duration": 0.00027523748576641083, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_tool_choice_none[gpt-4o-case0]", - "lineno": 329, - "outcome": "passed", - "keywords": [ - "test_chat_non_streaming_tool_choice_none[gpt-4o-case0]", - "parametrize", - "pytestmark", - "gpt-4o-case0", - "test_chat_completion.py", - "openai_api", - "verifications", - "tests", - "llama-stack", - "" - ], - "metadata": { - "model": "gpt-4o", - "case_id": "case0" - }, - "setup": { - "duration": 0.07208988349884748, - "outcome": "passed" - }, - "call": { - "duration": 0.6744982637465, - "outcome": "passed" - }, - "teardown": { - "duration": 0.0002555781975388527, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_tool_choice_none[gpt-4o-mini-case0]", - "lineno": 329, - "outcome": "passed", - "keywords": [ - "test_chat_non_streaming_tool_choice_none[gpt-4o-mini-case0]", - "parametrize", - "pytestmark", - "gpt-4o-mini-case0", - "test_chat_completion.py", - "openai_api", - "verifications", - "tests", - "llama-stack", - "" - ], - "metadata": { - "model": "gpt-4o-mini", - "case_id": "case0" - }, - "setup": { - "duration": 0.07785151246935129, - "outcome": "passed" - }, - "call": { - "duration": 0.6253539212048054, - "outcome": "passed" - }, - "teardown": { - "duration": 0.00028202030807733536, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_tool_choice_none[gpt-4o-case0]", - "lineno": 352, - "outcome": "passed", - "keywords": [ - "test_chat_streaming_tool_choice_none[gpt-4o-case0]", - "parametrize", - "pytestmark", - "gpt-4o-case0", - "test_chat_completion.py", - "openai_api", - "verifications", - "tests", - "llama-stack", - "" - ], - "metadata": { - "model": "gpt-4o", - "case_id": "case0" - }, - "setup": { - "duration": 0.0911521203815937, - "outcome": "passed" - }, - "call": { - "duration": 0.7869452070444822, - "outcome": "passed" - }, - "teardown": { - "duration": 0.00043197907507419586, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_tool_choice_none[gpt-4o-mini-case0]", - "lineno": 352, - "outcome": "passed", - "keywords": [ - "test_chat_streaming_tool_choice_none[gpt-4o-mini-case0]", - "parametrize", - "pytestmark", - "gpt-4o-mini-case0", - "test_chat_completion.py", - "openai_api", - "verifications", - "tests", - "llama-stack", - "" - ], - "metadata": { - "model": "gpt-4o-mini", - "case_id": "case0" - }, - "setup": { - "duration": 0.10472878441214561, - "outcome": "passed" - }, - "call": { - "duration": 0.6786438375711441, - "outcome": "passed" - }, - "teardown": { - "duration": 0.00025699567049741745, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_multi_turn_tool_calling[gpt-4o-text_then_weather_tool]", - "lineno": 380, - "outcome": "passed", - "keywords": [ - "test_chat_non_streaming_multi_turn_tool_calling[gpt-4o-text_then_weather_tool]", - "parametrize", - "pytestmark", - "gpt-4o-text_then_weather_tool", - "test_chat_completion.py", - "openai_api", - "verifications", - "tests", - "llama-stack", - "" - ], - "metadata": { - "model": "gpt-4o", - "case_id": "text_then_weather_tool" - }, - "setup": { - "duration": 0.07002853509038687, - "outcome": "passed" - }, - "call": { - "duration": 2.395758199505508, - "outcome": "passed" - }, - "teardown": { - "duration": 0.0002955012023448944, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_multi_turn_tool_calling[gpt-4o-weather_tool_then_text]", - "lineno": 380, - "outcome": "passed", - "keywords": [ - "test_chat_non_streaming_multi_turn_tool_calling[gpt-4o-weather_tool_then_text]", - "parametrize", - "pytestmark", - "gpt-4o-weather_tool_then_text", - "test_chat_completion.py", - "openai_api", - "verifications", - "tests", - "llama-stack", - "" - ], - "metadata": { - "model": "gpt-4o", - "case_id": "weather_tool_then_text" - }, - "setup": { - "duration": 0.07316868472844362, - "outcome": "passed" - }, - "call": { - "duration": 1.3224441464990377, - "outcome": "passed" - }, - "teardown": { - "duration": 0.0002612341195344925, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_multi_turn_tool_calling[gpt-4o-add_product_tool]", - "lineno": 380, - "outcome": "passed", - "keywords": [ - "test_chat_non_streaming_multi_turn_tool_calling[gpt-4o-add_product_tool]", - "parametrize", - "pytestmark", - "gpt-4o-add_product_tool", - "test_chat_completion.py", - "openai_api", - "verifications", - "tests", - "llama-stack", - "" - ], - "metadata": { - "model": "gpt-4o", - "case_id": "add_product_tool" - }, - "setup": { - "duration": 0.10713072493672371, - "outcome": "passed" - }, - "call": { - "duration": 1.0061814906075597, - "outcome": "passed" - }, - "teardown": { - "duration": 0.0002610785886645317, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_multi_turn_tool_calling[gpt-4o-get_then_create_event_tool]", - "lineno": 380, - "outcome": "passed", - "keywords": [ - "test_chat_non_streaming_multi_turn_tool_calling[gpt-4o-get_then_create_event_tool]", - "parametrize", - "pytestmark", - "gpt-4o-get_then_create_event_tool", - "test_chat_completion.py", - "openai_api", - "verifications", - "tests", - "llama-stack", - "" - ], - "metadata": { - "model": "gpt-4o", - "case_id": "get_then_create_event_tool" - }, - "setup": { - "duration": 0.07267123833298683, - "outcome": "passed" - }, - "call": { - "duration": 4.26907461322844, - "outcome": "passed" - }, - "teardown": { - "duration": 0.00025866832584142685, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_multi_turn_tool_calling[gpt-4o-compare_monthly_expense_tool]", - "lineno": 380, - "outcome": "passed", - "keywords": [ - "test_chat_non_streaming_multi_turn_tool_calling[gpt-4o-compare_monthly_expense_tool]", - "parametrize", - "pytestmark", - "gpt-4o-compare_monthly_expense_tool", - "test_chat_completion.py", - "openai_api", - "verifications", - "tests", - "llama-stack", - "" - ], - "metadata": { - "model": "gpt-4o", - "case_id": "compare_monthly_expense_tool" - }, - "setup": { - "duration": 0.07208938524127007, - "outcome": "passed" - }, - "call": { - "duration": 2.8186135441064835, - "outcome": "passed" - }, - "teardown": { - "duration": 0.00026924535632133484, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_multi_turn_tool_calling[gpt-4o-mini-text_then_weather_tool]", - "lineno": 380, - "outcome": "passed", - "keywords": [ - "test_chat_non_streaming_multi_turn_tool_calling[gpt-4o-mini-text_then_weather_tool]", - "parametrize", - "pytestmark", - "gpt-4o-mini-text_then_weather_tool", - "test_chat_completion.py", - "openai_api", - "verifications", - "tests", - "llama-stack", - "" - ], - "metadata": { - "model": "gpt-4o-mini", - "case_id": "text_then_weather_tool" - }, - "setup": { - "duration": 0.07148494757711887, - "outcome": "passed" - }, - "call": { - "duration": 2.1276168935000896, - "outcome": "passed" - }, - "teardown": { - "duration": 0.00024427566677331924, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_multi_turn_tool_calling[gpt-4o-mini-weather_tool_then_text]", - "lineno": 380, - "outcome": "passed", - "keywords": [ - "test_chat_non_streaming_multi_turn_tool_calling[gpt-4o-mini-weather_tool_then_text]", - "parametrize", - "pytestmark", - "gpt-4o-mini-weather_tool_then_text", - "test_chat_completion.py", - "openai_api", - "verifications", - "tests", - "llama-stack", - "" - ], - "metadata": { - "model": "gpt-4o-mini", - "case_id": "weather_tool_then_text" - }, - "setup": { - "duration": 0.07107946090400219, - "outcome": "passed" - }, - "call": { - "duration": 1.1634307894855738, - "outcome": "passed" - }, - "teardown": { - "duration": 0.00030216481536626816, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_multi_turn_tool_calling[gpt-4o-mini-add_product_tool]", - "lineno": 380, - "outcome": "passed", - "keywords": [ - "test_chat_non_streaming_multi_turn_tool_calling[gpt-4o-mini-add_product_tool]", - "parametrize", - "pytestmark", - "gpt-4o-mini-add_product_tool", - "test_chat_completion.py", - "openai_api", - "verifications", - "tests", - "llama-stack", - "" - ], - "metadata": { - "model": "gpt-4o-mini", - "case_id": "add_product_tool" - }, - "setup": { - "duration": 0.07261826191097498, - "outcome": "passed" - }, - "call": { - "duration": 1.4525672728195786, - "outcome": "passed" - }, - "teardown": { - "duration": 0.0002602897584438324, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_multi_turn_tool_calling[gpt-4o-mini-get_then_create_event_tool]", - "lineno": 380, - "outcome": "passed", - "keywords": [ - "test_chat_non_streaming_multi_turn_tool_calling[gpt-4o-mini-get_then_create_event_tool]", - "parametrize", - "pytestmark", - "gpt-4o-mini-get_then_create_event_tool", - "test_chat_completion.py", - "openai_api", - "verifications", - "tests", - "llama-stack", - "" - ], - "metadata": { - "model": "gpt-4o-mini", - "case_id": "get_then_create_event_tool" - }, - "setup": { - "duration": 0.0710728308185935, - "outcome": "passed" - }, - "call": { - "duration": 4.533652591519058, - "outcome": "passed" - }, - "teardown": { - "duration": 0.0002704774960875511, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_multi_turn_tool_calling[gpt-4o-mini-compare_monthly_expense_tool]", - "lineno": 380, - "outcome": "passed", - "keywords": [ - "test_chat_non_streaming_multi_turn_tool_calling[gpt-4o-mini-compare_monthly_expense_tool]", - "parametrize", - "pytestmark", - "gpt-4o-mini-compare_monthly_expense_tool", - "test_chat_completion.py", - "openai_api", - "verifications", - "tests", - "llama-stack", - "" - ], - "metadata": { - "model": "gpt-4o-mini", - "case_id": "compare_monthly_expense_tool" - }, - "setup": { - "duration": 0.0781267425045371, - "outcome": "passed" - }, - "call": { - "duration": 2.160066588781774, - "outcome": "passed" - }, - "teardown": { - "duration": 0.0002731531858444214, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_multi_turn_tool_calling[gpt-4o-text_then_weather_tool]", - "lineno": 471, - "outcome": "passed", - "keywords": [ - "test_chat_streaming_multi_turn_tool_calling[gpt-4o-text_then_weather_tool]", - "parametrize", - "pytestmark", - "gpt-4o-text_then_weather_tool", - "test_chat_completion.py", - "openai_api", - "verifications", - "tests", - "llama-stack", - "" - ], - "metadata": { - "model": "gpt-4o", - "case_id": "text_then_weather_tool" - }, - "setup": { - "duration": 0.07118126843124628, - "outcome": "passed" - }, - "call": { - "duration": 2.068133544176817, - "outcome": "passed" - }, - "teardown": { - "duration": 0.0002514524385333061, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_multi_turn_tool_calling[gpt-4o-weather_tool_then_text]", - "lineno": 471, - "outcome": "passed", - "keywords": [ - "test_chat_streaming_multi_turn_tool_calling[gpt-4o-weather_tool_then_text]", - "parametrize", - "pytestmark", - "gpt-4o-weather_tool_then_text", - "test_chat_completion.py", - "openai_api", - "verifications", - "tests", - "llama-stack", - "" - ], - "metadata": { - "model": "gpt-4o", - "case_id": "weather_tool_then_text" - }, - "setup": { - "duration": 0.07241942081600428, - "outcome": "passed" - }, - "call": { - "duration": 1.1098179938271642, - "outcome": "passed" - }, - "teardown": { - "duration": 0.00028003379702568054, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_multi_turn_tool_calling[gpt-4o-add_product_tool]", - "lineno": 471, - "outcome": "passed", - "keywords": [ - "test_chat_streaming_multi_turn_tool_calling[gpt-4o-add_product_tool]", - "parametrize", - "pytestmark", - "gpt-4o-add_product_tool", - "test_chat_completion.py", - "openai_api", - "verifications", - "tests", - "llama-stack", - "" - ], - "metadata": { - "model": "gpt-4o", - "case_id": "add_product_tool" - }, - "setup": { - "duration": 0.07439264003187418, - "outcome": "passed" - }, - "call": { - "duration": 1.0720843756571412, - "outcome": "passed" - }, - "teardown": { - "duration": 0.00026407837867736816, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_multi_turn_tool_calling[gpt-4o-get_then_create_event_tool]", - "lineno": 471, - "outcome": "passed", - "keywords": [ - "test_chat_streaming_multi_turn_tool_calling[gpt-4o-get_then_create_event_tool]", - "parametrize", - "pytestmark", - "gpt-4o-get_then_create_event_tool", - "test_chat_completion.py", - "openai_api", - "verifications", - "tests", - "llama-stack", - "" - ], - "metadata": { - "model": "gpt-4o", - "case_id": "get_then_create_event_tool" - }, - "setup": { - "duration": 0.07028928305953741, - "outcome": "passed" - }, - "call": { - "duration": 5.23135226033628, - "outcome": "passed" - }, - "teardown": { - "duration": 0.0002559954300522804, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_multi_turn_tool_calling[gpt-4o-compare_monthly_expense_tool]", - "lineno": 471, - "outcome": "passed", - "keywords": [ - "test_chat_streaming_multi_turn_tool_calling[gpt-4o-compare_monthly_expense_tool]", - "parametrize", - "pytestmark", - "gpt-4o-compare_monthly_expense_tool", - "test_chat_completion.py", - "openai_api", - "verifications", - "tests", - "llama-stack", - "" - ], - "metadata": { - "model": "gpt-4o", - "case_id": "compare_monthly_expense_tool" - }, - "setup": { - "duration": 0.0733694015070796, - "outcome": "passed" - }, - "call": { - "duration": 2.3011497305706143, - "outcome": "passed" - }, - "teardown": { - "duration": 0.0002724975347518921, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_multi_turn_tool_calling[gpt-4o-mini-text_then_weather_tool]", - "lineno": 471, - "outcome": "passed", - "keywords": [ - "test_chat_streaming_multi_turn_tool_calling[gpt-4o-mini-text_then_weather_tool]", - "parametrize", - "pytestmark", - "gpt-4o-mini-text_then_weather_tool", - "test_chat_completion.py", - "openai_api", - "verifications", - "tests", - "llama-stack", - "" - ], - "metadata": { - "model": "gpt-4o-mini", - "case_id": "text_then_weather_tool" - }, - "setup": { - "duration": 0.07319487817585468, - "outcome": "passed" - }, - "call": { - "duration": 2.060736038722098, - "outcome": "passed" - }, - "teardown": { - "duration": 0.0002620834857225418, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_multi_turn_tool_calling[gpt-4o-mini-weather_tool_then_text]", - "lineno": 471, - "outcome": "passed", - "keywords": [ - "test_chat_streaming_multi_turn_tool_calling[gpt-4o-mini-weather_tool_then_text]", - "parametrize", - "pytestmark", - "gpt-4o-mini-weather_tool_then_text", - "test_chat_completion.py", - "openai_api", - "verifications", - "tests", - "llama-stack", - "" - ], - "metadata": { - "model": "gpt-4o-mini", - "case_id": "weather_tool_then_text" - }, - "setup": { - "duration": 0.07086801622062922, - "outcome": "passed" - }, - "call": { - "duration": 1.1969546489417553, - "outcome": "passed" - }, - "teardown": { - "duration": 0.00023349467664957047, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_multi_turn_tool_calling[gpt-4o-mini-add_product_tool]", - "lineno": 471, - "outcome": "passed", - "keywords": [ - "test_chat_streaming_multi_turn_tool_calling[gpt-4o-mini-add_product_tool]", - "parametrize", - "pytestmark", - "gpt-4o-mini-add_product_tool", - "test_chat_completion.py", - "openai_api", - "verifications", - "tests", - "llama-stack", - "" - ], - "metadata": { - "model": "gpt-4o-mini", - "case_id": "add_product_tool" - }, - "setup": { - "duration": 0.07276885025203228, - "outcome": "passed" - }, - "call": { - "duration": 2.2494191862642765, - "outcome": "passed" - }, - "teardown": { - "duration": 0.0002493094652891159, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_multi_turn_tool_calling[gpt-4o-mini-get_then_create_event_tool]", - "lineno": 471, - "outcome": "passed", - "keywords": [ - "test_chat_streaming_multi_turn_tool_calling[gpt-4o-mini-get_then_create_event_tool]", - "parametrize", - "pytestmark", - "gpt-4o-mini-get_then_create_event_tool", - "test_chat_completion.py", - "openai_api", - "verifications", - "tests", - "llama-stack", - "" - ], - "metadata": { - "model": "gpt-4o-mini", - "case_id": "get_then_create_event_tool" - }, - "setup": { - "duration": 0.07039583195000887, - "outcome": "passed" - }, - "call": { - "duration": 4.528189226053655, - "outcome": "passed" - }, - "teardown": { - "duration": 0.00025649741291999817, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_multi_turn_tool_calling[gpt-4o-mini-compare_monthly_expense_tool]", - "lineno": 471, - "outcome": "passed", - "keywords": [ - "test_chat_streaming_multi_turn_tool_calling[gpt-4o-mini-compare_monthly_expense_tool]", - "parametrize", - "pytestmark", - "gpt-4o-mini-compare_monthly_expense_tool", - "test_chat_completion.py", - "openai_api", - "verifications", - "tests", - "llama-stack", - "" - ], - "metadata": { - "model": "gpt-4o-mini", - "case_id": "compare_monthly_expense_tool" - }, - "setup": { - "duration": 0.07187813706696033, - "outcome": "passed" - }, - "call": { - "duration": 2.446169280447066, - "outcome": "passed" - }, - "teardown": { - "duration": 0.00024812109768390656, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_multi_turn_multiple_images[gpt-4o-stream=False]", - "lineno": 554, - "outcome": "passed", - "keywords": [ - "test_chat_multi_turn_multiple_images[gpt-4o-stream=False]", - "parametrize", - "pytestmark", - "gpt-4o-stream=False", - "test_chat_completion.py", - "openai_api", - "verifications", - "tests", - "llama-stack", - "" - ], - "metadata": { - "model": "gpt-4o", - "case_id": "stream=False" - }, - "setup": { - "duration": 0.07299137767404318, - "outcome": "passed" - }, - "call": { - "duration": 8.35237762145698, - "outcome": "passed" - }, - "teardown": { - "duration": 0.00026817526668310165, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_multi_turn_multiple_images[gpt-4o-stream=True]", - "lineno": 554, - "outcome": "passed", - "keywords": [ - "test_chat_multi_turn_multiple_images[gpt-4o-stream=True]", - "parametrize", - "pytestmark", - "gpt-4o-stream=True", - "test_chat_completion.py", - "openai_api", - "verifications", - "tests", - "llama-stack", - "" - ], - "metadata": { - "model": "gpt-4o", - "case_id": "stream=True" - }, - "setup": { - "duration": 0.07363969460129738, - "outcome": "passed" - }, - "call": { - "duration": 4.653971025720239, - "outcome": "passed" - }, - "teardown": { - "duration": 0.00026602670550346375, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_multi_turn_multiple_images[gpt-4o-mini-stream=False]", - "lineno": 554, - "outcome": "passed", - "keywords": [ - "test_chat_multi_turn_multiple_images[gpt-4o-mini-stream=False]", - "parametrize", - "pytestmark", - "gpt-4o-mini-stream=False", - "test_chat_completion.py", - "openai_api", - "verifications", - "tests", - "llama-stack", - "" - ], - "metadata": { - "model": "gpt-4o-mini", - "case_id": "stream=False" - }, - "setup": { - "duration": 0.07377734407782555, - "outcome": "passed" - }, - "call": { - "duration": 9.776036521419883, - "outcome": "passed" - }, - "teardown": { - "duration": 0.000254971906542778, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_multi_turn_multiple_images[gpt-4o-mini-stream=True]", - "lineno": 554, - "outcome": "passed", - "keywords": [ - "test_chat_multi_turn_multiple_images[gpt-4o-mini-stream=True]", - "parametrize", - "pytestmark", - "gpt-4o-mini-stream=True", - "test_chat_completion.py", - "openai_api", - "verifications", - "tests", - "llama-stack", - "" - ], - "metadata": { - "model": "gpt-4o-mini", - "case_id": "stream=True" - }, - "setup": { - "duration": 0.07054048776626587, - "outcome": "passed" - }, - "call": { - "duration": 12.58133109845221, - "outcome": "passed" - }, - "teardown": { - "duration": 0.0013354746624827385, - "outcome": "passed" - } - } - ], - "run_timestamp": 1744918448 -} diff --git a/tests/verifications/test_results/together.json b/tests/verifications/test_results/together.json deleted file mode 100644 index 2d74b8cca..000000000 --- a/tests/verifications/test_results/together.json +++ /dev/null @@ -1,3821 +0,0 @@ -{ - "created": 1744918192.9299376, - "duration": 126.91354608535767, - "exitcode": 1, - "root": "/home/erichuang/llama-stack", - "environment": {}, - "summary": { - "passed": 40, - "failed": 40, - "skipped": 4, - "total": 84, - "collected": 84 - }, - "collectors": [ - { - "nodeid": "", - "outcome": "passed", - "result": [ - { - "nodeid": "tests/verifications/openai_api/test_chat_completion.py", - "type": "Module" - } - ] - }, - { - "nodeid": "tests/verifications/openai_api/test_chat_completion.py", - "outcome": "passed", - "result": [ - { - "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_basic[meta-llama/Llama-3.3-70B-Instruct-Turbo-earth]", - "type": "Function", - "lineno": 95 - }, - { - "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_basic[meta-llama/Llama-3.3-70B-Instruct-Turbo-saturn]", - "type": "Function", - "lineno": 95 - }, - { - "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_basic[meta-llama/Llama-4-Scout-17B-16E-Instruct-earth]", - "type": "Function", - "lineno": 95 - }, - { - "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_basic[meta-llama/Llama-4-Scout-17B-16E-Instruct-saturn]", - "type": "Function", - "lineno": 95 - }, - { - "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_basic[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-earth]", - "type": "Function", - "lineno": 95 - }, - { - "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_basic[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-saturn]", - "type": "Function", - "lineno": 95 - }, - { - "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_basic[meta-llama/Llama-3.3-70B-Instruct-Turbo-earth]", - "type": "Function", - "lineno": 114 - }, - { - "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_basic[meta-llama/Llama-3.3-70B-Instruct-Turbo-saturn]", - "type": "Function", - "lineno": 114 - }, - { - "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_basic[meta-llama/Llama-4-Scout-17B-16E-Instruct-earth]", - "type": "Function", - "lineno": 114 - }, - { - "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_basic[meta-llama/Llama-4-Scout-17B-16E-Instruct-saturn]", - "type": "Function", - "lineno": 114 - }, - { - "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_basic[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-earth]", - "type": "Function", - "lineno": 114 - }, - { - "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_basic[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-saturn]", - "type": "Function", - "lineno": 114 - }, - { - "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_image[meta-llama/Llama-3.3-70B-Instruct-Turbo-case0]", - "type": "Function", - "lineno": 138 - }, - { - "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_image[meta-llama/Llama-4-Scout-17B-16E-Instruct-case0]", - "type": "Function", - "lineno": 138 - }, - { - "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_image[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-case0]", - "type": "Function", - "lineno": 138 - }, - { - "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_image[meta-llama/Llama-3.3-70B-Instruct-Turbo-case0]", - "type": "Function", - "lineno": 157 - }, - { - "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_image[meta-llama/Llama-4-Scout-17B-16E-Instruct-case0]", - "type": "Function", - "lineno": 157 - }, - { - "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_image[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-case0]", - "type": "Function", - "lineno": 157 - }, - { - "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_structured_output[meta-llama/Llama-3.3-70B-Instruct-Turbo-calendar]", - "type": "Function", - "lineno": 181 - }, - { - "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_structured_output[meta-llama/Llama-3.3-70B-Instruct-Turbo-math]", - "type": "Function", - "lineno": 181 - }, - { - "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_structured_output[meta-llama/Llama-4-Scout-17B-16E-Instruct-calendar]", - "type": "Function", - "lineno": 181 - }, - { - "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_structured_output[meta-llama/Llama-4-Scout-17B-16E-Instruct-math]", - "type": "Function", - "lineno": 181 - }, - { - "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_structured_output[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-calendar]", - "type": "Function", - "lineno": 181 - }, - { - "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_structured_output[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-math]", - "type": "Function", - "lineno": 181 - }, - { - "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_structured_output[meta-llama/Llama-3.3-70B-Instruct-Turbo-calendar]", - "type": "Function", - "lineno": 204 - }, - { - "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_structured_output[meta-llama/Llama-3.3-70B-Instruct-Turbo-math]", - "type": "Function", - "lineno": 204 - }, - { - "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_structured_output[meta-llama/Llama-4-Scout-17B-16E-Instruct-calendar]", - "type": "Function", - "lineno": 204 - }, - { - "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_structured_output[meta-llama/Llama-4-Scout-17B-16E-Instruct-math]", - "type": "Function", - "lineno": 204 - }, - { - "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_structured_output[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-calendar]", - "type": "Function", - "lineno": 204 - }, - { - "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_structured_output[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-math]", - "type": "Function", - "lineno": 204 - }, - { - "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_tool_calling[meta-llama/Llama-3.3-70B-Instruct-Turbo-case0]", - "type": "Function", - "lineno": 226 - }, - { - "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_tool_calling[meta-llama/Llama-4-Scout-17B-16E-Instruct-case0]", - "type": "Function", - "lineno": 226 - }, - { - "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_tool_calling[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-case0]", - "type": "Function", - "lineno": 226 - }, - { - "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_tool_calling[meta-llama/Llama-3.3-70B-Instruct-Turbo-case0]", - "type": "Function", - "lineno": 250 - }, - { - "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_tool_calling[meta-llama/Llama-4-Scout-17B-16E-Instruct-case0]", - "type": "Function", - "lineno": 250 - }, - { - "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_tool_calling[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-case0]", - "type": "Function", - "lineno": 250 - }, - { - "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_tool_choice_required[meta-llama/Llama-3.3-70B-Instruct-Turbo-case0]", - "type": "Function", - "lineno": 278 - }, - { - "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_tool_choice_required[meta-llama/Llama-4-Scout-17B-16E-Instruct-case0]", - "type": "Function", - "lineno": 278 - }, - { - "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_tool_choice_required[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-case0]", - "type": "Function", - "lineno": 278 - }, - { - "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_tool_choice_required[meta-llama/Llama-3.3-70B-Instruct-Turbo-case0]", - "type": "Function", - "lineno": 302 - }, - { - "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_tool_choice_required[meta-llama/Llama-4-Scout-17B-16E-Instruct-case0]", - "type": "Function", - "lineno": 302 - }, - { - "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_tool_choice_required[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-case0]", - "type": "Function", - "lineno": 302 - }, - { - "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_tool_choice_none[meta-llama/Llama-3.3-70B-Instruct-Turbo-case0]", - "type": "Function", - "lineno": 329 - }, - { - "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_tool_choice_none[meta-llama/Llama-4-Scout-17B-16E-Instruct-case0]", - "type": "Function", - "lineno": 329 - }, - { - "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_tool_choice_none[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-case0]", - "type": "Function", - "lineno": 329 - }, - { - "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_tool_choice_none[meta-llama/Llama-3.3-70B-Instruct-Turbo-case0]", - "type": "Function", - "lineno": 352 - }, - { - "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_tool_choice_none[meta-llama/Llama-4-Scout-17B-16E-Instruct-case0]", - "type": "Function", - "lineno": 352 - }, - { - "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_tool_choice_none[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-case0]", - "type": "Function", - "lineno": 352 - }, - { - "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_multi_turn_tool_calling[meta-llama/Llama-3.3-70B-Instruct-Turbo-text_then_weather_tool]", - "type": "Function", - "lineno": 380 - }, - { - "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_multi_turn_tool_calling[meta-llama/Llama-3.3-70B-Instruct-Turbo-weather_tool_then_text]", - "type": "Function", - "lineno": 380 - }, - { - "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_multi_turn_tool_calling[meta-llama/Llama-3.3-70B-Instruct-Turbo-add_product_tool]", - "type": "Function", - "lineno": 380 - }, - { - "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_multi_turn_tool_calling[meta-llama/Llama-3.3-70B-Instruct-Turbo-get_then_create_event_tool]", - "type": "Function", - "lineno": 380 - }, - { - "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_multi_turn_tool_calling[meta-llama/Llama-3.3-70B-Instruct-Turbo-compare_monthly_expense_tool]", - "type": "Function", - "lineno": 380 - }, - { - "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_multi_turn_tool_calling[meta-llama/Llama-4-Scout-17B-16E-Instruct-text_then_weather_tool]", - "type": "Function", - "lineno": 380 - }, - { - "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_multi_turn_tool_calling[meta-llama/Llama-4-Scout-17B-16E-Instruct-weather_tool_then_text]", - "type": "Function", - "lineno": 380 - }, - { - "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_multi_turn_tool_calling[meta-llama/Llama-4-Scout-17B-16E-Instruct-add_product_tool]", - "type": "Function", - "lineno": 380 - }, - { - "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_multi_turn_tool_calling[meta-llama/Llama-4-Scout-17B-16E-Instruct-get_then_create_event_tool]", - "type": "Function", - "lineno": 380 - }, - { - "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_multi_turn_tool_calling[meta-llama/Llama-4-Scout-17B-16E-Instruct-compare_monthly_expense_tool]", - "type": "Function", - "lineno": 380 - }, - { - "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_multi_turn_tool_calling[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-text_then_weather_tool]", - "type": "Function", - "lineno": 380 - }, - { - "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_multi_turn_tool_calling[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-weather_tool_then_text]", - "type": "Function", - "lineno": 380 - }, - { - "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_multi_turn_tool_calling[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-add_product_tool]", - "type": "Function", - "lineno": 380 - }, - { - "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_multi_turn_tool_calling[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-get_then_create_event_tool]", - "type": "Function", - "lineno": 380 - }, - { - "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_multi_turn_tool_calling[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-compare_monthly_expense_tool]", - "type": "Function", - "lineno": 380 - }, - { - "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_multi_turn_tool_calling[meta-llama/Llama-3.3-70B-Instruct-Turbo-text_then_weather_tool]", - "type": "Function", - "lineno": 471 - }, - { - "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_multi_turn_tool_calling[meta-llama/Llama-3.3-70B-Instruct-Turbo-weather_tool_then_text]", - "type": "Function", - "lineno": 471 - }, - { - "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_multi_turn_tool_calling[meta-llama/Llama-3.3-70B-Instruct-Turbo-add_product_tool]", - "type": "Function", - "lineno": 471 - }, - { - "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_multi_turn_tool_calling[meta-llama/Llama-3.3-70B-Instruct-Turbo-get_then_create_event_tool]", - "type": "Function", - "lineno": 471 - }, - { - "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_multi_turn_tool_calling[meta-llama/Llama-3.3-70B-Instruct-Turbo-compare_monthly_expense_tool]", - "type": "Function", - "lineno": 471 - }, - { - "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_multi_turn_tool_calling[meta-llama/Llama-4-Scout-17B-16E-Instruct-text_then_weather_tool]", - "type": "Function", - "lineno": 471 - }, - { - "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_multi_turn_tool_calling[meta-llama/Llama-4-Scout-17B-16E-Instruct-weather_tool_then_text]", - "type": "Function", - "lineno": 471 - }, - { - "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_multi_turn_tool_calling[meta-llama/Llama-4-Scout-17B-16E-Instruct-add_product_tool]", - "type": "Function", - "lineno": 471 - }, - { - "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_multi_turn_tool_calling[meta-llama/Llama-4-Scout-17B-16E-Instruct-get_then_create_event_tool]", - "type": "Function", - "lineno": 471 - }, - { - "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_multi_turn_tool_calling[meta-llama/Llama-4-Scout-17B-16E-Instruct-compare_monthly_expense_tool]", - "type": "Function", - "lineno": 471 - }, - { - "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_multi_turn_tool_calling[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-text_then_weather_tool]", - "type": "Function", - "lineno": 471 - }, - { - "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_multi_turn_tool_calling[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-weather_tool_then_text]", - "type": "Function", - "lineno": 471 - }, - { - "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_multi_turn_tool_calling[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-add_product_tool]", - "type": "Function", - "lineno": 471 - }, - { - "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_multi_turn_tool_calling[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-get_then_create_event_tool]", - "type": "Function", - "lineno": 471 - }, - { - "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_multi_turn_tool_calling[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-compare_monthly_expense_tool]", - "type": "Function", - "lineno": 471 - }, - { - "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_multi_turn_multiple_images[meta-llama/Llama-3.3-70B-Instruct-Turbo-stream=False]", - "type": "Function", - "lineno": 554 - }, - { - "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_multi_turn_multiple_images[meta-llama/Llama-3.3-70B-Instruct-Turbo-stream=True]", - "type": "Function", - "lineno": 554 - }, - { - "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_multi_turn_multiple_images[meta-llama/Llama-4-Scout-17B-16E-Instruct-stream=False]", - "type": "Function", - "lineno": 554 - }, - { - "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_multi_turn_multiple_images[meta-llama/Llama-4-Scout-17B-16E-Instruct-stream=True]", - "type": "Function", - "lineno": 554 - }, - { - "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_multi_turn_multiple_images[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-stream=False]", - "type": "Function", - "lineno": 554 - }, - { - "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_multi_turn_multiple_images[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-stream=True]", - "type": "Function", - "lineno": 554 - } - ] - } - ], - "tests": [ - { - "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_basic[meta-llama/Llama-3.3-70B-Instruct-Turbo-earth]", - "lineno": 95, - "outcome": "passed", - "keywords": [ - "test_chat_non_streaming_basic[meta-llama/Llama-3.3-70B-Instruct-Turbo-earth]", - "parametrize", - "pytestmark", - "meta-llama/Llama-3.3-70B-Instruct-Turbo-earth", - "test_chat_completion.py", - "openai_api", - "verifications", - "tests", - "llama-stack", - "" - ], - "metadata": { - "model": "meta-llama/Llama-3.3-70B-Instruct-Turbo", - "case_id": "earth" - }, - "setup": { - "duration": 0.11939296405762434, - "outcome": "passed" - }, - "call": { - "duration": 0.6422080835327506, - "outcome": "passed" - }, - "teardown": { - "duration": 0.0002934802323579788, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_basic[meta-llama/Llama-3.3-70B-Instruct-Turbo-saturn]", - "lineno": 95, - "outcome": "passed", - "keywords": [ - "test_chat_non_streaming_basic[meta-llama/Llama-3.3-70B-Instruct-Turbo-saturn]", - "parametrize", - "pytestmark", - "meta-llama/Llama-3.3-70B-Instruct-Turbo-saturn", - "test_chat_completion.py", - "openai_api", - "verifications", - "tests", - "llama-stack", - "" - ], - "metadata": { - "model": "meta-llama/Llama-3.3-70B-Instruct-Turbo", - "case_id": "saturn" - }, - "setup": { - "duration": 0.07340026367455721, - "outcome": "passed" - }, - "call": { - "duration": 0.6134521719068289, - "outcome": "passed" - }, - "teardown": { - "duration": 0.00031049735844135284, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_basic[meta-llama/Llama-4-Scout-17B-16E-Instruct-earth]", - "lineno": 95, - "outcome": "passed", - "keywords": [ - "test_chat_non_streaming_basic[meta-llama/Llama-4-Scout-17B-16E-Instruct-earth]", - "parametrize", - "pytestmark", - "meta-llama/Llama-4-Scout-17B-16E-Instruct-earth", - "test_chat_completion.py", - "openai_api", - "verifications", - "tests", - "llama-stack", - "" - ], - "metadata": { - "model": "meta-llama/Llama-4-Scout-17B-16E-Instruct", - "case_id": "earth" - }, - "setup": { - "duration": 0.07351398840546608, - "outcome": "passed" - }, - "call": { - "duration": 0.898847377859056, - "outcome": "passed" - }, - "teardown": { - "duration": 0.0002735760062932968, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_basic[meta-llama/Llama-4-Scout-17B-16E-Instruct-saturn]", - "lineno": 95, - "outcome": "passed", - "keywords": [ - "test_chat_non_streaming_basic[meta-llama/Llama-4-Scout-17B-16E-Instruct-saturn]", - "parametrize", - "pytestmark", - "meta-llama/Llama-4-Scout-17B-16E-Instruct-saturn", - "test_chat_completion.py", - "openai_api", - "verifications", - "tests", - "llama-stack", - "" - ], - "metadata": { - "model": "meta-llama/Llama-4-Scout-17B-16E-Instruct", - "case_id": "saturn" - }, - "setup": { - "duration": 0.08612977154552937, - "outcome": "passed" - }, - "call": { - "duration": 0.6511319326236844, - "outcome": "passed" - }, - "teardown": { - "duration": 0.0003559151664376259, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_basic[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-earth]", - "lineno": 95, - "outcome": "passed", - "keywords": [ - "test_chat_non_streaming_basic[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-earth]", - "parametrize", - "pytestmark", - "meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-earth", - "test_chat_completion.py", - "openai_api", - "verifications", - "tests", - "llama-stack", - "" - ], - "metadata": { - "model": "meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8", - "case_id": "earth" - }, - "setup": { - "duration": 0.08106738794595003, - "outcome": "passed" - }, - "call": { - "duration": 1.206272155046463, - "outcome": "passed" - }, - "teardown": { - "duration": 0.0003584325313568115, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_basic[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-saturn]", - "lineno": 95, - "outcome": "passed", - "keywords": [ - "test_chat_non_streaming_basic[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-saturn]", - "parametrize", - "pytestmark", - "meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-saturn", - "test_chat_completion.py", - "openai_api", - "verifications", - "tests", - "llama-stack", - "" - ], - "metadata": { - "model": "meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8", - "case_id": "saturn" - }, - "setup": { - "duration": 0.0796442786231637, - "outcome": "passed" - }, - "call": { - "duration": 0.4815350500866771, - "outcome": "passed" - }, - "teardown": { - "duration": 0.00025806669145822525, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_basic[meta-llama/Llama-3.3-70B-Instruct-Turbo-earth]", - "lineno": 114, - "outcome": "passed", - "keywords": [ - "test_chat_streaming_basic[meta-llama/Llama-3.3-70B-Instruct-Turbo-earth]", - "parametrize", - "pytestmark", - "meta-llama/Llama-3.3-70B-Instruct-Turbo-earth", - "test_chat_completion.py", - "openai_api", - "verifications", - "tests", - "llama-stack", - "" - ], - "metadata": { - "model": "meta-llama/Llama-3.3-70B-Instruct-Turbo", - "case_id": "earth" - }, - "setup": { - "duration": 0.07231954019516706, - "outcome": "passed" - }, - "call": { - "duration": 1.1521263290196657, - "outcome": "passed" - }, - "teardown": { - "duration": 0.00032721273601055145, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_basic[meta-llama/Llama-3.3-70B-Instruct-Turbo-saturn]", - "lineno": 114, - "outcome": "passed", - "keywords": [ - "test_chat_streaming_basic[meta-llama/Llama-3.3-70B-Instruct-Turbo-saturn]", - "parametrize", - "pytestmark", - "meta-llama/Llama-3.3-70B-Instruct-Turbo-saturn", - "test_chat_completion.py", - "openai_api", - "verifications", - "tests", - "llama-stack", - "" - ], - "metadata": { - "model": "meta-llama/Llama-3.3-70B-Instruct-Turbo", - "case_id": "saturn" - }, - "setup": { - "duration": 0.07364387530833483, - "outcome": "passed" - }, - "call": { - "duration": 1.0600289879366755, - "outcome": "passed" - }, - "teardown": { - "duration": 0.00028987880796194077, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_basic[meta-llama/Llama-4-Scout-17B-16E-Instruct-earth]", - "lineno": 114, - "outcome": "failed", - "keywords": [ - "test_chat_streaming_basic[meta-llama/Llama-4-Scout-17B-16E-Instruct-earth]", - "parametrize", - "pytestmark", - "meta-llama/Llama-4-Scout-17B-16E-Instruct-earth", - "test_chat_completion.py", - "openai_api", - "verifications", - "tests", - "llama-stack", - "" - ], - "metadata": { - "model": "meta-llama/Llama-4-Scout-17B-16E-Instruct", - "case_id": "earth" - }, - "setup": { - "duration": 0.07162868417799473, - "outcome": "passed" - }, - "call": { - "duration": 0.2930005770176649, - "outcome": "failed", - "crash": { - "path": "/home/erichuang/llama-stack/tests/verifications/openai_api/test_chat_completion.py", - "lineno": 132, - "message": "IndexError: list index out of range" - }, - "traceback": [ - { - "path": "tests/verifications/openai_api/test_chat_completion.py", - "lineno": 132, - "message": "IndexError" - } - ], - "longrepr": "request = >\nopenai_client = \nmodel = 'meta-llama/Llama-4-Scout-17B-16E-Instruct', provider = 'together'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...-versatile', 'meta-llama/llama-4-scout-17b-16e-instruct', 'meta-llama/llama-4-maverick-17b-128e-instruct'], ...}, ...}}\ncase = {'case_id': 'earth', 'input': {'messages': [{'content': 'Which planet do humans live on?', 'role': 'user'}]}, 'output': 'Earth'}\n\n @pytest.mark.parametrize(\n \"case\",\n chat_completion_test_cases[\"test_chat_basic\"][\"test_params\"][\"case\"],\n ids=case_id_generator,\n )\n def test_chat_streaming_basic(request, openai_client, model, provider, verification_config, case):\n test_name_base = get_base_test_name(request)\n if should_skip_test(verification_config, provider, model, test_name_base):\n pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n \n response = openai_client.chat.completions.create(\n model=model,\n messages=case[\"input\"][\"messages\"],\n stream=True,\n )\n content = \"\"\n for chunk in response:\n> content += chunk.choices[0].delta.content or \"\"\nE IndexError: list index out of range\n\ntests/verifications/openai_api/test_chat_completion.py:132: IndexError" - }, - "teardown": { - "duration": 0.0004123607650399208, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_basic[meta-llama/Llama-4-Scout-17B-16E-Instruct-saturn]", - "lineno": 114, - "outcome": "failed", - "keywords": [ - "test_chat_streaming_basic[meta-llama/Llama-4-Scout-17B-16E-Instruct-saturn]", - "parametrize", - "pytestmark", - "meta-llama/Llama-4-Scout-17B-16E-Instruct-saturn", - "test_chat_completion.py", - "openai_api", - "verifications", - "tests", - "llama-stack", - "" - ], - "metadata": { - "model": "meta-llama/Llama-4-Scout-17B-16E-Instruct", - "case_id": "saturn" - }, - "setup": { - "duration": 0.07553945016115904, - "outcome": "passed" - }, - "call": { - "duration": 0.4265708066523075, - "outcome": "failed", - "crash": { - "path": "/home/erichuang/llama-stack/tests/verifications/openai_api/test_chat_completion.py", - "lineno": 132, - "message": "IndexError: list index out of range" - }, - "traceback": [ - { - "path": "tests/verifications/openai_api/test_chat_completion.py", - "lineno": 132, - "message": "IndexError" - } - ], - "longrepr": "request = >\nopenai_client = \nmodel = 'meta-llama/Llama-4-Scout-17B-16E-Instruct', provider = 'together'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...-versatile', 'meta-llama/llama-4-scout-17b-16e-instruct', 'meta-llama/llama-4-maverick-17b-128e-instruct'], ...}, ...}}\ncase = {'case_id': 'saturn', 'input': {'messages': [{'content': 'Which planet has rings around it with a name starting with letter S?', 'role': 'user'}]}, 'output': 'Saturn'}\n\n @pytest.mark.parametrize(\n \"case\",\n chat_completion_test_cases[\"test_chat_basic\"][\"test_params\"][\"case\"],\n ids=case_id_generator,\n )\n def test_chat_streaming_basic(request, openai_client, model, provider, verification_config, case):\n test_name_base = get_base_test_name(request)\n if should_skip_test(verification_config, provider, model, test_name_base):\n pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n \n response = openai_client.chat.completions.create(\n model=model,\n messages=case[\"input\"][\"messages\"],\n stream=True,\n )\n content = \"\"\n for chunk in response:\n> content += chunk.choices[0].delta.content or \"\"\nE IndexError: list index out of range\n\ntests/verifications/openai_api/test_chat_completion.py:132: IndexError" - }, - "teardown": { - "duration": 0.0003767991438508034, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_basic[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-earth]", - "lineno": 114, - "outcome": "failed", - "keywords": [ - "test_chat_streaming_basic[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-earth]", - "parametrize", - "pytestmark", - "meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-earth", - "test_chat_completion.py", - "openai_api", - "verifications", - "tests", - "llama-stack", - "" - ], - "metadata": { - "model": "meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8", - "case_id": "earth" - }, - "setup": { - "duration": 0.07143466174602509, - "outcome": "passed" - }, - "call": { - "duration": 1.0281891459599137, - "outcome": "failed", - "crash": { - "path": "/home/erichuang/llama-stack/tests/verifications/openai_api/test_chat_completion.py", - "lineno": 132, - "message": "IndexError: list index out of range" - }, - "traceback": [ - { - "path": "tests/verifications/openai_api/test_chat_completion.py", - "lineno": 132, - "message": "IndexError" - } - ], - "longrepr": "request = >\nopenai_client = \nmodel = 'meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8'\nprovider = 'together'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...-versatile', 'meta-llama/llama-4-scout-17b-16e-instruct', 'meta-llama/llama-4-maverick-17b-128e-instruct'], ...}, ...}}\ncase = {'case_id': 'earth', 'input': {'messages': [{'content': 'Which planet do humans live on?', 'role': 'user'}]}, 'output': 'Earth'}\n\n @pytest.mark.parametrize(\n \"case\",\n chat_completion_test_cases[\"test_chat_basic\"][\"test_params\"][\"case\"],\n ids=case_id_generator,\n )\n def test_chat_streaming_basic(request, openai_client, model, provider, verification_config, case):\n test_name_base = get_base_test_name(request)\n if should_skip_test(verification_config, provider, model, test_name_base):\n pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n \n response = openai_client.chat.completions.create(\n model=model,\n messages=case[\"input\"][\"messages\"],\n stream=True,\n )\n content = \"\"\n for chunk in response:\n> content += chunk.choices[0].delta.content or \"\"\nE IndexError: list index out of range\n\ntests/verifications/openai_api/test_chat_completion.py:132: IndexError" - }, - "teardown": { - "duration": 0.0003773234784603119, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_basic[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-saturn]", - "lineno": 114, - "outcome": "failed", - "keywords": [ - "test_chat_streaming_basic[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-saturn]", - "parametrize", - "pytestmark", - "meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-saturn", - "test_chat_completion.py", - "openai_api", - "verifications", - "tests", - "llama-stack", - "" - ], - "metadata": { - "model": "meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8", - "case_id": "saturn" - }, - "setup": { - "duration": 0.07092289440333843, - "outcome": "passed" - }, - "call": { - "duration": 0.4124102909117937, - "outcome": "failed", - "crash": { - "path": "/home/erichuang/llama-stack/tests/verifications/openai_api/test_chat_completion.py", - "lineno": 132, - "message": "IndexError: list index out of range" - }, - "traceback": [ - { - "path": "tests/verifications/openai_api/test_chat_completion.py", - "lineno": 132, - "message": "IndexError" - } - ], - "longrepr": "request = >\nopenai_client = \nmodel = 'meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8'\nprovider = 'together'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...-versatile', 'meta-llama/llama-4-scout-17b-16e-instruct', 'meta-llama/llama-4-maverick-17b-128e-instruct'], ...}, ...}}\ncase = {'case_id': 'saturn', 'input': {'messages': [{'content': 'Which planet has rings around it with a name starting with letter S?', 'role': 'user'}]}, 'output': 'Saturn'}\n\n @pytest.mark.parametrize(\n \"case\",\n chat_completion_test_cases[\"test_chat_basic\"][\"test_params\"][\"case\"],\n ids=case_id_generator,\n )\n def test_chat_streaming_basic(request, openai_client, model, provider, verification_config, case):\n test_name_base = get_base_test_name(request)\n if should_skip_test(verification_config, provider, model, test_name_base):\n pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n \n response = openai_client.chat.completions.create(\n model=model,\n messages=case[\"input\"][\"messages\"],\n stream=True,\n )\n content = \"\"\n for chunk in response:\n> content += chunk.choices[0].delta.content or \"\"\nE IndexError: list index out of range\n\ntests/verifications/openai_api/test_chat_completion.py:132: IndexError" - }, - "teardown": { - "duration": 0.0003204820677638054, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_image[meta-llama/Llama-3.3-70B-Instruct-Turbo-case0]", - "lineno": 138, - "outcome": "skipped", - "keywords": [ - "test_chat_non_streaming_image[meta-llama/Llama-3.3-70B-Instruct-Turbo-case0]", - "parametrize", - "pytestmark", - "meta-llama/Llama-3.3-70B-Instruct-Turbo-case0", - "test_chat_completion.py", - "openai_api", - "verifications", - "tests", - "llama-stack", - "" - ], - "metadata": { - "model": "meta-llama/Llama-3.3-70B-Instruct-Turbo", - "case_id": "case0" - }, - "setup": { - "duration": 0.07159135863184929, - "outcome": "passed" - }, - "call": { - "duration": 0.0002104705199599266, - "outcome": "skipped", - "longrepr": "('/home/erichuang/llama-stack/tests/verifications/openai_api/test_chat_completion.py', 147, 'Skipped: Skipping test_chat_non_streaming_image for model meta-llama/Llama-3.3-70B-Instruct-Turbo on provider together based on config.')" - }, - "teardown": { - "duration": 0.0003354400396347046, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_image[meta-llama/Llama-4-Scout-17B-16E-Instruct-case0]", - "lineno": 138, - "outcome": "passed", - "keywords": [ - "test_chat_non_streaming_image[meta-llama/Llama-4-Scout-17B-16E-Instruct-case0]", - "parametrize", - "pytestmark", - "meta-llama/Llama-4-Scout-17B-16E-Instruct-case0", - "test_chat_completion.py", - "openai_api", - "verifications", - "tests", - "llama-stack", - "" - ], - "metadata": { - "model": "meta-llama/Llama-4-Scout-17B-16E-Instruct", - "case_id": "case0" - }, - "setup": { - "duration": 0.0744061404839158, - "outcome": "passed" - }, - "call": { - "duration": 2.2864254424348474, - "outcome": "passed" - }, - "teardown": { - "duration": 0.000246487557888031, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_image[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-case0]", - "lineno": 138, - "outcome": "passed", - "keywords": [ - "test_chat_non_streaming_image[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-case0]", - "parametrize", - "pytestmark", - "meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-case0", - "test_chat_completion.py", - "openai_api", - "verifications", - "tests", - "llama-stack", - "" - ], - "metadata": { - "model": "meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8", - "case_id": "case0" - }, - "setup": { - "duration": 0.07066962588578463, - "outcome": "passed" - }, - "call": { - "duration": 4.47614302393049, - "outcome": "passed" - }, - "teardown": { - "duration": 0.00034836214035749435, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_image[meta-llama/Llama-3.3-70B-Instruct-Turbo-case0]", - "lineno": 157, - "outcome": "skipped", - "keywords": [ - "test_chat_streaming_image[meta-llama/Llama-3.3-70B-Instruct-Turbo-case0]", - "parametrize", - "pytestmark", - "meta-llama/Llama-3.3-70B-Instruct-Turbo-case0", - "test_chat_completion.py", - "openai_api", - "verifications", - "tests", - "llama-stack", - "" - ], - "metadata": { - "model": "meta-llama/Llama-3.3-70B-Instruct-Turbo", - "case_id": "case0" - }, - "setup": { - "duration": 0.09739464800804853, - "outcome": "passed" - }, - "call": { - "duration": 0.0003191335126757622, - "outcome": "skipped", - "longrepr": "('/home/erichuang/llama-stack/tests/verifications/openai_api/test_chat_completion.py', 166, 'Skipped: Skipping test_chat_streaming_image for model meta-llama/Llama-3.3-70B-Instruct-Turbo on provider together based on config.')" - }, - "teardown": { - "duration": 0.00026350561529397964, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_image[meta-llama/Llama-4-Scout-17B-16E-Instruct-case0]", - "lineno": 157, - "outcome": "failed", - "keywords": [ - "test_chat_streaming_image[meta-llama/Llama-4-Scout-17B-16E-Instruct-case0]", - "parametrize", - "pytestmark", - "meta-llama/Llama-4-Scout-17B-16E-Instruct-case0", - "test_chat_completion.py", - "openai_api", - "verifications", - "tests", - "llama-stack", - "" - ], - "metadata": { - "model": "meta-llama/Llama-4-Scout-17B-16E-Instruct", - "case_id": "case0" - }, - "setup": { - "duration": 0.10561292432248592, - "outcome": "passed" - }, - "call": { - "duration": 2.6175378002226353, - "outcome": "failed", - "crash": { - "path": "/home/erichuang/llama-stack/tests/verifications/openai_api/test_chat_completion.py", - "lineno": 175, - "message": "IndexError: list index out of range" - }, - "traceback": [ - { - "path": "tests/verifications/openai_api/test_chat_completion.py", - "lineno": 175, - "message": "IndexError" - } - ], - "longrepr": "request = >\nopenai_client = \nmodel = 'meta-llama/Llama-4-Scout-17B-16E-Instruct', provider = 'together'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...-versatile', 'meta-llama/llama-4-scout-17b-16e-instruct', 'meta-llama/llama-4-maverick-17b-128e-instruct'], ...}, ...}}\ncase = {'input': {'messages': [{'content': [{'text': 'What is in this image?', 'type': 'text'}, {'image_url': {...}, 'type': 'image_url'}], 'role': 'user'}]}, 'output': 'llama'}\n\n @pytest.mark.parametrize(\n \"case\",\n chat_completion_test_cases[\"test_chat_image\"][\"test_params\"][\"case\"],\n ids=case_id_generator,\n )\n def test_chat_streaming_image(request, openai_client, model, provider, verification_config, case):\n test_name_base = get_base_test_name(request)\n if should_skip_test(verification_config, provider, model, test_name_base):\n pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n \n response = openai_client.chat.completions.create(\n model=model,\n messages=case[\"input\"][\"messages\"],\n stream=True,\n )\n content = \"\"\n for chunk in response:\n> content += chunk.choices[0].delta.content or \"\"\nE IndexError: list index out of range\n\ntests/verifications/openai_api/test_chat_completion.py:175: IndexError" - }, - "teardown": { - "duration": 0.0003682933747768402, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_image[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-case0]", - "lineno": 157, - "outcome": "failed", - "keywords": [ - "test_chat_streaming_image[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-case0]", - "parametrize", - "pytestmark", - "meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-case0", - "test_chat_completion.py", - "openai_api", - "verifications", - "tests", - "llama-stack", - "" - ], - "metadata": { - "model": "meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8", - "case_id": "case0" - }, - "setup": { - "duration": 0.07195662055164576, - "outcome": "passed" - }, - "call": { - "duration": 3.2985631534829736, - "outcome": "failed", - "crash": { - "path": "/home/erichuang/llama-stack/tests/verifications/openai_api/test_chat_completion.py", - "lineno": 175, - "message": "IndexError: list index out of range" - }, - "traceback": [ - { - "path": "tests/verifications/openai_api/test_chat_completion.py", - "lineno": 175, - "message": "IndexError" - } - ], - "longrepr": "request = >\nopenai_client = \nmodel = 'meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8'\nprovider = 'together'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...-versatile', 'meta-llama/llama-4-scout-17b-16e-instruct', 'meta-llama/llama-4-maverick-17b-128e-instruct'], ...}, ...}}\ncase = {'input': {'messages': [{'content': [{'text': 'What is in this image?', 'type': 'text'}, {'image_url': {...}, 'type': 'image_url'}], 'role': 'user'}]}, 'output': 'llama'}\n\n @pytest.mark.parametrize(\n \"case\",\n chat_completion_test_cases[\"test_chat_image\"][\"test_params\"][\"case\"],\n ids=case_id_generator,\n )\n def test_chat_streaming_image(request, openai_client, model, provider, verification_config, case):\n test_name_base = get_base_test_name(request)\n if should_skip_test(verification_config, provider, model, test_name_base):\n pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n \n response = openai_client.chat.completions.create(\n model=model,\n messages=case[\"input\"][\"messages\"],\n stream=True,\n )\n content = \"\"\n for chunk in response:\n> content += chunk.choices[0].delta.content or \"\"\nE IndexError: list index out of range\n\ntests/verifications/openai_api/test_chat_completion.py:175: IndexError" - }, - "teardown": { - "duration": 0.0003777453675866127, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_structured_output[meta-llama/Llama-3.3-70B-Instruct-Turbo-calendar]", - "lineno": 181, - "outcome": "passed", - "keywords": [ - "test_chat_non_streaming_structured_output[meta-llama/Llama-3.3-70B-Instruct-Turbo-calendar]", - "parametrize", - "pytestmark", - "meta-llama/Llama-3.3-70B-Instruct-Turbo-calendar", - "test_chat_completion.py", - "openai_api", - "verifications", - "tests", - "llama-stack", - "" - ], - "metadata": { - "model": "meta-llama/Llama-3.3-70B-Instruct-Turbo", - "case_id": "calendar" - }, - "setup": { - "duration": 0.0733196372166276, - "outcome": "passed" - }, - "call": { - "duration": 0.40959454514086246, - "outcome": "passed" - }, - "teardown": { - "duration": 0.00029125437140464783, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_structured_output[meta-llama/Llama-3.3-70B-Instruct-Turbo-math]", - "lineno": 181, - "outcome": "passed", - "keywords": [ - "test_chat_non_streaming_structured_output[meta-llama/Llama-3.3-70B-Instruct-Turbo-math]", - "parametrize", - "pytestmark", - "meta-llama/Llama-3.3-70B-Instruct-Turbo-math", - "test_chat_completion.py", - "openai_api", - "verifications", - "tests", - "llama-stack", - "" - ], - "metadata": { - "model": "meta-llama/Llama-3.3-70B-Instruct-Turbo", - "case_id": "math" - }, - "setup": { - "duration": 0.07248916011303663, - "outcome": "passed" - }, - "call": { - "duration": 3.498455540277064, - "outcome": "passed" - }, - "teardown": { - "duration": 0.00023921672254800797, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_structured_output[meta-llama/Llama-4-Scout-17B-16E-Instruct-calendar]", - "lineno": 181, - "outcome": "passed", - "keywords": [ - "test_chat_non_streaming_structured_output[meta-llama/Llama-4-Scout-17B-16E-Instruct-calendar]", - "parametrize", - "pytestmark", - "meta-llama/Llama-4-Scout-17B-16E-Instruct-calendar", - "test_chat_completion.py", - "openai_api", - "verifications", - "tests", - "llama-stack", - "" - ], - "metadata": { - "model": "meta-llama/Llama-4-Scout-17B-16E-Instruct", - "case_id": "calendar" - }, - "setup": { - "duration": 0.07911352813243866, - "outcome": "passed" - }, - "call": { - "duration": 0.6717434097081423, - "outcome": "passed" - }, - "teardown": { - "duration": 0.00025916099548339844, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_structured_output[meta-llama/Llama-4-Scout-17B-16E-Instruct-math]", - "lineno": 181, - "outcome": "passed", - "keywords": [ - "test_chat_non_streaming_structured_output[meta-llama/Llama-4-Scout-17B-16E-Instruct-math]", - "parametrize", - "pytestmark", - "meta-llama/Llama-4-Scout-17B-16E-Instruct-math", - "test_chat_completion.py", - "openai_api", - "verifications", - "tests", - "llama-stack", - "" - ], - "metadata": { - "model": "meta-llama/Llama-4-Scout-17B-16E-Instruct", - "case_id": "math" - }, - "setup": { - "duration": 0.07156322989612818, - "outcome": "passed" - }, - "call": { - "duration": 3.698870756663382, - "outcome": "passed" - }, - "teardown": { - "duration": 0.0002654632553458214, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_structured_output[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-calendar]", - "lineno": 181, - "outcome": "passed", - "keywords": [ - "test_chat_non_streaming_structured_output[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-calendar]", - "parametrize", - "pytestmark", - "meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-calendar", - "test_chat_completion.py", - "openai_api", - "verifications", - "tests", - "llama-stack", - "" - ], - "metadata": { - "model": "meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8", - "case_id": "calendar" - }, - "setup": { - "duration": 0.07457748707383871, - "outcome": "passed" - }, - "call": { - "duration": 0.8891718471422791, - "outcome": "passed" - }, - "teardown": { - "duration": 0.0002395138144493103, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_structured_output[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-math]", - "lineno": 181, - "outcome": "passed", - "keywords": [ - "test_chat_non_streaming_structured_output[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-math]", - "parametrize", - "pytestmark", - "meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-math", - "test_chat_completion.py", - "openai_api", - "verifications", - "tests", - "llama-stack", - "" - ], - "metadata": { - "model": "meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8", - "case_id": "math" - }, - "setup": { - "duration": 0.07155069429427385, - "outcome": "passed" - }, - "call": { - "duration": 3.276700599119067, - "outcome": "passed" - }, - "teardown": { - "duration": 0.0002568913623690605, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_structured_output[meta-llama/Llama-3.3-70B-Instruct-Turbo-calendar]", - "lineno": 204, - "outcome": "passed", - "keywords": [ - "test_chat_streaming_structured_output[meta-llama/Llama-3.3-70B-Instruct-Turbo-calendar]", - "parametrize", - "pytestmark", - "meta-llama/Llama-3.3-70B-Instruct-Turbo-calendar", - "test_chat_completion.py", - "openai_api", - "verifications", - "tests", - "llama-stack", - "" - ], - "metadata": { - "model": "meta-llama/Llama-3.3-70B-Instruct-Turbo", - "case_id": "calendar" - }, - "setup": { - "duration": 0.07365360390394926, - "outcome": "passed" - }, - "call": { - "duration": 0.7638470390811563, - "outcome": "passed" - }, - "teardown": { - "duration": 0.00027653202414512634, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_structured_output[meta-llama/Llama-3.3-70B-Instruct-Turbo-math]", - "lineno": 204, - "outcome": "passed", - "keywords": [ - "test_chat_streaming_structured_output[meta-llama/Llama-3.3-70B-Instruct-Turbo-math]", - "parametrize", - "pytestmark", - "meta-llama/Llama-3.3-70B-Instruct-Turbo-math", - "test_chat_completion.py", - "openai_api", - "verifications", - "tests", - "llama-stack", - "" - ], - "metadata": { - "model": "meta-llama/Llama-3.3-70B-Instruct-Turbo", - "case_id": "math" - }, - "setup": { - "duration": 0.07424602191895247, - "outcome": "passed" - }, - "call": { - "duration": 3.622116087935865, - "outcome": "passed" - }, - "teardown": { - "duration": 0.0002861013635993004, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_structured_output[meta-llama/Llama-4-Scout-17B-16E-Instruct-calendar]", - "lineno": 204, - "outcome": "failed", - "keywords": [ - "test_chat_streaming_structured_output[meta-llama/Llama-4-Scout-17B-16E-Instruct-calendar]", - "parametrize", - "pytestmark", - "meta-llama/Llama-4-Scout-17B-16E-Instruct-calendar", - "test_chat_completion.py", - "openai_api", - "verifications", - "tests", - "llama-stack", - "" - ], - "metadata": { - "model": "meta-llama/Llama-4-Scout-17B-16E-Instruct", - "case_id": "calendar" - }, - "setup": { - "duration": 0.07192372716963291, - "outcome": "passed" - }, - "call": { - "duration": 0.5049019353464246, - "outcome": "failed", - "crash": { - "path": "/home/erichuang/llama-stack/tests/verifications/openai_api/test_chat_completion.py", - "lineno": 223, - "message": "IndexError: list index out of range" - }, - "traceback": [ - { - "path": "tests/verifications/openai_api/test_chat_completion.py", - "lineno": 223, - "message": "IndexError" - } - ], - "longrepr": "request = >\nopenai_client = \nmodel = 'meta-llama/Llama-4-Scout-17B-16E-Instruct', provider = 'together'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...-versatile', 'meta-llama/llama-4-scout-17b-16e-instruct', 'meta-llama/llama-4-maverick-17b-128e-instruct'], ...}, ...}}\ncase = {'case_id': 'calendar', 'input': {'messages': [{'content': 'Extract the event information.', 'role': 'system'}, {'cont...articipants'], 'title': 'CalendarEvent', 'type': 'object'}}, 'type': 'json_schema'}}, 'output': 'valid_calendar_event'}\n\n @pytest.mark.parametrize(\n \"case\",\n chat_completion_test_cases[\"test_chat_structured_output\"][\"test_params\"][\"case\"],\n ids=case_id_generator,\n )\n def test_chat_streaming_structured_output(request, openai_client, model, provider, verification_config, case):\n test_name_base = get_base_test_name(request)\n if should_skip_test(verification_config, provider, model, test_name_base):\n pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n \n response = openai_client.chat.completions.create(\n model=model,\n messages=case[\"input\"][\"messages\"],\n response_format=case[\"input\"][\"response_format\"],\n stream=True,\n )\n maybe_json_content = \"\"\n for chunk in response:\n> maybe_json_content += chunk.choices[0].delta.content or \"\"\nE IndexError: list index out of range\n\ntests/verifications/openai_api/test_chat_completion.py:223: IndexError" - }, - "teardown": { - "duration": 0.00036794692277908325, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_structured_output[meta-llama/Llama-4-Scout-17B-16E-Instruct-math]", - "lineno": 204, - "outcome": "failed", - "keywords": [ - "test_chat_streaming_structured_output[meta-llama/Llama-4-Scout-17B-16E-Instruct-math]", - "parametrize", - "pytestmark", - "meta-llama/Llama-4-Scout-17B-16E-Instruct-math", - "test_chat_completion.py", - "openai_api", - "verifications", - "tests", - "llama-stack", - "" - ], - "metadata": { - "model": "meta-llama/Llama-4-Scout-17B-16E-Instruct", - "case_id": "math" - }, - "setup": { - "duration": 0.07304532174021006, - "outcome": "passed" - }, - "call": { - "duration": 2.961389934644103, - "outcome": "failed", - "crash": { - "path": "/home/erichuang/llama-stack/tests/verifications/openai_api/test_chat_completion.py", - "lineno": 223, - "message": "IndexError: list index out of range" - }, - "traceback": [ - { - "path": "tests/verifications/openai_api/test_chat_completion.py", - "lineno": 223, - "message": "IndexError" - } - ], - "longrepr": "request = >\nopenai_client = \nmodel = 'meta-llama/Llama-4-Scout-17B-16E-Instruct', provider = 'together'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...-versatile', 'meta-llama/llama-4-scout-17b-16e-instruct', 'meta-llama/llama-4-maverick-17b-128e-instruct'], ...}, ...}}\ncase = {'case_id': 'math', 'input': {'messages': [{'content': 'You are a helpful math tutor. Guide the user through the solut... ['steps', 'final_answer'], 'title': 'MathReasoning', ...}}, 'type': 'json_schema'}}, 'output': 'valid_math_reasoning'}\n\n @pytest.mark.parametrize(\n \"case\",\n chat_completion_test_cases[\"test_chat_structured_output\"][\"test_params\"][\"case\"],\n ids=case_id_generator,\n )\n def test_chat_streaming_structured_output(request, openai_client, model, provider, verification_config, case):\n test_name_base = get_base_test_name(request)\n if should_skip_test(verification_config, provider, model, test_name_base):\n pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n \n response = openai_client.chat.completions.create(\n model=model,\n messages=case[\"input\"][\"messages\"],\n response_format=case[\"input\"][\"response_format\"],\n stream=True,\n )\n maybe_json_content = \"\"\n for chunk in response:\n> maybe_json_content += chunk.choices[0].delta.content or \"\"\nE IndexError: list index out of range\n\ntests/verifications/openai_api/test_chat_completion.py:223: IndexError" - }, - "teardown": { - "duration": 0.0003312695771455765, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_structured_output[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-calendar]", - "lineno": 204, - "outcome": "failed", - "keywords": [ - "test_chat_streaming_structured_output[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-calendar]", - "parametrize", - "pytestmark", - "meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-calendar", - "test_chat_completion.py", - "openai_api", - "verifications", - "tests", - "llama-stack", - "" - ], - "metadata": { - "model": "meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8", - "case_id": "calendar" - }, - "setup": { - "duration": 0.07350922282785177, - "outcome": "passed" - }, - "call": { - "duration": 0.6764275450259447, - "outcome": "failed", - "crash": { - "path": "/home/erichuang/llama-stack/tests/verifications/openai_api/test_chat_completion.py", - "lineno": 223, - "message": "IndexError: list index out of range" - }, - "traceback": [ - { - "path": "tests/verifications/openai_api/test_chat_completion.py", - "lineno": 223, - "message": "IndexError" - } - ], - "longrepr": "request = >\nopenai_client = \nmodel = 'meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8'\nprovider = 'together'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...-versatile', 'meta-llama/llama-4-scout-17b-16e-instruct', 'meta-llama/llama-4-maverick-17b-128e-instruct'], ...}, ...}}\ncase = {'case_id': 'calendar', 'input': {'messages': [{'content': 'Extract the event information.', 'role': 'system'}, {'cont...articipants'], 'title': 'CalendarEvent', 'type': 'object'}}, 'type': 'json_schema'}}, 'output': 'valid_calendar_event'}\n\n @pytest.mark.parametrize(\n \"case\",\n chat_completion_test_cases[\"test_chat_structured_output\"][\"test_params\"][\"case\"],\n ids=case_id_generator,\n )\n def test_chat_streaming_structured_output(request, openai_client, model, provider, verification_config, case):\n test_name_base = get_base_test_name(request)\n if should_skip_test(verification_config, provider, model, test_name_base):\n pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n \n response = openai_client.chat.completions.create(\n model=model,\n messages=case[\"input\"][\"messages\"],\n response_format=case[\"input\"][\"response_format\"],\n stream=True,\n )\n maybe_json_content = \"\"\n for chunk in response:\n> maybe_json_content += chunk.choices[0].delta.content or \"\"\nE IndexError: list index out of range\n\ntests/verifications/openai_api/test_chat_completion.py:223: IndexError" - }, - "teardown": { - "duration": 0.0003826189786195755, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_structured_output[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-math]", - "lineno": 204, - "outcome": "failed", - "keywords": [ - "test_chat_streaming_structured_output[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-math]", - "parametrize", - "pytestmark", - "meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-math", - "test_chat_completion.py", - "openai_api", - "verifications", - "tests", - "llama-stack", - "" - ], - "metadata": { - "model": "meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8", - "case_id": "math" - }, - "setup": { - "duration": 0.07295230869203806, - "outcome": "passed" - }, - "call": { - "duration": 10.689278944395483, - "outcome": "failed", - "crash": { - "path": "/home/erichuang/llama-stack/tests/verifications/openai_api/test_chat_completion.py", - "lineno": 223, - "message": "IndexError: list index out of range" - }, - "traceback": [ - { - "path": "tests/verifications/openai_api/test_chat_completion.py", - "lineno": 223, - "message": "IndexError" - } - ], - "longrepr": "request = >\nopenai_client = \nmodel = 'meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8'\nprovider = 'together'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...-versatile', 'meta-llama/llama-4-scout-17b-16e-instruct', 'meta-llama/llama-4-maverick-17b-128e-instruct'], ...}, ...}}\ncase = {'case_id': 'math', 'input': {'messages': [{'content': 'You are a helpful math tutor. Guide the user through the solut... ['steps', 'final_answer'], 'title': 'MathReasoning', ...}}, 'type': 'json_schema'}}, 'output': 'valid_math_reasoning'}\n\n @pytest.mark.parametrize(\n \"case\",\n chat_completion_test_cases[\"test_chat_structured_output\"][\"test_params\"][\"case\"],\n ids=case_id_generator,\n )\n def test_chat_streaming_structured_output(request, openai_client, model, provider, verification_config, case):\n test_name_base = get_base_test_name(request)\n if should_skip_test(verification_config, provider, model, test_name_base):\n pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n \n response = openai_client.chat.completions.create(\n model=model,\n messages=case[\"input\"][\"messages\"],\n response_format=case[\"input\"][\"response_format\"],\n stream=True,\n )\n maybe_json_content = \"\"\n for chunk in response:\n> maybe_json_content += chunk.choices[0].delta.content or \"\"\nE IndexError: list index out of range\n\ntests/verifications/openai_api/test_chat_completion.py:223: IndexError" - }, - "teardown": { - "duration": 0.0004014279693365097, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_tool_calling[meta-llama/Llama-3.3-70B-Instruct-Turbo-case0]", - "lineno": 226, - "outcome": "passed", - "keywords": [ - "test_chat_non_streaming_tool_calling[meta-llama/Llama-3.3-70B-Instruct-Turbo-case0]", - "parametrize", - "pytestmark", - "meta-llama/Llama-3.3-70B-Instruct-Turbo-case0", - "test_chat_completion.py", - "openai_api", - "verifications", - "tests", - "llama-stack", - "" - ], - "metadata": { - "model": "meta-llama/Llama-3.3-70B-Instruct-Turbo", - "case_id": "case0" - }, - "setup": { - "duration": 0.09202722646296024, - "outcome": "passed" - }, - "call": { - "duration": 0.8140280386433005, - "outcome": "passed" - }, - "teardown": { - "duration": 0.0003595082089304924, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_tool_calling[meta-llama/Llama-4-Scout-17B-16E-Instruct-case0]", - "lineno": 226, - "outcome": "passed", - "keywords": [ - "test_chat_non_streaming_tool_calling[meta-llama/Llama-4-Scout-17B-16E-Instruct-case0]", - "parametrize", - "pytestmark", - "meta-llama/Llama-4-Scout-17B-16E-Instruct-case0", - "test_chat_completion.py", - "openai_api", - "verifications", - "tests", - "llama-stack", - "" - ], - "metadata": { - "model": "meta-llama/Llama-4-Scout-17B-16E-Instruct", - "case_id": "case0" - }, - "setup": { - "duration": 0.09484888892620802, - "outcome": "passed" - }, - "call": { - "duration": 0.3706049248576164, - "outcome": "passed" - }, - "teardown": { - "duration": 0.0003290809690952301, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_tool_calling[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-case0]", - "lineno": 226, - "outcome": "passed", - "keywords": [ - "test_chat_non_streaming_tool_calling[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-case0]", - "parametrize", - "pytestmark", - "meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-case0", - "test_chat_completion.py", - "openai_api", - "verifications", - "tests", - "llama-stack", - "" - ], - "metadata": { - "model": "meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8", - "case_id": "case0" - }, - "setup": { - "duration": 0.10521113499999046, - "outcome": "passed" - }, - "call": { - "duration": 0.36842701490968466, - "outcome": "passed" - }, - "teardown": { - "duration": 0.00031410157680511475, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_tool_calling[meta-llama/Llama-3.3-70B-Instruct-Turbo-case0]", - "lineno": 250, - "outcome": "passed", - "keywords": [ - "test_chat_streaming_tool_calling[meta-llama/Llama-3.3-70B-Instruct-Turbo-case0]", - "parametrize", - "pytestmark", - "meta-llama/Llama-3.3-70B-Instruct-Turbo-case0", - "test_chat_completion.py", - "openai_api", - "verifications", - "tests", - "llama-stack", - "" - ], - "metadata": { - "model": "meta-llama/Llama-3.3-70B-Instruct-Turbo", - "case_id": "case0" - }, - "setup": { - "duration": 0.10422383341938257, - "outcome": "passed" - }, - "call": { - "duration": 0.6454980997368693, - "outcome": "passed" - }, - "teardown": { - "duration": 0.0002997415140271187, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_tool_calling[meta-llama/Llama-4-Scout-17B-16E-Instruct-case0]", - "lineno": 250, - "outcome": "failed", - "keywords": [ - "test_chat_streaming_tool_calling[meta-llama/Llama-4-Scout-17B-16E-Instruct-case0]", - "parametrize", - "pytestmark", - "meta-llama/Llama-4-Scout-17B-16E-Instruct-case0", - "test_chat_completion.py", - "openai_api", - "verifications", - "tests", - "llama-stack", - "" - ], - "metadata": { - "model": "meta-llama/Llama-4-Scout-17B-16E-Instruct", - "case_id": "case0" - }, - "setup": { - "duration": 0.09408890828490257, - "outcome": "passed" - }, - "call": { - "duration": 0.36066764686256647, - "outcome": "failed", - "crash": { - "path": "/home/erichuang/llama-stack/tests/verifications/openai_api/test_chat_completion.py", - "lineno": 688, - "message": "IndexError: list index out of range" - }, - "traceback": [ - { - "path": "tests/verifications/openai_api/test_chat_completion.py", - "lineno": 268, - "message": "" - }, - { - "path": "tests/verifications/openai_api/test_chat_completion.py", - "lineno": 688, - "message": "IndexError" - } - ], - "longrepr": "request = >\nopenai_client = \nmodel = 'meta-llama/Llama-4-Scout-17B-16E-Instruct', provider = 'together'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...-versatile', 'meta-llama/llama-4-scout-17b-16e-instruct', 'meta-llama/llama-4-maverick-17b-128e-instruct'], ...}, ...}}\ncase = {'input': {'messages': [{'content': 'You are a helpful assistant that can use tools to get information.', 'role': 'sys..., 'properties': {...}, 'required': [...], 'type': 'object'}}, 'type': 'function'}]}, 'output': 'get_weather_tool_call'}\n\n @pytest.mark.parametrize(\n \"case\",\n chat_completion_test_cases[\"test_tool_calling\"][\"test_params\"][\"case\"],\n ids=case_id_generator,\n )\n def test_chat_streaming_tool_calling(request, openai_client, model, provider, verification_config, case):\n test_name_base = get_base_test_name(request)\n if should_skip_test(verification_config, provider, model, test_name_base):\n pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n \n stream = openai_client.chat.completions.create(\n model=model,\n messages=case[\"input\"][\"messages\"],\n tools=case[\"input\"][\"tools\"],\n stream=True,\n )\n \n> _, tool_calls_buffer = _accumulate_streaming_tool_calls(stream)\n\ntests/verifications/openai_api/test_chat_completion.py:268: \n_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ \n\nstream = \n\n def _accumulate_streaming_tool_calls(stream):\n \"\"\"Accumulates tool calls and content from a streaming ChatCompletion response.\"\"\"\n tool_calls_buffer = {}\n current_id = None\n full_content = \"\" # Initialize content accumulator\n # Process streaming chunks\n for chunk in stream:\n> choice = chunk.choices[0]\nE IndexError: list index out of range\n\ntests/verifications/openai_api/test_chat_completion.py:688: IndexError" - }, - "teardown": { - "duration": 0.00035039614886045456, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_tool_calling[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-case0]", - "lineno": 250, - "outcome": "failed", - "keywords": [ - "test_chat_streaming_tool_calling[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-case0]", - "parametrize", - "pytestmark", - "meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-case0", - "test_chat_completion.py", - "openai_api", - "verifications", - "tests", - "llama-stack", - "" - ], - "metadata": { - "model": "meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8", - "case_id": "case0" - }, - "setup": { - "duration": 0.07232134602963924, - "outcome": "passed" - }, - "call": { - "duration": 0.4706049496307969, - "outcome": "failed", - "crash": { - "path": "/home/erichuang/llama-stack/tests/verifications/openai_api/test_chat_completion.py", - "lineno": 688, - "message": "IndexError: list index out of range" - }, - "traceback": [ - { - "path": "tests/verifications/openai_api/test_chat_completion.py", - "lineno": 268, - "message": "" - }, - { - "path": "tests/verifications/openai_api/test_chat_completion.py", - "lineno": 688, - "message": "IndexError" - } - ], - "longrepr": "request = >\nopenai_client = \nmodel = 'meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8'\nprovider = 'together'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...-versatile', 'meta-llama/llama-4-scout-17b-16e-instruct', 'meta-llama/llama-4-maverick-17b-128e-instruct'], ...}, ...}}\ncase = {'input': {'messages': [{'content': 'You are a helpful assistant that can use tools to get information.', 'role': 'sys..., 'properties': {...}, 'required': [...], 'type': 'object'}}, 'type': 'function'}]}, 'output': 'get_weather_tool_call'}\n\n @pytest.mark.parametrize(\n \"case\",\n chat_completion_test_cases[\"test_tool_calling\"][\"test_params\"][\"case\"],\n ids=case_id_generator,\n )\n def test_chat_streaming_tool_calling(request, openai_client, model, provider, verification_config, case):\n test_name_base = get_base_test_name(request)\n if should_skip_test(verification_config, provider, model, test_name_base):\n pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n \n stream = openai_client.chat.completions.create(\n model=model,\n messages=case[\"input\"][\"messages\"],\n tools=case[\"input\"][\"tools\"],\n stream=True,\n )\n \n> _, tool_calls_buffer = _accumulate_streaming_tool_calls(stream)\n\ntests/verifications/openai_api/test_chat_completion.py:268: \n_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ \n\nstream = \n\n def _accumulate_streaming_tool_calls(stream):\n \"\"\"Accumulates tool calls and content from a streaming ChatCompletion response.\"\"\"\n tool_calls_buffer = {}\n current_id = None\n full_content = \"\" # Initialize content accumulator\n # Process streaming chunks\n for chunk in stream:\n> choice = chunk.choices[0]\nE IndexError: list index out of range\n\ntests/verifications/openai_api/test_chat_completion.py:688: IndexError" - }, - "teardown": { - "duration": 0.00039384420961141586, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_tool_choice_required[meta-llama/Llama-3.3-70B-Instruct-Turbo-case0]", - "lineno": 278, - "outcome": "passed", - "keywords": [ - "test_chat_non_streaming_tool_choice_required[meta-llama/Llama-3.3-70B-Instruct-Turbo-case0]", - "parametrize", - "pytestmark", - "meta-llama/Llama-3.3-70B-Instruct-Turbo-case0", - "test_chat_completion.py", - "openai_api", - "verifications", - "tests", - "llama-stack", - "" - ], - "metadata": { - "model": "meta-llama/Llama-3.3-70B-Instruct-Turbo", - "case_id": "case0" - }, - "setup": { - "duration": 0.07465469185262918, - "outcome": "passed" - }, - "call": { - "duration": 0.4374591317027807, - "outcome": "passed" - }, - "teardown": { - "duration": 0.0003099888563156128, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_tool_choice_required[meta-llama/Llama-4-Scout-17B-16E-Instruct-case0]", - "lineno": 278, - "outcome": "passed", - "keywords": [ - "test_chat_non_streaming_tool_choice_required[meta-llama/Llama-4-Scout-17B-16E-Instruct-case0]", - "parametrize", - "pytestmark", - "meta-llama/Llama-4-Scout-17B-16E-Instruct-case0", - "test_chat_completion.py", - "openai_api", - "verifications", - "tests", - "llama-stack", - "" - ], - "metadata": { - "model": "meta-llama/Llama-4-Scout-17B-16E-Instruct", - "case_id": "case0" - }, - "setup": { - "duration": 0.07351493183523417, - "outcome": "passed" - }, - "call": { - "duration": 0.4368853671476245, - "outcome": "passed" - }, - "teardown": { - "duration": 0.00026369933038949966, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_tool_choice_required[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-case0]", - "lineno": 278, - "outcome": "passed", - "keywords": [ - "test_chat_non_streaming_tool_choice_required[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-case0]", - "parametrize", - "pytestmark", - "meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-case0", - "test_chat_completion.py", - "openai_api", - "verifications", - "tests", - "llama-stack", - "" - ], - "metadata": { - "model": "meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8", - "case_id": "case0" - }, - "setup": { - "duration": 0.07258845027536154, - "outcome": "passed" - }, - "call": { - "duration": 0.940508272498846, - "outcome": "passed" - }, - "teardown": { - "duration": 0.00032961275428533554, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_tool_choice_required[meta-llama/Llama-3.3-70B-Instruct-Turbo-case0]", - "lineno": 302, - "outcome": "passed", - "keywords": [ - "test_chat_streaming_tool_choice_required[meta-llama/Llama-3.3-70B-Instruct-Turbo-case0]", - "parametrize", - "pytestmark", - "meta-llama/Llama-3.3-70B-Instruct-Turbo-case0", - "test_chat_completion.py", - "openai_api", - "verifications", - "tests", - "llama-stack", - "" - ], - "metadata": { - "model": "meta-llama/Llama-3.3-70B-Instruct-Turbo", - "case_id": "case0" - }, - "setup": { - "duration": 0.07273276895284653, - "outcome": "passed" - }, - "call": { - "duration": 0.6150273764505982, - "outcome": "passed" - }, - "teardown": { - "duration": 0.0002876110374927521, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_tool_choice_required[meta-llama/Llama-4-Scout-17B-16E-Instruct-case0]", - "lineno": 302, - "outcome": "failed", - "keywords": [ - "test_chat_streaming_tool_choice_required[meta-llama/Llama-4-Scout-17B-16E-Instruct-case0]", - "parametrize", - "pytestmark", - "meta-llama/Llama-4-Scout-17B-16E-Instruct-case0", - "test_chat_completion.py", - "openai_api", - "verifications", - "tests", - "llama-stack", - "" - ], - "metadata": { - "model": "meta-llama/Llama-4-Scout-17B-16E-Instruct", - "case_id": "case0" - }, - "setup": { - "duration": 0.07505382597446442, - "outcome": "passed" - }, - "call": { - "duration": 0.5026597818359733, - "outcome": "failed", - "crash": { - "path": "/home/erichuang/llama-stack/tests/verifications/openai_api/test_chat_completion.py", - "lineno": 688, - "message": "IndexError: list index out of range" - }, - "traceback": [ - { - "path": "tests/verifications/openai_api/test_chat_completion.py", - "lineno": 321, - "message": "" - }, - { - "path": "tests/verifications/openai_api/test_chat_completion.py", - "lineno": 688, - "message": "IndexError" - } - ], - "longrepr": "request = >\nopenai_client = \nmodel = 'meta-llama/Llama-4-Scout-17B-16E-Instruct', provider = 'together'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...-versatile', 'meta-llama/llama-4-scout-17b-16e-instruct', 'meta-llama/llama-4-maverick-17b-128e-instruct'], ...}, ...}}\ncase = {'input': {'messages': [{'content': 'You are a helpful assistant that can use tools to get information.', 'role': 'sys..., 'properties': {...}, 'required': [...], 'type': 'object'}}, 'type': 'function'}]}, 'output': 'get_weather_tool_call'}\n\n @pytest.mark.parametrize(\n \"case\",\n chat_completion_test_cases[\"test_tool_calling\"][\"test_params\"][\"case\"], # Reusing existing case for now\n ids=case_id_generator,\n )\n def test_chat_streaming_tool_choice_required(request, openai_client, model, provider, verification_config, case):\n test_name_base = get_base_test_name(request)\n if should_skip_test(verification_config, provider, model, test_name_base):\n pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n \n stream = openai_client.chat.completions.create(\n model=model,\n messages=case[\"input\"][\"messages\"],\n tools=case[\"input\"][\"tools\"],\n tool_choice=\"required\", # Force tool call\n stream=True,\n )\n \n> _, tool_calls_buffer = _accumulate_streaming_tool_calls(stream)\n\ntests/verifications/openai_api/test_chat_completion.py:321: \n_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ \n\nstream = \n\n def _accumulate_streaming_tool_calls(stream):\n \"\"\"Accumulates tool calls and content from a streaming ChatCompletion response.\"\"\"\n tool_calls_buffer = {}\n current_id = None\n full_content = \"\" # Initialize content accumulator\n # Process streaming chunks\n for chunk in stream:\n> choice = chunk.choices[0]\nE IndexError: list index out of range\n\ntests/verifications/openai_api/test_chat_completion.py:688: IndexError" - }, - "teardown": { - "duration": 0.0003487151116132736, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_tool_choice_required[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-case0]", - "lineno": 302, - "outcome": "failed", - "keywords": [ - "test_chat_streaming_tool_choice_required[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-case0]", - "parametrize", - "pytestmark", - "meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-case0", - "test_chat_completion.py", - "openai_api", - "verifications", - "tests", - "llama-stack", - "" - ], - "metadata": { - "model": "meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8", - "case_id": "case0" - }, - "setup": { - "duration": 0.07343385275453329, - "outcome": "passed" - }, - "call": { - "duration": 0.720921658910811, - "outcome": "failed", - "crash": { - "path": "/home/erichuang/llama-stack/tests/verifications/openai_api/test_chat_completion.py", - "lineno": 688, - "message": "IndexError: list index out of range" - }, - "traceback": [ - { - "path": "tests/verifications/openai_api/test_chat_completion.py", - "lineno": 321, - "message": "" - }, - { - "path": "tests/verifications/openai_api/test_chat_completion.py", - "lineno": 688, - "message": "IndexError" - } - ], - "longrepr": "request = >\nopenai_client = \nmodel = 'meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8'\nprovider = 'together'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...-versatile', 'meta-llama/llama-4-scout-17b-16e-instruct', 'meta-llama/llama-4-maverick-17b-128e-instruct'], ...}, ...}}\ncase = {'input': {'messages': [{'content': 'You are a helpful assistant that can use tools to get information.', 'role': 'sys..., 'properties': {...}, 'required': [...], 'type': 'object'}}, 'type': 'function'}]}, 'output': 'get_weather_tool_call'}\n\n @pytest.mark.parametrize(\n \"case\",\n chat_completion_test_cases[\"test_tool_calling\"][\"test_params\"][\"case\"], # Reusing existing case for now\n ids=case_id_generator,\n )\n def test_chat_streaming_tool_choice_required(request, openai_client, model, provider, verification_config, case):\n test_name_base = get_base_test_name(request)\n if should_skip_test(verification_config, provider, model, test_name_base):\n pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n \n stream = openai_client.chat.completions.create(\n model=model,\n messages=case[\"input\"][\"messages\"],\n tools=case[\"input\"][\"tools\"],\n tool_choice=\"required\", # Force tool call\n stream=True,\n )\n \n> _, tool_calls_buffer = _accumulate_streaming_tool_calls(stream)\n\ntests/verifications/openai_api/test_chat_completion.py:321: \n_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ \n\nstream = \n\n def _accumulate_streaming_tool_calls(stream):\n \"\"\"Accumulates tool calls and content from a streaming ChatCompletion response.\"\"\"\n tool_calls_buffer = {}\n current_id = None\n full_content = \"\" # Initialize content accumulator\n # Process streaming chunks\n for chunk in stream:\n> choice = chunk.choices[0]\nE IndexError: list index out of range\n\ntests/verifications/openai_api/test_chat_completion.py:688: IndexError" - }, - "teardown": { - "duration": 0.0004109758883714676, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_tool_choice_none[meta-llama/Llama-3.3-70B-Instruct-Turbo-case0]", - "lineno": 329, - "outcome": "failed", - "keywords": [ - "test_chat_non_streaming_tool_choice_none[meta-llama/Llama-3.3-70B-Instruct-Turbo-case0]", - "parametrize", - "pytestmark", - "meta-llama/Llama-3.3-70B-Instruct-Turbo-case0", - "test_chat_completion.py", - "openai_api", - "verifications", - "tests", - "llama-stack", - "" - ], - "metadata": { - "model": "meta-llama/Llama-3.3-70B-Instruct-Turbo", - "case_id": "case0" - }, - "setup": { - "duration": 0.07189673464745283, - "outcome": "passed" - }, - "call": { - "duration": 0.403152690269053, - "outcome": "failed", - "crash": { - "path": "/home/erichuang/llama-stack/tests/verifications/openai_api/test_chat_completion.py", - "lineno": 349, - "message": "AssertionError: Expected no tool calls when tool_choice='none'\nassert [ChatCompletionMessageToolCall(id='call_xx4eg2o4wladhs7i0gy8d2cb', function=Function(arguments='{\"location\":\"San Francisco, USA\"}', name='get_weather'), type='function', index=0)] is None\n + where [ChatCompletionMessageToolCall(id='call_xx4eg2o4wladhs7i0gy8d2cb', function=Function(arguments='{\"location\":\"San Francisco, USA\"}', name='get_weather'), type='function', index=0)] = ChatCompletionMessage(content=None, refusal=None, role='assistant', annotations=None, audio=None, function_call=None, tool_calls=[ChatCompletionMessageToolCall(id='call_xx4eg2o4wladhs7i0gy8d2cb', function=Function(arguments='{\"location\":\"San Francisco, USA\"}', name='get_weather'), type='function', index=0)]).tool_calls\n + where ChatCompletionMessage(content=None, refusal=None, role='assistant', annotations=None, audio=None, function_call=None, tool_calls=[ChatCompletionMessageToolCall(id='call_xx4eg2o4wladhs7i0gy8d2cb', function=Function(arguments='{\"location\":\"San Francisco, USA\"}', name='get_weather'), type='function', index=0)]) = Choice(finish_reason='tool_calls', index=0, logprobs=None, message=ChatCompletionMessage(content=None, refusal=None, role='assistant', annotations=None, audio=None, function_call=None, tool_calls=[ChatCompletionMessageToolCall(id='call_xx4eg2o4wladhs7i0gy8d2cb', function=Function(arguments='{\"location\":\"San Francisco, USA\"}', name='get_weather'), type='function', index=0)]), seed=4867562177231181000).message" - }, - "traceback": [ - { - "path": "tests/verifications/openai_api/test_chat_completion.py", - "lineno": 349, - "message": "AssertionError" - } - ], - "longrepr": "request = >\nopenai_client = \nmodel = 'meta-llama/Llama-3.3-70B-Instruct-Turbo', provider = 'together'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...-versatile', 'meta-llama/llama-4-scout-17b-16e-instruct', 'meta-llama/llama-4-maverick-17b-128e-instruct'], ...}, ...}}\ncase = {'input': {'messages': [{'content': 'You are a helpful assistant that can use tools to get information.', 'role': 'sys..., 'properties': {...}, 'required': [...], 'type': 'object'}}, 'type': 'function'}]}, 'output': 'get_weather_tool_call'}\n\n @pytest.mark.parametrize(\n \"case\",\n chat_completion_test_cases[\"test_tool_calling\"][\"test_params\"][\"case\"], # Reusing existing case for now\n ids=case_id_generator,\n )\n def test_chat_non_streaming_tool_choice_none(request, openai_client, model, provider, verification_config, case):\n test_name_base = get_base_test_name(request)\n if should_skip_test(verification_config, provider, model, test_name_base):\n pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n \n response = openai_client.chat.completions.create(\n model=model,\n messages=case[\"input\"][\"messages\"],\n tools=case[\"input\"][\"tools\"],\n tool_choice=\"none\",\n stream=False,\n )\n \n assert response.choices[0].message.role == \"assistant\"\n> assert response.choices[0].message.tool_calls is None, \"Expected no tool calls when tool_choice='none'\"\nE AssertionError: Expected no tool calls when tool_choice='none'\nE assert [ChatCompletionMessageToolCall(id='call_xx4eg2o4wladhs7i0gy8d2cb', function=Function(arguments='{\"location\":\"San Francisco, USA\"}', name='get_weather'), type='function', index=0)] is None\nE + where [ChatCompletionMessageToolCall(id='call_xx4eg2o4wladhs7i0gy8d2cb', function=Function(arguments='{\"location\":\"San Francisco, USA\"}', name='get_weather'), type='function', index=0)] = ChatCompletionMessage(content=None, refusal=None, role='assistant', annotations=None, audio=None, function_call=None, tool_calls=[ChatCompletionMessageToolCall(id='call_xx4eg2o4wladhs7i0gy8d2cb', function=Function(arguments='{\"location\":\"San Francisco, USA\"}', name='get_weather'), type='function', index=0)]).tool_calls\nE + where ChatCompletionMessage(content=None, refusal=None, role='assistant', annotations=None, audio=None, function_call=None, tool_calls=[ChatCompletionMessageToolCall(id='call_xx4eg2o4wladhs7i0gy8d2cb', function=Function(arguments='{\"location\":\"San Francisco, USA\"}', name='get_weather'), type='function', index=0)]) = Choice(finish_reason='tool_calls', index=0, logprobs=None, message=ChatCompletionMessage(content=None, refusal=None, role='assistant', annotations=None, audio=None, function_call=None, tool_calls=[ChatCompletionMessageToolCall(id='call_xx4eg2o4wladhs7i0gy8d2cb', function=Function(arguments='{\"location\":\"San Francisco, USA\"}', name='get_weather'), type='function', index=0)]), seed=4867562177231181000).message\n\ntests/verifications/openai_api/test_chat_completion.py:349: AssertionError" - }, - "teardown": { - "duration": 0.00037758704274892807, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_tool_choice_none[meta-llama/Llama-4-Scout-17B-16E-Instruct-case0]", - "lineno": 329, - "outcome": "failed", - "keywords": [ - "test_chat_non_streaming_tool_choice_none[meta-llama/Llama-4-Scout-17B-16E-Instruct-case0]", - "parametrize", - "pytestmark", - "meta-llama/Llama-4-Scout-17B-16E-Instruct-case0", - "test_chat_completion.py", - "openai_api", - "verifications", - "tests", - "llama-stack", - "" - ], - "metadata": { - "model": "meta-llama/Llama-4-Scout-17B-16E-Instruct", - "case_id": "case0" - }, - "setup": { - "duration": 0.07282305508852005, - "outcome": "passed" - }, - "call": { - "duration": 0.4538485202938318, - "outcome": "failed", - "crash": { - "path": "/home/erichuang/llama-stack/tests/verifications/openai_api/test_chat_completion.py", - "lineno": 349, - "message": "AssertionError: Expected no tool calls when tool_choice='none'\nassert [ChatCompletionMessageToolCall(id='call_6gehr7flf4gaqu65prmi1pca', function=Function(arguments='{\"location\":\"San Francisco\"}', name='get_weather'), type='function', index=0)] is None\n + where [ChatCompletionMessageToolCall(id='call_6gehr7flf4gaqu65prmi1pca', function=Function(arguments='{\"location\":\"San Francisco\"}', name='get_weather'), type='function', index=0)] = ChatCompletionMessage(content=None, refusal=None, role='assistant', annotations=None, audio=None, function_call=None, tool_calls=[ChatCompletionMessageToolCall(id='call_6gehr7flf4gaqu65prmi1pca', function=Function(arguments='{\"location\":\"San Francisco\"}', name='get_weather'), type='function', index=0)]).tool_calls\n + where ChatCompletionMessage(content=None, refusal=None, role='assistant', annotations=None, audio=None, function_call=None, tool_calls=[ChatCompletionMessageToolCall(id='call_6gehr7flf4gaqu65prmi1pca', function=Function(arguments='{\"location\":\"San Francisco\"}', name='get_weather'), type='function', index=0)]) = Choice(finish_reason='tool_calls', index=0, logprobs=None, message=ChatCompletionMessage(content=None, refusal=None, role='assistant', annotations=None, audio=None, function_call=None, tool_calls=[ChatCompletionMessageToolCall(id='call_6gehr7flf4gaqu65prmi1pca', function=Function(arguments='{\"location\":\"San Francisco\"}', name='get_weather'), type='function', index=0)]), seed=None).message" - }, - "traceback": [ - { - "path": "tests/verifications/openai_api/test_chat_completion.py", - "lineno": 349, - "message": "AssertionError" - } - ], - "longrepr": "request = >\nopenai_client = \nmodel = 'meta-llama/Llama-4-Scout-17B-16E-Instruct', provider = 'together'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...-versatile', 'meta-llama/llama-4-scout-17b-16e-instruct', 'meta-llama/llama-4-maverick-17b-128e-instruct'], ...}, ...}}\ncase = {'input': {'messages': [{'content': 'You are a helpful assistant that can use tools to get information.', 'role': 'sys..., 'properties': {...}, 'required': [...], 'type': 'object'}}, 'type': 'function'}]}, 'output': 'get_weather_tool_call'}\n\n @pytest.mark.parametrize(\n \"case\",\n chat_completion_test_cases[\"test_tool_calling\"][\"test_params\"][\"case\"], # Reusing existing case for now\n ids=case_id_generator,\n )\n def test_chat_non_streaming_tool_choice_none(request, openai_client, model, provider, verification_config, case):\n test_name_base = get_base_test_name(request)\n if should_skip_test(verification_config, provider, model, test_name_base):\n pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n \n response = openai_client.chat.completions.create(\n model=model,\n messages=case[\"input\"][\"messages\"],\n tools=case[\"input\"][\"tools\"],\n tool_choice=\"none\",\n stream=False,\n )\n \n assert response.choices[0].message.role == \"assistant\"\n> assert response.choices[0].message.tool_calls is None, \"Expected no tool calls when tool_choice='none'\"\nE AssertionError: Expected no tool calls when tool_choice='none'\nE assert [ChatCompletionMessageToolCall(id='call_6gehr7flf4gaqu65prmi1pca', function=Function(arguments='{\"location\":\"San Francisco\"}', name='get_weather'), type='function', index=0)] is None\nE + where [ChatCompletionMessageToolCall(id='call_6gehr7flf4gaqu65prmi1pca', function=Function(arguments='{\"location\":\"San Francisco\"}', name='get_weather'), type='function', index=0)] = ChatCompletionMessage(content=None, refusal=None, role='assistant', annotations=None, audio=None, function_call=None, tool_calls=[ChatCompletionMessageToolCall(id='call_6gehr7flf4gaqu65prmi1pca', function=Function(arguments='{\"location\":\"San Francisco\"}', name='get_weather'), type='function', index=0)]).tool_calls\nE + where ChatCompletionMessage(content=None, refusal=None, role='assistant', annotations=None, audio=None, function_call=None, tool_calls=[ChatCompletionMessageToolCall(id='call_6gehr7flf4gaqu65prmi1pca', function=Function(arguments='{\"location\":\"San Francisco\"}', name='get_weather'), type='function', index=0)]) = Choice(finish_reason='tool_calls', index=0, logprobs=None, message=ChatCompletionMessage(content=None, refusal=None, role='assistant', annotations=None, audio=None, function_call=None, tool_calls=[ChatCompletionMessageToolCall(id='call_6gehr7flf4gaqu65prmi1pca', function=Function(arguments='{\"location\":\"San Francisco\"}', name='get_weather'), type='function', index=0)]), seed=None).message\n\ntests/verifications/openai_api/test_chat_completion.py:349: AssertionError" - }, - "teardown": { - "duration": 0.0003799665719270706, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_tool_choice_none[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-case0]", - "lineno": 329, - "outcome": "failed", - "keywords": [ - "test_chat_non_streaming_tool_choice_none[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-case0]", - "parametrize", - "pytestmark", - "meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-case0", - "test_chat_completion.py", - "openai_api", - "verifications", - "tests", - "llama-stack", - "" - ], - "metadata": { - "model": "meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8", - "case_id": "case0" - }, - "setup": { - "duration": 0.07050042506307364, - "outcome": "passed" - }, - "call": { - "duration": 0.3740060832351446, - "outcome": "failed", - "crash": { - "path": "/home/erichuang/llama-stack/tests/verifications/openai_api/test_chat_completion.py", - "lineno": 349, - "message": "AssertionError: Expected no tool calls when tool_choice='none'\nassert [ChatCompletionMessageToolCall(id='call_ngwnt1xmgxipkswdhdepisni', function=Function(arguments='{\"location\":\"San Francisco\"}', name='get_weather'), type='function', index=0)] is None\n + where [ChatCompletionMessageToolCall(id='call_ngwnt1xmgxipkswdhdepisni', function=Function(arguments='{\"location\":\"San Francisco\"}', name='get_weather'), type='function', index=0)] = ChatCompletionMessage(content=None, refusal=None, role='assistant', annotations=None, audio=None, function_call=None, tool_calls=[ChatCompletionMessageToolCall(id='call_ngwnt1xmgxipkswdhdepisni', function=Function(arguments='{\"location\":\"San Francisco\"}', name='get_weather'), type='function', index=0)]).tool_calls\n + where ChatCompletionMessage(content=None, refusal=None, role='assistant', annotations=None, audio=None, function_call=None, tool_calls=[ChatCompletionMessageToolCall(id='call_ngwnt1xmgxipkswdhdepisni', function=Function(arguments='{\"location\":\"San Francisco\"}', name='get_weather'), type='function', index=0)]) = Choice(finish_reason='tool_calls', index=0, logprobs=None, message=ChatCompletionMessage(content=None, refusal=None, role='assistant', annotations=None, audio=None, function_call=None, tool_calls=[ChatCompletionMessageToolCall(id='call_ngwnt1xmgxipkswdhdepisni', function=Function(arguments='{\"location\":\"San Francisco\"}', name='get_weather'), type='function', index=0)]), seed=None).message" - }, - "traceback": [ - { - "path": "tests/verifications/openai_api/test_chat_completion.py", - "lineno": 349, - "message": "AssertionError" - } - ], - "longrepr": "request = >\nopenai_client = \nmodel = 'meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8'\nprovider = 'together'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...-versatile', 'meta-llama/llama-4-scout-17b-16e-instruct', 'meta-llama/llama-4-maverick-17b-128e-instruct'], ...}, ...}}\ncase = {'input': {'messages': [{'content': 'You are a helpful assistant that can use tools to get information.', 'role': 'sys..., 'properties': {...}, 'required': [...], 'type': 'object'}}, 'type': 'function'}]}, 'output': 'get_weather_tool_call'}\n\n @pytest.mark.parametrize(\n \"case\",\n chat_completion_test_cases[\"test_tool_calling\"][\"test_params\"][\"case\"], # Reusing existing case for now\n ids=case_id_generator,\n )\n def test_chat_non_streaming_tool_choice_none(request, openai_client, model, provider, verification_config, case):\n test_name_base = get_base_test_name(request)\n if should_skip_test(verification_config, provider, model, test_name_base):\n pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n \n response = openai_client.chat.completions.create(\n model=model,\n messages=case[\"input\"][\"messages\"],\n tools=case[\"input\"][\"tools\"],\n tool_choice=\"none\",\n stream=False,\n )\n \n assert response.choices[0].message.role == \"assistant\"\n> assert response.choices[0].message.tool_calls is None, \"Expected no tool calls when tool_choice='none'\"\nE AssertionError: Expected no tool calls when tool_choice='none'\nE assert [ChatCompletionMessageToolCall(id='call_ngwnt1xmgxipkswdhdepisni', function=Function(arguments='{\"location\":\"San Francisco\"}', name='get_weather'), type='function', index=0)] is None\nE + where [ChatCompletionMessageToolCall(id='call_ngwnt1xmgxipkswdhdepisni', function=Function(arguments='{\"location\":\"San Francisco\"}', name='get_weather'), type='function', index=0)] = ChatCompletionMessage(content=None, refusal=None, role='assistant', annotations=None, audio=None, function_call=None, tool_calls=[ChatCompletionMessageToolCall(id='call_ngwnt1xmgxipkswdhdepisni', function=Function(arguments='{\"location\":\"San Francisco\"}', name='get_weather'), type='function', index=0)]).tool_calls\nE + where ChatCompletionMessage(content=None, refusal=None, role='assistant', annotations=None, audio=None, function_call=None, tool_calls=[ChatCompletionMessageToolCall(id='call_ngwnt1xmgxipkswdhdepisni', function=Function(arguments='{\"location\":\"San Francisco\"}', name='get_weather'), type='function', index=0)]) = Choice(finish_reason='tool_calls', index=0, logprobs=None, message=ChatCompletionMessage(content=None, refusal=None, role='assistant', annotations=None, audio=None, function_call=None, tool_calls=[ChatCompletionMessageToolCall(id='call_ngwnt1xmgxipkswdhdepisni', function=Function(arguments='{\"location\":\"San Francisco\"}', name='get_weather'), type='function', index=0)]), seed=None).message\n\ntests/verifications/openai_api/test_chat_completion.py:349: AssertionError" - }, - "teardown": { - "duration": 0.0003066370263695717, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_tool_choice_none[meta-llama/Llama-3.3-70B-Instruct-Turbo-case0]", - "lineno": 352, - "outcome": "failed", - "keywords": [ - "test_chat_streaming_tool_choice_none[meta-llama/Llama-3.3-70B-Instruct-Turbo-case0]", - "parametrize", - "pytestmark", - "meta-llama/Llama-3.3-70B-Instruct-Turbo-case0", - "test_chat_completion.py", - "openai_api", - "verifications", - "tests", - "llama-stack", - "" - ], - "metadata": { - "model": "meta-llama/Llama-3.3-70B-Instruct-Turbo", - "case_id": "case0" - }, - "setup": { - "duration": 0.06983672920614481, - "outcome": "passed" - }, - "call": { - "duration": 0.6774894064292312, - "outcome": "failed", - "crash": { - "path": "/home/erichuang/llama-stack/tests/verifications/openai_api/test_chat_completion.py", - "lineno": 376, - "message": "AssertionError: Expected no tool call chunks when tool_choice='none'\nassert not [ChoiceDeltaToolCall(index=0, id='call_emdpbpvm77rqbzz66arrzv5w', function=ChoiceDeltaToolCallFunction(arguments='', name='get_weather'), type='function')]\n + where [ChoiceDeltaToolCall(index=0, id='call_emdpbpvm77rqbzz66arrzv5w', function=ChoiceDeltaToolCallFunction(arguments='', name='get_weather'), type='function')] = ChoiceDelta(content=None, function_call=None, refusal=None, role=None, tool_calls=[ChoiceDeltaToolCall(index=0, id='call_emdpbpvm77rqbzz66arrzv5w', function=ChoiceDeltaToolCallFunction(arguments='', name='get_weather'), type='function')]).tool_calls" - }, - "traceback": [ - { - "path": "tests/verifications/openai_api/test_chat_completion.py", - "lineno": 376, - "message": "AssertionError" - } - ], - "longrepr": "request = >\nopenai_client = \nmodel = 'meta-llama/Llama-3.3-70B-Instruct-Turbo', provider = 'together'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...-versatile', 'meta-llama/llama-4-scout-17b-16e-instruct', 'meta-llama/llama-4-maverick-17b-128e-instruct'], ...}, ...}}\ncase = {'input': {'messages': [{'content': 'You are a helpful assistant that can use tools to get information.', 'role': 'sys..., 'properties': {...}, 'required': [...], 'type': 'object'}}, 'type': 'function'}]}, 'output': 'get_weather_tool_call'}\n\n @pytest.mark.parametrize(\n \"case\",\n chat_completion_test_cases[\"test_tool_calling\"][\"test_params\"][\"case\"], # Reusing existing case for now\n ids=case_id_generator,\n )\n def test_chat_streaming_tool_choice_none(request, openai_client, model, provider, verification_config, case):\n test_name_base = get_base_test_name(request)\n if should_skip_test(verification_config, provider, model, test_name_base):\n pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n \n stream = openai_client.chat.completions.create(\n model=model,\n messages=case[\"input\"][\"messages\"],\n tools=case[\"input\"][\"tools\"],\n tool_choice=\"none\",\n stream=True,\n )\n \n content = \"\"\n for chunk in stream:\n delta = chunk.choices[0].delta\n if delta.content:\n content += delta.content\n> assert not delta.tool_calls, \"Expected no tool call chunks when tool_choice='none'\"\nE AssertionError: Expected no tool call chunks when tool_choice='none'\nE assert not [ChoiceDeltaToolCall(index=0, id='call_emdpbpvm77rqbzz66arrzv5w', function=ChoiceDeltaToolCallFunction(arguments='', name='get_weather'), type='function')]\nE + where [ChoiceDeltaToolCall(index=0, id='call_emdpbpvm77rqbzz66arrzv5w', function=ChoiceDeltaToolCallFunction(arguments='', name='get_weather'), type='function')] = ChoiceDelta(content=None, function_call=None, refusal=None, role=None, tool_calls=[ChoiceDeltaToolCall(index=0, id='call_emdpbpvm77rqbzz66arrzv5w', function=ChoiceDeltaToolCallFunction(arguments='', name='get_weather'), type='function')]).tool_calls\n\ntests/verifications/openai_api/test_chat_completion.py:376: AssertionError" - }, - "teardown": { - "duration": 0.0003580348566174507, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_tool_choice_none[meta-llama/Llama-4-Scout-17B-16E-Instruct-case0]", - "lineno": 352, - "outcome": "failed", - "keywords": [ - "test_chat_streaming_tool_choice_none[meta-llama/Llama-4-Scout-17B-16E-Instruct-case0]", - "parametrize", - "pytestmark", - "meta-llama/Llama-4-Scout-17B-16E-Instruct-case0", - "test_chat_completion.py", - "openai_api", - "verifications", - "tests", - "llama-stack", - "" - ], - "metadata": { - "model": "meta-llama/Llama-4-Scout-17B-16E-Instruct", - "case_id": "case0" - }, - "setup": { - "duration": 0.07331710867583752, - "outcome": "passed" - }, - "call": { - "duration": 0.38044120091944933, - "outcome": "failed", - "crash": { - "path": "/home/erichuang/llama-stack/tests/verifications/openai_api/test_chat_completion.py", - "lineno": 376, - "message": "AssertionError: Expected no tool call chunks when tool_choice='none'\nassert not [ChoiceDeltaToolCall(index=0, id='call_g85q6ysacljgjczgq8r30tjv', function=ChoiceDeltaToolCallFunction(arguments='', name='get_weather'), type='function')]\n + where [ChoiceDeltaToolCall(index=0, id='call_g85q6ysacljgjczgq8r30tjv', function=ChoiceDeltaToolCallFunction(arguments='', name='get_weather'), type='function')] = ChoiceDelta(content=None, function_call=None, refusal=None, role=None, tool_calls=[ChoiceDeltaToolCall(index=0, id='call_g85q6ysacljgjczgq8r30tjv', function=ChoiceDeltaToolCallFunction(arguments='', name='get_weather'), type='function')]).tool_calls" - }, - "traceback": [ - { - "path": "tests/verifications/openai_api/test_chat_completion.py", - "lineno": 376, - "message": "AssertionError" - } - ], - "longrepr": "request = >\nopenai_client = \nmodel = 'meta-llama/Llama-4-Scout-17B-16E-Instruct', provider = 'together'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...-versatile', 'meta-llama/llama-4-scout-17b-16e-instruct', 'meta-llama/llama-4-maverick-17b-128e-instruct'], ...}, ...}}\ncase = {'input': {'messages': [{'content': 'You are a helpful assistant that can use tools to get information.', 'role': 'sys..., 'properties': {...}, 'required': [...], 'type': 'object'}}, 'type': 'function'}]}, 'output': 'get_weather_tool_call'}\n\n @pytest.mark.parametrize(\n \"case\",\n chat_completion_test_cases[\"test_tool_calling\"][\"test_params\"][\"case\"], # Reusing existing case for now\n ids=case_id_generator,\n )\n def test_chat_streaming_tool_choice_none(request, openai_client, model, provider, verification_config, case):\n test_name_base = get_base_test_name(request)\n if should_skip_test(verification_config, provider, model, test_name_base):\n pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n \n stream = openai_client.chat.completions.create(\n model=model,\n messages=case[\"input\"][\"messages\"],\n tools=case[\"input\"][\"tools\"],\n tool_choice=\"none\",\n stream=True,\n )\n \n content = \"\"\n for chunk in stream:\n delta = chunk.choices[0].delta\n if delta.content:\n content += delta.content\n> assert not delta.tool_calls, \"Expected no tool call chunks when tool_choice='none'\"\nE AssertionError: Expected no tool call chunks when tool_choice='none'\nE assert not [ChoiceDeltaToolCall(index=0, id='call_g85q6ysacljgjczgq8r30tjv', function=ChoiceDeltaToolCallFunction(arguments='', name='get_weather'), type='function')]\nE + where [ChoiceDeltaToolCall(index=0, id='call_g85q6ysacljgjczgq8r30tjv', function=ChoiceDeltaToolCallFunction(arguments='', name='get_weather'), type='function')] = ChoiceDelta(content=None, function_call=None, refusal=None, role=None, tool_calls=[ChoiceDeltaToolCall(index=0, id='call_g85q6ysacljgjczgq8r30tjv', function=ChoiceDeltaToolCallFunction(arguments='', name='get_weather'), type='function')]).tool_calls\n\ntests/verifications/openai_api/test_chat_completion.py:376: AssertionError" - }, - "teardown": { - "duration": 0.0003765234723687172, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_tool_choice_none[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-case0]", - "lineno": 352, - "outcome": "failed", - "keywords": [ - "test_chat_streaming_tool_choice_none[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-case0]", - "parametrize", - "pytestmark", - "meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-case0", - "test_chat_completion.py", - "openai_api", - "verifications", - "tests", - "llama-stack", - "" - ], - "metadata": { - "model": "meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8", - "case_id": "case0" - }, - "setup": { - "duration": 0.07194581907242537, - "outcome": "passed" - }, - "call": { - "duration": 0.37374384608119726, - "outcome": "failed", - "crash": { - "path": "/home/erichuang/llama-stack/tests/verifications/openai_api/test_chat_completion.py", - "lineno": 376, - "message": "AssertionError: Expected no tool call chunks when tool_choice='none'\nassert not [ChoiceDeltaToolCall(index=0, id='call_zq6x10vfu9pkxme6pm9zxouk', function=ChoiceDeltaToolCallFunction(arguments='', name='get_weather'), type='function')]\n + where [ChoiceDeltaToolCall(index=0, id='call_zq6x10vfu9pkxme6pm9zxouk', function=ChoiceDeltaToolCallFunction(arguments='', name='get_weather'), type='function')] = ChoiceDelta(content=None, function_call=None, refusal=None, role=None, tool_calls=[ChoiceDeltaToolCall(index=0, id='call_zq6x10vfu9pkxme6pm9zxouk', function=ChoiceDeltaToolCallFunction(arguments='', name='get_weather'), type='function')]).tool_calls" - }, - "traceback": [ - { - "path": "tests/verifications/openai_api/test_chat_completion.py", - "lineno": 376, - "message": "AssertionError" - } - ], - "longrepr": "request = >\nopenai_client = \nmodel = 'meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8'\nprovider = 'together'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...-versatile', 'meta-llama/llama-4-scout-17b-16e-instruct', 'meta-llama/llama-4-maverick-17b-128e-instruct'], ...}, ...}}\ncase = {'input': {'messages': [{'content': 'You are a helpful assistant that can use tools to get information.', 'role': 'sys..., 'properties': {...}, 'required': [...], 'type': 'object'}}, 'type': 'function'}]}, 'output': 'get_weather_tool_call'}\n\n @pytest.mark.parametrize(\n \"case\",\n chat_completion_test_cases[\"test_tool_calling\"][\"test_params\"][\"case\"], # Reusing existing case for now\n ids=case_id_generator,\n )\n def test_chat_streaming_tool_choice_none(request, openai_client, model, provider, verification_config, case):\n test_name_base = get_base_test_name(request)\n if should_skip_test(verification_config, provider, model, test_name_base):\n pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n \n stream = openai_client.chat.completions.create(\n model=model,\n messages=case[\"input\"][\"messages\"],\n tools=case[\"input\"][\"tools\"],\n tool_choice=\"none\",\n stream=True,\n )\n \n content = \"\"\n for chunk in stream:\n delta = chunk.choices[0].delta\n if delta.content:\n content += delta.content\n> assert not delta.tool_calls, \"Expected no tool call chunks when tool_choice='none'\"\nE AssertionError: Expected no tool call chunks when tool_choice='none'\nE assert not [ChoiceDeltaToolCall(index=0, id='call_zq6x10vfu9pkxme6pm9zxouk', function=ChoiceDeltaToolCallFunction(arguments='', name='get_weather'), type='function')]\nE + where [ChoiceDeltaToolCall(index=0, id='call_zq6x10vfu9pkxme6pm9zxouk', function=ChoiceDeltaToolCallFunction(arguments='', name='get_weather'), type='function')] = ChoiceDelta(content=None, function_call=None, refusal=None, role=None, tool_calls=[ChoiceDeltaToolCall(index=0, id='call_zq6x10vfu9pkxme6pm9zxouk', function=ChoiceDeltaToolCallFunction(arguments='', name='get_weather'), type='function')]).tool_calls\n\ntests/verifications/openai_api/test_chat_completion.py:376: AssertionError" - }, - "teardown": { - "duration": 0.0003813542425632477, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_multi_turn_tool_calling[meta-llama/Llama-3.3-70B-Instruct-Turbo-text_then_weather_tool]", - "lineno": 380, - "outcome": "failed", - "keywords": [ - "test_chat_non_streaming_multi_turn_tool_calling[meta-llama/Llama-3.3-70B-Instruct-Turbo-text_then_weather_tool]", - "parametrize", - "pytestmark", - "meta-llama/Llama-3.3-70B-Instruct-Turbo-text_then_weather_tool", - "test_chat_completion.py", - "openai_api", - "verifications", - "tests", - "llama-stack", - "" - ], - "metadata": { - "model": "meta-llama/Llama-3.3-70B-Instruct-Turbo", - "case_id": "text_then_weather_tool" - }, - "setup": { - "duration": 0.07330320309847593, - "outcome": "passed" - }, - "call": { - "duration": 0.4314677305519581, - "outcome": "failed", - "crash": { - "path": "/home/erichuang/llama-stack/tests/verifications/openai_api/test_chat_completion.py", - "lineno": 439, - "message": "AssertionError: Expected 0 tool calls, but got 1\nassert 1 == 0\n + where 1 = len(([ChatCompletionMessageToolCall(id='call_l05cckdk5mooai2iyfucg4s8', function=Function(arguments='{\"location\":\"San Francisco, CA\"}', name='get_weather'), type='function', index=0)]))\n + where [ChatCompletionMessageToolCall(id='call_l05cckdk5mooai2iyfucg4s8', function=Function(arguments='{\"location\":\"San Francisco, CA\"}', name='get_weather'), type='function', index=0)] = ChatCompletionMessage(content=None, refusal=None, role='assistant', annotations=None, audio=None, function_call=None, tool_calls=[ChatCompletionMessageToolCall(id='call_l05cckdk5mooai2iyfucg4s8', function=Function(arguments='{\"location\":\"San Francisco, CA\"}', name='get_weather'), type='function', index=0)]).tool_calls" - }, - "traceback": [ - { - "path": "tests/verifications/openai_api/test_chat_completion.py", - "lineno": 439, - "message": "AssertionError" - } - ], - "longrepr": "request = >\nopenai_client = \nmodel = 'meta-llama/Llama-3.3-70B-Instruct-Turbo', provider = 'together'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...-versatile', 'meta-llama/llama-4-scout-17b-16e-instruct', 'meta-llama/llama-4-maverick-17b-128e-instruct'], ...}, ...}}\ncase = {'case_id': 'text_then_weather_tool', 'expected': [{'answer': ['sol'], 'num_tool_calls': 0}, {'num_tool_calls': 1, 'to...], 'type': 'object'}}, 'type': 'function'}]}, 'tool_responses': [{'response': \"{'response': '70 degrees and foggy'}\"}]}\n\n @pytest.mark.parametrize(\n \"case\",\n chat_completion_test_cases.get(\"test_chat_multi_turn_tool_calling\", {}).get(\"test_params\", {}).get(\"case\", []),\n ids=case_id_generator,\n )\n def test_chat_non_streaming_multi_turn_tool_calling(request, openai_client, model, provider, verification_config, case):\n \"\"\"\n Test cases for multi-turn tool calling.\n Tool calls are asserted.\n Tool responses are provided in the test case.\n Final response is asserted.\n \"\"\"\n \n test_name_base = get_base_test_name(request)\n if should_skip_test(verification_config, provider, model, test_name_base):\n pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n \n # Create a copy of the messages list to avoid modifying the original\n messages = []\n tools = case[\"input\"][\"tools\"]\n # Use deepcopy to prevent modification across runs/parametrization\n expected_results = copy.deepcopy(case[\"expected\"])\n tool_responses = copy.deepcopy(case.get(\"tool_responses\", []))\n input_messages_turns = copy.deepcopy(case[\"input\"][\"messages\"])\n \n # keep going until either\n # 1. we have messages to test in multi-turn\n # 2. no messages but last message is tool response\n while len(input_messages_turns) > 0 or (len(messages) > 0 and messages[-1][\"role\"] == \"tool\"):\n # do not take new messages if last message is tool response\n if len(messages) == 0 or messages[-1][\"role\"] != \"tool\":\n new_messages = input_messages_turns.pop(0)\n # Ensure new_messages is a list of message objects\n if isinstance(new_messages, list):\n messages.extend(new_messages)\n else:\n # If it's a single message object, add it directly\n messages.append(new_messages)\n \n # --- API Call ---\n response = openai_client.chat.completions.create(\n model=model,\n messages=messages,\n tools=tools,\n stream=False,\n )\n \n # --- Process Response ---\n assistant_message = response.choices[0].message\n messages.append(assistant_message.model_dump(exclude_unset=True))\n \n assert assistant_message.role == \"assistant\"\n \n # Get the expected result data\n expected = expected_results.pop(0)\n num_tool_calls = expected[\"num_tool_calls\"]\n \n # --- Assertions based on expected result ---\n> assert len(assistant_message.tool_calls or []) == num_tool_calls, (\n f\"Expected {num_tool_calls} tool calls, but got {len(assistant_message.tool_calls or [])}\"\n )\nE AssertionError: Expected 0 tool calls, but got 1\nE assert 1 == 0\nE + where 1 = len(([ChatCompletionMessageToolCall(id='call_l05cckdk5mooai2iyfucg4s8', function=Function(arguments='{\"location\":\"San Francisco, CA\"}', name='get_weather'), type='function', index=0)]))\nE + where [ChatCompletionMessageToolCall(id='call_l05cckdk5mooai2iyfucg4s8', function=Function(arguments='{\"location\":\"San Francisco, CA\"}', name='get_weather'), type='function', index=0)] = ChatCompletionMessage(content=None, refusal=None, role='assistant', annotations=None, audio=None, function_call=None, tool_calls=[ChatCompletionMessageToolCall(id='call_l05cckdk5mooai2iyfucg4s8', function=Function(arguments='{\"location\":\"San Francisco, CA\"}', name='get_weather'), type='function', index=0)]).tool_calls\n\ntests/verifications/openai_api/test_chat_completion.py:439: AssertionError" - }, - "teardown": { - "duration": 0.00040314625948667526, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_multi_turn_tool_calling[meta-llama/Llama-3.3-70B-Instruct-Turbo-weather_tool_then_text]", - "lineno": 380, - "outcome": "passed", - "keywords": [ - "test_chat_non_streaming_multi_turn_tool_calling[meta-llama/Llama-3.3-70B-Instruct-Turbo-weather_tool_then_text]", - "parametrize", - "pytestmark", - "meta-llama/Llama-3.3-70B-Instruct-Turbo-weather_tool_then_text", - "test_chat_completion.py", - "openai_api", - "verifications", - "tests", - "llama-stack", - "" - ], - "metadata": { - "model": "meta-llama/Llama-3.3-70B-Instruct-Turbo", - "case_id": "weather_tool_then_text" - }, - "setup": { - "duration": 0.07405277714133263, - "outcome": "passed" - }, - "call": { - "duration": 0.8350177155807614, - "outcome": "passed" - }, - "teardown": { - "duration": 0.00023361947387456894, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_multi_turn_tool_calling[meta-llama/Llama-3.3-70B-Instruct-Turbo-add_product_tool]", - "lineno": 380, - "outcome": "passed", - "keywords": [ - "test_chat_non_streaming_multi_turn_tool_calling[meta-llama/Llama-3.3-70B-Instruct-Turbo-add_product_tool]", - "parametrize", - "pytestmark", - "meta-llama/Llama-3.3-70B-Instruct-Turbo-add_product_tool", - "test_chat_completion.py", - "openai_api", - "verifications", - "tests", - "llama-stack", - "" - ], - "metadata": { - "model": "meta-llama/Llama-3.3-70B-Instruct-Turbo", - "case_id": "add_product_tool" - }, - "setup": { - "duration": 0.07361320778727531, - "outcome": "passed" - }, - "call": { - "duration": 1.0619212854653597, - "outcome": "passed" - }, - "teardown": { - "duration": 0.0002395985648036003, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_multi_turn_tool_calling[meta-llama/Llama-3.3-70B-Instruct-Turbo-get_then_create_event_tool]", - "lineno": 380, - "outcome": "passed", - "keywords": [ - "test_chat_non_streaming_multi_turn_tool_calling[meta-llama/Llama-3.3-70B-Instruct-Turbo-get_then_create_event_tool]", - "parametrize", - "pytestmark", - "meta-llama/Llama-3.3-70B-Instruct-Turbo-get_then_create_event_tool", - "test_chat_completion.py", - "openai_api", - "verifications", - "tests", - "llama-stack", - "" - ], - "metadata": { - "model": "meta-llama/Llama-3.3-70B-Instruct-Turbo", - "case_id": "get_then_create_event_tool" - }, - "setup": { - "duration": 0.07290417980402708, - "outcome": "passed" - }, - "call": { - "duration": 4.241749887354672, - "outcome": "passed" - }, - "teardown": { - "duration": 0.00027841050177812576, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_multi_turn_tool_calling[meta-llama/Llama-3.3-70B-Instruct-Turbo-compare_monthly_expense_tool]", - "lineno": 380, - "outcome": "passed", - "keywords": [ - "test_chat_non_streaming_multi_turn_tool_calling[meta-llama/Llama-3.3-70B-Instruct-Turbo-compare_monthly_expense_tool]", - "parametrize", - "pytestmark", - "meta-llama/Llama-3.3-70B-Instruct-Turbo-compare_monthly_expense_tool", - "test_chat_completion.py", - "openai_api", - "verifications", - "tests", - "llama-stack", - "" - ], - "metadata": { - "model": "meta-llama/Llama-3.3-70B-Instruct-Turbo", - "case_id": "compare_monthly_expense_tool" - }, - "setup": { - "duration": 0.07301546633243561, - "outcome": "passed" - }, - "call": { - "duration": 2.0520667918026447, - "outcome": "passed" - }, - "teardown": { - "duration": 0.0002469858154654503, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_multi_turn_tool_calling[meta-llama/Llama-4-Scout-17B-16E-Instruct-text_then_weather_tool]", - "lineno": 380, - "outcome": "failed", - "keywords": [ - "test_chat_non_streaming_multi_turn_tool_calling[meta-llama/Llama-4-Scout-17B-16E-Instruct-text_then_weather_tool]", - "parametrize", - "pytestmark", - "meta-llama/Llama-4-Scout-17B-16E-Instruct-text_then_weather_tool", - "test_chat_completion.py", - "openai_api", - "verifications", - "tests", - "llama-stack", - "" - ], - "metadata": { - "model": "meta-llama/Llama-4-Scout-17B-16E-Instruct", - "case_id": "text_then_weather_tool" - }, - "setup": { - "duration": 0.07405530381947756, - "outcome": "passed" - }, - "call": { - "duration": 0.48041669093072414, - "outcome": "failed", - "crash": { - "path": "/home/erichuang/llama-stack/tests/verifications/openai_api/test_chat_completion.py", - "lineno": 467, - "message": "AssertionError: Expected one of ['sol'] in content, but got: 'I am not able to complete this task as it falls outside of the scope of the functions I have been given.'\nassert False\n + where False = any(. at 0x7f4274057610>)" - }, - "traceback": [ - { - "path": "tests/verifications/openai_api/test_chat_completion.py", - "lineno": 467, - "message": "AssertionError" - } - ], - "longrepr": "request = >\nopenai_client = \nmodel = 'meta-llama/Llama-4-Scout-17B-16E-Instruct', provider = 'together'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...-versatile', 'meta-llama/llama-4-scout-17b-16e-instruct', 'meta-llama/llama-4-maverick-17b-128e-instruct'], ...}, ...}}\ncase = {'case_id': 'text_then_weather_tool', 'expected': [{'answer': ['sol'], 'num_tool_calls': 0}, {'num_tool_calls': 1, 'to...], 'type': 'object'}}, 'type': 'function'}]}, 'tool_responses': [{'response': \"{'response': '70 degrees and foggy'}\"}]}\n\n @pytest.mark.parametrize(\n \"case\",\n chat_completion_test_cases.get(\"test_chat_multi_turn_tool_calling\", {}).get(\"test_params\", {}).get(\"case\", []),\n ids=case_id_generator,\n )\n def test_chat_non_streaming_multi_turn_tool_calling(request, openai_client, model, provider, verification_config, case):\n \"\"\"\n Test cases for multi-turn tool calling.\n Tool calls are asserted.\n Tool responses are provided in the test case.\n Final response is asserted.\n \"\"\"\n \n test_name_base = get_base_test_name(request)\n if should_skip_test(verification_config, provider, model, test_name_base):\n pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n \n # Create a copy of the messages list to avoid modifying the original\n messages = []\n tools = case[\"input\"][\"tools\"]\n # Use deepcopy to prevent modification across runs/parametrization\n expected_results = copy.deepcopy(case[\"expected\"])\n tool_responses = copy.deepcopy(case.get(\"tool_responses\", []))\n input_messages_turns = copy.deepcopy(case[\"input\"][\"messages\"])\n \n # keep going until either\n # 1. we have messages to test in multi-turn\n # 2. no messages but last message is tool response\n while len(input_messages_turns) > 0 or (len(messages) > 0 and messages[-1][\"role\"] == \"tool\"):\n # do not take new messages if last message is tool response\n if len(messages) == 0 or messages[-1][\"role\"] != \"tool\":\n new_messages = input_messages_turns.pop(0)\n # Ensure new_messages is a list of message objects\n if isinstance(new_messages, list):\n messages.extend(new_messages)\n else:\n # If it's a single message object, add it directly\n messages.append(new_messages)\n \n # --- API Call ---\n response = openai_client.chat.completions.create(\n model=model,\n messages=messages,\n tools=tools,\n stream=False,\n )\n \n # --- Process Response ---\n assistant_message = response.choices[0].message\n messages.append(assistant_message.model_dump(exclude_unset=True))\n \n assert assistant_message.role == \"assistant\"\n \n # Get the expected result data\n expected = expected_results.pop(0)\n num_tool_calls = expected[\"num_tool_calls\"]\n \n # --- Assertions based on expected result ---\n assert len(assistant_message.tool_calls or []) == num_tool_calls, (\n f\"Expected {num_tool_calls} tool calls, but got {len(assistant_message.tool_calls or [])}\"\n )\n \n if num_tool_calls > 0:\n tool_call = assistant_message.tool_calls[0]\n assert tool_call.function.name == expected[\"tool_name\"], (\n f\"Expected tool '{expected['tool_name']}', got '{tool_call.function.name}'\"\n )\n # Parse the JSON string arguments before comparing\n actual_arguments = json.loads(tool_call.function.arguments)\n assert actual_arguments == expected[\"tool_arguments\"], (\n f\"Expected arguments '{expected['tool_arguments']}', got '{actual_arguments}'\"\n )\n \n # Prepare and append the tool response for the next turn\n tool_response = tool_responses.pop(0)\n messages.append(\n {\n \"role\": \"tool\",\n \"tool_call_id\": tool_call.id,\n \"content\": tool_response[\"response\"],\n }\n )\n else:\n assert assistant_message.content is not None, \"Expected content, but none received.\"\n expected_answers = expected[\"answer\"] # This is now a list\n content_lower = assistant_message.content.lower()\n> assert any(ans.lower() in content_lower for ans in expected_answers), (\n f\"Expected one of {expected_answers} in content, but got: '{assistant_message.content}'\"\n )\nE AssertionError: Expected one of ['sol'] in content, but got: 'I am not able to complete this task as it falls outside of the scope of the functions I have been given.'\nE assert False\nE + where False = any(. at 0x7f4274057610>)\n\ntests/verifications/openai_api/test_chat_completion.py:467: AssertionError" - }, - "teardown": { - "duration": 0.00035319291055202484, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_multi_turn_tool_calling[meta-llama/Llama-4-Scout-17B-16E-Instruct-weather_tool_then_text]", - "lineno": 380, - "outcome": "passed", - "keywords": [ - "test_chat_non_streaming_multi_turn_tool_calling[meta-llama/Llama-4-Scout-17B-16E-Instruct-weather_tool_then_text]", - "parametrize", - "pytestmark", - "meta-llama/Llama-4-Scout-17B-16E-Instruct-weather_tool_then_text", - "test_chat_completion.py", - "openai_api", - "verifications", - "tests", - "llama-stack", - "" - ], - "metadata": { - "model": "meta-llama/Llama-4-Scout-17B-16E-Instruct", - "case_id": "weather_tool_then_text" - }, - "setup": { - "duration": 0.0724497502669692, - "outcome": "passed" - }, - "call": { - "duration": 0.832760401070118, - "outcome": "passed" - }, - "teardown": { - "duration": 0.00026283878833055496, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_multi_turn_tool_calling[meta-llama/Llama-4-Scout-17B-16E-Instruct-add_product_tool]", - "lineno": 380, - "outcome": "passed", - "keywords": [ - "test_chat_non_streaming_multi_turn_tool_calling[meta-llama/Llama-4-Scout-17B-16E-Instruct-add_product_tool]", - "parametrize", - "pytestmark", - "meta-llama/Llama-4-Scout-17B-16E-Instruct-add_product_tool", - "test_chat_completion.py", - "openai_api", - "verifications", - "tests", - "llama-stack", - "" - ], - "metadata": { - "model": "meta-llama/Llama-4-Scout-17B-16E-Instruct", - "case_id": "add_product_tool" - }, - "setup": { - "duration": 0.07180811651051044, - "outcome": "passed" - }, - "call": { - "duration": 1.4359142612665892, - "outcome": "passed" - }, - "teardown": { - "duration": 0.0002761436626315117, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_multi_turn_tool_calling[meta-llama/Llama-4-Scout-17B-16E-Instruct-get_then_create_event_tool]", - "lineno": 380, - "outcome": "passed", - "keywords": [ - "test_chat_non_streaming_multi_turn_tool_calling[meta-llama/Llama-4-Scout-17B-16E-Instruct-get_then_create_event_tool]", - "parametrize", - "pytestmark", - "meta-llama/Llama-4-Scout-17B-16E-Instruct-get_then_create_event_tool", - "test_chat_completion.py", - "openai_api", - "verifications", - "tests", - "llama-stack", - "" - ], - "metadata": { - "model": "meta-llama/Llama-4-Scout-17B-16E-Instruct", - "case_id": "get_then_create_event_tool" - }, - "setup": { - "duration": 0.07503274269402027, - "outcome": "passed" - }, - "call": { - "duration": 1.909641013480723, - "outcome": "passed" - }, - "teardown": { - "duration": 0.0002613905817270279, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_multi_turn_tool_calling[meta-llama/Llama-4-Scout-17B-16E-Instruct-compare_monthly_expense_tool]", - "lineno": 380, - "outcome": "passed", - "keywords": [ - "test_chat_non_streaming_multi_turn_tool_calling[meta-llama/Llama-4-Scout-17B-16E-Instruct-compare_monthly_expense_tool]", - "parametrize", - "pytestmark", - "meta-llama/Llama-4-Scout-17B-16E-Instruct-compare_monthly_expense_tool", - "test_chat_completion.py", - "openai_api", - "verifications", - "tests", - "llama-stack", - "" - ], - "metadata": { - "model": "meta-llama/Llama-4-Scout-17B-16E-Instruct", - "case_id": "compare_monthly_expense_tool" - }, - "setup": { - "duration": 0.07153380755335093, - "outcome": "passed" - }, - "call": { - "duration": 2.695867782458663, - "outcome": "passed" - }, - "teardown": { - "duration": 0.00032124295830726624, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_multi_turn_tool_calling[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-text_then_weather_tool]", - "lineno": 380, - "outcome": "failed", - "keywords": [ - "test_chat_non_streaming_multi_turn_tool_calling[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-text_then_weather_tool]", - "parametrize", - "pytestmark", - "meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-text_then_weather_tool", - "test_chat_completion.py", - "openai_api", - "verifications", - "tests", - "llama-stack", - "" - ], - "metadata": { - "model": "meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8", - "case_id": "text_then_weather_tool" - }, - "setup": { - "duration": 0.07275318540632725, - "outcome": "passed" - }, - "call": { - "duration": 0.34551760647445917, - "outcome": "failed", - "crash": { - "path": "/home/erichuang/llama-stack/tests/verifications/openai_api/test_chat_completion.py", - "lineno": 467, - "message": "AssertionError: Expected one of ['sol'] in content, but got: '{\"name\": null, \"parameters\": null}'\nassert False\n + where False = any(. at 0x7f42742dd4d0>)" - }, - "traceback": [ - { - "path": "tests/verifications/openai_api/test_chat_completion.py", - "lineno": 467, - "message": "AssertionError" - } - ], - "longrepr": "request = >\nopenai_client = \nmodel = 'meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8'\nprovider = 'together'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...-versatile', 'meta-llama/llama-4-scout-17b-16e-instruct', 'meta-llama/llama-4-maverick-17b-128e-instruct'], ...}, ...}}\ncase = {'case_id': 'text_then_weather_tool', 'expected': [{'answer': ['sol'], 'num_tool_calls': 0}, {'num_tool_calls': 1, 'to...], 'type': 'object'}}, 'type': 'function'}]}, 'tool_responses': [{'response': \"{'response': '70 degrees and foggy'}\"}]}\n\n @pytest.mark.parametrize(\n \"case\",\n chat_completion_test_cases.get(\"test_chat_multi_turn_tool_calling\", {}).get(\"test_params\", {}).get(\"case\", []),\n ids=case_id_generator,\n )\n def test_chat_non_streaming_multi_turn_tool_calling(request, openai_client, model, provider, verification_config, case):\n \"\"\"\n Test cases for multi-turn tool calling.\n Tool calls are asserted.\n Tool responses are provided in the test case.\n Final response is asserted.\n \"\"\"\n \n test_name_base = get_base_test_name(request)\n if should_skip_test(verification_config, provider, model, test_name_base):\n pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n \n # Create a copy of the messages list to avoid modifying the original\n messages = []\n tools = case[\"input\"][\"tools\"]\n # Use deepcopy to prevent modification across runs/parametrization\n expected_results = copy.deepcopy(case[\"expected\"])\n tool_responses = copy.deepcopy(case.get(\"tool_responses\", []))\n input_messages_turns = copy.deepcopy(case[\"input\"][\"messages\"])\n \n # keep going until either\n # 1. we have messages to test in multi-turn\n # 2. no messages but last message is tool response\n while len(input_messages_turns) > 0 or (len(messages) > 0 and messages[-1][\"role\"] == \"tool\"):\n # do not take new messages if last message is tool response\n if len(messages) == 0 or messages[-1][\"role\"] != \"tool\":\n new_messages = input_messages_turns.pop(0)\n # Ensure new_messages is a list of message objects\n if isinstance(new_messages, list):\n messages.extend(new_messages)\n else:\n # If it's a single message object, add it directly\n messages.append(new_messages)\n \n # --- API Call ---\n response = openai_client.chat.completions.create(\n model=model,\n messages=messages,\n tools=tools,\n stream=False,\n )\n \n # --- Process Response ---\n assistant_message = response.choices[0].message\n messages.append(assistant_message.model_dump(exclude_unset=True))\n \n assert assistant_message.role == \"assistant\"\n \n # Get the expected result data\n expected = expected_results.pop(0)\n num_tool_calls = expected[\"num_tool_calls\"]\n \n # --- Assertions based on expected result ---\n assert len(assistant_message.tool_calls or []) == num_tool_calls, (\n f\"Expected {num_tool_calls} tool calls, but got {len(assistant_message.tool_calls or [])}\"\n )\n \n if num_tool_calls > 0:\n tool_call = assistant_message.tool_calls[0]\n assert tool_call.function.name == expected[\"tool_name\"], (\n f\"Expected tool '{expected['tool_name']}', got '{tool_call.function.name}'\"\n )\n # Parse the JSON string arguments before comparing\n actual_arguments = json.loads(tool_call.function.arguments)\n assert actual_arguments == expected[\"tool_arguments\"], (\n f\"Expected arguments '{expected['tool_arguments']}', got '{actual_arguments}'\"\n )\n \n # Prepare and append the tool response for the next turn\n tool_response = tool_responses.pop(0)\n messages.append(\n {\n \"role\": \"tool\",\n \"tool_call_id\": tool_call.id,\n \"content\": tool_response[\"response\"],\n }\n )\n else:\n assert assistant_message.content is not None, \"Expected content, but none received.\"\n expected_answers = expected[\"answer\"] # This is now a list\n content_lower = assistant_message.content.lower()\n> assert any(ans.lower() in content_lower for ans in expected_answers), (\n f\"Expected one of {expected_answers} in content, but got: '{assistant_message.content}'\"\n )\nE AssertionError: Expected one of ['sol'] in content, but got: '{\"name\": null, \"parameters\": null}'\nE assert False\nE + where False = any(. at 0x7f42742dd4d0>)\n\ntests/verifications/openai_api/test_chat_completion.py:467: AssertionError" - }, - "teardown": { - "duration": 0.0003842068836092949, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_multi_turn_tool_calling[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-weather_tool_then_text]", - "lineno": 380, - "outcome": "passed", - "keywords": [ - "test_chat_non_streaming_multi_turn_tool_calling[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-weather_tool_then_text]", - "parametrize", - "pytestmark", - "meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-weather_tool_then_text", - "test_chat_completion.py", - "openai_api", - "verifications", - "tests", - "llama-stack", - "" - ], - "metadata": { - "model": "meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8", - "case_id": "weather_tool_then_text" - }, - "setup": { - "duration": 0.07281951513141394, - "outcome": "passed" - }, - "call": { - "duration": 1.008104412816465, - "outcome": "passed" - }, - "teardown": { - "duration": 0.00026233773678541183, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_multi_turn_tool_calling[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-add_product_tool]", - "lineno": 380, - "outcome": "passed", - "keywords": [ - "test_chat_non_streaming_multi_turn_tool_calling[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-add_product_tool]", - "parametrize", - "pytestmark", - "meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-add_product_tool", - "test_chat_completion.py", - "openai_api", - "verifications", - "tests", - "llama-stack", - "" - ], - "metadata": { - "model": "meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8", - "case_id": "add_product_tool" - }, - "setup": { - "duration": 0.07155719958245754, - "outcome": "passed" - }, - "call": { - "duration": 2.3485742239281535, - "outcome": "passed" - }, - "teardown": { - "duration": 0.0002629430964589119, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_multi_turn_tool_calling[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-get_then_create_event_tool]", - "lineno": 380, - "outcome": "failed", - "keywords": [ - "test_chat_non_streaming_multi_turn_tool_calling[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-get_then_create_event_tool]", - "parametrize", - "pytestmark", - "meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-get_then_create_event_tool", - "test_chat_completion.py", - "openai_api", - "verifications", - "tests", - "llama-stack", - "" - ], - "metadata": { - "model": "meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8", - "case_id": "get_then_create_event_tool" - }, - "setup": { - "duration": 0.07251190021634102, - "outcome": "passed" - }, - "call": { - "duration": 2.9882029946893454, - "outcome": "failed", - "crash": { - "path": "/home/erichuang/llama-stack/tests/verifications/openai_api/test_chat_completion.py", - "lineno": 450, - "message": "AssertionError: Expected arguments '{'name': 'Team Building', 'date': '2025-03-03', 'time': '10:00', 'location': 'Main Conference Room', 'participants': ['Alice', 'Bob', 'Charlie']}', got '{'date': '\"2025-03-03\"', 'location': '\"Main Conference Room\"', 'name': '\"Team Building\"', 'participants': ['Alice', 'Bob', 'Charlie'], 'time': '\"10:00\"'}'\nassert {'date': '\"20...harlie'], ...} == {'date': '202...harlie'], ...}\n \n Omitting 1 identical items, use -vv to show\n Differing items:\n {'date': '\"2025-03-03\"'} != {'date': '2025-03-03'}\n {'name': '\"Team Building\"'} != {'name': 'Team Building'}\n {'time': '\"10:00\"'} != {'time': '10:00'}\n {'location': '\"Main Conference Room\"'} != {'location': 'Main Conference Room'}...\n \n ...Full output truncated (21 lines hidden), use '-vv' to show" - }, - "traceback": [ - { - "path": "tests/verifications/openai_api/test_chat_completion.py", - "lineno": 450, - "message": "AssertionError" - } - ], - "longrepr": "request = >\nopenai_client = \nmodel = 'meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8'\nprovider = 'together'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...-versatile', 'meta-llama/llama-4-scout-17b-16e-instruct', 'meta-llama/llama-4-maverick-17b-128e-instruct'], ...}, ...}}\ncase = {'case_id': 'get_then_create_event_tool', 'expected': [{'num_tool_calls': 1, 'tool_arguments': {'date': '2025-03-03', ...ents found for 2025-03-03 at 10:00'}\"}, {'response': \"{'response': 'Successfully created new event with id: e_123'}\"}]}\n\n @pytest.mark.parametrize(\n \"case\",\n chat_completion_test_cases.get(\"test_chat_multi_turn_tool_calling\", {}).get(\"test_params\", {}).get(\"case\", []),\n ids=case_id_generator,\n )\n def test_chat_non_streaming_multi_turn_tool_calling(request, openai_client, model, provider, verification_config, case):\n \"\"\"\n Test cases for multi-turn tool calling.\n Tool calls are asserted.\n Tool responses are provided in the test case.\n Final response is asserted.\n \"\"\"\n \n test_name_base = get_base_test_name(request)\n if should_skip_test(verification_config, provider, model, test_name_base):\n pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n \n # Create a copy of the messages list to avoid modifying the original\n messages = []\n tools = case[\"input\"][\"tools\"]\n # Use deepcopy to prevent modification across runs/parametrization\n expected_results = copy.deepcopy(case[\"expected\"])\n tool_responses = copy.deepcopy(case.get(\"tool_responses\", []))\n input_messages_turns = copy.deepcopy(case[\"input\"][\"messages\"])\n \n # keep going until either\n # 1. we have messages to test in multi-turn\n # 2. no messages but last message is tool response\n while len(input_messages_turns) > 0 or (len(messages) > 0 and messages[-1][\"role\"] == \"tool\"):\n # do not take new messages if last message is tool response\n if len(messages) == 0 or messages[-1][\"role\"] != \"tool\":\n new_messages = input_messages_turns.pop(0)\n # Ensure new_messages is a list of message objects\n if isinstance(new_messages, list):\n messages.extend(new_messages)\n else:\n # If it's a single message object, add it directly\n messages.append(new_messages)\n \n # --- API Call ---\n response = openai_client.chat.completions.create(\n model=model,\n messages=messages,\n tools=tools,\n stream=False,\n )\n \n # --- Process Response ---\n assistant_message = response.choices[0].message\n messages.append(assistant_message.model_dump(exclude_unset=True))\n \n assert assistant_message.role == \"assistant\"\n \n # Get the expected result data\n expected = expected_results.pop(0)\n num_tool_calls = expected[\"num_tool_calls\"]\n \n # --- Assertions based on expected result ---\n assert len(assistant_message.tool_calls or []) == num_tool_calls, (\n f\"Expected {num_tool_calls} tool calls, but got {len(assistant_message.tool_calls or [])}\"\n )\n \n if num_tool_calls > 0:\n tool_call = assistant_message.tool_calls[0]\n assert tool_call.function.name == expected[\"tool_name\"], (\n f\"Expected tool '{expected['tool_name']}', got '{tool_call.function.name}'\"\n )\n # Parse the JSON string arguments before comparing\n actual_arguments = json.loads(tool_call.function.arguments)\n> assert actual_arguments == expected[\"tool_arguments\"], (\n f\"Expected arguments '{expected['tool_arguments']}', got '{actual_arguments}'\"\n )\nE AssertionError: Expected arguments '{'name': 'Team Building', 'date': '2025-03-03', 'time': '10:00', 'location': 'Main Conference Room', 'participants': ['Alice', 'Bob', 'Charlie']}', got '{'date': '\"2025-03-03\"', 'location': '\"Main Conference Room\"', 'name': '\"Team Building\"', 'participants': ['Alice', 'Bob', 'Charlie'], 'time': '\"10:00\"'}'\nE assert {'date': '\"20...harlie'], ...} == {'date': '202...harlie'], ...}\nE \nE Omitting 1 identical items, use -vv to show\nE Differing items:\nE {'date': '\"2025-03-03\"'} != {'date': '2025-03-03'}\nE {'name': '\"Team Building\"'} != {'name': 'Team Building'}\nE {'time': '\"10:00\"'} != {'time': '10:00'}\nE {'location': '\"Main Conference Room\"'} != {'location': 'Main Conference Room'}...\nE \nE ...Full output truncated (21 lines hidden), use '-vv' to show\n\ntests/verifications/openai_api/test_chat_completion.py:450: AssertionError" - }, - "teardown": { - "duration": 0.0003328891471028328, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_multi_turn_tool_calling[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-compare_monthly_expense_tool]", - "lineno": 380, - "outcome": "passed", - "keywords": [ - "test_chat_non_streaming_multi_turn_tool_calling[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-compare_monthly_expense_tool]", - "parametrize", - "pytestmark", - "meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-compare_monthly_expense_tool", - "test_chat_completion.py", - "openai_api", - "verifications", - "tests", - "llama-stack", - "" - ], - "metadata": { - "model": "meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8", - "case_id": "compare_monthly_expense_tool" - }, - "setup": { - "duration": 0.07363704219460487, - "outcome": "passed" - }, - "call": { - "duration": 4.031332626007497, - "outcome": "passed" - }, - "teardown": { - "duration": 0.0002817586064338684, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_multi_turn_tool_calling[meta-llama/Llama-3.3-70B-Instruct-Turbo-text_then_weather_tool]", - "lineno": 471, - "outcome": "failed", - "keywords": [ - "test_chat_streaming_multi_turn_tool_calling[meta-llama/Llama-3.3-70B-Instruct-Turbo-text_then_weather_tool]", - "parametrize", - "pytestmark", - "meta-llama/Llama-3.3-70B-Instruct-Turbo-text_then_weather_tool", - "test_chat_completion.py", - "openai_api", - "verifications", - "tests", - "llama-stack", - "" - ], - "metadata": { - "model": "meta-llama/Llama-3.3-70B-Instruct-Turbo", - "case_id": "text_then_weather_tool" - }, - "setup": { - "duration": 0.07673048228025436, - "outcome": "passed" - }, - "call": { - "duration": 0.3994998000562191, - "outcome": "failed", - "crash": { - "path": "/home/erichuang/llama-stack/tests/verifications/openai_api/test_chat_completion.py", - "lineno": 521, - "message": "AssertionError: Expected 0 tool calls, but got 1\nassert 1 == 0\n + where 1 = len(([{'function': {'arguments': '{\"location\":\"San Francisco, CA\"}', 'name': 'get_weather'}, 'id': 'call_dqcu28a6iyxlobv36c23k0qp', 'type': 'function'}]))" - }, - "traceback": [ - { - "path": "tests/verifications/openai_api/test_chat_completion.py", - "lineno": 521, - "message": "AssertionError" - } - ], - "longrepr": "request = >\nopenai_client = \nmodel = 'meta-llama/Llama-3.3-70B-Instruct-Turbo', provider = 'together'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...-versatile', 'meta-llama/llama-4-scout-17b-16e-instruct', 'meta-llama/llama-4-maverick-17b-128e-instruct'], ...}, ...}}\ncase = {'case_id': 'text_then_weather_tool', 'expected': [{'answer': ['sol'], 'num_tool_calls': 0}, {'num_tool_calls': 1, 'to...], 'type': 'object'}}, 'type': 'function'}]}, 'tool_responses': [{'response': \"{'response': '70 degrees and foggy'}\"}]}\n\n @pytest.mark.parametrize(\n \"case\",\n chat_completion_test_cases.get(\"test_chat_multi_turn_tool_calling\", {}).get(\"test_params\", {}).get(\"case\", []),\n ids=case_id_generator,\n )\n def test_chat_streaming_multi_turn_tool_calling(request, openai_client, model, provider, verification_config, case):\n \"\"\" \"\"\"\n test_name_base = get_base_test_name(request)\n if should_skip_test(verification_config, provider, model, test_name_base):\n pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n \n messages = []\n tools = case[\"input\"][\"tools\"]\n expected_results = copy.deepcopy(case[\"expected\"])\n tool_responses = copy.deepcopy(case.get(\"tool_responses\", []))\n input_messages_turns = copy.deepcopy(case[\"input\"][\"messages\"])\n \n while len(input_messages_turns) > 0 or (len(messages) > 0 and messages[-1][\"role\"] == \"tool\"):\n if len(messages) == 0 or messages[-1][\"role\"] != \"tool\":\n new_messages = input_messages_turns.pop(0)\n if isinstance(new_messages, list):\n messages.extend(new_messages)\n else:\n messages.append(new_messages)\n \n # --- API Call (Streaming) ---\n stream = openai_client.chat.completions.create(\n model=model,\n messages=messages,\n tools=tools,\n stream=True,\n )\n \n # --- Process Stream ---\n accumulated_content, accumulated_tool_calls = _accumulate_streaming_tool_calls(stream)\n \n # --- Construct Assistant Message for History ---\n assistant_message_dict = {\"role\": \"assistant\"}\n if accumulated_content:\n assistant_message_dict[\"content\"] = accumulated_content\n if accumulated_tool_calls:\n assistant_message_dict[\"tool_calls\"] = accumulated_tool_calls\n \n messages.append(assistant_message_dict)\n \n # --- Assertions ---\n expected = expected_results.pop(0)\n num_tool_calls = expected[\"num_tool_calls\"]\n \n> assert len(accumulated_tool_calls or []) == num_tool_calls, (\n f\"Expected {num_tool_calls} tool calls, but got {len(accumulated_tool_calls or [])}\"\n )\nE AssertionError: Expected 0 tool calls, but got 1\nE assert 1 == 0\nE + where 1 = len(([{'function': {'arguments': '{\"location\":\"San Francisco, CA\"}', 'name': 'get_weather'}, 'id': 'call_dqcu28a6iyxlobv36c23k0qp', 'type': 'function'}]))\n\ntests/verifications/openai_api/test_chat_completion.py:521: AssertionError" - }, - "teardown": { - "duration": 0.0003687366843223572, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_multi_turn_tool_calling[meta-llama/Llama-3.3-70B-Instruct-Turbo-weather_tool_then_text]", - "lineno": 471, - "outcome": "failed", - "keywords": [ - "test_chat_streaming_multi_turn_tool_calling[meta-llama/Llama-3.3-70B-Instruct-Turbo-weather_tool_then_text]", - "parametrize", - "pytestmark", - "meta-llama/Llama-3.3-70B-Instruct-Turbo-weather_tool_then_text", - "test_chat_completion.py", - "openai_api", - "verifications", - "tests", - "llama-stack", - "" - ], - "metadata": { - "model": "meta-llama/Llama-3.3-70B-Instruct-Turbo", - "case_id": "weather_tool_then_text" - }, - "setup": { - "duration": 0.07477510999888182, - "outcome": "passed" - }, - "call": { - "duration": 0.918418399989605, - "outcome": "failed", - "crash": { - "path": "/home/erichuang/llama-stack/tests/verifications/openai_api/test_chat_completion.py", - "lineno": 547, - "message": "AssertionError: Expected content, but none received.\nassert ('' is not None and '' != '')" - }, - "traceback": [ - { - "path": "tests/verifications/openai_api/test_chat_completion.py", - "lineno": 547, - "message": "AssertionError" - } - ], - "longrepr": "request = >\nopenai_client = \nmodel = 'meta-llama/Llama-3.3-70B-Instruct-Turbo', provider = 'together'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...-versatile', 'meta-llama/llama-4-scout-17b-16e-instruct', 'meta-llama/llama-4-maverick-17b-128e-instruct'], ...}, ...}}\ncase = {'case_id': 'weather_tool_then_text', 'expected': [{'num_tool_calls': 1, 'tool_arguments': {'location': 'San Francisco...], 'type': 'object'}}, 'type': 'function'}]}, 'tool_responses': [{'response': \"{'response': '70 degrees and foggy'}\"}]}\n\n @pytest.mark.parametrize(\n \"case\",\n chat_completion_test_cases.get(\"test_chat_multi_turn_tool_calling\", {}).get(\"test_params\", {}).get(\"case\", []),\n ids=case_id_generator,\n )\n def test_chat_streaming_multi_turn_tool_calling(request, openai_client, model, provider, verification_config, case):\n \"\"\" \"\"\"\n test_name_base = get_base_test_name(request)\n if should_skip_test(verification_config, provider, model, test_name_base):\n pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n \n messages = []\n tools = case[\"input\"][\"tools\"]\n expected_results = copy.deepcopy(case[\"expected\"])\n tool_responses = copy.deepcopy(case.get(\"tool_responses\", []))\n input_messages_turns = copy.deepcopy(case[\"input\"][\"messages\"])\n \n while len(input_messages_turns) > 0 or (len(messages) > 0 and messages[-1][\"role\"] == \"tool\"):\n if len(messages) == 0 or messages[-1][\"role\"] != \"tool\":\n new_messages = input_messages_turns.pop(0)\n if isinstance(new_messages, list):\n messages.extend(new_messages)\n else:\n messages.append(new_messages)\n \n # --- API Call (Streaming) ---\n stream = openai_client.chat.completions.create(\n model=model,\n messages=messages,\n tools=tools,\n stream=True,\n )\n \n # --- Process Stream ---\n accumulated_content, accumulated_tool_calls = _accumulate_streaming_tool_calls(stream)\n \n # --- Construct Assistant Message for History ---\n assistant_message_dict = {\"role\": \"assistant\"}\n if accumulated_content:\n assistant_message_dict[\"content\"] = accumulated_content\n if accumulated_tool_calls:\n assistant_message_dict[\"tool_calls\"] = accumulated_tool_calls\n \n messages.append(assistant_message_dict)\n \n # --- Assertions ---\n expected = expected_results.pop(0)\n num_tool_calls = expected[\"num_tool_calls\"]\n \n assert len(accumulated_tool_calls or []) == num_tool_calls, (\n f\"Expected {num_tool_calls} tool calls, but got {len(accumulated_tool_calls or [])}\"\n )\n \n if num_tool_calls > 0:\n # Use the first accumulated tool call for assertion\n tool_call = accumulated_tool_calls[0]\n assert tool_call[\"function\"][\"name\"] == expected[\"tool_name\"], (\n f\"Expected tool '{expected['tool_name']}', got '{tool_call['function']['name']}'\"\n )\n # Parse the accumulated arguments string for comparison\n actual_arguments = json.loads(tool_call[\"function\"][\"arguments\"])\n assert actual_arguments == expected[\"tool_arguments\"], (\n f\"Expected arguments '{expected['tool_arguments']}', got '{actual_arguments}'\"\n )\n \n # Prepare and append the tool response for the next turn\n tool_response = tool_responses.pop(0)\n messages.append(\n {\n \"role\": \"tool\",\n \"tool_call_id\": tool_call[\"id\"],\n \"content\": tool_response[\"response\"],\n }\n )\n else:\n> assert accumulated_content is not None and accumulated_content != \"\", \"Expected content, but none received.\"\nE AssertionError: Expected content, but none received.\nE assert ('' is not None and '' != '')\n\ntests/verifications/openai_api/test_chat_completion.py:547: AssertionError" - }, - "teardown": { - "duration": 0.00036141276359558105, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_multi_turn_tool_calling[meta-llama/Llama-3.3-70B-Instruct-Turbo-add_product_tool]", - "lineno": 471, - "outcome": "passed", - "keywords": [ - "test_chat_streaming_multi_turn_tool_calling[meta-llama/Llama-3.3-70B-Instruct-Turbo-add_product_tool]", - "parametrize", - "pytestmark", - "meta-llama/Llama-3.3-70B-Instruct-Turbo-add_product_tool", - "test_chat_completion.py", - "openai_api", - "verifications", - "tests", - "llama-stack", - "" - ], - "metadata": { - "model": "meta-llama/Llama-3.3-70B-Instruct-Turbo", - "case_id": "add_product_tool" - }, - "setup": { - "duration": 0.07217607088387012, - "outcome": "passed" - }, - "call": { - "duration": 1.2676455974578857, - "outcome": "passed" - }, - "teardown": { - "duration": 0.00024215038865804672, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_multi_turn_tool_calling[meta-llama/Llama-3.3-70B-Instruct-Turbo-get_then_create_event_tool]", - "lineno": 471, - "outcome": "failed", - "keywords": [ - "test_chat_streaming_multi_turn_tool_calling[meta-llama/Llama-3.3-70B-Instruct-Turbo-get_then_create_event_tool]", - "parametrize", - "pytestmark", - "meta-llama/Llama-3.3-70B-Instruct-Turbo-get_then_create_event_tool", - "test_chat_completion.py", - "openai_api", - "verifications", - "tests", - "llama-stack", - "" - ], - "metadata": { - "model": "meta-llama/Llama-3.3-70B-Instruct-Turbo", - "case_id": "get_then_create_event_tool" - }, - "setup": { - "duration": 0.0713065592572093, - "outcome": "passed" - }, - "call": { - "duration": 1.0453352769836783, - "outcome": "failed", - "crash": { - "path": "/home/erichuang/llama-stack/tests/verifications/openai_api/test_chat_completion.py", - "lineno": 547, - "message": "AssertionError: Expected content, but none received.\nassert ('' is not None and '' != '')" - }, - "traceback": [ - { - "path": "tests/verifications/openai_api/test_chat_completion.py", - "lineno": 547, - "message": "AssertionError" - } - ], - "longrepr": "request = >\nopenai_client = \nmodel = 'meta-llama/Llama-3.3-70B-Instruct-Turbo', provider = 'together'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...-versatile', 'meta-llama/llama-4-scout-17b-16e-instruct', 'meta-llama/llama-4-maverick-17b-128e-instruct'], ...}, ...}}\ncase = {'case_id': 'get_then_create_event_tool', 'expected': [{'num_tool_calls': 1, 'tool_arguments': {'date': '2025-03-03', ...ents found for 2025-03-03 at 10:00'}\"}, {'response': \"{'response': 'Successfully created new event with id: e_123'}\"}]}\n\n @pytest.mark.parametrize(\n \"case\",\n chat_completion_test_cases.get(\"test_chat_multi_turn_tool_calling\", {}).get(\"test_params\", {}).get(\"case\", []),\n ids=case_id_generator,\n )\n def test_chat_streaming_multi_turn_tool_calling(request, openai_client, model, provider, verification_config, case):\n \"\"\" \"\"\"\n test_name_base = get_base_test_name(request)\n if should_skip_test(verification_config, provider, model, test_name_base):\n pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n \n messages = []\n tools = case[\"input\"][\"tools\"]\n expected_results = copy.deepcopy(case[\"expected\"])\n tool_responses = copy.deepcopy(case.get(\"tool_responses\", []))\n input_messages_turns = copy.deepcopy(case[\"input\"][\"messages\"])\n \n while len(input_messages_turns) > 0 or (len(messages) > 0 and messages[-1][\"role\"] == \"tool\"):\n if len(messages) == 0 or messages[-1][\"role\"] != \"tool\":\n new_messages = input_messages_turns.pop(0)\n if isinstance(new_messages, list):\n messages.extend(new_messages)\n else:\n messages.append(new_messages)\n \n # --- API Call (Streaming) ---\n stream = openai_client.chat.completions.create(\n model=model,\n messages=messages,\n tools=tools,\n stream=True,\n )\n \n # --- Process Stream ---\n accumulated_content, accumulated_tool_calls = _accumulate_streaming_tool_calls(stream)\n \n # --- Construct Assistant Message for History ---\n assistant_message_dict = {\"role\": \"assistant\"}\n if accumulated_content:\n assistant_message_dict[\"content\"] = accumulated_content\n if accumulated_tool_calls:\n assistant_message_dict[\"tool_calls\"] = accumulated_tool_calls\n \n messages.append(assistant_message_dict)\n \n # --- Assertions ---\n expected = expected_results.pop(0)\n num_tool_calls = expected[\"num_tool_calls\"]\n \n assert len(accumulated_tool_calls or []) == num_tool_calls, (\n f\"Expected {num_tool_calls} tool calls, but got {len(accumulated_tool_calls or [])}\"\n )\n \n if num_tool_calls > 0:\n # Use the first accumulated tool call for assertion\n tool_call = accumulated_tool_calls[0]\n assert tool_call[\"function\"][\"name\"] == expected[\"tool_name\"], (\n f\"Expected tool '{expected['tool_name']}', got '{tool_call['function']['name']}'\"\n )\n # Parse the accumulated arguments string for comparison\n actual_arguments = json.loads(tool_call[\"function\"][\"arguments\"])\n assert actual_arguments == expected[\"tool_arguments\"], (\n f\"Expected arguments '{expected['tool_arguments']}', got '{actual_arguments}'\"\n )\n \n # Prepare and append the tool response for the next turn\n tool_response = tool_responses.pop(0)\n messages.append(\n {\n \"role\": \"tool\",\n \"tool_call_id\": tool_call[\"id\"],\n \"content\": tool_response[\"response\"],\n }\n )\n else:\n> assert accumulated_content is not None and accumulated_content != \"\", \"Expected content, but none received.\"\nE AssertionError: Expected content, but none received.\nE assert ('' is not None and '' != '')\n\ntests/verifications/openai_api/test_chat_completion.py:547: AssertionError" - }, - "teardown": { - "duration": 0.00030668359249830246, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_multi_turn_tool_calling[meta-llama/Llama-3.3-70B-Instruct-Turbo-compare_monthly_expense_tool]", - "lineno": 471, - "outcome": "failed", - "keywords": [ - "test_chat_streaming_multi_turn_tool_calling[meta-llama/Llama-3.3-70B-Instruct-Turbo-compare_monthly_expense_tool]", - "parametrize", - "pytestmark", - "meta-llama/Llama-3.3-70B-Instruct-Turbo-compare_monthly_expense_tool", - "test_chat_completion.py", - "openai_api", - "verifications", - "tests", - "llama-stack", - "" - ], - "metadata": { - "model": "meta-llama/Llama-3.3-70B-Instruct-Turbo", - "case_id": "compare_monthly_expense_tool" - }, - "setup": { - "duration": 0.07108221855014563, - "outcome": "passed" - }, - "call": { - "duration": 1.034472893923521, - "outcome": "failed", - "crash": { - "path": "/home/erichuang/llama-stack/tests/verifications/openai_api/test_chat_completion.py", - "lineno": 547, - "message": "AssertionError: Expected content, but none received.\nassert ('' is not None and '' != '')" - }, - "traceback": [ - { - "path": "tests/verifications/openai_api/test_chat_completion.py", - "lineno": 547, - "message": "AssertionError" - } - ], - "longrepr": "request = >\nopenai_client = \nmodel = 'meta-llama/Llama-3.3-70B-Instruct-Turbo', provider = 'together'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...-versatile', 'meta-llama/llama-4-scout-17b-16e-instruct', 'meta-llama/llama-4-maverick-17b-128e-instruct'], ...}, ...}}\ncase = {'case_id': 'compare_monthly_expense_tool', 'expected': [{'num_tool_calls': 1, 'tool_arguments': {'month': 1, 'year': ... 'Total expenses for January 2025: $1000'}\"}, {'response': \"{'response': 'Total expenses for February 2024: $2000'}\"}]}\n\n @pytest.mark.parametrize(\n \"case\",\n chat_completion_test_cases.get(\"test_chat_multi_turn_tool_calling\", {}).get(\"test_params\", {}).get(\"case\", []),\n ids=case_id_generator,\n )\n def test_chat_streaming_multi_turn_tool_calling(request, openai_client, model, provider, verification_config, case):\n \"\"\" \"\"\"\n test_name_base = get_base_test_name(request)\n if should_skip_test(verification_config, provider, model, test_name_base):\n pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n \n messages = []\n tools = case[\"input\"][\"tools\"]\n expected_results = copy.deepcopy(case[\"expected\"])\n tool_responses = copy.deepcopy(case.get(\"tool_responses\", []))\n input_messages_turns = copy.deepcopy(case[\"input\"][\"messages\"])\n \n while len(input_messages_turns) > 0 or (len(messages) > 0 and messages[-1][\"role\"] == \"tool\"):\n if len(messages) == 0 or messages[-1][\"role\"] != \"tool\":\n new_messages = input_messages_turns.pop(0)\n if isinstance(new_messages, list):\n messages.extend(new_messages)\n else:\n messages.append(new_messages)\n \n # --- API Call (Streaming) ---\n stream = openai_client.chat.completions.create(\n model=model,\n messages=messages,\n tools=tools,\n stream=True,\n )\n \n # --- Process Stream ---\n accumulated_content, accumulated_tool_calls = _accumulate_streaming_tool_calls(stream)\n \n # --- Construct Assistant Message for History ---\n assistant_message_dict = {\"role\": \"assistant\"}\n if accumulated_content:\n assistant_message_dict[\"content\"] = accumulated_content\n if accumulated_tool_calls:\n assistant_message_dict[\"tool_calls\"] = accumulated_tool_calls\n \n messages.append(assistant_message_dict)\n \n # --- Assertions ---\n expected = expected_results.pop(0)\n num_tool_calls = expected[\"num_tool_calls\"]\n \n assert len(accumulated_tool_calls or []) == num_tool_calls, (\n f\"Expected {num_tool_calls} tool calls, but got {len(accumulated_tool_calls or [])}\"\n )\n \n if num_tool_calls > 0:\n # Use the first accumulated tool call for assertion\n tool_call = accumulated_tool_calls[0]\n assert tool_call[\"function\"][\"name\"] == expected[\"tool_name\"], (\n f\"Expected tool '{expected['tool_name']}', got '{tool_call['function']['name']}'\"\n )\n # Parse the accumulated arguments string for comparison\n actual_arguments = json.loads(tool_call[\"function\"][\"arguments\"])\n assert actual_arguments == expected[\"tool_arguments\"], (\n f\"Expected arguments '{expected['tool_arguments']}', got '{actual_arguments}'\"\n )\n \n # Prepare and append the tool response for the next turn\n tool_response = tool_responses.pop(0)\n messages.append(\n {\n \"role\": \"tool\",\n \"tool_call_id\": tool_call[\"id\"],\n \"content\": tool_response[\"response\"],\n }\n )\n else:\n> assert accumulated_content is not None and accumulated_content != \"\", \"Expected content, but none received.\"\nE AssertionError: Expected content, but none received.\nE assert ('' is not None and '' != '')\n\ntests/verifications/openai_api/test_chat_completion.py:547: AssertionError" - }, - "teardown": { - "duration": 0.00035398639738559723, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_multi_turn_tool_calling[meta-llama/Llama-4-Scout-17B-16E-Instruct-text_then_weather_tool]", - "lineno": 471, - "outcome": "failed", - "keywords": [ - "test_chat_streaming_multi_turn_tool_calling[meta-llama/Llama-4-Scout-17B-16E-Instruct-text_then_weather_tool]", - "parametrize", - "pytestmark", - "meta-llama/Llama-4-Scout-17B-16E-Instruct-text_then_weather_tool", - "test_chat_completion.py", - "openai_api", - "verifications", - "tests", - "llama-stack", - "" - ], - "metadata": { - "model": "meta-llama/Llama-4-Scout-17B-16E-Instruct", - "case_id": "text_then_weather_tool" - }, - "setup": { - "duration": 0.07186305243521929, - "outcome": "passed" - }, - "call": { - "duration": 1.8766405330970883, - "outcome": "failed", - "crash": { - "path": "/home/erichuang/llama-stack/tests/verifications/openai_api/test_chat_completion.py", - "lineno": 688, - "message": "IndexError: list index out of range" - }, - "traceback": [ - { - "path": "tests/verifications/openai_api/test_chat_completion.py", - "lineno": 506, - "message": "" - }, - { - "path": "tests/verifications/openai_api/test_chat_completion.py", - "lineno": 688, - "message": "IndexError" - } - ], - "longrepr": "request = >\nopenai_client = \nmodel = 'meta-llama/Llama-4-Scout-17B-16E-Instruct', provider = 'together'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...-versatile', 'meta-llama/llama-4-scout-17b-16e-instruct', 'meta-llama/llama-4-maverick-17b-128e-instruct'], ...}, ...}}\ncase = {'case_id': 'text_then_weather_tool', 'expected': [{'answer': ['sol'], 'num_tool_calls': 0}, {'num_tool_calls': 1, 'to...], 'type': 'object'}}, 'type': 'function'}]}, 'tool_responses': [{'response': \"{'response': '70 degrees and foggy'}\"}]}\n\n @pytest.mark.parametrize(\n \"case\",\n chat_completion_test_cases.get(\"test_chat_multi_turn_tool_calling\", {}).get(\"test_params\", {}).get(\"case\", []),\n ids=case_id_generator,\n )\n def test_chat_streaming_multi_turn_tool_calling(request, openai_client, model, provider, verification_config, case):\n \"\"\" \"\"\"\n test_name_base = get_base_test_name(request)\n if should_skip_test(verification_config, provider, model, test_name_base):\n pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n \n messages = []\n tools = case[\"input\"][\"tools\"]\n expected_results = copy.deepcopy(case[\"expected\"])\n tool_responses = copy.deepcopy(case.get(\"tool_responses\", []))\n input_messages_turns = copy.deepcopy(case[\"input\"][\"messages\"])\n \n while len(input_messages_turns) > 0 or (len(messages) > 0 and messages[-1][\"role\"] == \"tool\"):\n if len(messages) == 0 or messages[-1][\"role\"] != \"tool\":\n new_messages = input_messages_turns.pop(0)\n if isinstance(new_messages, list):\n messages.extend(new_messages)\n else:\n messages.append(new_messages)\n \n # --- API Call (Streaming) ---\n stream = openai_client.chat.completions.create(\n model=model,\n messages=messages,\n tools=tools,\n stream=True,\n )\n \n # --- Process Stream ---\n> accumulated_content, accumulated_tool_calls = _accumulate_streaming_tool_calls(stream)\n\ntests/verifications/openai_api/test_chat_completion.py:506: \n_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ \n\nstream = \n\n def _accumulate_streaming_tool_calls(stream):\n \"\"\"Accumulates tool calls and content from a streaming ChatCompletion response.\"\"\"\n tool_calls_buffer = {}\n current_id = None\n full_content = \"\" # Initialize content accumulator\n # Process streaming chunks\n for chunk in stream:\n> choice = chunk.choices[0]\nE IndexError: list index out of range\n\ntests/verifications/openai_api/test_chat_completion.py:688: IndexError" - }, - "teardown": { - "duration": 0.0003088880330324173, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_multi_turn_tool_calling[meta-llama/Llama-4-Scout-17B-16E-Instruct-weather_tool_then_text]", - "lineno": 471, - "outcome": "failed", - "keywords": [ - "test_chat_streaming_multi_turn_tool_calling[meta-llama/Llama-4-Scout-17B-16E-Instruct-weather_tool_then_text]", - "parametrize", - "pytestmark", - "meta-llama/Llama-4-Scout-17B-16E-Instruct-weather_tool_then_text", - "test_chat_completion.py", - "openai_api", - "verifications", - "tests", - "llama-stack", - "" - ], - "metadata": { - "model": "meta-llama/Llama-4-Scout-17B-16E-Instruct", - "case_id": "weather_tool_then_text" - }, - "setup": { - "duration": 0.0846314700320363, - "outcome": "passed" - }, - "call": { - "duration": 0.40889575984328985, - "outcome": "failed", - "crash": { - "path": "/home/erichuang/llama-stack/tests/verifications/openai_api/test_chat_completion.py", - "lineno": 688, - "message": "IndexError: list index out of range" - }, - "traceback": [ - { - "path": "tests/verifications/openai_api/test_chat_completion.py", - "lineno": 506, - "message": "" - }, - { - "path": "tests/verifications/openai_api/test_chat_completion.py", - "lineno": 688, - "message": "IndexError" - } - ], - "longrepr": "request = >\nopenai_client = \nmodel = 'meta-llama/Llama-4-Scout-17B-16E-Instruct', provider = 'together'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...-versatile', 'meta-llama/llama-4-scout-17b-16e-instruct', 'meta-llama/llama-4-maverick-17b-128e-instruct'], ...}, ...}}\ncase = {'case_id': 'weather_tool_then_text', 'expected': [{'num_tool_calls': 1, 'tool_arguments': {'location': 'San Francisco...], 'type': 'object'}}, 'type': 'function'}]}, 'tool_responses': [{'response': \"{'response': '70 degrees and foggy'}\"}]}\n\n @pytest.mark.parametrize(\n \"case\",\n chat_completion_test_cases.get(\"test_chat_multi_turn_tool_calling\", {}).get(\"test_params\", {}).get(\"case\", []),\n ids=case_id_generator,\n )\n def test_chat_streaming_multi_turn_tool_calling(request, openai_client, model, provider, verification_config, case):\n \"\"\" \"\"\"\n test_name_base = get_base_test_name(request)\n if should_skip_test(verification_config, provider, model, test_name_base):\n pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n \n messages = []\n tools = case[\"input\"][\"tools\"]\n expected_results = copy.deepcopy(case[\"expected\"])\n tool_responses = copy.deepcopy(case.get(\"tool_responses\", []))\n input_messages_turns = copy.deepcopy(case[\"input\"][\"messages\"])\n \n while len(input_messages_turns) > 0 or (len(messages) > 0 and messages[-1][\"role\"] == \"tool\"):\n if len(messages) == 0 or messages[-1][\"role\"] != \"tool\":\n new_messages = input_messages_turns.pop(0)\n if isinstance(new_messages, list):\n messages.extend(new_messages)\n else:\n messages.append(new_messages)\n \n # --- API Call (Streaming) ---\n stream = openai_client.chat.completions.create(\n model=model,\n messages=messages,\n tools=tools,\n stream=True,\n )\n \n # --- Process Stream ---\n> accumulated_content, accumulated_tool_calls = _accumulate_streaming_tool_calls(stream)\n\ntests/verifications/openai_api/test_chat_completion.py:506: \n_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ \n\nstream = \n\n def _accumulate_streaming_tool_calls(stream):\n \"\"\"Accumulates tool calls and content from a streaming ChatCompletion response.\"\"\"\n tool_calls_buffer = {}\n current_id = None\n full_content = \"\" # Initialize content accumulator\n # Process streaming chunks\n for chunk in stream:\n> choice = chunk.choices[0]\nE IndexError: list index out of range\n\ntests/verifications/openai_api/test_chat_completion.py:688: IndexError" - }, - "teardown": { - "duration": 0.0003652172163128853, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_multi_turn_tool_calling[meta-llama/Llama-4-Scout-17B-16E-Instruct-add_product_tool]", - "lineno": 471, - "outcome": "failed", - "keywords": [ - "test_chat_streaming_multi_turn_tool_calling[meta-llama/Llama-4-Scout-17B-16E-Instruct-add_product_tool]", - "parametrize", - "pytestmark", - "meta-llama/Llama-4-Scout-17B-16E-Instruct-add_product_tool", - "test_chat_completion.py", - "openai_api", - "verifications", - "tests", - "llama-stack", - "" - ], - "metadata": { - "model": "meta-llama/Llama-4-Scout-17B-16E-Instruct", - "case_id": "add_product_tool" - }, - "setup": { - "duration": 0.07273881137371063, - "outcome": "passed" - }, - "call": { - "duration": 2.251293654553592, - "outcome": "failed", - "crash": { - "path": "/home/erichuang/llama-stack/tests/verifications/openai_api/test_chat_completion.py", - "lineno": 688, - "message": "IndexError: list index out of range" - }, - "traceback": [ - { - "path": "tests/verifications/openai_api/test_chat_completion.py", - "lineno": 506, - "message": "" - }, - { - "path": "tests/verifications/openai_api/test_chat_completion.py", - "lineno": 688, - "message": "IndexError" - } - ], - "longrepr": "request = >\nopenai_client = \nmodel = 'meta-llama/Llama-4-Scout-17B-16E-Instruct', provider = 'together'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...-versatile', 'meta-llama/llama-4-scout-17b-16e-instruct', 'meta-llama/llama-4-maverick-17b-128e-instruct'], ...}, ...}}\ncase = {'case_id': 'add_product_tool', 'expected': [{'num_tool_calls': 1, 'tool_arguments': {'inStock': True, 'name': 'Widget...}}, 'type': 'function'}]}, 'tool_responses': [{'response': \"{'response': 'Successfully added product with id: 123'}\"}]}\n\n @pytest.mark.parametrize(\n \"case\",\n chat_completion_test_cases.get(\"test_chat_multi_turn_tool_calling\", {}).get(\"test_params\", {}).get(\"case\", []),\n ids=case_id_generator,\n )\n def test_chat_streaming_multi_turn_tool_calling(request, openai_client, model, provider, verification_config, case):\n \"\"\" \"\"\"\n test_name_base = get_base_test_name(request)\n if should_skip_test(verification_config, provider, model, test_name_base):\n pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n \n messages = []\n tools = case[\"input\"][\"tools\"]\n expected_results = copy.deepcopy(case[\"expected\"])\n tool_responses = copy.deepcopy(case.get(\"tool_responses\", []))\n input_messages_turns = copy.deepcopy(case[\"input\"][\"messages\"])\n \n while len(input_messages_turns) > 0 or (len(messages) > 0 and messages[-1][\"role\"] == \"tool\"):\n if len(messages) == 0 or messages[-1][\"role\"] != \"tool\":\n new_messages = input_messages_turns.pop(0)\n if isinstance(new_messages, list):\n messages.extend(new_messages)\n else:\n messages.append(new_messages)\n \n # --- API Call (Streaming) ---\n stream = openai_client.chat.completions.create(\n model=model,\n messages=messages,\n tools=tools,\n stream=True,\n )\n \n # --- Process Stream ---\n> accumulated_content, accumulated_tool_calls = _accumulate_streaming_tool_calls(stream)\n\ntests/verifications/openai_api/test_chat_completion.py:506: \n_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ \n\nstream = \n\n def _accumulate_streaming_tool_calls(stream):\n \"\"\"Accumulates tool calls and content from a streaming ChatCompletion response.\"\"\"\n tool_calls_buffer = {}\n current_id = None\n full_content = \"\" # Initialize content accumulator\n # Process streaming chunks\n for chunk in stream:\n> choice = chunk.choices[0]\nE IndexError: list index out of range\n\ntests/verifications/openai_api/test_chat_completion.py:688: IndexError" - }, - "teardown": { - "duration": 0.00030664633959531784, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_multi_turn_tool_calling[meta-llama/Llama-4-Scout-17B-16E-Instruct-get_then_create_event_tool]", - "lineno": 471, - "outcome": "failed", - "keywords": [ - "test_chat_streaming_multi_turn_tool_calling[meta-llama/Llama-4-Scout-17B-16E-Instruct-get_then_create_event_tool]", - "parametrize", - "pytestmark", - "meta-llama/Llama-4-Scout-17B-16E-Instruct-get_then_create_event_tool", - "test_chat_completion.py", - "openai_api", - "verifications", - "tests", - "llama-stack", - "" - ], - "metadata": { - "model": "meta-llama/Llama-4-Scout-17B-16E-Instruct", - "case_id": "get_then_create_event_tool" - }, - "setup": { - "duration": 0.071181770414114, - "outcome": "passed" - }, - "call": { - "duration": 0.5708655547350645, - "outcome": "failed", - "crash": { - "path": "/home/erichuang/llama-stack/tests/verifications/openai_api/test_chat_completion.py", - "lineno": 688, - "message": "IndexError: list index out of range" - }, - "traceback": [ - { - "path": "tests/verifications/openai_api/test_chat_completion.py", - "lineno": 506, - "message": "" - }, - { - "path": "tests/verifications/openai_api/test_chat_completion.py", - "lineno": 688, - "message": "IndexError" - } - ], - "longrepr": "request = >\nopenai_client = \nmodel = 'meta-llama/Llama-4-Scout-17B-16E-Instruct', provider = 'together'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...-versatile', 'meta-llama/llama-4-scout-17b-16e-instruct', 'meta-llama/llama-4-maverick-17b-128e-instruct'], ...}, ...}}\ncase = {'case_id': 'get_then_create_event_tool', 'expected': [{'num_tool_calls': 1, 'tool_arguments': {'date': '2025-03-03', ...ents found for 2025-03-03 at 10:00'}\"}, {'response': \"{'response': 'Successfully created new event with id: e_123'}\"}]}\n\n @pytest.mark.parametrize(\n \"case\",\n chat_completion_test_cases.get(\"test_chat_multi_turn_tool_calling\", {}).get(\"test_params\", {}).get(\"case\", []),\n ids=case_id_generator,\n )\n def test_chat_streaming_multi_turn_tool_calling(request, openai_client, model, provider, verification_config, case):\n \"\"\" \"\"\"\n test_name_base = get_base_test_name(request)\n if should_skip_test(verification_config, provider, model, test_name_base):\n pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n \n messages = []\n tools = case[\"input\"][\"tools\"]\n expected_results = copy.deepcopy(case[\"expected\"])\n tool_responses = copy.deepcopy(case.get(\"tool_responses\", []))\n input_messages_turns = copy.deepcopy(case[\"input\"][\"messages\"])\n \n while len(input_messages_turns) > 0 or (len(messages) > 0 and messages[-1][\"role\"] == \"tool\"):\n if len(messages) == 0 or messages[-1][\"role\"] != \"tool\":\n new_messages = input_messages_turns.pop(0)\n if isinstance(new_messages, list):\n messages.extend(new_messages)\n else:\n messages.append(new_messages)\n \n # --- API Call (Streaming) ---\n stream = openai_client.chat.completions.create(\n model=model,\n messages=messages,\n tools=tools,\n stream=True,\n )\n \n # --- Process Stream ---\n> accumulated_content, accumulated_tool_calls = _accumulate_streaming_tool_calls(stream)\n\ntests/verifications/openai_api/test_chat_completion.py:506: \n_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ \n\nstream = \n\n def _accumulate_streaming_tool_calls(stream):\n \"\"\"Accumulates tool calls and content from a streaming ChatCompletion response.\"\"\"\n tool_calls_buffer = {}\n current_id = None\n full_content = \"\" # Initialize content accumulator\n # Process streaming chunks\n for chunk in stream:\n> choice = chunk.choices[0]\nE IndexError: list index out of range\n\ntests/verifications/openai_api/test_chat_completion.py:688: IndexError" - }, - "teardown": { - "duration": 0.00036500580608844757, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_multi_turn_tool_calling[meta-llama/Llama-4-Scout-17B-16E-Instruct-compare_monthly_expense_tool]", - "lineno": 471, - "outcome": "failed", - "keywords": [ - "test_chat_streaming_multi_turn_tool_calling[meta-llama/Llama-4-Scout-17B-16E-Instruct-compare_monthly_expense_tool]", - "parametrize", - "pytestmark", - "meta-llama/Llama-4-Scout-17B-16E-Instruct-compare_monthly_expense_tool", - "test_chat_completion.py", - "openai_api", - "verifications", - "tests", - "llama-stack", - "" - ], - "metadata": { - "model": "meta-llama/Llama-4-Scout-17B-16E-Instruct", - "case_id": "compare_monthly_expense_tool" - }, - "setup": { - "duration": 0.06934114638715982, - "outcome": "passed" - }, - "call": { - "duration": 0.5055103581398726, - "outcome": "failed", - "crash": { - "path": "/home/erichuang/llama-stack/tests/verifications/openai_api/test_chat_completion.py", - "lineno": 688, - "message": "IndexError: list index out of range" - }, - "traceback": [ - { - "path": "tests/verifications/openai_api/test_chat_completion.py", - "lineno": 506, - "message": "" - }, - { - "path": "tests/verifications/openai_api/test_chat_completion.py", - "lineno": 688, - "message": "IndexError" - } - ], - "longrepr": "request = >\nopenai_client = \nmodel = 'meta-llama/Llama-4-Scout-17B-16E-Instruct', provider = 'together'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...-versatile', 'meta-llama/llama-4-scout-17b-16e-instruct', 'meta-llama/llama-4-maverick-17b-128e-instruct'], ...}, ...}}\ncase = {'case_id': 'compare_monthly_expense_tool', 'expected': [{'num_tool_calls': 1, 'tool_arguments': {'month': 1, 'year': ... 'Total expenses for January 2025: $1000'}\"}, {'response': \"{'response': 'Total expenses for February 2024: $2000'}\"}]}\n\n @pytest.mark.parametrize(\n \"case\",\n chat_completion_test_cases.get(\"test_chat_multi_turn_tool_calling\", {}).get(\"test_params\", {}).get(\"case\", []),\n ids=case_id_generator,\n )\n def test_chat_streaming_multi_turn_tool_calling(request, openai_client, model, provider, verification_config, case):\n \"\"\" \"\"\"\n test_name_base = get_base_test_name(request)\n if should_skip_test(verification_config, provider, model, test_name_base):\n pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n \n messages = []\n tools = case[\"input\"][\"tools\"]\n expected_results = copy.deepcopy(case[\"expected\"])\n tool_responses = copy.deepcopy(case.get(\"tool_responses\", []))\n input_messages_turns = copy.deepcopy(case[\"input\"][\"messages\"])\n \n while len(input_messages_turns) > 0 or (len(messages) > 0 and messages[-1][\"role\"] == \"tool\"):\n if len(messages) == 0 or messages[-1][\"role\"] != \"tool\":\n new_messages = input_messages_turns.pop(0)\n if isinstance(new_messages, list):\n messages.extend(new_messages)\n else:\n messages.append(new_messages)\n \n # --- API Call (Streaming) ---\n stream = openai_client.chat.completions.create(\n model=model,\n messages=messages,\n tools=tools,\n stream=True,\n )\n \n # --- Process Stream ---\n> accumulated_content, accumulated_tool_calls = _accumulate_streaming_tool_calls(stream)\n\ntests/verifications/openai_api/test_chat_completion.py:506: \n_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ \n\nstream = \n\n def _accumulate_streaming_tool_calls(stream):\n \"\"\"Accumulates tool calls and content from a streaming ChatCompletion response.\"\"\"\n tool_calls_buffer = {}\n current_id = None\n full_content = \"\" # Initialize content accumulator\n # Process streaming chunks\n for chunk in stream:\n> choice = chunk.choices[0]\nE IndexError: list index out of range\n\ntests/verifications/openai_api/test_chat_completion.py:688: IndexError" - }, - "teardown": { - "duration": 0.00035354867577552795, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_multi_turn_tool_calling[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-text_then_weather_tool]", - "lineno": 471, - "outcome": "failed", - "keywords": [ - "test_chat_streaming_multi_turn_tool_calling[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-text_then_weather_tool]", - "parametrize", - "pytestmark", - "meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-text_then_weather_tool", - "test_chat_completion.py", - "openai_api", - "verifications", - "tests", - "llama-stack", - "" - ], - "metadata": { - "model": "meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8", - "case_id": "text_then_weather_tool" - }, - "setup": { - "duration": 0.07129869516938925, - "outcome": "passed" - }, - "call": { - "duration": 1.5799349313601851, - "outcome": "failed", - "crash": { - "path": "/home/erichuang/llama-stack/tests/verifications/openai_api/test_chat_completion.py", - "lineno": 688, - "message": "IndexError: list index out of range" - }, - "traceback": [ - { - "path": "tests/verifications/openai_api/test_chat_completion.py", - "lineno": 506, - "message": "" - }, - { - "path": "tests/verifications/openai_api/test_chat_completion.py", - "lineno": 688, - "message": "IndexError" - } - ], - "longrepr": "request = >\nopenai_client = \nmodel = 'meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8'\nprovider = 'together'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...-versatile', 'meta-llama/llama-4-scout-17b-16e-instruct', 'meta-llama/llama-4-maverick-17b-128e-instruct'], ...}, ...}}\ncase = {'case_id': 'text_then_weather_tool', 'expected': [{'answer': ['sol'], 'num_tool_calls': 0}, {'num_tool_calls': 1, 'to...], 'type': 'object'}}, 'type': 'function'}]}, 'tool_responses': [{'response': \"{'response': '70 degrees and foggy'}\"}]}\n\n @pytest.mark.parametrize(\n \"case\",\n chat_completion_test_cases.get(\"test_chat_multi_turn_tool_calling\", {}).get(\"test_params\", {}).get(\"case\", []),\n ids=case_id_generator,\n )\n def test_chat_streaming_multi_turn_tool_calling(request, openai_client, model, provider, verification_config, case):\n \"\"\" \"\"\"\n test_name_base = get_base_test_name(request)\n if should_skip_test(verification_config, provider, model, test_name_base):\n pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n \n messages = []\n tools = case[\"input\"][\"tools\"]\n expected_results = copy.deepcopy(case[\"expected\"])\n tool_responses = copy.deepcopy(case.get(\"tool_responses\", []))\n input_messages_turns = copy.deepcopy(case[\"input\"][\"messages\"])\n \n while len(input_messages_turns) > 0 or (len(messages) > 0 and messages[-1][\"role\"] == \"tool\"):\n if len(messages) == 0 or messages[-1][\"role\"] != \"tool\":\n new_messages = input_messages_turns.pop(0)\n if isinstance(new_messages, list):\n messages.extend(new_messages)\n else:\n messages.append(new_messages)\n \n # --- API Call (Streaming) ---\n stream = openai_client.chat.completions.create(\n model=model,\n messages=messages,\n tools=tools,\n stream=True,\n )\n \n # --- Process Stream ---\n> accumulated_content, accumulated_tool_calls = _accumulate_streaming_tool_calls(stream)\n\ntests/verifications/openai_api/test_chat_completion.py:506: \n_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ \n\nstream = \n\n def _accumulate_streaming_tool_calls(stream):\n \"\"\"Accumulates tool calls and content from a streaming ChatCompletion response.\"\"\"\n tool_calls_buffer = {}\n current_id = None\n full_content = \"\" # Initialize content accumulator\n # Process streaming chunks\n for chunk in stream:\n> choice = chunk.choices[0]\nE IndexError: list index out of range\n\ntests/verifications/openai_api/test_chat_completion.py:688: IndexError" - }, - "teardown": { - "duration": 0.00033699069172143936, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_multi_turn_tool_calling[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-weather_tool_then_text]", - "lineno": 471, - "outcome": "failed", - "keywords": [ - "test_chat_streaming_multi_turn_tool_calling[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-weather_tool_then_text]", - "parametrize", - "pytestmark", - "meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-weather_tool_then_text", - "test_chat_completion.py", - "openai_api", - "verifications", - "tests", - "llama-stack", - "" - ], - "metadata": { - "model": "meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8", - "case_id": "weather_tool_then_text" - }, - "setup": { - "duration": 0.07074506860226393, - "outcome": "passed" - }, - "call": { - "duration": 0.5245106862857938, - "outcome": "failed", - "crash": { - "path": "/home/erichuang/llama-stack/tests/verifications/openai_api/test_chat_completion.py", - "lineno": 688, - "message": "IndexError: list index out of range" - }, - "traceback": [ - { - "path": "tests/verifications/openai_api/test_chat_completion.py", - "lineno": 506, - "message": "" - }, - { - "path": "tests/verifications/openai_api/test_chat_completion.py", - "lineno": 688, - "message": "IndexError" - } - ], - "longrepr": "request = >\nopenai_client = \nmodel = 'meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8'\nprovider = 'together'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...-versatile', 'meta-llama/llama-4-scout-17b-16e-instruct', 'meta-llama/llama-4-maverick-17b-128e-instruct'], ...}, ...}}\ncase = {'case_id': 'weather_tool_then_text', 'expected': [{'num_tool_calls': 1, 'tool_arguments': {'location': 'San Francisco...], 'type': 'object'}}, 'type': 'function'}]}, 'tool_responses': [{'response': \"{'response': '70 degrees and foggy'}\"}]}\n\n @pytest.mark.parametrize(\n \"case\",\n chat_completion_test_cases.get(\"test_chat_multi_turn_tool_calling\", {}).get(\"test_params\", {}).get(\"case\", []),\n ids=case_id_generator,\n )\n def test_chat_streaming_multi_turn_tool_calling(request, openai_client, model, provider, verification_config, case):\n \"\"\" \"\"\"\n test_name_base = get_base_test_name(request)\n if should_skip_test(verification_config, provider, model, test_name_base):\n pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n \n messages = []\n tools = case[\"input\"][\"tools\"]\n expected_results = copy.deepcopy(case[\"expected\"])\n tool_responses = copy.deepcopy(case.get(\"tool_responses\", []))\n input_messages_turns = copy.deepcopy(case[\"input\"][\"messages\"])\n \n while len(input_messages_turns) > 0 or (len(messages) > 0 and messages[-1][\"role\"] == \"tool\"):\n if len(messages) == 0 or messages[-1][\"role\"] != \"tool\":\n new_messages = input_messages_turns.pop(0)\n if isinstance(new_messages, list):\n messages.extend(new_messages)\n else:\n messages.append(new_messages)\n \n # --- API Call (Streaming) ---\n stream = openai_client.chat.completions.create(\n model=model,\n messages=messages,\n tools=tools,\n stream=True,\n )\n \n # --- Process Stream ---\n> accumulated_content, accumulated_tool_calls = _accumulate_streaming_tool_calls(stream)\n\ntests/verifications/openai_api/test_chat_completion.py:506: \n_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ \n\nstream = \n\n def _accumulate_streaming_tool_calls(stream):\n \"\"\"Accumulates tool calls and content from a streaming ChatCompletion response.\"\"\"\n tool_calls_buffer = {}\n current_id = None\n full_content = \"\" # Initialize content accumulator\n # Process streaming chunks\n for chunk in stream:\n> choice = chunk.choices[0]\nE IndexError: list index out of range\n\ntests/verifications/openai_api/test_chat_completion.py:688: IndexError" - }, - "teardown": { - "duration": 0.00042015407234430313, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_multi_turn_tool_calling[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-add_product_tool]", - "lineno": 471, - "outcome": "failed", - "keywords": [ - "test_chat_streaming_multi_turn_tool_calling[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-add_product_tool]", - "parametrize", - "pytestmark", - "meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-add_product_tool", - "test_chat_completion.py", - "openai_api", - "verifications", - "tests", - "llama-stack", - "" - ], - "metadata": { - "model": "meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8", - "case_id": "add_product_tool" - }, - "setup": { - "duration": 0.07020766660571098, - "outcome": "passed" - }, - "call": { - "duration": 0.6389470677822828, - "outcome": "failed", - "crash": { - "path": "/home/erichuang/llama-stack/tests/verifications/openai_api/test_chat_completion.py", - "lineno": 688, - "message": "IndexError: list index out of range" - }, - "traceback": [ - { - "path": "tests/verifications/openai_api/test_chat_completion.py", - "lineno": 506, - "message": "" - }, - { - "path": "tests/verifications/openai_api/test_chat_completion.py", - "lineno": 688, - "message": "IndexError" - } - ], - "longrepr": "request = >\nopenai_client = \nmodel = 'meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8'\nprovider = 'together'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...-versatile', 'meta-llama/llama-4-scout-17b-16e-instruct', 'meta-llama/llama-4-maverick-17b-128e-instruct'], ...}, ...}}\ncase = {'case_id': 'add_product_tool', 'expected': [{'num_tool_calls': 1, 'tool_arguments': {'inStock': True, 'name': 'Widget...}}, 'type': 'function'}]}, 'tool_responses': [{'response': \"{'response': 'Successfully added product with id: 123'}\"}]}\n\n @pytest.mark.parametrize(\n \"case\",\n chat_completion_test_cases.get(\"test_chat_multi_turn_tool_calling\", {}).get(\"test_params\", {}).get(\"case\", []),\n ids=case_id_generator,\n )\n def test_chat_streaming_multi_turn_tool_calling(request, openai_client, model, provider, verification_config, case):\n \"\"\" \"\"\"\n test_name_base = get_base_test_name(request)\n if should_skip_test(verification_config, provider, model, test_name_base):\n pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n \n messages = []\n tools = case[\"input\"][\"tools\"]\n expected_results = copy.deepcopy(case[\"expected\"])\n tool_responses = copy.deepcopy(case.get(\"tool_responses\", []))\n input_messages_turns = copy.deepcopy(case[\"input\"][\"messages\"])\n \n while len(input_messages_turns) > 0 or (len(messages) > 0 and messages[-1][\"role\"] == \"tool\"):\n if len(messages) == 0 or messages[-1][\"role\"] != \"tool\":\n new_messages = input_messages_turns.pop(0)\n if isinstance(new_messages, list):\n messages.extend(new_messages)\n else:\n messages.append(new_messages)\n \n # --- API Call (Streaming) ---\n stream = openai_client.chat.completions.create(\n model=model,\n messages=messages,\n tools=tools,\n stream=True,\n )\n \n # --- Process Stream ---\n> accumulated_content, accumulated_tool_calls = _accumulate_streaming_tool_calls(stream)\n\ntests/verifications/openai_api/test_chat_completion.py:506: \n_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ \n\nstream = \n\n def _accumulate_streaming_tool_calls(stream):\n \"\"\"Accumulates tool calls and content from a streaming ChatCompletion response.\"\"\"\n tool_calls_buffer = {}\n current_id = None\n full_content = \"\" # Initialize content accumulator\n # Process streaming chunks\n for chunk in stream:\n> choice = chunk.choices[0]\nE IndexError: list index out of range\n\ntests/verifications/openai_api/test_chat_completion.py:688: IndexError" - }, - "teardown": { - "duration": 0.00035757478326559067, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_multi_turn_tool_calling[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-get_then_create_event_tool]", - "lineno": 471, - "outcome": "failed", - "keywords": [ - "test_chat_streaming_multi_turn_tool_calling[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-get_then_create_event_tool]", - "parametrize", - "pytestmark", - "meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-get_then_create_event_tool", - "test_chat_completion.py", - "openai_api", - "verifications", - "tests", - "llama-stack", - "" - ], - "metadata": { - "model": "meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8", - "case_id": "get_then_create_event_tool" - }, - "setup": { - "duration": 0.07121358439326286, - "outcome": "passed" - }, - "call": { - "duration": 0.5222592242062092, - "outcome": "failed", - "crash": { - "path": "/home/erichuang/llama-stack/tests/verifications/openai_api/test_chat_completion.py", - "lineno": 688, - "message": "IndexError: list index out of range" - }, - "traceback": [ - { - "path": "tests/verifications/openai_api/test_chat_completion.py", - "lineno": 506, - "message": "" - }, - { - "path": "tests/verifications/openai_api/test_chat_completion.py", - "lineno": 688, - "message": "IndexError" - } - ], - "longrepr": "request = >\nopenai_client = \nmodel = 'meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8'\nprovider = 'together'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...-versatile', 'meta-llama/llama-4-scout-17b-16e-instruct', 'meta-llama/llama-4-maverick-17b-128e-instruct'], ...}, ...}}\ncase = {'case_id': 'get_then_create_event_tool', 'expected': [{'num_tool_calls': 1, 'tool_arguments': {'date': '2025-03-03', ...ents found for 2025-03-03 at 10:00'}\"}, {'response': \"{'response': 'Successfully created new event with id: e_123'}\"}]}\n\n @pytest.mark.parametrize(\n \"case\",\n chat_completion_test_cases.get(\"test_chat_multi_turn_tool_calling\", {}).get(\"test_params\", {}).get(\"case\", []),\n ids=case_id_generator,\n )\n def test_chat_streaming_multi_turn_tool_calling(request, openai_client, model, provider, verification_config, case):\n \"\"\" \"\"\"\n test_name_base = get_base_test_name(request)\n if should_skip_test(verification_config, provider, model, test_name_base):\n pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n \n messages = []\n tools = case[\"input\"][\"tools\"]\n expected_results = copy.deepcopy(case[\"expected\"])\n tool_responses = copy.deepcopy(case.get(\"tool_responses\", []))\n input_messages_turns = copy.deepcopy(case[\"input\"][\"messages\"])\n \n while len(input_messages_turns) > 0 or (len(messages) > 0 and messages[-1][\"role\"] == \"tool\"):\n if len(messages) == 0 or messages[-1][\"role\"] != \"tool\":\n new_messages = input_messages_turns.pop(0)\n if isinstance(new_messages, list):\n messages.extend(new_messages)\n else:\n messages.append(new_messages)\n \n # --- API Call (Streaming) ---\n stream = openai_client.chat.completions.create(\n model=model,\n messages=messages,\n tools=tools,\n stream=True,\n )\n \n # --- Process Stream ---\n> accumulated_content, accumulated_tool_calls = _accumulate_streaming_tool_calls(stream)\n\ntests/verifications/openai_api/test_chat_completion.py:506: \n_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ \n\nstream = \n\n def _accumulate_streaming_tool_calls(stream):\n \"\"\"Accumulates tool calls and content from a streaming ChatCompletion response.\"\"\"\n tool_calls_buffer = {}\n current_id = None\n full_content = \"\" # Initialize content accumulator\n # Process streaming chunks\n for chunk in stream:\n> choice = chunk.choices[0]\nE IndexError: list index out of range\n\ntests/verifications/openai_api/test_chat_completion.py:688: IndexError" - }, - "teardown": { - "duration": 0.0003436664119362831, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_multi_turn_tool_calling[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-compare_monthly_expense_tool]", - "lineno": 471, - "outcome": "failed", - "keywords": [ - "test_chat_streaming_multi_turn_tool_calling[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-compare_monthly_expense_tool]", - "parametrize", - "pytestmark", - "meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-compare_monthly_expense_tool", - "test_chat_completion.py", - "openai_api", - "verifications", - "tests", - "llama-stack", - "" - ], - "metadata": { - "model": "meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8", - "case_id": "compare_monthly_expense_tool" - }, - "setup": { - "duration": 0.07017400953918695, - "outcome": "passed" - }, - "call": { - "duration": 1.7245550760999322, - "outcome": "failed", - "crash": { - "path": "/home/erichuang/llama-stack/tests/verifications/openai_api/test_chat_completion.py", - "lineno": 688, - "message": "IndexError: list index out of range" - }, - "traceback": [ - { - "path": "tests/verifications/openai_api/test_chat_completion.py", - "lineno": 506, - "message": "" - }, - { - "path": "tests/verifications/openai_api/test_chat_completion.py", - "lineno": 688, - "message": "IndexError" - } - ], - "longrepr": "request = >\nopenai_client = \nmodel = 'meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8'\nprovider = 'together'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...-versatile', 'meta-llama/llama-4-scout-17b-16e-instruct', 'meta-llama/llama-4-maverick-17b-128e-instruct'], ...}, ...}}\ncase = {'case_id': 'compare_monthly_expense_tool', 'expected': [{'num_tool_calls': 1, 'tool_arguments': {'month': 1, 'year': ... 'Total expenses for January 2025: $1000'}\"}, {'response': \"{'response': 'Total expenses for February 2024: $2000'}\"}]}\n\n @pytest.mark.parametrize(\n \"case\",\n chat_completion_test_cases.get(\"test_chat_multi_turn_tool_calling\", {}).get(\"test_params\", {}).get(\"case\", []),\n ids=case_id_generator,\n )\n def test_chat_streaming_multi_turn_tool_calling(request, openai_client, model, provider, verification_config, case):\n \"\"\" \"\"\"\n test_name_base = get_base_test_name(request)\n if should_skip_test(verification_config, provider, model, test_name_base):\n pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n \n messages = []\n tools = case[\"input\"][\"tools\"]\n expected_results = copy.deepcopy(case[\"expected\"])\n tool_responses = copy.deepcopy(case.get(\"tool_responses\", []))\n input_messages_turns = copy.deepcopy(case[\"input\"][\"messages\"])\n \n while len(input_messages_turns) > 0 or (len(messages) > 0 and messages[-1][\"role\"] == \"tool\"):\n if len(messages) == 0 or messages[-1][\"role\"] != \"tool\":\n new_messages = input_messages_turns.pop(0)\n if isinstance(new_messages, list):\n messages.extend(new_messages)\n else:\n messages.append(new_messages)\n \n # --- API Call (Streaming) ---\n stream = openai_client.chat.completions.create(\n model=model,\n messages=messages,\n tools=tools,\n stream=True,\n )\n \n # --- Process Stream ---\n> accumulated_content, accumulated_tool_calls = _accumulate_streaming_tool_calls(stream)\n\ntests/verifications/openai_api/test_chat_completion.py:506: \n_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ \n\nstream = \n\n def _accumulate_streaming_tool_calls(stream):\n \"\"\"Accumulates tool calls and content from a streaming ChatCompletion response.\"\"\"\n tool_calls_buffer = {}\n current_id = None\n full_content = \"\" # Initialize content accumulator\n # Process streaming chunks\n for chunk in stream:\n> choice = chunk.choices[0]\nE IndexError: list index out of range\n\ntests/verifications/openai_api/test_chat_completion.py:688: IndexError" - }, - "teardown": { - "duration": 0.0003162780776619911, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_multi_turn_multiple_images[meta-llama/Llama-3.3-70B-Instruct-Turbo-stream=False]", - "lineno": 554, - "outcome": "skipped", - "keywords": [ - "test_chat_multi_turn_multiple_images[meta-llama/Llama-3.3-70B-Instruct-Turbo-stream=False]", - "parametrize", - "pytestmark", - "meta-llama/Llama-3.3-70B-Instruct-Turbo-stream=False", - "test_chat_completion.py", - "openai_api", - "verifications", - "tests", - "llama-stack", - "" - ], - "metadata": { - "model": "meta-llama/Llama-3.3-70B-Instruct-Turbo", - "case_id": "stream=False" - }, - "setup": { - "duration": 0.07253758516162634, - "outcome": "passed" - }, - "call": { - "duration": 0.00021537486463785172, - "outcome": "skipped", - "longrepr": "('/home/erichuang/llama-stack/tests/verifications/openai_api/test_chat_completion.py', 561, 'Skipped: Skipping test_chat_multi_turn_multiple_images for model meta-llama/Llama-3.3-70B-Instruct-Turbo on provider together based on config.')" - }, - "teardown": { - "duration": 0.0004162406548857689, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_multi_turn_multiple_images[meta-llama/Llama-3.3-70B-Instruct-Turbo-stream=True]", - "lineno": 554, - "outcome": "skipped", - "keywords": [ - "test_chat_multi_turn_multiple_images[meta-llama/Llama-3.3-70B-Instruct-Turbo-stream=True]", - "parametrize", - "pytestmark", - "meta-llama/Llama-3.3-70B-Instruct-Turbo-stream=True", - "test_chat_completion.py", - "openai_api", - "verifications", - "tests", - "llama-stack", - "" - ], - "metadata": { - "model": "meta-llama/Llama-3.3-70B-Instruct-Turbo", - "case_id": "stream=True" - }, - "setup": { - "duration": 0.07268107868731022, - "outcome": "passed" - }, - "call": { - "duration": 0.0002132616937160492, - "outcome": "skipped", - "longrepr": "('/home/erichuang/llama-stack/tests/verifications/openai_api/test_chat_completion.py', 561, 'Skipped: Skipping test_chat_multi_turn_multiple_images for model meta-llama/Llama-3.3-70B-Instruct-Turbo on provider together based on config.')" - }, - "teardown": { - "duration": 0.00021094270050525665, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_multi_turn_multiple_images[meta-llama/Llama-4-Scout-17B-16E-Instruct-stream=False]", - "lineno": 554, - "outcome": "passed", - "keywords": [ - "test_chat_multi_turn_multiple_images[meta-llama/Llama-4-Scout-17B-16E-Instruct-stream=False]", - "parametrize", - "pytestmark", - "meta-llama/Llama-4-Scout-17B-16E-Instruct-stream=False", - "test_chat_completion.py", - "openai_api", - "verifications", - "tests", - "llama-stack", - "" - ], - "metadata": { - "model": "meta-llama/Llama-4-Scout-17B-16E-Instruct", - "case_id": "stream=False" - }, - "setup": { - "duration": 0.07398672867566347, - "outcome": "passed" - }, - "call": { - "duration": 4.383559702895582, - "outcome": "passed" - }, - "teardown": { - "duration": 0.0002781357616186142, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_multi_turn_multiple_images[meta-llama/Llama-4-Scout-17B-16E-Instruct-stream=True]", - "lineno": 554, - "outcome": "failed", - "keywords": [ - "test_chat_multi_turn_multiple_images[meta-llama/Llama-4-Scout-17B-16E-Instruct-stream=True]", - "parametrize", - "pytestmark", - "meta-llama/Llama-4-Scout-17B-16E-Instruct-stream=True", - "test_chat_completion.py", - "openai_api", - "verifications", - "tests", - "llama-stack", - "" - ], - "metadata": { - "model": "meta-llama/Llama-4-Scout-17B-16E-Instruct", - "case_id": "stream=True" - }, - "setup": { - "duration": 0.08006586041301489, - "outcome": "passed" - }, - "call": { - "duration": 2.16784877050668, - "outcome": "failed", - "crash": { - "path": "/home/erichuang/llama-stack/tests/verifications/openai_api/test_chat_completion.py", - "lineno": 596, - "message": "IndexError: list index out of range" - }, - "traceback": [ - { - "path": "tests/verifications/openai_api/test_chat_completion.py", - "lineno": 596, - "message": "IndexError" - } - ], - "longrepr": "request = >\nopenai_client = \nmodel = 'meta-llama/Llama-4-Scout-17B-16E-Instruct', provider = 'together'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...-versatile', 'meta-llama/llama-4-scout-17b-16e-instruct', 'meta-llama/llama-4-maverick-17b-128e-instruct'], ...}, ...}}\nmulti_image_data = ['...6pH9jaTzNv7vfRRXzubfxj9f8Pv8AkTz/AMX/ALbEz5Ly38lfMk/5Z/u64PxhqEZh+z/6rzvn2UUV5EvgPuzy/wAc6p5dt5ccibJpNkkdFFFec27mZ//Z']\nstream = True\n\n @pytest.mark.parametrize(\"stream\", [False, True], ids=[\"stream=False\", \"stream=True\"])\n def test_chat_multi_turn_multiple_images(\n request, openai_client, model, provider, verification_config, multi_image_data, stream\n ):\n test_name_base = get_base_test_name(request)\n if should_skip_test(verification_config, provider, model, test_name_base):\n pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n \n messages_turn1 = [\n {\n \"role\": \"user\",\n \"content\": [\n {\n \"type\": \"image_url\",\n \"image_url\": {\n \"url\": multi_image_data[0],\n },\n },\n {\n \"type\": \"image_url\",\n \"image_url\": {\n \"url\": multi_image_data[1],\n },\n },\n {\n \"type\": \"text\",\n \"text\": \"What furniture is in the first image that is not in the second image?\",\n },\n ],\n },\n ]\n \n # First API call\n response1 = openai_client.chat.completions.create(\n model=model,\n messages=messages_turn1,\n stream=stream,\n )\n if stream:\n message_content1 = \"\"\n for chunk in response1:\n> message_content1 += chunk.choices[0].delta.content or \"\"\nE IndexError: list index out of range\n\ntests/verifications/openai_api/test_chat_completion.py:596: IndexError" - }, - "teardown": { - "duration": 0.0003619194030761719, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_multi_turn_multiple_images[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-stream=False]", - "lineno": 554, - "outcome": "passed", - "keywords": [ - "test_chat_multi_turn_multiple_images[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-stream=False]", - "parametrize", - "pytestmark", - "meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-stream=False", - "test_chat_completion.py", - "openai_api", - "verifications", - "tests", - "llama-stack", - "" - ], - "metadata": { - "model": "meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8", - "case_id": "stream=False" - }, - "setup": { - "duration": 0.0709412069991231, - "outcome": "passed" - }, - "call": { - "duration": 6.110534753650427, - "outcome": "passed" - }, - "teardown": { - "duration": 0.0002450142055749893, - "outcome": "passed" - } - }, - { - "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_multi_turn_multiple_images[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-stream=True]", - "lineno": 554, - "outcome": "failed", - "keywords": [ - "test_chat_multi_turn_multiple_images[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-stream=True]", - "parametrize", - "pytestmark", - "meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-stream=True", - "test_chat_completion.py", - "openai_api", - "verifications", - "tests", - "llama-stack", - "" - ], - "metadata": { - "model": "meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8", - "case_id": "stream=True" - }, - "setup": { - "duration": 0.0725309094414115, - "outcome": "passed" - }, - "call": { - "duration": 2.291131243109703, - "outcome": "failed", - "crash": { - "path": "/home/erichuang/llama-stack/tests/verifications/openai_api/test_chat_completion.py", - "lineno": 596, - "message": "IndexError: list index out of range" - }, - "traceback": [ - { - "path": "tests/verifications/openai_api/test_chat_completion.py", - "lineno": 596, - "message": "IndexError" - } - ], - "longrepr": "request = >\nopenai_client = \nmodel = 'meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8'\nprovider = 'together'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...-versatile', 'meta-llama/llama-4-scout-17b-16e-instruct', 'meta-llama/llama-4-maverick-17b-128e-instruct'], ...}, ...}}\nmulti_image_data = ['...6pH9jaTzNv7vfRRXzubfxj9f8Pv8AkTz/AMX/ALbEz5Ly38lfMk/5Z/u64PxhqEZh+z/6rzvn2UUV5EvgPuzy/wAc6p5dt5ccibJpNkkdFFFec27mZ//Z']\nstream = True\n\n @pytest.mark.parametrize(\"stream\", [False, True], ids=[\"stream=False\", \"stream=True\"])\n def test_chat_multi_turn_multiple_images(\n request, openai_client, model, provider, verification_config, multi_image_data, stream\n ):\n test_name_base = get_base_test_name(request)\n if should_skip_test(verification_config, provider, model, test_name_base):\n pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n \n messages_turn1 = [\n {\n \"role\": \"user\",\n \"content\": [\n {\n \"type\": \"image_url\",\n \"image_url\": {\n \"url\": multi_image_data[0],\n },\n },\n {\n \"type\": \"image_url\",\n \"image_url\": {\n \"url\": multi_image_data[1],\n },\n },\n {\n \"type\": \"text\",\n \"text\": \"What furniture is in the first image that is not in the second image?\",\n },\n ],\n },\n ]\n \n # First API call\n response1 = openai_client.chat.completions.create(\n model=model,\n messages=messages_turn1,\n stream=stream,\n )\n if stream:\n message_content1 = \"\"\n for chunk in response1:\n> message_content1 += chunk.choices[0].delta.content or \"\"\nE IndexError: list index out of range\n\ntests/verifications/openai_api/test_chat_completion.py:596: IndexError" - }, - "teardown": { - "duration": 0.0018906639888882637, - "outcome": "passed" - } - } - ], - "run_timestamp": 1744918065 -}