{
  "created": 1744915672.332456,
  "duration": 157.25543904304504,
  "exitcode": 1,
  "root": "/home/erichuang/llama-stack",
  "environment": {},
  "summary": {
    "passed": 41,
    "failed": 39,
    "skipped": 4,
    "total": 84,
    "collected": 84
  },
  "collectors": [
    {
      "nodeid": "",
      "outcome": "passed",
      "result": [
        {
          "nodeid": "tests/verifications/openai_api/test_chat_completion.py",
          "type": "Module"
        }
      ]
    },
    {
      "nodeid": "tests/verifications/openai_api/test_chat_completion.py",
      "outcome": "passed",
      "result": [
        {
          "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_basic[meta-llama/Llama-3.3-70B-Instruct-Turbo-earth]",
          "type": "Function",
          "lineno": 95
        },
        {
          "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_basic[meta-llama/Llama-3.3-70B-Instruct-Turbo-saturn]",
          "type": "Function",
          "lineno": 95
        },
        {
          "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_basic[meta-llama/Llama-4-Scout-17B-16E-Instruct-earth]",
          "type": "Function",
          "lineno": 95
        },
        {
          "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_basic[meta-llama/Llama-4-Scout-17B-16E-Instruct-saturn]",
          "type": "Function",
          "lineno": 95
        },
        {
          "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_basic[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-earth]",
          "type": "Function",
          "lineno": 95
        },
        {
          "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_basic[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-saturn]",
          "type": "Function",
          "lineno": 95
        },
        {
          "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_basic[meta-llama/Llama-3.3-70B-Instruct-Turbo-earth]",
          "type": "Function",
          "lineno": 114
        },
        {
          "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_basic[meta-llama/Llama-3.3-70B-Instruct-Turbo-saturn]",
          "type": "Function",
          "lineno": 114
        },
        {
          "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_basic[meta-llama/Llama-4-Scout-17B-16E-Instruct-earth]",
          "type": "Function",
          "lineno": 114
        },
        {
          "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_basic[meta-llama/Llama-4-Scout-17B-16E-Instruct-saturn]",
          "type": "Function",
          "lineno": 114
        },
        {
          "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_basic[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-earth]",
          "type": "Function",
          "lineno": 114
        },
        {
          "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_basic[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-saturn]",
          "type": "Function",
          "lineno": 114
        },
        {
          "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_image[meta-llama/Llama-3.3-70B-Instruct-Turbo-case0]",
          "type": "Function",
          "lineno": 138
        },
        {
          "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_image[meta-llama/Llama-4-Scout-17B-16E-Instruct-case0]",
          "type": "Function",
          "lineno": 138
        },
        {
          "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_image[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-case0]",
          "type": "Function",
          "lineno": 138
        },
        {
          "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_image[meta-llama/Llama-3.3-70B-Instruct-Turbo-case0]",
          "type": "Function",
          "lineno": 157
        },
        {
          "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_image[meta-llama/Llama-4-Scout-17B-16E-Instruct-case0]",
          "type": "Function",
          "lineno": 157
        },
        {
          "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_image[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-case0]",
          "type": "Function",
          "lineno": 157
        },
        {
          "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_structured_output[meta-llama/Llama-3.3-70B-Instruct-Turbo-calendar]",
          "type": "Function",
          "lineno": 181
        },
        {
          "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_structured_output[meta-llama/Llama-3.3-70B-Instruct-Turbo-math]",
          "type": "Function",
          "lineno": 181
        },
        {
          "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_structured_output[meta-llama/Llama-4-Scout-17B-16E-Instruct-calendar]",
          "type": "Function",
          "lineno": 181
        },
        {
          "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_structured_output[meta-llama/Llama-4-Scout-17B-16E-Instruct-math]",
          "type": "Function",
          "lineno": 181
        },
        {
          "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_structured_output[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-calendar]",
          "type": "Function",
          "lineno": 181
        },
        {
          "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_structured_output[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-math]",
          "type": "Function",
          "lineno": 181
        },
        {
          "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_structured_output[meta-llama/Llama-3.3-70B-Instruct-Turbo-calendar]",
          "type": "Function",
          "lineno": 204
        },
        {
          "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_structured_output[meta-llama/Llama-3.3-70B-Instruct-Turbo-math]",
          "type": "Function",
          "lineno": 204
        },
        {
          "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_structured_output[meta-llama/Llama-4-Scout-17B-16E-Instruct-calendar]",
          "type": "Function",
          "lineno": 204
        },
        {
          "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_structured_output[meta-llama/Llama-4-Scout-17B-16E-Instruct-math]",
          "type": "Function",
          "lineno": 204
        },
        {
          "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_structured_output[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-calendar]",
          "type": "Function",
          "lineno": 204
        },
        {
          "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_structured_output[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-math]",
          "type": "Function",
          "lineno": 204
        },
        {
          "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_tool_calling[meta-llama/Llama-3.3-70B-Instruct-Turbo-case0]",
          "type": "Function",
          "lineno": 226
        },
        {
          "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_tool_calling[meta-llama/Llama-4-Scout-17B-16E-Instruct-case0]",
          "type": "Function",
          "lineno": 226
        },
        {
          "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_tool_calling[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-case0]",
          "type": "Function",
          "lineno": 226
        },
        {
          "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_tool_calling[meta-llama/Llama-3.3-70B-Instruct-Turbo-case0]",
          "type": "Function",
          "lineno": 250
        },
        {
          "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_tool_calling[meta-llama/Llama-4-Scout-17B-16E-Instruct-case0]",
          "type": "Function",
          "lineno": 250
        },
        {
          "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_tool_calling[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-case0]",
          "type": "Function",
          "lineno": 250
        },
        {
          "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_tool_choice_required[meta-llama/Llama-3.3-70B-Instruct-Turbo-case0]",
          "type": "Function",
          "lineno": 278
        },
        {
          "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_tool_choice_required[meta-llama/Llama-4-Scout-17B-16E-Instruct-case0]",
          "type": "Function",
          "lineno": 278
        },
        {
          "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_tool_choice_required[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-case0]",
          "type": "Function",
          "lineno": 278
        },
        {
          "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_tool_choice_required[meta-llama/Llama-3.3-70B-Instruct-Turbo-case0]",
          "type": "Function",
          "lineno": 302
        },
        {
          "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_tool_choice_required[meta-llama/Llama-4-Scout-17B-16E-Instruct-case0]",
          "type": "Function",
          "lineno": 302
        },
        {
          "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_tool_choice_required[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-case0]",
          "type": "Function",
          "lineno": 302
        },
        {
          "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_tool_choice_none[meta-llama/Llama-3.3-70B-Instruct-Turbo-case0]",
          "type": "Function",
          "lineno": 329
        },
        {
          "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_tool_choice_none[meta-llama/Llama-4-Scout-17B-16E-Instruct-case0]",
          "type": "Function",
          "lineno": 329
        },
        {
          "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_tool_choice_none[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-case0]",
          "type": "Function",
          "lineno": 329
        },
        {
          "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_tool_choice_none[meta-llama/Llama-3.3-70B-Instruct-Turbo-case0]",
          "type": "Function",
          "lineno": 352
        },
        {
          "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_tool_choice_none[meta-llama/Llama-4-Scout-17B-16E-Instruct-case0]",
          "type": "Function",
          "lineno": 352
        },
        {
          "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_tool_choice_none[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-case0]",
          "type": "Function",
          "lineno": 352
        },
        {
          "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_multi_turn_tool_calling[meta-llama/Llama-3.3-70B-Instruct-Turbo-text_then_weather_tool]",
          "type": "Function",
          "lineno": 380
        },
        {
          "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_multi_turn_tool_calling[meta-llama/Llama-3.3-70B-Instruct-Turbo-weather_tool_then_text]",
          "type": "Function",
          "lineno": 380
        },
        {
          "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_multi_turn_tool_calling[meta-llama/Llama-3.3-70B-Instruct-Turbo-add_product_tool]",
          "type": "Function",
          "lineno": 380
        },
        {
          "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_multi_turn_tool_calling[meta-llama/Llama-3.3-70B-Instruct-Turbo-get_then_create_event_tool]",
          "type": "Function",
          "lineno": 380
        },
        {
          "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_multi_turn_tool_calling[meta-llama/Llama-3.3-70B-Instruct-Turbo-compare_monthly_expense_tool]",
          "type": "Function",
          "lineno": 380
        },
        {
          "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_multi_turn_tool_calling[meta-llama/Llama-4-Scout-17B-16E-Instruct-text_then_weather_tool]",
          "type": "Function",
          "lineno": 380
        },
        {
          "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_multi_turn_tool_calling[meta-llama/Llama-4-Scout-17B-16E-Instruct-weather_tool_then_text]",
          "type": "Function",
          "lineno": 380
        },
        {
          "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_multi_turn_tool_calling[meta-llama/Llama-4-Scout-17B-16E-Instruct-add_product_tool]",
          "type": "Function",
          "lineno": 380
        },
        {
          "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_multi_turn_tool_calling[meta-llama/Llama-4-Scout-17B-16E-Instruct-get_then_create_event_tool]",
          "type": "Function",
          "lineno": 380
        },
        {
          "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_multi_turn_tool_calling[meta-llama/Llama-4-Scout-17B-16E-Instruct-compare_monthly_expense_tool]",
          "type": "Function",
          "lineno": 380
        },
        {
          "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_multi_turn_tool_calling[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-text_then_weather_tool]",
          "type": "Function",
          "lineno": 380
        },
        {
          "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_multi_turn_tool_calling[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-weather_tool_then_text]",
          "type": "Function",
          "lineno": 380
        },
        {
          "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_multi_turn_tool_calling[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-add_product_tool]",
          "type": "Function",
          "lineno": 380
        },
        {
          "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_multi_turn_tool_calling[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-get_then_create_event_tool]",
          "type": "Function",
          "lineno": 380
        },
        {
          "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_multi_turn_tool_calling[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-compare_monthly_expense_tool]",
          "type": "Function",
          "lineno": 380
        },
        {
          "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_multi_turn_tool_calling[meta-llama/Llama-3.3-70B-Instruct-Turbo-text_then_weather_tool]",
          "type": "Function",
          "lineno": 471
        },
        {
          "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_multi_turn_tool_calling[meta-llama/Llama-3.3-70B-Instruct-Turbo-weather_tool_then_text]",
          "type": "Function",
          "lineno": 471
        },
        {
          "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_multi_turn_tool_calling[meta-llama/Llama-3.3-70B-Instruct-Turbo-add_product_tool]",
          "type": "Function",
          "lineno": 471
        },
        {
          "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_multi_turn_tool_calling[meta-llama/Llama-3.3-70B-Instruct-Turbo-get_then_create_event_tool]",
          "type": "Function",
          "lineno": 471
        },
        {
          "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_multi_turn_tool_calling[meta-llama/Llama-3.3-70B-Instruct-Turbo-compare_monthly_expense_tool]",
          "type": "Function",
          "lineno": 471
        },
        {
          "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_multi_turn_tool_calling[meta-llama/Llama-4-Scout-17B-16E-Instruct-text_then_weather_tool]",
          "type": "Function",
          "lineno": 471
        },
        {
          "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_multi_turn_tool_calling[meta-llama/Llama-4-Scout-17B-16E-Instruct-weather_tool_then_text]",
          "type": "Function",
          "lineno": 471
        },
        {
          "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_multi_turn_tool_calling[meta-llama/Llama-4-Scout-17B-16E-Instruct-add_product_tool]",
          "type": "Function",
          "lineno": 471
        },
        {
          "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_multi_turn_tool_calling[meta-llama/Llama-4-Scout-17B-16E-Instruct-get_then_create_event_tool]",
          "type": "Function",
          "lineno": 471
        },
        {
          "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_multi_turn_tool_calling[meta-llama/Llama-4-Scout-17B-16E-Instruct-compare_monthly_expense_tool]",
          "type": "Function",
          "lineno": 471
        },
        {
          "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_multi_turn_tool_calling[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-text_then_weather_tool]",
          "type": "Function",
          "lineno": 471
        },
        {
          "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_multi_turn_tool_calling[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-weather_tool_then_text]",
          "type": "Function",
          "lineno": 471
        },
        {
          "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_multi_turn_tool_calling[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-add_product_tool]",
          "type": "Function",
          "lineno": 471
        },
        {
          "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_multi_turn_tool_calling[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-get_then_create_event_tool]",
          "type": "Function",
          "lineno": 471
        },
        {
          "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_multi_turn_tool_calling[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-compare_monthly_expense_tool]",
          "type": "Function",
          "lineno": 471
        },
        {
          "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_multi_turn_multiple_images[meta-llama/Llama-3.3-70B-Instruct-Turbo-stream=False]",
          "type": "Function",
          "lineno": 554
        },
        {
          "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_multi_turn_multiple_images[meta-llama/Llama-3.3-70B-Instruct-Turbo-stream=True]",
          "type": "Function",
          "lineno": 554
        },
        {
          "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_multi_turn_multiple_images[meta-llama/Llama-4-Scout-17B-16E-Instruct-stream=False]",
          "type": "Function",
          "lineno": 554
        },
        {
          "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_multi_turn_multiple_images[meta-llama/Llama-4-Scout-17B-16E-Instruct-stream=True]",
          "type": "Function",
          "lineno": 554
        },
        {
          "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_multi_turn_multiple_images[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-stream=False]",
          "type": "Function",
          "lineno": 554
        },
        {
          "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_multi_turn_multiple_images[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-stream=True]",
          "type": "Function",
          "lineno": 554
        }
      ]
    }
  ],
  "tests": [
    {
      "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_basic[meta-llama/Llama-3.3-70B-Instruct-Turbo-earth]",
      "lineno": 95,
      "outcome": "passed",
      "keywords": [
        "test_chat_non_streaming_basic[meta-llama/Llama-3.3-70B-Instruct-Turbo-earth]",
        "parametrize",
        "pytestmark",
        "meta-llama/Llama-3.3-70B-Instruct-Turbo-earth",
        "test_chat_completion.py",
        "openai_api",
        "verifications",
        "tests",
        "llama-stack",
        ""
      ],
      "metadata": {
        "model": "meta-llama/Llama-3.3-70B-Instruct-Turbo",
        "case_id": "earth"
      },
      "setup": {
        "duration": 0.11770237609744072,
        "outcome": "passed"
      },
      "call": {
        "duration": 0.6406435770913959,
        "outcome": "passed"
      },
      "teardown": {
        "duration": 0.0002960069105029106,
        "outcome": "passed"
      }
    },
    {
      "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_basic[meta-llama/Llama-3.3-70B-Instruct-Turbo-saturn]",
      "lineno": 95,
      "outcome": "passed",
      "keywords": [
        "test_chat_non_streaming_basic[meta-llama/Llama-3.3-70B-Instruct-Turbo-saturn]",
        "parametrize",
        "pytestmark",
        "meta-llama/Llama-3.3-70B-Instruct-Turbo-saturn",
        "test_chat_completion.py",
        "openai_api",
        "verifications",
        "tests",
        "llama-stack",
        ""
      ],
      "metadata": {
        "model": "meta-llama/Llama-3.3-70B-Instruct-Turbo",
        "case_id": "saturn"
      },
      "setup": {
        "duration": 0.07460446748882532,
        "outcome": "passed"
      },
      "call": {
        "duration": 0.5787241570651531,
        "outcome": "passed"
      },
      "teardown": {
        "duration": 0.00026445742696523666,
        "outcome": "passed"
      }
    },
    {
      "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_basic[meta-llama/Llama-4-Scout-17B-16E-Instruct-earth]",
      "lineno": 95,
      "outcome": "passed",
      "keywords": [
        "test_chat_non_streaming_basic[meta-llama/Llama-4-Scout-17B-16E-Instruct-earth]",
        "parametrize",
        "pytestmark",
        "meta-llama/Llama-4-Scout-17B-16E-Instruct-earth",
        "test_chat_completion.py",
        "openai_api",
        "verifications",
        "tests",
        "llama-stack",
        ""
      ],
      "metadata": {
        "model": "meta-llama/Llama-4-Scout-17B-16E-Instruct",
        "case_id": "earth"
      },
      "setup": {
        "duration": 0.07483412884175777,
        "outcome": "passed"
      },
      "call": {
        "duration": 0.8699872978031635,
        "outcome": "passed"
      },
      "teardown": {
        "duration": 0.0002666134387254715,
        "outcome": "passed"
      }
    },
    {
      "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_basic[meta-llama/Llama-4-Scout-17B-16E-Instruct-saturn]",
      "lineno": 95,
      "outcome": "passed",
      "keywords": [
        "test_chat_non_streaming_basic[meta-llama/Llama-4-Scout-17B-16E-Instruct-saturn]",
        "parametrize",
        "pytestmark",
        "meta-llama/Llama-4-Scout-17B-16E-Instruct-saturn",
        "test_chat_completion.py",
        "openai_api",
        "verifications",
        "tests",
        "llama-stack",
        ""
      ],
      "metadata": {
        "model": "meta-llama/Llama-4-Scout-17B-16E-Instruct",
        "case_id": "saturn"
      },
      "setup": {
        "duration": 0.07169668562710285,
        "outcome": "passed"
      },
      "call": {
        "duration": 0.5061587328091264,
        "outcome": "passed"
      },
      "teardown": {
        "duration": 0.00028620287775993347,
        "outcome": "passed"
      }
    },
    {
      "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_basic[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-earth]",
      "lineno": 95,
      "outcome": "passed",
      "keywords": [
        "test_chat_non_streaming_basic[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-earth]",
        "parametrize",
        "pytestmark",
        "meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-earth",
        "test_chat_completion.py",
        "openai_api",
        "verifications",
        "tests",
        "llama-stack",
        ""
      ],
      "metadata": {
        "model": "meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8",
        "case_id": "earth"
      },
      "setup": {
        "duration": 0.0829654112458229,
        "outcome": "passed"
      },
      "call": {
        "duration": 1.2450250089168549,
        "outcome": "passed"
      },
      "teardown": {
        "duration": 0.00024125166237354279,
        "outcome": "passed"
      }
    },
    {
      "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_basic[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-saturn]",
      "lineno": 95,
      "outcome": "passed",
      "keywords": [
        "test_chat_non_streaming_basic[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-saturn]",
        "parametrize",
        "pytestmark",
        "meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-saturn",
        "test_chat_completion.py",
        "openai_api",
        "verifications",
        "tests",
        "llama-stack",
        ""
      ],
      "metadata": {
        "model": "meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8",
        "case_id": "saturn"
      },
      "setup": {
        "duration": 0.07169047556817532,
        "outcome": "passed"
      },
      "call": {
        "duration": 0.7659840155392885,
        "outcome": "passed"
      },
      "teardown": {
        "duration": 0.00023942161351442337,
        "outcome": "passed"
      }
    },
    {
      "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_basic[meta-llama/Llama-3.3-70B-Instruct-Turbo-earth]",
      "lineno": 114,
      "outcome": "passed",
      "keywords": [
        "test_chat_streaming_basic[meta-llama/Llama-3.3-70B-Instruct-Turbo-earth]",
        "parametrize",
        "pytestmark",
        "meta-llama/Llama-3.3-70B-Instruct-Turbo-earth",
        "test_chat_completion.py",
        "openai_api",
        "verifications",
        "tests",
        "llama-stack",
        ""
      ],
      "metadata": {
        "model": "meta-llama/Llama-3.3-70B-Instruct-Turbo",
        "case_id": "earth"
      },
      "setup": {
        "duration": 0.0718865105882287,
        "outcome": "passed"
      },
      "call": {
        "duration": 0.9115259740501642,
        "outcome": "passed"
      },
      "teardown": {
        "duration": 0.0002334779128432274,
        "outcome": "passed"
      }
    },
    {
      "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_basic[meta-llama/Llama-3.3-70B-Instruct-Turbo-saturn]",
      "lineno": 114,
      "outcome": "passed",
      "keywords": [
        "test_chat_streaming_basic[meta-llama/Llama-3.3-70B-Instruct-Turbo-saturn]",
        "parametrize",
        "pytestmark",
        "meta-llama/Llama-3.3-70B-Instruct-Turbo-saturn",
        "test_chat_completion.py",
        "openai_api",
        "verifications",
        "tests",
        "llama-stack",
        ""
      ],
      "metadata": {
        "model": "meta-llama/Llama-3.3-70B-Instruct-Turbo",
        "case_id": "saturn"
      },
      "setup": {
        "duration": 0.07380938995629549,
        "outcome": "passed"
      },
      "call": {
        "duration": 0.9997824784368277,
        "outcome": "passed"
      },
      "teardown": {
        "duration": 0.00029965396970510483,
        "outcome": "passed"
      }
    },
    {
      "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_basic[meta-llama/Llama-4-Scout-17B-16E-Instruct-earth]",
      "lineno": 114,
      "outcome": "failed",
      "keywords": [
        "test_chat_streaming_basic[meta-llama/Llama-4-Scout-17B-16E-Instruct-earth]",
        "parametrize",
        "pytestmark",
        "meta-llama/Llama-4-Scout-17B-16E-Instruct-earth",
        "test_chat_completion.py",
        "openai_api",
        "verifications",
        "tests",
        "llama-stack",
        ""
      ],
      "metadata": {
        "model": "meta-llama/Llama-4-Scout-17B-16E-Instruct",
        "case_id": "earth"
      },
      "setup": {
        "duration": 0.07564573176205158,
        "outcome": "passed"
      },
      "call": {
        "duration": 0.7452597729861736,
        "outcome": "failed",
        "crash": {
          "path": "/home/erichuang/llama-stack/tests/verifications/openai_api/test_chat_completion.py",
          "lineno": 132,
          "message": "IndexError: list index out of range"
        },
        "traceback": [
          {
            "path": "tests/verifications/openai_api/test_chat_completion.py",
            "lineno": 132,
            "message": "IndexError"
          }
        ],
        "longrepr": "request = <FixtureRequest for <Function test_chat_streaming_basic[meta-llama/Llama-4-Scout-17B-16E-Instruct-earth]>>\nopenai_client = <openai.OpenAI object at 0x7f31280cf760>\nmodel = 'meta-llama/Llama-4-Scout-17B-16E-Instruct', provider = 'together'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...-versatile', 'meta-llama/llama-4-scout-17b-16e-instruct', 'meta-llama/llama-4-maverick-17b-128e-instruct'], ...}, ...}}\ncase = {'case_id': 'earth', 'input': {'messages': [{'content': 'Which planet do humans live on?', 'role': 'user'}]}, 'output': 'Earth'}\n\n    @pytest.mark.parametrize(\n        \"case\",\n        chat_completion_test_cases[\"test_chat_basic\"][\"test_params\"][\"case\"],\n        ids=case_id_generator,\n    )\n    def test_chat_streaming_basic(request, openai_client, model, provider, verification_config, case):\n        test_name_base = get_base_test_name(request)\n        if should_skip_test(verification_config, provider, model, test_name_base):\n            pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n    \n        response = openai_client.chat.completions.create(\n            model=model,\n            messages=case[\"input\"][\"messages\"],\n            stream=True,\n        )\n        content = \"\"\n        for chunk in response:\n>           content += chunk.choices[0].delta.content or \"\"\nE           IndexError: list index out of range\n\ntests/verifications/openai_api/test_chat_completion.py:132: IndexError"
      },
      "teardown": {
        "duration": 0.0003521796315908432,
        "outcome": "passed"
      }
    },
    {
      "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_basic[meta-llama/Llama-4-Scout-17B-16E-Instruct-saturn]",
      "lineno": 114,
      "outcome": "failed",
      "keywords": [
        "test_chat_streaming_basic[meta-llama/Llama-4-Scout-17B-16E-Instruct-saturn]",
        "parametrize",
        "pytestmark",
        "meta-llama/Llama-4-Scout-17B-16E-Instruct-saturn",
        "test_chat_completion.py",
        "openai_api",
        "verifications",
        "tests",
        "llama-stack",
        ""
      ],
      "metadata": {
        "model": "meta-llama/Llama-4-Scout-17B-16E-Instruct",
        "case_id": "saturn"
      },
      "setup": {
        "duration": 0.07307655736804008,
        "outcome": "passed"
      },
      "call": {
        "duration": 0.45107892248779535,
        "outcome": "failed",
        "crash": {
          "path": "/home/erichuang/llama-stack/tests/verifications/openai_api/test_chat_completion.py",
          "lineno": 132,
          "message": "IndexError: list index out of range"
        },
        "traceback": [
          {
            "path": "tests/verifications/openai_api/test_chat_completion.py",
            "lineno": 132,
            "message": "IndexError"
          }
        ],
        "longrepr": "request = <FixtureRequest for <Function test_chat_streaming_basic[meta-llama/Llama-4-Scout-17B-16E-Instruct-saturn]>>\nopenai_client = <openai.OpenAI object at 0x7f3127f38040>\nmodel = 'meta-llama/Llama-4-Scout-17B-16E-Instruct', provider = 'together'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...-versatile', 'meta-llama/llama-4-scout-17b-16e-instruct', 'meta-llama/llama-4-maverick-17b-128e-instruct'], ...}, ...}}\ncase = {'case_id': 'saturn', 'input': {'messages': [{'content': 'Which planet has rings around it with a name starting with letter S?', 'role': 'user'}]}, 'output': 'Saturn'}\n\n    @pytest.mark.parametrize(\n        \"case\",\n        chat_completion_test_cases[\"test_chat_basic\"][\"test_params\"][\"case\"],\n        ids=case_id_generator,\n    )\n    def test_chat_streaming_basic(request, openai_client, model, provider, verification_config, case):\n        test_name_base = get_base_test_name(request)\n        if should_skip_test(verification_config, provider, model, test_name_base):\n            pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n    \n        response = openai_client.chat.completions.create(\n            model=model,\n            messages=case[\"input\"][\"messages\"],\n            stream=True,\n        )\n        content = \"\"\n        for chunk in response:\n>           content += chunk.choices[0].delta.content or \"\"\nE           IndexError: list index out of range\n\ntests/verifications/openai_api/test_chat_completion.py:132: IndexError"
      },
      "teardown": {
        "duration": 0.00031046755611896515,
        "outcome": "passed"
      }
    },
    {
      "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_basic[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-earth]",
      "lineno": 114,
      "outcome": "failed",
      "keywords": [
        "test_chat_streaming_basic[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-earth]",
        "parametrize",
        "pytestmark",
        "meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-earth",
        "test_chat_completion.py",
        "openai_api",
        "verifications",
        "tests",
        "llama-stack",
        ""
      ],
      "metadata": {
        "model": "meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8",
        "case_id": "earth"
      },
      "setup": {
        "duration": 0.07041068747639656,
        "outcome": "passed"
      },
      "call": {
        "duration": 1.1565949888899922,
        "outcome": "failed",
        "crash": {
          "path": "/home/erichuang/llama-stack/tests/verifications/openai_api/test_chat_completion.py",
          "lineno": 132,
          "message": "IndexError: list index out of range"
        },
        "traceback": [
          {
            "path": "tests/verifications/openai_api/test_chat_completion.py",
            "lineno": 132,
            "message": "IndexError"
          }
        ],
        "longrepr": "request = <FixtureRequest for <Function test_chat_streaming_basic[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-earth]>>\nopenai_client = <openai.OpenAI object at 0x7f3127f64370>\nmodel = 'meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8'\nprovider = 'together'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...-versatile', 'meta-llama/llama-4-scout-17b-16e-instruct', 'meta-llama/llama-4-maverick-17b-128e-instruct'], ...}, ...}}\ncase = {'case_id': 'earth', 'input': {'messages': [{'content': 'Which planet do humans live on?', 'role': 'user'}]}, 'output': 'Earth'}\n\n    @pytest.mark.parametrize(\n        \"case\",\n        chat_completion_test_cases[\"test_chat_basic\"][\"test_params\"][\"case\"],\n        ids=case_id_generator,\n    )\n    def test_chat_streaming_basic(request, openai_client, model, provider, verification_config, case):\n        test_name_base = get_base_test_name(request)\n        if should_skip_test(verification_config, provider, model, test_name_base):\n            pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n    \n        response = openai_client.chat.completions.create(\n            model=model,\n            messages=case[\"input\"][\"messages\"],\n            stream=True,\n        )\n        content = \"\"\n        for chunk in response:\n>           content += chunk.choices[0].delta.content or \"\"\nE           IndexError: list index out of range\n\ntests/verifications/openai_api/test_chat_completion.py:132: IndexError"
      },
      "teardown": {
        "duration": 0.0002977624535560608,
        "outcome": "passed"
      }
    },
    {
      "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_basic[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-saturn]",
      "lineno": 114,
      "outcome": "failed",
      "keywords": [
        "test_chat_streaming_basic[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-saturn]",
        "parametrize",
        "pytestmark",
        "meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-saturn",
        "test_chat_completion.py",
        "openai_api",
        "verifications",
        "tests",
        "llama-stack",
        ""
      ],
      "metadata": {
        "model": "meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8",
        "case_id": "saturn"
      },
      "setup": {
        "duration": 0.07026446517556906,
        "outcome": "passed"
      },
      "call": {
        "duration": 0.7347098160535097,
        "outcome": "failed",
        "crash": {
          "path": "/home/erichuang/llama-stack/tests/verifications/openai_api/test_chat_completion.py",
          "lineno": 132,
          "message": "IndexError: list index out of range"
        },
        "traceback": [
          {
            "path": "tests/verifications/openai_api/test_chat_completion.py",
            "lineno": 132,
            "message": "IndexError"
          }
        ],
        "longrepr": "request = <FixtureRequest for <Function test_chat_streaming_basic[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-saturn]>>\nopenai_client = <openai.OpenAI object at 0x7f31280cf3a0>\nmodel = 'meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8'\nprovider = 'together'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...-versatile', 'meta-llama/llama-4-scout-17b-16e-instruct', 'meta-llama/llama-4-maverick-17b-128e-instruct'], ...}, ...}}\ncase = {'case_id': 'saturn', 'input': {'messages': [{'content': 'Which planet has rings around it with a name starting with letter S?', 'role': 'user'}]}, 'output': 'Saturn'}\n\n    @pytest.mark.parametrize(\n        \"case\",\n        chat_completion_test_cases[\"test_chat_basic\"][\"test_params\"][\"case\"],\n        ids=case_id_generator,\n    )\n    def test_chat_streaming_basic(request, openai_client, model, provider, verification_config, case):\n        test_name_base = get_base_test_name(request)\n        if should_skip_test(verification_config, provider, model, test_name_base):\n            pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n    \n        response = openai_client.chat.completions.create(\n            model=model,\n            messages=case[\"input\"][\"messages\"],\n            stream=True,\n        )\n        content = \"\"\n        for chunk in response:\n>           content += chunk.choices[0].delta.content or \"\"\nE           IndexError: list index out of range\n\ntests/verifications/openai_api/test_chat_completion.py:132: IndexError"
      },
      "teardown": {
        "duration": 0.000298389233648777,
        "outcome": "passed"
      }
    },
    {
      "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_image[meta-llama/Llama-3.3-70B-Instruct-Turbo-case0]",
      "lineno": 138,
      "outcome": "skipped",
      "keywords": [
        "test_chat_non_streaming_image[meta-llama/Llama-3.3-70B-Instruct-Turbo-case0]",
        "parametrize",
        "pytestmark",
        "meta-llama/Llama-3.3-70B-Instruct-Turbo-case0",
        "test_chat_completion.py",
        "openai_api",
        "verifications",
        "tests",
        "llama-stack",
        ""
      ],
      "metadata": {
        "model": "meta-llama/Llama-3.3-70B-Instruct-Turbo",
        "case_id": "case0"
      },
      "setup": {
        "duration": 0.07295764330774546,
        "outcome": "passed"
      },
      "call": {
        "duration": 0.0002657398581504822,
        "outcome": "skipped",
        "longrepr": "('/home/erichuang/llama-stack/tests/verifications/openai_api/test_chat_completion.py', 147, 'Skipped: Skipping test_chat_non_streaming_image for model meta-llama/Llama-3.3-70B-Instruct-Turbo on provider together based on config.')"
      },
      "teardown": {
        "duration": 0.00035269279032945633,
        "outcome": "passed"
      }
    },
    {
      "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_image[meta-llama/Llama-4-Scout-17B-16E-Instruct-case0]",
      "lineno": 138,
      "outcome": "passed",
      "keywords": [
        "test_chat_non_streaming_image[meta-llama/Llama-4-Scout-17B-16E-Instruct-case0]",
        "parametrize",
        "pytestmark",
        "meta-llama/Llama-4-Scout-17B-16E-Instruct-case0",
        "test_chat_completion.py",
        "openai_api",
        "verifications",
        "tests",
        "llama-stack",
        ""
      ],
      "metadata": {
        "model": "meta-llama/Llama-4-Scout-17B-16E-Instruct",
        "case_id": "case0"
      },
      "setup": {
        "duration": 0.07182978931814432,
        "outcome": "passed"
      },
      "call": {
        "duration": 1.746746251359582,
        "outcome": "passed"
      },
      "teardown": {
        "duration": 0.00026807747781276703,
        "outcome": "passed"
      }
    },
    {
      "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_image[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-case0]",
      "lineno": 138,
      "outcome": "passed",
      "keywords": [
        "test_chat_non_streaming_image[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-case0]",
        "parametrize",
        "pytestmark",
        "meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-case0",
        "test_chat_completion.py",
        "openai_api",
        "verifications",
        "tests",
        "llama-stack",
        ""
      ],
      "metadata": {
        "model": "meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8",
        "case_id": "case0"
      },
      "setup": {
        "duration": 0.07101599592715502,
        "outcome": "passed"
      },
      "call": {
        "duration": 5.472218153998256,
        "outcome": "passed"
      },
      "teardown": {
        "duration": 0.00029551703482866287,
        "outcome": "passed"
      }
    },
    {
      "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_image[meta-llama/Llama-3.3-70B-Instruct-Turbo-case0]",
      "lineno": 157,
      "outcome": "skipped",
      "keywords": [
        "test_chat_streaming_image[meta-llama/Llama-3.3-70B-Instruct-Turbo-case0]",
        "parametrize",
        "pytestmark",
        "meta-llama/Llama-3.3-70B-Instruct-Turbo-case0",
        "test_chat_completion.py",
        "openai_api",
        "verifications",
        "tests",
        "llama-stack",
        ""
      ],
      "metadata": {
        "model": "meta-llama/Llama-3.3-70B-Instruct-Turbo",
        "case_id": "case0"
      },
      "setup": {
        "duration": 0.07391759566962719,
        "outcome": "passed"
      },
      "call": {
        "duration": 0.00026184599846601486,
        "outcome": "skipped",
        "longrepr": "('/home/erichuang/llama-stack/tests/verifications/openai_api/test_chat_completion.py', 166, 'Skipped: Skipping test_chat_streaming_image for model meta-llama/Llama-3.3-70B-Instruct-Turbo on provider together based on config.')"
      },
      "teardown": {
        "duration": 0.0002144472673535347,
        "outcome": "passed"
      }
    },
    {
      "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_image[meta-llama/Llama-4-Scout-17B-16E-Instruct-case0]",
      "lineno": 157,
      "outcome": "failed",
      "keywords": [
        "test_chat_streaming_image[meta-llama/Llama-4-Scout-17B-16E-Instruct-case0]",
        "parametrize",
        "pytestmark",
        "meta-llama/Llama-4-Scout-17B-16E-Instruct-case0",
        "test_chat_completion.py",
        "openai_api",
        "verifications",
        "tests",
        "llama-stack",
        ""
      ],
      "metadata": {
        "model": "meta-llama/Llama-4-Scout-17B-16E-Instruct",
        "case_id": "case0"
      },
      "setup": {
        "duration": 0.08702181186527014,
        "outcome": "passed"
      },
      "call": {
        "duration": 2.685878618620336,
        "outcome": "failed",
        "crash": {
          "path": "/home/erichuang/llama-stack/tests/verifications/openai_api/test_chat_completion.py",
          "lineno": 175,
          "message": "IndexError: list index out of range"
        },
        "traceback": [
          {
            "path": "tests/verifications/openai_api/test_chat_completion.py",
            "lineno": 175,
            "message": "IndexError"
          }
        ],
        "longrepr": "request = <FixtureRequest for <Function test_chat_streaming_image[meta-llama/Llama-4-Scout-17B-16E-Instruct-case0]>>\nopenai_client = <openai.OpenAI object at 0x7f3127e67430>\nmodel = 'meta-llama/Llama-4-Scout-17B-16E-Instruct', provider = 'together'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...-versatile', 'meta-llama/llama-4-scout-17b-16e-instruct', 'meta-llama/llama-4-maverick-17b-128e-instruct'], ...}, ...}}\ncase = {'input': {'messages': [{'content': [{'text': 'What is in this image?', 'type': 'text'}, {'image_url': {...}, 'type': 'image_url'}], 'role': 'user'}]}, 'output': 'llama'}\n\n    @pytest.mark.parametrize(\n        \"case\",\n        chat_completion_test_cases[\"test_chat_image\"][\"test_params\"][\"case\"],\n        ids=case_id_generator,\n    )\n    def test_chat_streaming_image(request, openai_client, model, provider, verification_config, case):\n        test_name_base = get_base_test_name(request)\n        if should_skip_test(verification_config, provider, model, test_name_base):\n            pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n    \n        response = openai_client.chat.completions.create(\n            model=model,\n            messages=case[\"input\"][\"messages\"],\n            stream=True,\n        )\n        content = \"\"\n        for chunk in response:\n>           content += chunk.choices[0].delta.content or \"\"\nE           IndexError: list index out of range\n\ntests/verifications/openai_api/test_chat_completion.py:175: IndexError"
      },
      "teardown": {
        "duration": 0.00031331367790699005,
        "outcome": "passed"
      }
    },
    {
      "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_image[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-case0]",
      "lineno": 157,
      "outcome": "failed",
      "keywords": [
        "test_chat_streaming_image[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-case0]",
        "parametrize",
        "pytestmark",
        "meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-case0",
        "test_chat_completion.py",
        "openai_api",
        "verifications",
        "tests",
        "llama-stack",
        ""
      ],
      "metadata": {
        "model": "meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8",
        "case_id": "case0"
      },
      "setup": {
        "duration": 0.07287734653800726,
        "outcome": "passed"
      },
      "call": {
        "duration": 3.985588035546243,
        "outcome": "failed",
        "crash": {
          "path": "/home/erichuang/llama-stack/tests/verifications/openai_api/test_chat_completion.py",
          "lineno": 175,
          "message": "IndexError: list index out of range"
        },
        "traceback": [
          {
            "path": "tests/verifications/openai_api/test_chat_completion.py",
            "lineno": 175,
            "message": "IndexError"
          }
        ],
        "longrepr": "request = <FixtureRequest for <Function test_chat_streaming_image[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-case0]>>\nopenai_client = <openai.OpenAI object at 0x7f3127e42980>\nmodel = 'meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8'\nprovider = 'together'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...-versatile', 'meta-llama/llama-4-scout-17b-16e-instruct', 'meta-llama/llama-4-maverick-17b-128e-instruct'], ...}, ...}}\ncase = {'input': {'messages': [{'content': [{'text': 'What is in this image?', 'type': 'text'}, {'image_url': {...}, 'type': 'image_url'}], 'role': 'user'}]}, 'output': 'llama'}\n\n    @pytest.mark.parametrize(\n        \"case\",\n        chat_completion_test_cases[\"test_chat_image\"][\"test_params\"][\"case\"],\n        ids=case_id_generator,\n    )\n    def test_chat_streaming_image(request, openai_client, model, provider, verification_config, case):\n        test_name_base = get_base_test_name(request)\n        if should_skip_test(verification_config, provider, model, test_name_base):\n            pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n    \n        response = openai_client.chat.completions.create(\n            model=model,\n            messages=case[\"input\"][\"messages\"],\n            stream=True,\n        )\n        content = \"\"\n        for chunk in response:\n>           content += chunk.choices[0].delta.content or \"\"\nE           IndexError: list index out of range\n\ntests/verifications/openai_api/test_chat_completion.py:175: IndexError"
      },
      "teardown": {
        "duration": 0.0002881418913602829,
        "outcome": "passed"
      }
    },
    {
      "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_structured_output[meta-llama/Llama-3.3-70B-Instruct-Turbo-calendar]",
      "lineno": 181,
      "outcome": "passed",
      "keywords": [
        "test_chat_non_streaming_structured_output[meta-llama/Llama-3.3-70B-Instruct-Turbo-calendar]",
        "parametrize",
        "pytestmark",
        "meta-llama/Llama-3.3-70B-Instruct-Turbo-calendar",
        "test_chat_completion.py",
        "openai_api",
        "verifications",
        "tests",
        "llama-stack",
        ""
      ],
      "metadata": {
        "model": "meta-llama/Llama-3.3-70B-Instruct-Turbo",
        "case_id": "calendar"
      },
      "setup": {
        "duration": 0.07055713329464197,
        "outcome": "passed"
      },
      "call": {
        "duration": 0.7881239131093025,
        "outcome": "passed"
      },
      "teardown": {
        "duration": 0.00024167727679014206,
        "outcome": "passed"
      }
    },
    {
      "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_structured_output[meta-llama/Llama-3.3-70B-Instruct-Turbo-math]",
      "lineno": 181,
      "outcome": "passed",
      "keywords": [
        "test_chat_non_streaming_structured_output[meta-llama/Llama-3.3-70B-Instruct-Turbo-math]",
        "parametrize",
        "pytestmark",
        "meta-llama/Llama-3.3-70B-Instruct-Turbo-math",
        "test_chat_completion.py",
        "openai_api",
        "verifications",
        "tests",
        "llama-stack",
        ""
      ],
      "metadata": {
        "model": "meta-llama/Llama-3.3-70B-Instruct-Turbo",
        "case_id": "math"
      },
      "setup": {
        "duration": 0.07159801851958036,
        "outcome": "passed"
      },
      "call": {
        "duration": 4.972125994041562,
        "outcome": "passed"
      },
      "teardown": {
        "duration": 0.0002335989847779274,
        "outcome": "passed"
      }
    },
    {
      "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_structured_output[meta-llama/Llama-4-Scout-17B-16E-Instruct-calendar]",
      "lineno": 181,
      "outcome": "passed",
      "keywords": [
        "test_chat_non_streaming_structured_output[meta-llama/Llama-4-Scout-17B-16E-Instruct-calendar]",
        "parametrize",
        "pytestmark",
        "meta-llama/Llama-4-Scout-17B-16E-Instruct-calendar",
        "test_chat_completion.py",
        "openai_api",
        "verifications",
        "tests",
        "llama-stack",
        ""
      ],
      "metadata": {
        "model": "meta-llama/Llama-4-Scout-17B-16E-Instruct",
        "case_id": "calendar"
      },
      "setup": {
        "duration": 0.07872365694493055,
        "outcome": "passed"
      },
      "call": {
        "duration": 0.5325954724103212,
        "outcome": "passed"
      },
      "teardown": {
        "duration": 0.0002604750916361809,
        "outcome": "passed"
      }
    },
    {
      "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_structured_output[meta-llama/Llama-4-Scout-17B-16E-Instruct-math]",
      "lineno": 181,
      "outcome": "passed",
      "keywords": [
        "test_chat_non_streaming_structured_output[meta-llama/Llama-4-Scout-17B-16E-Instruct-math]",
        "parametrize",
        "pytestmark",
        "meta-llama/Llama-4-Scout-17B-16E-Instruct-math",
        "test_chat_completion.py",
        "openai_api",
        "verifications",
        "tests",
        "llama-stack",
        ""
      ],
      "metadata": {
        "model": "meta-llama/Llama-4-Scout-17B-16E-Instruct",
        "case_id": "math"
      },
      "setup": {
        "duration": 0.07101414445787668,
        "outcome": "passed"
      },
      "call": {
        "duration": 2.5978550128638744,
        "outcome": "passed"
      },
      "teardown": {
        "duration": 0.00032774079591035843,
        "outcome": "passed"
      }
    },
    {
      "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_structured_output[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-calendar]",
      "lineno": 181,
      "outcome": "passed",
      "keywords": [
        "test_chat_non_streaming_structured_output[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-calendar]",
        "parametrize",
        "pytestmark",
        "meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-calendar",
        "test_chat_completion.py",
        "openai_api",
        "verifications",
        "tests",
        "llama-stack",
        ""
      ],
      "metadata": {
        "model": "meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8",
        "case_id": "calendar"
      },
      "setup": {
        "duration": 0.0737495282664895,
        "outcome": "passed"
      },
      "call": {
        "duration": 0.7547545190900564,
        "outcome": "passed"
      },
      "teardown": {
        "duration": 0.00024818163365125656,
        "outcome": "passed"
      }
    },
    {
      "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_structured_output[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-math]",
      "lineno": 181,
      "outcome": "passed",
      "keywords": [
        "test_chat_non_streaming_structured_output[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-math]",
        "parametrize",
        "pytestmark",
        "meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-math",
        "test_chat_completion.py",
        "openai_api",
        "verifications",
        "tests",
        "llama-stack",
        ""
      ],
      "metadata": {
        "model": "meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8",
        "case_id": "math"
      },
      "setup": {
        "duration": 0.07616753969341516,
        "outcome": "passed"
      },
      "call": {
        "duration": 4.4260268323123455,
        "outcome": "passed"
      },
      "teardown": {
        "duration": 0.00023849774152040482,
        "outcome": "passed"
      }
    },
    {
      "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_structured_output[meta-llama/Llama-3.3-70B-Instruct-Turbo-calendar]",
      "lineno": 204,
      "outcome": "passed",
      "keywords": [
        "test_chat_streaming_structured_output[meta-llama/Llama-3.3-70B-Instruct-Turbo-calendar]",
        "parametrize",
        "pytestmark",
        "meta-llama/Llama-3.3-70B-Instruct-Turbo-calendar",
        "test_chat_completion.py",
        "openai_api",
        "verifications",
        "tests",
        "llama-stack",
        ""
      ],
      "metadata": {
        "model": "meta-llama/Llama-3.3-70B-Instruct-Turbo",
        "case_id": "calendar"
      },
      "setup": {
        "duration": 0.07280991226434708,
        "outcome": "passed"
      },
      "call": {
        "duration": 1.2391796316951513,
        "outcome": "passed"
      },
      "teardown": {
        "duration": 0.00022371485829353333,
        "outcome": "passed"
      }
    },
    {
      "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_structured_output[meta-llama/Llama-3.3-70B-Instruct-Turbo-math]",
      "lineno": 204,
      "outcome": "passed",
      "keywords": [
        "test_chat_streaming_structured_output[meta-llama/Llama-3.3-70B-Instruct-Turbo-math]",
        "parametrize",
        "pytestmark",
        "meta-llama/Llama-3.3-70B-Instruct-Turbo-math",
        "test_chat_completion.py",
        "openai_api",
        "verifications",
        "tests",
        "llama-stack",
        ""
      ],
      "metadata": {
        "model": "meta-llama/Llama-3.3-70B-Instruct-Turbo",
        "case_id": "math"
      },
      "setup": {
        "duration": 0.07361716963350773,
        "outcome": "passed"
      },
      "call": {
        "duration": 6.5637129517272115,
        "outcome": "passed"
      },
      "teardown": {
        "duration": 0.00024466682225465775,
        "outcome": "passed"
      }
    },
    {
      "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_structured_output[meta-llama/Llama-4-Scout-17B-16E-Instruct-calendar]",
      "lineno": 204,
      "outcome": "failed",
      "keywords": [
        "test_chat_streaming_structured_output[meta-llama/Llama-4-Scout-17B-16E-Instruct-calendar]",
        "parametrize",
        "pytestmark",
        "meta-llama/Llama-4-Scout-17B-16E-Instruct-calendar",
        "test_chat_completion.py",
        "openai_api",
        "verifications",
        "tests",
        "llama-stack",
        ""
      ],
      "metadata": {
        "model": "meta-llama/Llama-4-Scout-17B-16E-Instruct",
        "case_id": "calendar"
      },
      "setup": {
        "duration": 0.07750869635492563,
        "outcome": "passed"
      },
      "call": {
        "duration": 0.6057027634233236,
        "outcome": "failed",
        "crash": {
          "path": "/home/erichuang/llama-stack/tests/verifications/openai_api/test_chat_completion.py",
          "lineno": 223,
          "message": "IndexError: list index out of range"
        },
        "traceback": [
          {
            "path": "tests/verifications/openai_api/test_chat_completion.py",
            "lineno": 223,
            "message": "IndexError"
          }
        ],
        "longrepr": "request = <FixtureRequest for <Function test_chat_streaming_structured_output[meta-llama/Llama-4-Scout-17B-16E-Instruct-calendar]>>\nopenai_client = <openai.OpenAI object at 0x7f3127eb4c70>\nmodel = 'meta-llama/Llama-4-Scout-17B-16E-Instruct', provider = 'together'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...-versatile', 'meta-llama/llama-4-scout-17b-16e-instruct', 'meta-llama/llama-4-maverick-17b-128e-instruct'], ...}, ...}}\ncase = {'case_id': 'calendar', 'input': {'messages': [{'content': 'Extract the event information.', 'role': 'system'}, {'cont...articipants'], 'title': 'CalendarEvent', 'type': 'object'}}, 'type': 'json_schema'}}, 'output': 'valid_calendar_event'}\n\n    @pytest.mark.parametrize(\n        \"case\",\n        chat_completion_test_cases[\"test_chat_structured_output\"][\"test_params\"][\"case\"],\n        ids=case_id_generator,\n    )\n    def test_chat_streaming_structured_output(request, openai_client, model, provider, verification_config, case):\n        test_name_base = get_base_test_name(request)\n        if should_skip_test(verification_config, provider, model, test_name_base):\n            pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n    \n        response = openai_client.chat.completions.create(\n            model=model,\n            messages=case[\"input\"][\"messages\"],\n            response_format=case[\"input\"][\"response_format\"],\n            stream=True,\n        )\n        maybe_json_content = \"\"\n        for chunk in response:\n>           maybe_json_content += chunk.choices[0].delta.content or \"\"\nE           IndexError: list index out of range\n\ntests/verifications/openai_api/test_chat_completion.py:223: IndexError"
      },
      "teardown": {
        "duration": 0.00031183846294879913,
        "outcome": "passed"
      }
    },
    {
      "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_structured_output[meta-llama/Llama-4-Scout-17B-16E-Instruct-math]",
      "lineno": 204,
      "outcome": "failed",
      "keywords": [
        "test_chat_streaming_structured_output[meta-llama/Llama-4-Scout-17B-16E-Instruct-math]",
        "parametrize",
        "pytestmark",
        "meta-llama/Llama-4-Scout-17B-16E-Instruct-math",
        "test_chat_completion.py",
        "openai_api",
        "verifications",
        "tests",
        "llama-stack",
        ""
      ],
      "metadata": {
        "model": "meta-llama/Llama-4-Scout-17B-16E-Instruct",
        "case_id": "math"
      },
      "setup": {
        "duration": 0.0722011923789978,
        "outcome": "passed"
      },
      "call": {
        "duration": 3.635944495908916,
        "outcome": "failed",
        "crash": {
          "path": "/home/erichuang/llama-stack/tests/verifications/openai_api/test_chat_completion.py",
          "lineno": 223,
          "message": "IndexError: list index out of range"
        },
        "traceback": [
          {
            "path": "tests/verifications/openai_api/test_chat_completion.py",
            "lineno": 223,
            "message": "IndexError"
          }
        ],
        "longrepr": "request = <FixtureRequest for <Function test_chat_streaming_structured_output[meta-llama/Llama-4-Scout-17B-16E-Instruct-math]>>\nopenai_client = <openai.OpenAI object at 0x7f3127eb4370>\nmodel = 'meta-llama/Llama-4-Scout-17B-16E-Instruct', provider = 'together'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...-versatile', 'meta-llama/llama-4-scout-17b-16e-instruct', 'meta-llama/llama-4-maverick-17b-128e-instruct'], ...}, ...}}\ncase = {'case_id': 'math', 'input': {'messages': [{'content': 'You are a helpful math tutor. Guide the user through the solut... ['steps', 'final_answer'], 'title': 'MathReasoning', ...}}, 'type': 'json_schema'}}, 'output': 'valid_math_reasoning'}\n\n    @pytest.mark.parametrize(\n        \"case\",\n        chat_completion_test_cases[\"test_chat_structured_output\"][\"test_params\"][\"case\"],\n        ids=case_id_generator,\n    )\n    def test_chat_streaming_structured_output(request, openai_client, model, provider, verification_config, case):\n        test_name_base = get_base_test_name(request)\n        if should_skip_test(verification_config, provider, model, test_name_base):\n            pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n    \n        response = openai_client.chat.completions.create(\n            model=model,\n            messages=case[\"input\"][\"messages\"],\n            response_format=case[\"input\"][\"response_format\"],\n            stream=True,\n        )\n        maybe_json_content = \"\"\n        for chunk in response:\n>           maybe_json_content += chunk.choices[0].delta.content or \"\"\nE           IndexError: list index out of range\n\ntests/verifications/openai_api/test_chat_completion.py:223: IndexError"
      },
      "teardown": {
        "duration": 0.00029693637043237686,
        "outcome": "passed"
      }
    },
    {
      "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_structured_output[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-calendar]",
      "lineno": 204,
      "outcome": "failed",
      "keywords": [
        "test_chat_streaming_structured_output[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-calendar]",
        "parametrize",
        "pytestmark",
        "meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-calendar",
        "test_chat_completion.py",
        "openai_api",
        "verifications",
        "tests",
        "llama-stack",
        ""
      ],
      "metadata": {
        "model": "meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8",
        "case_id": "calendar"
      },
      "setup": {
        "duration": 0.07182575855404139,
        "outcome": "passed"
      },
      "call": {
        "duration": 0.583567344583571,
        "outcome": "failed",
        "crash": {
          "path": "/home/erichuang/llama-stack/tests/verifications/openai_api/test_chat_completion.py",
          "lineno": 223,
          "message": "IndexError: list index out of range"
        },
        "traceback": [
          {
            "path": "tests/verifications/openai_api/test_chat_completion.py",
            "lineno": 223,
            "message": "IndexError"
          }
        ],
        "longrepr": "request = <FixtureRequest for <Function test_chat_streaming_structured_output[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-calendar]>>\nopenai_client = <openai.OpenAI object at 0x7f3127e37f70>\nmodel = 'meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8'\nprovider = 'together'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...-versatile', 'meta-llama/llama-4-scout-17b-16e-instruct', 'meta-llama/llama-4-maverick-17b-128e-instruct'], ...}, ...}}\ncase = {'case_id': 'calendar', 'input': {'messages': [{'content': 'Extract the event information.', 'role': 'system'}, {'cont...articipants'], 'title': 'CalendarEvent', 'type': 'object'}}, 'type': 'json_schema'}}, 'output': 'valid_calendar_event'}\n\n    @pytest.mark.parametrize(\n        \"case\",\n        chat_completion_test_cases[\"test_chat_structured_output\"][\"test_params\"][\"case\"],\n        ids=case_id_generator,\n    )\n    def test_chat_streaming_structured_output(request, openai_client, model, provider, verification_config, case):\n        test_name_base = get_base_test_name(request)\n        if should_skip_test(verification_config, provider, model, test_name_base):\n            pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n    \n        response = openai_client.chat.completions.create(\n            model=model,\n            messages=case[\"input\"][\"messages\"],\n            response_format=case[\"input\"][\"response_format\"],\n            stream=True,\n        )\n        maybe_json_content = \"\"\n        for chunk in response:\n>           maybe_json_content += chunk.choices[0].delta.content or \"\"\nE           IndexError: list index out of range\n\ntests/verifications/openai_api/test_chat_completion.py:223: IndexError"
      },
      "teardown": {
        "duration": 0.0002760365605354309,
        "outcome": "passed"
      }
    },
    {
      "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_structured_output[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-math]",
      "lineno": 204,
      "outcome": "failed",
      "keywords": [
        "test_chat_streaming_structured_output[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-math]",
        "parametrize",
        "pytestmark",
        "meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-math",
        "test_chat_completion.py",
        "openai_api",
        "verifications",
        "tests",
        "llama-stack",
        ""
      ],
      "metadata": {
        "model": "meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8",
        "case_id": "math"
      },
      "setup": {
        "duration": 0.07146896701306105,
        "outcome": "passed"
      },
      "call": {
        "duration": 3.9762256713584065,
        "outcome": "failed",
        "crash": {
          "path": "/home/erichuang/llama-stack/tests/verifications/openai_api/test_chat_completion.py",
          "lineno": 223,
          "message": "IndexError: list index out of range"
        },
        "traceback": [
          {
            "path": "tests/verifications/openai_api/test_chat_completion.py",
            "lineno": 223,
            "message": "IndexError"
          }
        ],
        "longrepr": "request = <FixtureRequest for <Function test_chat_streaming_structured_output[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-math]>>\nopenai_client = <openai.OpenAI object at 0x7f3127e656f0>\nmodel = 'meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8'\nprovider = 'together'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...-versatile', 'meta-llama/llama-4-scout-17b-16e-instruct', 'meta-llama/llama-4-maverick-17b-128e-instruct'], ...}, ...}}\ncase = {'case_id': 'math', 'input': {'messages': [{'content': 'You are a helpful math tutor. Guide the user through the solut... ['steps', 'final_answer'], 'title': 'MathReasoning', ...}}, 'type': 'json_schema'}}, 'output': 'valid_math_reasoning'}\n\n    @pytest.mark.parametrize(\n        \"case\",\n        chat_completion_test_cases[\"test_chat_structured_output\"][\"test_params\"][\"case\"],\n        ids=case_id_generator,\n    )\n    def test_chat_streaming_structured_output(request, openai_client, model, provider, verification_config, case):\n        test_name_base = get_base_test_name(request)\n        if should_skip_test(verification_config, provider, model, test_name_base):\n            pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n    \n        response = openai_client.chat.completions.create(\n            model=model,\n            messages=case[\"input\"][\"messages\"],\n            response_format=case[\"input\"][\"response_format\"],\n            stream=True,\n        )\n        maybe_json_content = \"\"\n        for chunk in response:\n>           maybe_json_content += chunk.choices[0].delta.content or \"\"\nE           IndexError: list index out of range\n\ntests/verifications/openai_api/test_chat_completion.py:223: IndexError"
      },
      "teardown": {
        "duration": 0.00030822213739156723,
        "outcome": "passed"
      }
    },
    {
      "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_tool_calling[meta-llama/Llama-3.3-70B-Instruct-Turbo-case0]",
      "lineno": 226,
      "outcome": "passed",
      "keywords": [
        "test_chat_non_streaming_tool_calling[meta-llama/Llama-3.3-70B-Instruct-Turbo-case0]",
        "parametrize",
        "pytestmark",
        "meta-llama/Llama-3.3-70B-Instruct-Turbo-case0",
        "test_chat_completion.py",
        "openai_api",
        "verifications",
        "tests",
        "llama-stack",
        ""
      ],
      "metadata": {
        "model": "meta-llama/Llama-3.3-70B-Instruct-Turbo",
        "case_id": "case0"
      },
      "setup": {
        "duration": 0.07303964532911777,
        "outcome": "passed"
      },
      "call": {
        "duration": 0.7525951210409403,
        "outcome": "passed"
      },
      "teardown": {
        "duration": 0.0002357764169573784,
        "outcome": "passed"
      }
    },
    {
      "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_tool_calling[meta-llama/Llama-4-Scout-17B-16E-Instruct-case0]",
      "lineno": 226,
      "outcome": "passed",
      "keywords": [
        "test_chat_non_streaming_tool_calling[meta-llama/Llama-4-Scout-17B-16E-Instruct-case0]",
        "parametrize",
        "pytestmark",
        "meta-llama/Llama-4-Scout-17B-16E-Instruct-case0",
        "test_chat_completion.py",
        "openai_api",
        "verifications",
        "tests",
        "llama-stack",
        ""
      ],
      "metadata": {
        "model": "meta-llama/Llama-4-Scout-17B-16E-Instruct",
        "case_id": "case0"
      },
      "setup": {
        "duration": 0.07376459613442421,
        "outcome": "passed"
      },
      "call": {
        "duration": 0.4931257301941514,
        "outcome": "passed"
      },
      "teardown": {
        "duration": 0.0002181418240070343,
        "outcome": "passed"
      }
    },
    {
      "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_tool_calling[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-case0]",
      "lineno": 226,
      "outcome": "passed",
      "keywords": [
        "test_chat_non_streaming_tool_calling[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-case0]",
        "parametrize",
        "pytestmark",
        "meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-case0",
        "test_chat_completion.py",
        "openai_api",
        "verifications",
        "tests",
        "llama-stack",
        ""
      ],
      "metadata": {
        "model": "meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8",
        "case_id": "case0"
      },
      "setup": {
        "duration": 0.07053922209888697,
        "outcome": "passed"
      },
      "call": {
        "duration": 0.5402723103761673,
        "outcome": "passed"
      },
      "teardown": {
        "duration": 0.00022673048079013824,
        "outcome": "passed"
      }
    },
    {
      "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_tool_calling[meta-llama/Llama-3.3-70B-Instruct-Turbo-case0]",
      "lineno": 250,
      "outcome": "passed",
      "keywords": [
        "test_chat_streaming_tool_calling[meta-llama/Llama-3.3-70B-Instruct-Turbo-case0]",
        "parametrize",
        "pytestmark",
        "meta-llama/Llama-3.3-70B-Instruct-Turbo-case0",
        "test_chat_completion.py",
        "openai_api",
        "verifications",
        "tests",
        "llama-stack",
        ""
      ],
      "metadata": {
        "model": "meta-llama/Llama-3.3-70B-Instruct-Turbo",
        "case_id": "case0"
      },
      "setup": {
        "duration": 0.07116104569286108,
        "outcome": "passed"
      },
      "call": {
        "duration": 0.816182347945869,
        "outcome": "passed"
      },
      "teardown": {
        "duration": 0.0003773067146539688,
        "outcome": "passed"
      }
    },
    {
      "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_tool_calling[meta-llama/Llama-4-Scout-17B-16E-Instruct-case0]",
      "lineno": 250,
      "outcome": "failed",
      "keywords": [
        "test_chat_streaming_tool_calling[meta-llama/Llama-4-Scout-17B-16E-Instruct-case0]",
        "parametrize",
        "pytestmark",
        "meta-llama/Llama-4-Scout-17B-16E-Instruct-case0",
        "test_chat_completion.py",
        "openai_api",
        "verifications",
        "tests",
        "llama-stack",
        ""
      ],
      "metadata": {
        "model": "meta-llama/Llama-4-Scout-17B-16E-Instruct",
        "case_id": "case0"
      },
      "setup": {
        "duration": 0.07258457317948341,
        "outcome": "passed"
      },
      "call": {
        "duration": 0.4603788387030363,
        "outcome": "failed",
        "crash": {
          "path": "/home/erichuang/llama-stack/tests/verifications/openai_api/test_chat_completion.py",
          "lineno": 688,
          "message": "IndexError: list index out of range"
        },
        "traceback": [
          {
            "path": "tests/verifications/openai_api/test_chat_completion.py",
            "lineno": 268,
            "message": ""
          },
          {
            "path": "tests/verifications/openai_api/test_chat_completion.py",
            "lineno": 688,
            "message": "IndexError"
          }
        ],
        "longrepr": "request = <FixtureRequest for <Function test_chat_streaming_tool_calling[meta-llama/Llama-4-Scout-17B-16E-Instruct-case0]>>\nopenai_client = <openai.OpenAI object at 0x7f3127e40610>\nmodel = 'meta-llama/Llama-4-Scout-17B-16E-Instruct', provider = 'together'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...-versatile', 'meta-llama/llama-4-scout-17b-16e-instruct', 'meta-llama/llama-4-maverick-17b-128e-instruct'], ...}, ...}}\ncase = {'input': {'messages': [{'content': 'You are a helpful assistant that can use tools to get information.', 'role': 'sys..., 'properties': {...}, 'required': [...], 'type': 'object'}}, 'type': 'function'}]}, 'output': 'get_weather_tool_call'}\n\n    @pytest.mark.parametrize(\n        \"case\",\n        chat_completion_test_cases[\"test_tool_calling\"][\"test_params\"][\"case\"],\n        ids=case_id_generator,\n    )\n    def test_chat_streaming_tool_calling(request, openai_client, model, provider, verification_config, case):\n        test_name_base = get_base_test_name(request)\n        if should_skip_test(verification_config, provider, model, test_name_base):\n            pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n    \n        stream = openai_client.chat.completions.create(\n            model=model,\n            messages=case[\"input\"][\"messages\"],\n            tools=case[\"input\"][\"tools\"],\n            stream=True,\n        )\n    \n>       _, tool_calls_buffer = _accumulate_streaming_tool_calls(stream)\n\ntests/verifications/openai_api/test_chat_completion.py:268: \n_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ \n\nstream = <openai.Stream object at 0x7f3127f53190>\n\n    def _accumulate_streaming_tool_calls(stream):\n        \"\"\"Accumulates tool calls and content from a streaming ChatCompletion response.\"\"\"\n        tool_calls_buffer = {}\n        current_id = None\n        full_content = \"\"  # Initialize content accumulator\n        # Process streaming chunks\n        for chunk in stream:\n>           choice = chunk.choices[0]\nE           IndexError: list index out of range\n\ntests/verifications/openai_api/test_chat_completion.py:688: IndexError"
      },
      "teardown": {
        "duration": 0.0003149937838315964,
        "outcome": "passed"
      }
    },
    {
      "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_tool_calling[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-case0]",
      "lineno": 250,
      "outcome": "failed",
      "keywords": [
        "test_chat_streaming_tool_calling[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-case0]",
        "parametrize",
        "pytestmark",
        "meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-case0",
        "test_chat_completion.py",
        "openai_api",
        "verifications",
        "tests",
        "llama-stack",
        ""
      ],
      "metadata": {
        "model": "meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8",
        "case_id": "case0"
      },
      "setup": {
        "duration": 0.07071060407906771,
        "outcome": "passed"
      },
      "call": {
        "duration": 0.5184564171358943,
        "outcome": "failed",
        "crash": {
          "path": "/home/erichuang/llama-stack/tests/verifications/openai_api/test_chat_completion.py",
          "lineno": 688,
          "message": "IndexError: list index out of range"
        },
        "traceback": [
          {
            "path": "tests/verifications/openai_api/test_chat_completion.py",
            "lineno": 268,
            "message": ""
          },
          {
            "path": "tests/verifications/openai_api/test_chat_completion.py",
            "lineno": 688,
            "message": "IndexError"
          }
        ],
        "longrepr": "request = <FixtureRequest for <Function test_chat_streaming_tool_calling[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-case0]>>\nopenai_client = <openai.OpenAI object at 0x7f3127e9fbe0>\nmodel = 'meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8'\nprovider = 'together'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...-versatile', 'meta-llama/llama-4-scout-17b-16e-instruct', 'meta-llama/llama-4-maverick-17b-128e-instruct'], ...}, ...}}\ncase = {'input': {'messages': [{'content': 'You are a helpful assistant that can use tools to get information.', 'role': 'sys..., 'properties': {...}, 'required': [...], 'type': 'object'}}, 'type': 'function'}]}, 'output': 'get_weather_tool_call'}\n\n    @pytest.mark.parametrize(\n        \"case\",\n        chat_completion_test_cases[\"test_tool_calling\"][\"test_params\"][\"case\"],\n        ids=case_id_generator,\n    )\n    def test_chat_streaming_tool_calling(request, openai_client, model, provider, verification_config, case):\n        test_name_base = get_base_test_name(request)\n        if should_skip_test(verification_config, provider, model, test_name_base):\n            pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n    \n        stream = openai_client.chat.completions.create(\n            model=model,\n            messages=case[\"input\"][\"messages\"],\n            tools=case[\"input\"][\"tools\"],\n            stream=True,\n        )\n    \n>       _, tool_calls_buffer = _accumulate_streaming_tool_calls(stream)\n\ntests/verifications/openai_api/test_chat_completion.py:268: \n_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ \n\nstream = <openai.Stream object at 0x7f3127ec75b0>\n\n    def _accumulate_streaming_tool_calls(stream):\n        \"\"\"Accumulates tool calls and content from a streaming ChatCompletion response.\"\"\"\n        tool_calls_buffer = {}\n        current_id = None\n        full_content = \"\"  # Initialize content accumulator\n        # Process streaming chunks\n        for chunk in stream:\n>           choice = chunk.choices[0]\nE           IndexError: list index out of range\n\ntests/verifications/openai_api/test_chat_completion.py:688: IndexError"
      },
      "teardown": {
        "duration": 0.00034826435148715973,
        "outcome": "passed"
      }
    },
    {
      "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_tool_choice_required[meta-llama/Llama-3.3-70B-Instruct-Turbo-case0]",
      "lineno": 278,
      "outcome": "passed",
      "keywords": [
        "test_chat_non_streaming_tool_choice_required[meta-llama/Llama-3.3-70B-Instruct-Turbo-case0]",
        "parametrize",
        "pytestmark",
        "meta-llama/Llama-3.3-70B-Instruct-Turbo-case0",
        "test_chat_completion.py",
        "openai_api",
        "verifications",
        "tests",
        "llama-stack",
        ""
      ],
      "metadata": {
        "model": "meta-llama/Llama-3.3-70B-Instruct-Turbo",
        "case_id": "case0"
      },
      "setup": {
        "duration": 0.08638827316462994,
        "outcome": "passed"
      },
      "call": {
        "duration": 0.9104743050411344,
        "outcome": "passed"
      },
      "teardown": {
        "duration": 0.0002754591405391693,
        "outcome": "passed"
      }
    },
    {
      "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_tool_choice_required[meta-llama/Llama-4-Scout-17B-16E-Instruct-case0]",
      "lineno": 278,
      "outcome": "passed",
      "keywords": [
        "test_chat_non_streaming_tool_choice_required[meta-llama/Llama-4-Scout-17B-16E-Instruct-case0]",
        "parametrize",
        "pytestmark",
        "meta-llama/Llama-4-Scout-17B-16E-Instruct-case0",
        "test_chat_completion.py",
        "openai_api",
        "verifications",
        "tests",
        "llama-stack",
        ""
      ],
      "metadata": {
        "model": "meta-llama/Llama-4-Scout-17B-16E-Instruct",
        "case_id": "case0"
      },
      "setup": {
        "duration": 0.0742760868743062,
        "outcome": "passed"
      },
      "call": {
        "duration": 0.462676758877933,
        "outcome": "passed"
      },
      "teardown": {
        "duration": 0.00025860220193862915,
        "outcome": "passed"
      }
    },
    {
      "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_tool_choice_required[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-case0]",
      "lineno": 278,
      "outcome": "passed",
      "keywords": [
        "test_chat_non_streaming_tool_choice_required[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-case0]",
        "parametrize",
        "pytestmark",
        "meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-case0",
        "test_chat_completion.py",
        "openai_api",
        "verifications",
        "tests",
        "llama-stack",
        ""
      ],
      "metadata": {
        "model": "meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8",
        "case_id": "case0"
      },
      "setup": {
        "duration": 0.07174691930413246,
        "outcome": "passed"
      },
      "call": {
        "duration": 0.9501504441723228,
        "outcome": "passed"
      },
      "teardown": {
        "duration": 0.0002819998189806938,
        "outcome": "passed"
      }
    },
    {
      "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_tool_choice_required[meta-llama/Llama-3.3-70B-Instruct-Turbo-case0]",
      "lineno": 302,
      "outcome": "passed",
      "keywords": [
        "test_chat_streaming_tool_choice_required[meta-llama/Llama-3.3-70B-Instruct-Turbo-case0]",
        "parametrize",
        "pytestmark",
        "meta-llama/Llama-3.3-70B-Instruct-Turbo-case0",
        "test_chat_completion.py",
        "openai_api",
        "verifications",
        "tests",
        "llama-stack",
        ""
      ],
      "metadata": {
        "model": "meta-llama/Llama-3.3-70B-Instruct-Turbo",
        "case_id": "case0"
      },
      "setup": {
        "duration": 0.0707088652998209,
        "outcome": "passed"
      },
      "call": {
        "duration": 1.5536296227946877,
        "outcome": "passed"
      },
      "teardown": {
        "duration": 0.0002662409096956253,
        "outcome": "passed"
      }
    },
    {
      "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_tool_choice_required[meta-llama/Llama-4-Scout-17B-16E-Instruct-case0]",
      "lineno": 302,
      "outcome": "failed",
      "keywords": [
        "test_chat_streaming_tool_choice_required[meta-llama/Llama-4-Scout-17B-16E-Instruct-case0]",
        "parametrize",
        "pytestmark",
        "meta-llama/Llama-4-Scout-17B-16E-Instruct-case0",
        "test_chat_completion.py",
        "openai_api",
        "verifications",
        "tests",
        "llama-stack",
        ""
      ],
      "metadata": {
        "model": "meta-llama/Llama-4-Scout-17B-16E-Instruct",
        "case_id": "case0"
      },
      "setup": {
        "duration": 0.07114748656749725,
        "outcome": "passed"
      },
      "call": {
        "duration": 0.7880472335964441,
        "outcome": "failed",
        "crash": {
          "path": "/home/erichuang/llama-stack/tests/verifications/openai_api/test_chat_completion.py",
          "lineno": 688,
          "message": "IndexError: list index out of range"
        },
        "traceback": [
          {
            "path": "tests/verifications/openai_api/test_chat_completion.py",
            "lineno": 321,
            "message": ""
          },
          {
            "path": "tests/verifications/openai_api/test_chat_completion.py",
            "lineno": 688,
            "message": "IndexError"
          }
        ],
        "longrepr": "request = <FixtureRequest for <Function test_chat_streaming_tool_choice_required[meta-llama/Llama-4-Scout-17B-16E-Instruct-case0]>>\nopenai_client = <openai.OpenAI object at 0x7f3127e65210>\nmodel = 'meta-llama/Llama-4-Scout-17B-16E-Instruct', provider = 'together'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...-versatile', 'meta-llama/llama-4-scout-17b-16e-instruct', 'meta-llama/llama-4-maverick-17b-128e-instruct'], ...}, ...}}\ncase = {'input': {'messages': [{'content': 'You are a helpful assistant that can use tools to get information.', 'role': 'sys..., 'properties': {...}, 'required': [...], 'type': 'object'}}, 'type': 'function'}]}, 'output': 'get_weather_tool_call'}\n\n    @pytest.mark.parametrize(\n        \"case\",\n        chat_completion_test_cases[\"test_tool_calling\"][\"test_params\"][\"case\"],  # Reusing existing case for now\n        ids=case_id_generator,\n    )\n    def test_chat_streaming_tool_choice_required(request, openai_client, model, provider, verification_config, case):\n        test_name_base = get_base_test_name(request)\n        if should_skip_test(verification_config, provider, model, test_name_base):\n            pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n    \n        stream = openai_client.chat.completions.create(\n            model=model,\n            messages=case[\"input\"][\"messages\"],\n            tools=case[\"input\"][\"tools\"],\n            tool_choice=\"required\",  # Force tool call\n            stream=True,\n        )\n    \n>       _, tool_calls_buffer = _accumulate_streaming_tool_calls(stream)\n\ntests/verifications/openai_api/test_chat_completion.py:321: \n_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ \n\nstream = <openai.Stream object at 0x7f3127f2cf70>\n\n    def _accumulate_streaming_tool_calls(stream):\n        \"\"\"Accumulates tool calls and content from a streaming ChatCompletion response.\"\"\"\n        tool_calls_buffer = {}\n        current_id = None\n        full_content = \"\"  # Initialize content accumulator\n        # Process streaming chunks\n        for chunk in stream:\n>           choice = chunk.choices[0]\nE           IndexError: list index out of range\n\ntests/verifications/openai_api/test_chat_completion.py:688: IndexError"
      },
      "teardown": {
        "duration": 0.00038287416100502014,
        "outcome": "passed"
      }
    },
    {
      "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_tool_choice_required[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-case0]",
      "lineno": 302,
      "outcome": "failed",
      "keywords": [
        "test_chat_streaming_tool_choice_required[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-case0]",
        "parametrize",
        "pytestmark",
        "meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-case0",
        "test_chat_completion.py",
        "openai_api",
        "verifications",
        "tests",
        "llama-stack",
        ""
      ],
      "metadata": {
        "model": "meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8",
        "case_id": "case0"
      },
      "setup": {
        "duration": 0.0964375538751483,
        "outcome": "passed"
      },
      "call": {
        "duration": 0.5333329876884818,
        "outcome": "failed",
        "crash": {
          "path": "/home/erichuang/llama-stack/tests/verifications/openai_api/test_chat_completion.py",
          "lineno": 688,
          "message": "IndexError: list index out of range"
        },
        "traceback": [
          {
            "path": "tests/verifications/openai_api/test_chat_completion.py",
            "lineno": 321,
            "message": ""
          },
          {
            "path": "tests/verifications/openai_api/test_chat_completion.py",
            "lineno": 688,
            "message": "IndexError"
          }
        ],
        "longrepr": "request = <FixtureRequest for <Function test_chat_streaming_tool_choice_required[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-case0]>>\nopenai_client = <openai.OpenAI object at 0x7f3127e27d60>\nmodel = 'meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8'\nprovider = 'together'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...-versatile', 'meta-llama/llama-4-scout-17b-16e-instruct', 'meta-llama/llama-4-maverick-17b-128e-instruct'], ...}, ...}}\ncase = {'input': {'messages': [{'content': 'You are a helpful assistant that can use tools to get information.', 'role': 'sys..., 'properties': {...}, 'required': [...], 'type': 'object'}}, 'type': 'function'}]}, 'output': 'get_weather_tool_call'}\n\n    @pytest.mark.parametrize(\n        \"case\",\n        chat_completion_test_cases[\"test_tool_calling\"][\"test_params\"][\"case\"],  # Reusing existing case for now\n        ids=case_id_generator,\n    )\n    def test_chat_streaming_tool_choice_required(request, openai_client, model, provider, verification_config, case):\n        test_name_base = get_base_test_name(request)\n        if should_skip_test(verification_config, provider, model, test_name_base):\n            pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n    \n        stream = openai_client.chat.completions.create(\n            model=model,\n            messages=case[\"input\"][\"messages\"],\n            tools=case[\"input\"][\"tools\"],\n            tool_choice=\"required\",  # Force tool call\n            stream=True,\n        )\n    \n>       _, tool_calls_buffer = _accumulate_streaming_tool_calls(stream)\n\ntests/verifications/openai_api/test_chat_completion.py:321: \n_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ \n\nstream = <openai.Stream object at 0x7f31280cdae0>\n\n    def _accumulate_streaming_tool_calls(stream):\n        \"\"\"Accumulates tool calls and content from a streaming ChatCompletion response.\"\"\"\n        tool_calls_buffer = {}\n        current_id = None\n        full_content = \"\"  # Initialize content accumulator\n        # Process streaming chunks\n        for chunk in stream:\n>           choice = chunk.choices[0]\nE           IndexError: list index out of range\n\ntests/verifications/openai_api/test_chat_completion.py:688: IndexError"
      },
      "teardown": {
        "duration": 0.0003780834376811981,
        "outcome": "passed"
      }
    },
    {
      "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_tool_choice_none[meta-llama/Llama-3.3-70B-Instruct-Turbo-case0]",
      "lineno": 329,
      "outcome": "failed",
      "keywords": [
        "test_chat_non_streaming_tool_choice_none[meta-llama/Llama-3.3-70B-Instruct-Turbo-case0]",
        "parametrize",
        "pytestmark",
        "meta-llama/Llama-3.3-70B-Instruct-Turbo-case0",
        "test_chat_completion.py",
        "openai_api",
        "verifications",
        "tests",
        "llama-stack",
        ""
      ],
      "metadata": {
        "model": "meta-llama/Llama-3.3-70B-Instruct-Turbo",
        "case_id": "case0"
      },
      "setup": {
        "duration": 0.0716709028929472,
        "outcome": "passed"
      },
      "call": {
        "duration": 2.4488353775814176,
        "outcome": "failed",
        "crash": {
          "path": "/home/erichuang/llama-stack/tests/verifications/openai_api/test_chat_completion.py",
          "lineno": 349,
          "message": "AssertionError: Expected no tool calls when tool_choice='none'\nassert [ChatCompletionMessageToolCall(id='call_efyus3lwok2l7czire0yxkqm', function=Function(arguments='{\"location\":\"San Francisco, USA\"}', name='get_weather'), type='function', index=0)] is None\n +  where [ChatCompletionMessageToolCall(id='call_efyus3lwok2l7czire0yxkqm', function=Function(arguments='{\"location\":\"San Francisco, USA\"}', name='get_weather'), type='function', index=0)] = ChatCompletionMessage(content=None, refusal=None, role='assistant', annotations=None, audio=None, function_call=None, tool_calls=[ChatCompletionMessageToolCall(id='call_efyus3lwok2l7czire0yxkqm', function=Function(arguments='{\"location\":\"San Francisco, USA\"}', name='get_weather'), type='function', index=0)]).tool_calls\n +    where ChatCompletionMessage(content=None, refusal=None, role='assistant', annotations=None, audio=None, function_call=None, tool_calls=[ChatCompletionMessageToolCall(id='call_efyus3lwok2l7czire0yxkqm', function=Function(arguments='{\"location\":\"San Francisco, USA\"}', name='get_weather'), type='function', index=0)]) = Choice(finish_reason='tool_calls', index=0, logprobs=None, message=ChatCompletionMessage(content=None, refusal=None, role='assistant', annotations=None, audio=None, function_call=None, tool_calls=[ChatCompletionMessageToolCall(id='call_efyus3lwok2l7czire0yxkqm', function=Function(arguments='{\"location\":\"San Francisco, USA\"}', name='get_weather'), type='function', index=0)]), seed=10291057599279921000).message"
        },
        "traceback": [
          {
            "path": "tests/verifications/openai_api/test_chat_completion.py",
            "lineno": 349,
            "message": "AssertionError"
          }
        ],
        "longrepr": "request = <FixtureRequest for <Function test_chat_non_streaming_tool_choice_none[meta-llama/Llama-3.3-70B-Instruct-Turbo-case0]>>\nopenai_client = <openai.OpenAI object at 0x7f3127f67d00>\nmodel = 'meta-llama/Llama-3.3-70B-Instruct-Turbo', provider = 'together'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...-versatile', 'meta-llama/llama-4-scout-17b-16e-instruct', 'meta-llama/llama-4-maverick-17b-128e-instruct'], ...}, ...}}\ncase = {'input': {'messages': [{'content': 'You are a helpful assistant that can use tools to get information.', 'role': 'sys..., 'properties': {...}, 'required': [...], 'type': 'object'}}, 'type': 'function'}]}, 'output': 'get_weather_tool_call'}\n\n    @pytest.mark.parametrize(\n        \"case\",\n        chat_completion_test_cases[\"test_tool_calling\"][\"test_params\"][\"case\"],  # Reusing existing case for now\n        ids=case_id_generator,\n    )\n    def test_chat_non_streaming_tool_choice_none(request, openai_client, model, provider, verification_config, case):\n        test_name_base = get_base_test_name(request)\n        if should_skip_test(verification_config, provider, model, test_name_base):\n            pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n    \n        response = openai_client.chat.completions.create(\n            model=model,\n            messages=case[\"input\"][\"messages\"],\n            tools=case[\"input\"][\"tools\"],\n            tool_choice=\"none\",\n            stream=False,\n        )\n    \n        assert response.choices[0].message.role == \"assistant\"\n>       assert response.choices[0].message.tool_calls is None, \"Expected no tool calls when tool_choice='none'\"\nE       AssertionError: Expected no tool calls when tool_choice='none'\nE       assert [ChatCompletionMessageToolCall(id='call_efyus3lwok2l7czire0yxkqm', function=Function(arguments='{\"location\":\"San Francisco, USA\"}', name='get_weather'), type='function', index=0)] is None\nE        +  where [ChatCompletionMessageToolCall(id='call_efyus3lwok2l7czire0yxkqm', function=Function(arguments='{\"location\":\"San Francisco, USA\"}', name='get_weather'), type='function', index=0)] = ChatCompletionMessage(content=None, refusal=None, role='assistant', annotations=None, audio=None, function_call=None, tool_calls=[ChatCompletionMessageToolCall(id='call_efyus3lwok2l7czire0yxkqm', function=Function(arguments='{\"location\":\"San Francisco, USA\"}', name='get_weather'), type='function', index=0)]).tool_calls\nE        +    where ChatCompletionMessage(content=None, refusal=None, role='assistant', annotations=None, audio=None, function_call=None, tool_calls=[ChatCompletionMessageToolCall(id='call_efyus3lwok2l7czire0yxkqm', function=Function(arguments='{\"location\":\"San Francisco, USA\"}', name='get_weather'), type='function', index=0)]) = Choice(finish_reason='tool_calls', index=0, logprobs=None, message=ChatCompletionMessage(content=None, refusal=None, role='assistant', annotations=None, audio=None, function_call=None, tool_calls=[ChatCompletionMessageToolCall(id='call_efyus3lwok2l7czire0yxkqm', function=Function(arguments='{\"location\":\"San Francisco, USA\"}', name='get_weather'), type='function', index=0)]), seed=10291057599279921000).message\n\ntests/verifications/openai_api/test_chat_completion.py:349: AssertionError"
      },
      "teardown": {
        "duration": 0.00031497515738010406,
        "outcome": "passed"
      }
    },
    {
      "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_tool_choice_none[meta-llama/Llama-4-Scout-17B-16E-Instruct-case0]",
      "lineno": 329,
      "outcome": "failed",
      "keywords": [
        "test_chat_non_streaming_tool_choice_none[meta-llama/Llama-4-Scout-17B-16E-Instruct-case0]",
        "parametrize",
        "pytestmark",
        "meta-llama/Llama-4-Scout-17B-16E-Instruct-case0",
        "test_chat_completion.py",
        "openai_api",
        "verifications",
        "tests",
        "llama-stack",
        ""
      ],
      "metadata": {
        "model": "meta-llama/Llama-4-Scout-17B-16E-Instruct",
        "case_id": "case0"
      },
      "setup": {
        "duration": 0.07444119732826948,
        "outcome": "passed"
      },
      "call": {
        "duration": 0.6588975815102458,
        "outcome": "failed",
        "crash": {
          "path": "/home/erichuang/llama-stack/tests/verifications/openai_api/test_chat_completion.py",
          "lineno": 349,
          "message": "AssertionError: Expected no tool calls when tool_choice='none'\nassert [ChatCompletionMessageToolCall(id='call_biqe1tvadyupbqpgarqi91rn', function=Function(arguments='{\"location\":\"San Francisco, USA\"}', name='get_weather'), type='function', index=0)] is None\n +  where [ChatCompletionMessageToolCall(id='call_biqe1tvadyupbqpgarqi91rn', function=Function(arguments='{\"location\":\"San Francisco, USA\"}', name='get_weather'), type='function', index=0)] = ChatCompletionMessage(content=None, refusal=None, role='assistant', annotations=None, audio=None, function_call=None, tool_calls=[ChatCompletionMessageToolCall(id='call_biqe1tvadyupbqpgarqi91rn', function=Function(arguments='{\"location\":\"San Francisco, USA\"}', name='get_weather'), type='function', index=0)]).tool_calls\n +    where ChatCompletionMessage(content=None, refusal=None, role='assistant', annotations=None, audio=None, function_call=None, tool_calls=[ChatCompletionMessageToolCall(id='call_biqe1tvadyupbqpgarqi91rn', function=Function(arguments='{\"location\":\"San Francisco, USA\"}', name='get_weather'), type='function', index=0)]) = Choice(finish_reason='tool_calls', index=0, logprobs=None, message=ChatCompletionMessage(content=None, refusal=None, role='assistant', annotations=None, audio=None, function_call=None, tool_calls=[ChatCompletionMessageToolCall(id='call_biqe1tvadyupbqpgarqi91rn', function=Function(arguments='{\"location\":\"San Francisco, USA\"}', name='get_weather'), type='function', index=0)]), seed=None).message"
        },
        "traceback": [
          {
            "path": "tests/verifications/openai_api/test_chat_completion.py",
            "lineno": 349,
            "message": "AssertionError"
          }
        ],
        "longrepr": "request = <FixtureRequest for <Function test_chat_non_streaming_tool_choice_none[meta-llama/Llama-4-Scout-17B-16E-Instruct-case0]>>\nopenai_client = <openai.OpenAI object at 0x7f3127f91900>\nmodel = 'meta-llama/Llama-4-Scout-17B-16E-Instruct', provider = 'together'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...-versatile', 'meta-llama/llama-4-scout-17b-16e-instruct', 'meta-llama/llama-4-maverick-17b-128e-instruct'], ...}, ...}}\ncase = {'input': {'messages': [{'content': 'You are a helpful assistant that can use tools to get information.', 'role': 'sys..., 'properties': {...}, 'required': [...], 'type': 'object'}}, 'type': 'function'}]}, 'output': 'get_weather_tool_call'}\n\n    @pytest.mark.parametrize(\n        \"case\",\n        chat_completion_test_cases[\"test_tool_calling\"][\"test_params\"][\"case\"],  # Reusing existing case for now\n        ids=case_id_generator,\n    )\n    def test_chat_non_streaming_tool_choice_none(request, openai_client, model, provider, verification_config, case):\n        test_name_base = get_base_test_name(request)\n        if should_skip_test(verification_config, provider, model, test_name_base):\n            pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n    \n        response = openai_client.chat.completions.create(\n            model=model,\n            messages=case[\"input\"][\"messages\"],\n            tools=case[\"input\"][\"tools\"],\n            tool_choice=\"none\",\n            stream=False,\n        )\n    \n        assert response.choices[0].message.role == \"assistant\"\n>       assert response.choices[0].message.tool_calls is None, \"Expected no tool calls when tool_choice='none'\"\nE       AssertionError: Expected no tool calls when tool_choice='none'\nE       assert [ChatCompletionMessageToolCall(id='call_biqe1tvadyupbqpgarqi91rn', function=Function(arguments='{\"location\":\"San Francisco, USA\"}', name='get_weather'), type='function', index=0)] is None\nE        +  where [ChatCompletionMessageToolCall(id='call_biqe1tvadyupbqpgarqi91rn', function=Function(arguments='{\"location\":\"San Francisco, USA\"}', name='get_weather'), type='function', index=0)] = ChatCompletionMessage(content=None, refusal=None, role='assistant', annotations=None, audio=None, function_call=None, tool_calls=[ChatCompletionMessageToolCall(id='call_biqe1tvadyupbqpgarqi91rn', function=Function(arguments='{\"location\":\"San Francisco, USA\"}', name='get_weather'), type='function', index=0)]).tool_calls\nE        +    where ChatCompletionMessage(content=None, refusal=None, role='assistant', annotations=None, audio=None, function_call=None, tool_calls=[ChatCompletionMessageToolCall(id='call_biqe1tvadyupbqpgarqi91rn', function=Function(arguments='{\"location\":\"San Francisco, USA\"}', name='get_weather'), type='function', index=0)]) = Choice(finish_reason='tool_calls', index=0, logprobs=None, message=ChatCompletionMessage(content=None, refusal=None, role='assistant', annotations=None, audio=None, function_call=None, tool_calls=[ChatCompletionMessageToolCall(id='call_biqe1tvadyupbqpgarqi91rn', function=Function(arguments='{\"location\":\"San Francisco, USA\"}', name='get_weather'), type='function', index=0)]), seed=None).message\n\ntests/verifications/openai_api/test_chat_completion.py:349: AssertionError"
      },
      "teardown": {
        "duration": 0.000378509983420372,
        "outcome": "passed"
      }
    },
    {
      "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_tool_choice_none[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-case0]",
      "lineno": 329,
      "outcome": "failed",
      "keywords": [
        "test_chat_non_streaming_tool_choice_none[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-case0]",
        "parametrize",
        "pytestmark",
        "meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-case0",
        "test_chat_completion.py",
        "openai_api",
        "verifications",
        "tests",
        "llama-stack",
        ""
      ],
      "metadata": {
        "model": "meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8",
        "case_id": "case0"
      },
      "setup": {
        "duration": 0.07218985445797443,
        "outcome": "passed"
      },
      "call": {
        "duration": 0.46723131835460663,
        "outcome": "failed",
        "crash": {
          "path": "/home/erichuang/llama-stack/tests/verifications/openai_api/test_chat_completion.py",
          "lineno": 349,
          "message": "AssertionError: Expected no tool calls when tool_choice='none'\nassert [ChatCompletionMessageToolCall(id='call_ie8s7wjdypmhv2zzqeqkrif5', function=Function(arguments='{\"location\":\"San Francisco\"}', name='get_weather'), type='function', index=0)] is None\n +  where [ChatCompletionMessageToolCall(id='call_ie8s7wjdypmhv2zzqeqkrif5', function=Function(arguments='{\"location\":\"San Francisco\"}', name='get_weather'), type='function', index=0)] = ChatCompletionMessage(content=None, refusal=None, role='assistant', annotations=None, audio=None, function_call=None, tool_calls=[ChatCompletionMessageToolCall(id='call_ie8s7wjdypmhv2zzqeqkrif5', function=Function(arguments='{\"location\":\"San Francisco\"}', name='get_weather'), type='function', index=0)]).tool_calls\n +    where ChatCompletionMessage(content=None, refusal=None, role='assistant', annotations=None, audio=None, function_call=None, tool_calls=[ChatCompletionMessageToolCall(id='call_ie8s7wjdypmhv2zzqeqkrif5', function=Function(arguments='{\"location\":\"San Francisco\"}', name='get_weather'), type='function', index=0)]) = Choice(finish_reason='tool_calls', index=0, logprobs=None, message=ChatCompletionMessage(content=None, refusal=None, role='assistant', annotations=None, audio=None, function_call=None, tool_calls=[ChatCompletionMessageToolCall(id='call_ie8s7wjdypmhv2zzqeqkrif5', function=Function(arguments='{\"location\":\"San Francisco\"}', name='get_weather'), type='function', index=0)]), seed=None).message"
        },
        "traceback": [
          {
            "path": "tests/verifications/openai_api/test_chat_completion.py",
            "lineno": 349,
            "message": "AssertionError"
          }
        ],
        "longrepr": "request = <FixtureRequest for <Function test_chat_non_streaming_tool_choice_none[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-case0]>>\nopenai_client = <openai.OpenAI object at 0x7f3127e63100>\nmodel = 'meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8'\nprovider = 'together'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...-versatile', 'meta-llama/llama-4-scout-17b-16e-instruct', 'meta-llama/llama-4-maverick-17b-128e-instruct'], ...}, ...}}\ncase = {'input': {'messages': [{'content': 'You are a helpful assistant that can use tools to get information.', 'role': 'sys..., 'properties': {...}, 'required': [...], 'type': 'object'}}, 'type': 'function'}]}, 'output': 'get_weather_tool_call'}\n\n    @pytest.mark.parametrize(\n        \"case\",\n        chat_completion_test_cases[\"test_tool_calling\"][\"test_params\"][\"case\"],  # Reusing existing case for now\n        ids=case_id_generator,\n    )\n    def test_chat_non_streaming_tool_choice_none(request, openai_client, model, provider, verification_config, case):\n        test_name_base = get_base_test_name(request)\n        if should_skip_test(verification_config, provider, model, test_name_base):\n            pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n    \n        response = openai_client.chat.completions.create(\n            model=model,\n            messages=case[\"input\"][\"messages\"],\n            tools=case[\"input\"][\"tools\"],\n            tool_choice=\"none\",\n            stream=False,\n        )\n    \n        assert response.choices[0].message.role == \"assistant\"\n>       assert response.choices[0].message.tool_calls is None, \"Expected no tool calls when tool_choice='none'\"\nE       AssertionError: Expected no tool calls when tool_choice='none'\nE       assert [ChatCompletionMessageToolCall(id='call_ie8s7wjdypmhv2zzqeqkrif5', function=Function(arguments='{\"location\":\"San Francisco\"}', name='get_weather'), type='function', index=0)] is None\nE        +  where [ChatCompletionMessageToolCall(id='call_ie8s7wjdypmhv2zzqeqkrif5', function=Function(arguments='{\"location\":\"San Francisco\"}', name='get_weather'), type='function', index=0)] = ChatCompletionMessage(content=None, refusal=None, role='assistant', annotations=None, audio=None, function_call=None, tool_calls=[ChatCompletionMessageToolCall(id='call_ie8s7wjdypmhv2zzqeqkrif5', function=Function(arguments='{\"location\":\"San Francisco\"}', name='get_weather'), type='function', index=0)]).tool_calls\nE        +    where ChatCompletionMessage(content=None, refusal=None, role='assistant', annotations=None, audio=None, function_call=None, tool_calls=[ChatCompletionMessageToolCall(id='call_ie8s7wjdypmhv2zzqeqkrif5', function=Function(arguments='{\"location\":\"San Francisco\"}', name='get_weather'), type='function', index=0)]) = Choice(finish_reason='tool_calls', index=0, logprobs=None, message=ChatCompletionMessage(content=None, refusal=None, role='assistant', annotations=None, audio=None, function_call=None, tool_calls=[ChatCompletionMessageToolCall(id='call_ie8s7wjdypmhv2zzqeqkrif5', function=Function(arguments='{\"location\":\"San Francisco\"}', name='get_weather'), type='function', index=0)]), seed=None).message\n\ntests/verifications/openai_api/test_chat_completion.py:349: AssertionError"
      },
      "teardown": {
        "duration": 0.00030298903584480286,
        "outcome": "passed"
      }
    },
    {
      "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_tool_choice_none[meta-llama/Llama-3.3-70B-Instruct-Turbo-case0]",
      "lineno": 352,
      "outcome": "failed",
      "keywords": [
        "test_chat_streaming_tool_choice_none[meta-llama/Llama-3.3-70B-Instruct-Turbo-case0]",
        "parametrize",
        "pytestmark",
        "meta-llama/Llama-3.3-70B-Instruct-Turbo-case0",
        "test_chat_completion.py",
        "openai_api",
        "verifications",
        "tests",
        "llama-stack",
        ""
      ],
      "metadata": {
        "model": "meta-llama/Llama-3.3-70B-Instruct-Turbo",
        "case_id": "case0"
      },
      "setup": {
        "duration": 0.07074183039367199,
        "outcome": "passed"
      },
      "call": {
        "duration": 0.5427122255787253,
        "outcome": "failed",
        "crash": {
          "path": "/home/erichuang/llama-stack/tests/verifications/openai_api/test_chat_completion.py",
          "lineno": 376,
          "message": "AssertionError: Expected no tool call chunks when tool_choice='none'\nassert not [ChoiceDeltaToolCall(index=0, id='call_b3357o23munvhfy8cqg7wwwd', function=ChoiceDeltaToolCallFunction(arguments='', name='get_weather'), type='function')]\n +  where [ChoiceDeltaToolCall(index=0, id='call_b3357o23munvhfy8cqg7wwwd', function=ChoiceDeltaToolCallFunction(arguments='', name='get_weather'), type='function')] = ChoiceDelta(content=None, function_call=None, refusal=None, role=None, tool_calls=[ChoiceDeltaToolCall(index=0, id='call_b3357o23munvhfy8cqg7wwwd', function=ChoiceDeltaToolCallFunction(arguments='', name='get_weather'), type='function')]).tool_calls"
        },
        "traceback": [
          {
            "path": "tests/verifications/openai_api/test_chat_completion.py",
            "lineno": 376,
            "message": "AssertionError"
          }
        ],
        "longrepr": "request = <FixtureRequest for <Function test_chat_streaming_tool_choice_none[meta-llama/Llama-3.3-70B-Instruct-Turbo-case0]>>\nopenai_client = <openai.OpenAI object at 0x7f312800cdc0>\nmodel = 'meta-llama/Llama-3.3-70B-Instruct-Turbo', provider = 'together'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...-versatile', 'meta-llama/llama-4-scout-17b-16e-instruct', 'meta-llama/llama-4-maverick-17b-128e-instruct'], ...}, ...}}\ncase = {'input': {'messages': [{'content': 'You are a helpful assistant that can use tools to get information.', 'role': 'sys..., 'properties': {...}, 'required': [...], 'type': 'object'}}, 'type': 'function'}]}, 'output': 'get_weather_tool_call'}\n\n    @pytest.mark.parametrize(\n        \"case\",\n        chat_completion_test_cases[\"test_tool_calling\"][\"test_params\"][\"case\"],  # Reusing existing case for now\n        ids=case_id_generator,\n    )\n    def test_chat_streaming_tool_choice_none(request, openai_client, model, provider, verification_config, case):\n        test_name_base = get_base_test_name(request)\n        if should_skip_test(verification_config, provider, model, test_name_base):\n            pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n    \n        stream = openai_client.chat.completions.create(\n            model=model,\n            messages=case[\"input\"][\"messages\"],\n            tools=case[\"input\"][\"tools\"],\n            tool_choice=\"none\",\n            stream=True,\n        )\n    \n        content = \"\"\n        for chunk in stream:\n            delta = chunk.choices[0].delta\n            if delta.content:\n                content += delta.content\n>           assert not delta.tool_calls, \"Expected no tool call chunks when tool_choice='none'\"\nE           AssertionError: Expected no tool call chunks when tool_choice='none'\nE           assert not [ChoiceDeltaToolCall(index=0, id='call_b3357o23munvhfy8cqg7wwwd', function=ChoiceDeltaToolCallFunction(arguments='', name='get_weather'), type='function')]\nE            +  where [ChoiceDeltaToolCall(index=0, id='call_b3357o23munvhfy8cqg7wwwd', function=ChoiceDeltaToolCallFunction(arguments='', name='get_weather'), type='function')] = ChoiceDelta(content=None, function_call=None, refusal=None, role=None, tool_calls=[ChoiceDeltaToolCall(index=0, id='call_b3357o23munvhfy8cqg7wwwd', function=ChoiceDeltaToolCallFunction(arguments='', name='get_weather'), type='function')]).tool_calls\n\ntests/verifications/openai_api/test_chat_completion.py:376: AssertionError"
      },
      "teardown": {
        "duration": 0.00037453509867191315,
        "outcome": "passed"
      }
    },
    {
      "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_tool_choice_none[meta-llama/Llama-4-Scout-17B-16E-Instruct-case0]",
      "lineno": 352,
      "outcome": "failed",
      "keywords": [
        "test_chat_streaming_tool_choice_none[meta-llama/Llama-4-Scout-17B-16E-Instruct-case0]",
        "parametrize",
        "pytestmark",
        "meta-llama/Llama-4-Scout-17B-16E-Instruct-case0",
        "test_chat_completion.py",
        "openai_api",
        "verifications",
        "tests",
        "llama-stack",
        ""
      ],
      "metadata": {
        "model": "meta-llama/Llama-4-Scout-17B-16E-Instruct",
        "case_id": "case0"
      },
      "setup": {
        "duration": 0.07261296082288027,
        "outcome": "passed"
      },
      "call": {
        "duration": 0.44528466928750277,
        "outcome": "failed",
        "crash": {
          "path": "/home/erichuang/llama-stack/tests/verifications/openai_api/test_chat_completion.py",
          "lineno": 376,
          "message": "AssertionError: Expected no tool call chunks when tool_choice='none'\nassert not [ChoiceDeltaToolCall(index=0, id='call_00xpv69hsw7zrbkwnbq9ewxw', function=ChoiceDeltaToolCallFunction(arguments='', name='get_weather'), type='function')]\n +  where [ChoiceDeltaToolCall(index=0, id='call_00xpv69hsw7zrbkwnbq9ewxw', function=ChoiceDeltaToolCallFunction(arguments='', name='get_weather'), type='function')] = ChoiceDelta(content=None, function_call=None, refusal=None, role=None, tool_calls=[ChoiceDeltaToolCall(index=0, id='call_00xpv69hsw7zrbkwnbq9ewxw', function=ChoiceDeltaToolCallFunction(arguments='', name='get_weather'), type='function')]).tool_calls"
        },
        "traceback": [
          {
            "path": "tests/verifications/openai_api/test_chat_completion.py",
            "lineno": 376,
            "message": "AssertionError"
          }
        ],
        "longrepr": "request = <FixtureRequest for <Function test_chat_streaming_tool_choice_none[meta-llama/Llama-4-Scout-17B-16E-Instruct-case0]>>\nopenai_client = <openai.OpenAI object at 0x7f3127e34220>\nmodel = 'meta-llama/Llama-4-Scout-17B-16E-Instruct', provider = 'together'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...-versatile', 'meta-llama/llama-4-scout-17b-16e-instruct', 'meta-llama/llama-4-maverick-17b-128e-instruct'], ...}, ...}}\ncase = {'input': {'messages': [{'content': 'You are a helpful assistant that can use tools to get information.', 'role': 'sys..., 'properties': {...}, 'required': [...], 'type': 'object'}}, 'type': 'function'}]}, 'output': 'get_weather_tool_call'}\n\n    @pytest.mark.parametrize(\n        \"case\",\n        chat_completion_test_cases[\"test_tool_calling\"][\"test_params\"][\"case\"],  # Reusing existing case for now\n        ids=case_id_generator,\n    )\n    def test_chat_streaming_tool_choice_none(request, openai_client, model, provider, verification_config, case):\n        test_name_base = get_base_test_name(request)\n        if should_skip_test(verification_config, provider, model, test_name_base):\n            pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n    \n        stream = openai_client.chat.completions.create(\n            model=model,\n            messages=case[\"input\"][\"messages\"],\n            tools=case[\"input\"][\"tools\"],\n            tool_choice=\"none\",\n            stream=True,\n        )\n    \n        content = \"\"\n        for chunk in stream:\n            delta = chunk.choices[0].delta\n            if delta.content:\n                content += delta.content\n>           assert not delta.tool_calls, \"Expected no tool call chunks when tool_choice='none'\"\nE           AssertionError: Expected no tool call chunks when tool_choice='none'\nE           assert not [ChoiceDeltaToolCall(index=0, id='call_00xpv69hsw7zrbkwnbq9ewxw', function=ChoiceDeltaToolCallFunction(arguments='', name='get_weather'), type='function')]\nE            +  where [ChoiceDeltaToolCall(index=0, id='call_00xpv69hsw7zrbkwnbq9ewxw', function=ChoiceDeltaToolCallFunction(arguments='', name='get_weather'), type='function')] = ChoiceDelta(content=None, function_call=None, refusal=None, role=None, tool_calls=[ChoiceDeltaToolCall(index=0, id='call_00xpv69hsw7zrbkwnbq9ewxw', function=ChoiceDeltaToolCallFunction(arguments='', name='get_weather'), type='function')]).tool_calls\n\ntests/verifications/openai_api/test_chat_completion.py:376: AssertionError"
      },
      "teardown": {
        "duration": 0.00035339873284101486,
        "outcome": "passed"
      }
    },
    {
      "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_tool_choice_none[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-case0]",
      "lineno": 352,
      "outcome": "failed",
      "keywords": [
        "test_chat_streaming_tool_choice_none[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-case0]",
        "parametrize",
        "pytestmark",
        "meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-case0",
        "test_chat_completion.py",
        "openai_api",
        "verifications",
        "tests",
        "llama-stack",
        ""
      ],
      "metadata": {
        "model": "meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8",
        "case_id": "case0"
      },
      "setup": {
        "duration": 0.0712411506101489,
        "outcome": "passed"
      },
      "call": {
        "duration": 0.792656259611249,
        "outcome": "failed",
        "crash": {
          "path": "/home/erichuang/llama-stack/tests/verifications/openai_api/test_chat_completion.py",
          "lineno": 376,
          "message": "AssertionError: Expected no tool call chunks when tool_choice='none'\nassert not [ChoiceDeltaToolCall(index=0, id='call_kiye5b1nrq2b9yf0vy5feki8', function=ChoiceDeltaToolCallFunction(arguments='', name='get_weather'), type='function')]\n +  where [ChoiceDeltaToolCall(index=0, id='call_kiye5b1nrq2b9yf0vy5feki8', function=ChoiceDeltaToolCallFunction(arguments='', name='get_weather'), type='function')] = ChoiceDelta(content=None, function_call=None, refusal=None, role=None, tool_calls=[ChoiceDeltaToolCall(index=0, id='call_kiye5b1nrq2b9yf0vy5feki8', function=ChoiceDeltaToolCallFunction(arguments='', name='get_weather'), type='function')]).tool_calls"
        },
        "traceback": [
          {
            "path": "tests/verifications/openai_api/test_chat_completion.py",
            "lineno": 376,
            "message": "AssertionError"
          }
        ],
        "longrepr": "request = <FixtureRequest for <Function test_chat_streaming_tool_choice_none[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-case0]>>\nopenai_client = <openai.OpenAI object at 0x7f3127eb4220>\nmodel = 'meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8'\nprovider = 'together'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...-versatile', 'meta-llama/llama-4-scout-17b-16e-instruct', 'meta-llama/llama-4-maverick-17b-128e-instruct'], ...}, ...}}\ncase = {'input': {'messages': [{'content': 'You are a helpful assistant that can use tools to get information.', 'role': 'sys..., 'properties': {...}, 'required': [...], 'type': 'object'}}, 'type': 'function'}]}, 'output': 'get_weather_tool_call'}\n\n    @pytest.mark.parametrize(\n        \"case\",\n        chat_completion_test_cases[\"test_tool_calling\"][\"test_params\"][\"case\"],  # Reusing existing case for now\n        ids=case_id_generator,\n    )\n    def test_chat_streaming_tool_choice_none(request, openai_client, model, provider, verification_config, case):\n        test_name_base = get_base_test_name(request)\n        if should_skip_test(verification_config, provider, model, test_name_base):\n            pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n    \n        stream = openai_client.chat.completions.create(\n            model=model,\n            messages=case[\"input\"][\"messages\"],\n            tools=case[\"input\"][\"tools\"],\n            tool_choice=\"none\",\n            stream=True,\n        )\n    \n        content = \"\"\n        for chunk in stream:\n            delta = chunk.choices[0].delta\n            if delta.content:\n                content += delta.content\n>           assert not delta.tool_calls, \"Expected no tool call chunks when tool_choice='none'\"\nE           AssertionError: Expected no tool call chunks when tool_choice='none'\nE           assert not [ChoiceDeltaToolCall(index=0, id='call_kiye5b1nrq2b9yf0vy5feki8', function=ChoiceDeltaToolCallFunction(arguments='', name='get_weather'), type='function')]\nE            +  where [ChoiceDeltaToolCall(index=0, id='call_kiye5b1nrq2b9yf0vy5feki8', function=ChoiceDeltaToolCallFunction(arguments='', name='get_weather'), type='function')] = ChoiceDelta(content=None, function_call=None, refusal=None, role=None, tool_calls=[ChoiceDeltaToolCall(index=0, id='call_kiye5b1nrq2b9yf0vy5feki8', function=ChoiceDeltaToolCallFunction(arguments='', name='get_weather'), type='function')]).tool_calls\n\ntests/verifications/openai_api/test_chat_completion.py:376: AssertionError"
      },
      "teardown": {
        "duration": 0.0003104889765381813,
        "outcome": "passed"
      }
    },
    {
      "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_multi_turn_tool_calling[meta-llama/Llama-3.3-70B-Instruct-Turbo-text_then_weather_tool]",
      "lineno": 380,
      "outcome": "failed",
      "keywords": [
        "test_chat_non_streaming_multi_turn_tool_calling[meta-llama/Llama-3.3-70B-Instruct-Turbo-text_then_weather_tool]",
        "parametrize",
        "pytestmark",
        "meta-llama/Llama-3.3-70B-Instruct-Turbo-text_then_weather_tool",
        "test_chat_completion.py",
        "openai_api",
        "verifications",
        "tests",
        "llama-stack",
        ""
      ],
      "metadata": {
        "model": "meta-llama/Llama-3.3-70B-Instruct-Turbo",
        "case_id": "text_then_weather_tool"
      },
      "setup": {
        "duration": 0.07114225905388594,
        "outcome": "passed"
      },
      "call": {
        "duration": 0.9180215634405613,
        "outcome": "failed",
        "crash": {
          "path": "/home/erichuang/llama-stack/tests/verifications/openai_api/test_chat_completion.py",
          "lineno": 439,
          "message": "AssertionError: Expected 0 tool calls, but got 1\nassert 1 == 0\n +  where 1 = len(([ChatCompletionMessageToolCall(id='call_2qsbu2opq85hsx2fyb643xix', function=Function(arguments='{\"location\":\"San Francisco, CA\"}', name='get_weather'), type='function', index=0)]))\n +    where [ChatCompletionMessageToolCall(id='call_2qsbu2opq85hsx2fyb643xix', function=Function(arguments='{\"location\":\"San Francisco, CA\"}', name='get_weather'), type='function', index=0)] = ChatCompletionMessage(content=None, refusal=None, role='assistant', annotations=None, audio=None, function_call=None, tool_calls=[ChatCompletionMessageToolCall(id='call_2qsbu2opq85hsx2fyb643xix', function=Function(arguments='{\"location\":\"San Francisco, CA\"}', name='get_weather'), type='function', index=0)]).tool_calls"
        },
        "traceback": [
          {
            "path": "tests/verifications/openai_api/test_chat_completion.py",
            "lineno": 439,
            "message": "AssertionError"
          }
        ],
        "longrepr": "request = <FixtureRequest for <Function test_chat_non_streaming_multi_turn_tool_calling[meta-llama/Llama-3.3-70B-Instruct-Turbo-text_then_weather_tool]>>\nopenai_client = <openai.OpenAI object at 0x7f3127d8feb0>\nmodel = 'meta-llama/Llama-3.3-70B-Instruct-Turbo', provider = 'together'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...-versatile', 'meta-llama/llama-4-scout-17b-16e-instruct', 'meta-llama/llama-4-maverick-17b-128e-instruct'], ...}, ...}}\ncase = {'case_id': 'text_then_weather_tool', 'expected': [{'answer': ['sol'], 'num_tool_calls': 0}, {'num_tool_calls': 1, 'to...], 'type': 'object'}}, 'type': 'function'}]}, 'tool_responses': [{'response': \"{'response': '70 degrees and foggy'}\"}]}\n\n    @pytest.mark.parametrize(\n        \"case\",\n        chat_completion_test_cases.get(\"test_chat_multi_turn_tool_calling\", {}).get(\"test_params\", {}).get(\"case\", []),\n        ids=case_id_generator,\n    )\n    def test_chat_non_streaming_multi_turn_tool_calling(request, openai_client, model, provider, verification_config, case):\n        \"\"\"\n        Test cases for multi-turn tool calling.\n        Tool calls are asserted.\n        Tool responses are provided in the test case.\n        Final response is asserted.\n        \"\"\"\n    \n        test_name_base = get_base_test_name(request)\n        if should_skip_test(verification_config, provider, model, test_name_base):\n            pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n    \n        # Create a copy of the messages list to avoid modifying the original\n        messages = []\n        tools = case[\"input\"][\"tools\"]\n        # Use deepcopy to prevent modification across runs/parametrization\n        expected_results = copy.deepcopy(case[\"expected\"])\n        tool_responses = copy.deepcopy(case.get(\"tool_responses\", []))\n        input_messages_turns = copy.deepcopy(case[\"input\"][\"messages\"])\n    \n        # keep going until either\n        # 1. we have messages to test in multi-turn\n        # 2. no messages but last message is tool response\n        while len(input_messages_turns) > 0 or (len(messages) > 0 and messages[-1][\"role\"] == \"tool\"):\n            # do not take new messages if last message is tool response\n            if len(messages) == 0 or messages[-1][\"role\"] != \"tool\":\n                new_messages = input_messages_turns.pop(0)\n                # Ensure new_messages is a list of message objects\n                if isinstance(new_messages, list):\n                    messages.extend(new_messages)\n                else:\n                    # If it's a single message object, add it directly\n                    messages.append(new_messages)\n    \n            # --- API Call ---\n            response = openai_client.chat.completions.create(\n                model=model,\n                messages=messages,\n                tools=tools,\n                stream=False,\n            )\n    \n            # --- Process Response ---\n            assistant_message = response.choices[0].message\n            messages.append(assistant_message.model_dump(exclude_unset=True))\n    \n            assert assistant_message.role == \"assistant\"\n    \n            # Get the expected result data\n            expected = expected_results.pop(0)\n            num_tool_calls = expected[\"num_tool_calls\"]\n    \n            # --- Assertions based on expected result ---\n>           assert len(assistant_message.tool_calls or []) == num_tool_calls, (\n                f\"Expected {num_tool_calls} tool calls, but got {len(assistant_message.tool_calls or [])}\"\n            )\nE           AssertionError: Expected 0 tool calls, but got 1\nE           assert 1 == 0\nE            +  where 1 = len(([ChatCompletionMessageToolCall(id='call_2qsbu2opq85hsx2fyb643xix', function=Function(arguments='{\"location\":\"San Francisco, CA\"}', name='get_weather'), type='function', index=0)]))\nE            +    where [ChatCompletionMessageToolCall(id='call_2qsbu2opq85hsx2fyb643xix', function=Function(arguments='{\"location\":\"San Francisco, CA\"}', name='get_weather'), type='function', index=0)] = ChatCompletionMessage(content=None, refusal=None, role='assistant', annotations=None, audio=None, function_call=None, tool_calls=[ChatCompletionMessageToolCall(id='call_2qsbu2opq85hsx2fyb643xix', function=Function(arguments='{\"location\":\"San Francisco, CA\"}', name='get_weather'), type='function', index=0)]).tool_calls\n\ntests/verifications/openai_api/test_chat_completion.py:439: AssertionError"
      },
      "teardown": {
        "duration": 0.00033766962587833405,
        "outcome": "passed"
      }
    },
    {
      "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_multi_turn_tool_calling[meta-llama/Llama-3.3-70B-Instruct-Turbo-weather_tool_then_text]",
      "lineno": 380,
      "outcome": "passed",
      "keywords": [
        "test_chat_non_streaming_multi_turn_tool_calling[meta-llama/Llama-3.3-70B-Instruct-Turbo-weather_tool_then_text]",
        "parametrize",
        "pytestmark",
        "meta-llama/Llama-3.3-70B-Instruct-Turbo-weather_tool_then_text",
        "test_chat_completion.py",
        "openai_api",
        "verifications",
        "tests",
        "llama-stack",
        ""
      ],
      "metadata": {
        "model": "meta-llama/Llama-3.3-70B-Instruct-Turbo",
        "case_id": "weather_tool_then_text"
      },
      "setup": {
        "duration": 0.07061670627444983,
        "outcome": "passed"
      },
      "call": {
        "duration": 1.9790025427937508,
        "outcome": "passed"
      },
      "teardown": {
        "duration": 0.0002498161047697067,
        "outcome": "passed"
      }
    },
    {
      "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_multi_turn_tool_calling[meta-llama/Llama-3.3-70B-Instruct-Turbo-add_product_tool]",
      "lineno": 380,
      "outcome": "passed",
      "keywords": [
        "test_chat_non_streaming_multi_turn_tool_calling[meta-llama/Llama-3.3-70B-Instruct-Turbo-add_product_tool]",
        "parametrize",
        "pytestmark",
        "meta-llama/Llama-3.3-70B-Instruct-Turbo-add_product_tool",
        "test_chat_completion.py",
        "openai_api",
        "verifications",
        "tests",
        "llama-stack",
        ""
      ],
      "metadata": {
        "model": "meta-llama/Llama-3.3-70B-Instruct-Turbo",
        "case_id": "add_product_tool"
      },
      "setup": {
        "duration": 0.07382979057729244,
        "outcome": "passed"
      },
      "call": {
        "duration": 0.8887521298602223,
        "outcome": "passed"
      },
      "teardown": {
        "duration": 0.000309688039124012,
        "outcome": "passed"
      }
    },
    {
      "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_multi_turn_tool_calling[meta-llama/Llama-3.3-70B-Instruct-Turbo-get_then_create_event_tool]",
      "lineno": 380,
      "outcome": "passed",
      "keywords": [
        "test_chat_non_streaming_multi_turn_tool_calling[meta-llama/Llama-3.3-70B-Instruct-Turbo-get_then_create_event_tool]",
        "parametrize",
        "pytestmark",
        "meta-llama/Llama-3.3-70B-Instruct-Turbo-get_then_create_event_tool",
        "test_chat_completion.py",
        "openai_api",
        "verifications",
        "tests",
        "llama-stack",
        ""
      ],
      "metadata": {
        "model": "meta-llama/Llama-3.3-70B-Instruct-Turbo",
        "case_id": "get_then_create_event_tool"
      },
      "setup": {
        "duration": 0.07273934967815876,
        "outcome": "passed"
      },
      "call": {
        "duration": 4.823556405492127,
        "outcome": "passed"
      },
      "teardown": {
        "duration": 0.00023336336016654968,
        "outcome": "passed"
      }
    },
    {
      "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_multi_turn_tool_calling[meta-llama/Llama-3.3-70B-Instruct-Turbo-compare_monthly_expense_tool]",
      "lineno": 380,
      "outcome": "passed",
      "keywords": [
        "test_chat_non_streaming_multi_turn_tool_calling[meta-llama/Llama-3.3-70B-Instruct-Turbo-compare_monthly_expense_tool]",
        "parametrize",
        "pytestmark",
        "meta-llama/Llama-3.3-70B-Instruct-Turbo-compare_monthly_expense_tool",
        "test_chat_completion.py",
        "openai_api",
        "verifications",
        "tests",
        "llama-stack",
        ""
      ],
      "metadata": {
        "model": "meta-llama/Llama-3.3-70B-Instruct-Turbo",
        "case_id": "compare_monthly_expense_tool"
      },
      "setup": {
        "duration": 0.07417754456400871,
        "outcome": "passed"
      },
      "call": {
        "duration": 15.95331169757992,
        "outcome": "passed"
      },
      "teardown": {
        "duration": 0.00023916177451610565,
        "outcome": "passed"
      }
    },
    {
      "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_multi_turn_tool_calling[meta-llama/Llama-4-Scout-17B-16E-Instruct-text_then_weather_tool]",
      "lineno": 380,
      "outcome": "failed",
      "keywords": [
        "test_chat_non_streaming_multi_turn_tool_calling[meta-llama/Llama-4-Scout-17B-16E-Instruct-text_then_weather_tool]",
        "parametrize",
        "pytestmark",
        "meta-llama/Llama-4-Scout-17B-16E-Instruct-text_then_weather_tool",
        "test_chat_completion.py",
        "openai_api",
        "verifications",
        "tests",
        "llama-stack",
        ""
      ],
      "metadata": {
        "model": "meta-llama/Llama-4-Scout-17B-16E-Instruct",
        "case_id": "text_then_weather_tool"
      },
      "setup": {
        "duration": 0.07702007982879877,
        "outcome": "passed"
      },
      "call": {
        "duration": 0.735025598667562,
        "outcome": "failed",
        "crash": {
          "path": "/home/erichuang/llama-stack/tests/verifications/openai_api/test_chat_completion.py",
          "lineno": 467,
          "message": "AssertionError: Expected one of ['sol'] in content, but got: 'I cannot perform this task as it requires additional functionality that is not available in the given functions.'\nassert False\n +  where False = any(<generator object test_chat_non_streaming_multi_turn_tool_calling.<locals>.<genexpr> at 0x7f3127d3f610>)"
        },
        "traceback": [
          {
            "path": "tests/verifications/openai_api/test_chat_completion.py",
            "lineno": 467,
            "message": "AssertionError"
          }
        ],
        "longrepr": "request = <FixtureRequest for <Function test_chat_non_streaming_multi_turn_tool_calling[meta-llama/Llama-4-Scout-17B-16E-Instruct-text_then_weather_tool]>>\nopenai_client = <openai.OpenAI object at 0x7f3127de4c70>\nmodel = 'meta-llama/Llama-4-Scout-17B-16E-Instruct', provider = 'together'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...-versatile', 'meta-llama/llama-4-scout-17b-16e-instruct', 'meta-llama/llama-4-maverick-17b-128e-instruct'], ...}, ...}}\ncase = {'case_id': 'text_then_weather_tool', 'expected': [{'answer': ['sol'], 'num_tool_calls': 0}, {'num_tool_calls': 1, 'to...], 'type': 'object'}}, 'type': 'function'}]}, 'tool_responses': [{'response': \"{'response': '70 degrees and foggy'}\"}]}\n\n    @pytest.mark.parametrize(\n        \"case\",\n        chat_completion_test_cases.get(\"test_chat_multi_turn_tool_calling\", {}).get(\"test_params\", {}).get(\"case\", []),\n        ids=case_id_generator,\n    )\n    def test_chat_non_streaming_multi_turn_tool_calling(request, openai_client, model, provider, verification_config, case):\n        \"\"\"\n        Test cases for multi-turn tool calling.\n        Tool calls are asserted.\n        Tool responses are provided in the test case.\n        Final response is asserted.\n        \"\"\"\n    \n        test_name_base = get_base_test_name(request)\n        if should_skip_test(verification_config, provider, model, test_name_base):\n            pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n    \n        # Create a copy of the messages list to avoid modifying the original\n        messages = []\n        tools = case[\"input\"][\"tools\"]\n        # Use deepcopy to prevent modification across runs/parametrization\n        expected_results = copy.deepcopy(case[\"expected\"])\n        tool_responses = copy.deepcopy(case.get(\"tool_responses\", []))\n        input_messages_turns = copy.deepcopy(case[\"input\"][\"messages\"])\n    \n        # keep going until either\n        # 1. we have messages to test in multi-turn\n        # 2. no messages but last message is tool response\n        while len(input_messages_turns) > 0 or (len(messages) > 0 and messages[-1][\"role\"] == \"tool\"):\n            # do not take new messages if last message is tool response\n            if len(messages) == 0 or messages[-1][\"role\"] != \"tool\":\n                new_messages = input_messages_turns.pop(0)\n                # Ensure new_messages is a list of message objects\n                if isinstance(new_messages, list):\n                    messages.extend(new_messages)\n                else:\n                    # If it's a single message object, add it directly\n                    messages.append(new_messages)\n    \n            # --- API Call ---\n            response = openai_client.chat.completions.create(\n                model=model,\n                messages=messages,\n                tools=tools,\n                stream=False,\n            )\n    \n            # --- Process Response ---\n            assistant_message = response.choices[0].message\n            messages.append(assistant_message.model_dump(exclude_unset=True))\n    \n            assert assistant_message.role == \"assistant\"\n    \n            # Get the expected result data\n            expected = expected_results.pop(0)\n            num_tool_calls = expected[\"num_tool_calls\"]\n    \n            # --- Assertions based on expected result ---\n            assert len(assistant_message.tool_calls or []) == num_tool_calls, (\n                f\"Expected {num_tool_calls} tool calls, but got {len(assistant_message.tool_calls or [])}\"\n            )\n    \n            if num_tool_calls > 0:\n                tool_call = assistant_message.tool_calls[0]\n                assert tool_call.function.name == expected[\"tool_name\"], (\n                    f\"Expected tool '{expected['tool_name']}', got '{tool_call.function.name}'\"\n                )\n                # Parse the JSON string arguments before comparing\n                actual_arguments = json.loads(tool_call.function.arguments)\n                assert actual_arguments == expected[\"tool_arguments\"], (\n                    f\"Expected arguments '{expected['tool_arguments']}', got '{actual_arguments}'\"\n                )\n    \n                # Prepare and append the tool response for the next turn\n                tool_response = tool_responses.pop(0)\n                messages.append(\n                    {\n                        \"role\": \"tool\",\n                        \"tool_call_id\": tool_call.id,\n                        \"content\": tool_response[\"response\"],\n                    }\n                )\n            else:\n                assert assistant_message.content is not None, \"Expected content, but none received.\"\n                expected_answers = expected[\"answer\"]  # This is now a list\n                content_lower = assistant_message.content.lower()\n>               assert any(ans.lower() in content_lower for ans in expected_answers), (\n                    f\"Expected one of {expected_answers} in content, but got: '{assistant_message.content}'\"\n                )\nE               AssertionError: Expected one of ['sol'] in content, but got: 'I cannot perform this task as it requires additional functionality that is not available in the given functions.'\nE               assert False\nE                +  where False = any(<generator object test_chat_non_streaming_multi_turn_tool_calling.<locals>.<genexpr> at 0x7f3127d3f610>)\n\ntests/verifications/openai_api/test_chat_completion.py:467: AssertionError"
      },
      "teardown": {
        "duration": 0.00030991993844509125,
        "outcome": "passed"
      }
    },
    {
      "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_multi_turn_tool_calling[meta-llama/Llama-4-Scout-17B-16E-Instruct-weather_tool_then_text]",
      "lineno": 380,
      "outcome": "passed",
      "keywords": [
        "test_chat_non_streaming_multi_turn_tool_calling[meta-llama/Llama-4-Scout-17B-16E-Instruct-weather_tool_then_text]",
        "parametrize",
        "pytestmark",
        "meta-llama/Llama-4-Scout-17B-16E-Instruct-weather_tool_then_text",
        "test_chat_completion.py",
        "openai_api",
        "verifications",
        "tests",
        "llama-stack",
        ""
      ],
      "metadata": {
        "model": "meta-llama/Llama-4-Scout-17B-16E-Instruct",
        "case_id": "weather_tool_then_text"
      },
      "setup": {
        "duration": 0.07215368840843439,
        "outcome": "passed"
      },
      "call": {
        "duration": 2.01631036773324,
        "outcome": "passed"
      },
      "teardown": {
        "duration": 0.0002633333206176758,
        "outcome": "passed"
      }
    },
    {
      "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_multi_turn_tool_calling[meta-llama/Llama-4-Scout-17B-16E-Instruct-add_product_tool]",
      "lineno": 380,
      "outcome": "passed",
      "keywords": [
        "test_chat_non_streaming_multi_turn_tool_calling[meta-llama/Llama-4-Scout-17B-16E-Instruct-add_product_tool]",
        "parametrize",
        "pytestmark",
        "meta-llama/Llama-4-Scout-17B-16E-Instruct-add_product_tool",
        "test_chat_completion.py",
        "openai_api",
        "verifications",
        "tests",
        "llama-stack",
        ""
      ],
      "metadata": {
        "model": "meta-llama/Llama-4-Scout-17B-16E-Instruct",
        "case_id": "add_product_tool"
      },
      "setup": {
        "duration": 0.0734679875895381,
        "outcome": "passed"
      },
      "call": {
        "duration": 1.122296486981213,
        "outcome": "passed"
      },
      "teardown": {
        "duration": 0.0003703571856021881,
        "outcome": "passed"
      }
    },
    {
      "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_multi_turn_tool_calling[meta-llama/Llama-4-Scout-17B-16E-Instruct-get_then_create_event_tool]",
      "lineno": 380,
      "outcome": "passed",
      "keywords": [
        "test_chat_non_streaming_multi_turn_tool_calling[meta-llama/Llama-4-Scout-17B-16E-Instruct-get_then_create_event_tool]",
        "parametrize",
        "pytestmark",
        "meta-llama/Llama-4-Scout-17B-16E-Instruct-get_then_create_event_tool",
        "test_chat_completion.py",
        "openai_api",
        "verifications",
        "tests",
        "llama-stack",
        ""
      ],
      "metadata": {
        "model": "meta-llama/Llama-4-Scout-17B-16E-Instruct",
        "case_id": "get_then_create_event_tool"
      },
      "setup": {
        "duration": 0.07337972242385149,
        "outcome": "passed"
      },
      "call": {
        "duration": 5.233728421851993,
        "outcome": "passed"
      },
      "teardown": {
        "duration": 0.00024013593792915344,
        "outcome": "passed"
      }
    },
    {
      "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_multi_turn_tool_calling[meta-llama/Llama-4-Scout-17B-16E-Instruct-compare_monthly_expense_tool]",
      "lineno": 380,
      "outcome": "passed",
      "keywords": [
        "test_chat_non_streaming_multi_turn_tool_calling[meta-llama/Llama-4-Scout-17B-16E-Instruct-compare_monthly_expense_tool]",
        "parametrize",
        "pytestmark",
        "meta-llama/Llama-4-Scout-17B-16E-Instruct-compare_monthly_expense_tool",
        "test_chat_completion.py",
        "openai_api",
        "verifications",
        "tests",
        "llama-stack",
        ""
      ],
      "metadata": {
        "model": "meta-llama/Llama-4-Scout-17B-16E-Instruct",
        "case_id": "compare_monthly_expense_tool"
      },
      "setup": {
        "duration": 0.07165702432394028,
        "outcome": "passed"
      },
      "call": {
        "duration": 2.579101173207164,
        "outcome": "passed"
      },
      "teardown": {
        "duration": 0.00022159796208143234,
        "outcome": "passed"
      }
    },
    {
      "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_multi_turn_tool_calling[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-text_then_weather_tool]",
      "lineno": 380,
      "outcome": "failed",
      "keywords": [
        "test_chat_non_streaming_multi_turn_tool_calling[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-text_then_weather_tool]",
        "parametrize",
        "pytestmark",
        "meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-text_then_weather_tool",
        "test_chat_completion.py",
        "openai_api",
        "verifications",
        "tests",
        "llama-stack",
        ""
      ],
      "metadata": {
        "model": "meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8",
        "case_id": "text_then_weather_tool"
      },
      "setup": {
        "duration": 0.07499713078141212,
        "outcome": "passed"
      },
      "call": {
        "duration": 0.345451476983726,
        "outcome": "failed",
        "crash": {
          "path": "/home/erichuang/llama-stack/tests/verifications/openai_api/test_chat_completion.py",
          "lineno": 467,
          "message": "AssertionError: Expected one of ['sol'] in content, but got: '{\"name\": null, \"parameters\": null}'\nassert False\n +  where False = any(<generator object test_chat_non_streaming_multi_turn_tool_calling.<locals>.<genexpr> at 0x7f3127fc2ab0>)"
        },
        "traceback": [
          {
            "path": "tests/verifications/openai_api/test_chat_completion.py",
            "lineno": 467,
            "message": "AssertionError"
          }
        ],
        "longrepr": "request = <FixtureRequest for <Function test_chat_non_streaming_multi_turn_tool_calling[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-text_then_weather_tool]>>\nopenai_client = <openai.OpenAI object at 0x7f3127f2f370>\nmodel = 'meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8'\nprovider = 'together'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...-versatile', 'meta-llama/llama-4-scout-17b-16e-instruct', 'meta-llama/llama-4-maverick-17b-128e-instruct'], ...}, ...}}\ncase = {'case_id': 'text_then_weather_tool', 'expected': [{'answer': ['sol'], 'num_tool_calls': 0}, {'num_tool_calls': 1, 'to...], 'type': 'object'}}, 'type': 'function'}]}, 'tool_responses': [{'response': \"{'response': '70 degrees and foggy'}\"}]}\n\n    @pytest.mark.parametrize(\n        \"case\",\n        chat_completion_test_cases.get(\"test_chat_multi_turn_tool_calling\", {}).get(\"test_params\", {}).get(\"case\", []),\n        ids=case_id_generator,\n    )\n    def test_chat_non_streaming_multi_turn_tool_calling(request, openai_client, model, provider, verification_config, case):\n        \"\"\"\n        Test cases for multi-turn tool calling.\n        Tool calls are asserted.\n        Tool responses are provided in the test case.\n        Final response is asserted.\n        \"\"\"\n    \n        test_name_base = get_base_test_name(request)\n        if should_skip_test(verification_config, provider, model, test_name_base):\n            pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n    \n        # Create a copy of the messages list to avoid modifying the original\n        messages = []\n        tools = case[\"input\"][\"tools\"]\n        # Use deepcopy to prevent modification across runs/parametrization\n        expected_results = copy.deepcopy(case[\"expected\"])\n        tool_responses = copy.deepcopy(case.get(\"tool_responses\", []))\n        input_messages_turns = copy.deepcopy(case[\"input\"][\"messages\"])\n    \n        # keep going until either\n        # 1. we have messages to test in multi-turn\n        # 2. no messages but last message is tool response\n        while len(input_messages_turns) > 0 or (len(messages) > 0 and messages[-1][\"role\"] == \"tool\"):\n            # do not take new messages if last message is tool response\n            if len(messages) == 0 or messages[-1][\"role\"] != \"tool\":\n                new_messages = input_messages_turns.pop(0)\n                # Ensure new_messages is a list of message objects\n                if isinstance(new_messages, list):\n                    messages.extend(new_messages)\n                else:\n                    # If it's a single message object, add it directly\n                    messages.append(new_messages)\n    \n            # --- API Call ---\n            response = openai_client.chat.completions.create(\n                model=model,\n                messages=messages,\n                tools=tools,\n                stream=False,\n            )\n    \n            # --- Process Response ---\n            assistant_message = response.choices[0].message\n            messages.append(assistant_message.model_dump(exclude_unset=True))\n    \n            assert assistant_message.role == \"assistant\"\n    \n            # Get the expected result data\n            expected = expected_results.pop(0)\n            num_tool_calls = expected[\"num_tool_calls\"]\n    \n            # --- Assertions based on expected result ---\n            assert len(assistant_message.tool_calls or []) == num_tool_calls, (\n                f\"Expected {num_tool_calls} tool calls, but got {len(assistant_message.tool_calls or [])}\"\n            )\n    \n            if num_tool_calls > 0:\n                tool_call = assistant_message.tool_calls[0]\n                assert tool_call.function.name == expected[\"tool_name\"], (\n                    f\"Expected tool '{expected['tool_name']}', got '{tool_call.function.name}'\"\n                )\n                # Parse the JSON string arguments before comparing\n                actual_arguments = json.loads(tool_call.function.arguments)\n                assert actual_arguments == expected[\"tool_arguments\"], (\n                    f\"Expected arguments '{expected['tool_arguments']}', got '{actual_arguments}'\"\n                )\n    \n                # Prepare and append the tool response for the next turn\n                tool_response = tool_responses.pop(0)\n                messages.append(\n                    {\n                        \"role\": \"tool\",\n                        \"tool_call_id\": tool_call.id,\n                        \"content\": tool_response[\"response\"],\n                    }\n                )\n            else:\n                assert assistant_message.content is not None, \"Expected content, but none received.\"\n                expected_answers = expected[\"answer\"]  # This is now a list\n                content_lower = assistant_message.content.lower()\n>               assert any(ans.lower() in content_lower for ans in expected_answers), (\n                    f\"Expected one of {expected_answers} in content, but got: '{assistant_message.content}'\"\n                )\nE               AssertionError: Expected one of ['sol'] in content, but got: '{\"name\": null, \"parameters\": null}'\nE               assert False\nE                +  where False = any(<generator object test_chat_non_streaming_multi_turn_tool_calling.<locals>.<genexpr> at 0x7f3127fc2ab0>)\n\ntests/verifications/openai_api/test_chat_completion.py:467: AssertionError"
      },
      "teardown": {
        "duration": 0.0003060000017285347,
        "outcome": "passed"
      }
    },
    {
      "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_multi_turn_tool_calling[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-weather_tool_then_text]",
      "lineno": 380,
      "outcome": "passed",
      "keywords": [
        "test_chat_non_streaming_multi_turn_tool_calling[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-weather_tool_then_text]",
        "parametrize",
        "pytestmark",
        "meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-weather_tool_then_text",
        "test_chat_completion.py",
        "openai_api",
        "verifications",
        "tests",
        "llama-stack",
        ""
      ],
      "metadata": {
        "model": "meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8",
        "case_id": "weather_tool_then_text"
      },
      "setup": {
        "duration": 0.07219678908586502,
        "outcome": "passed"
      },
      "call": {
        "duration": 2.1842003548517823,
        "outcome": "passed"
      },
      "teardown": {
        "duration": 0.00023157335817813873,
        "outcome": "passed"
      }
    },
    {
      "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_multi_turn_tool_calling[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-add_product_tool]",
      "lineno": 380,
      "outcome": "passed",
      "keywords": [
        "test_chat_non_streaming_multi_turn_tool_calling[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-add_product_tool]",
        "parametrize",
        "pytestmark",
        "meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-add_product_tool",
        "test_chat_completion.py",
        "openai_api",
        "verifications",
        "tests",
        "llama-stack",
        ""
      ],
      "metadata": {
        "model": "meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8",
        "case_id": "add_product_tool"
      },
      "setup": {
        "duration": 0.07263681385666132,
        "outcome": "passed"
      },
      "call": {
        "duration": 2.064652710221708,
        "outcome": "passed"
      },
      "teardown": {
        "duration": 0.0002187909558415413,
        "outcome": "passed"
      }
    },
    {
      "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_multi_turn_tool_calling[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-get_then_create_event_tool]",
      "lineno": 380,
      "outcome": "passed",
      "keywords": [
        "test_chat_non_streaming_multi_turn_tool_calling[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-get_then_create_event_tool]",
        "parametrize",
        "pytestmark",
        "meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-get_then_create_event_tool",
        "test_chat_completion.py",
        "openai_api",
        "verifications",
        "tests",
        "llama-stack",
        ""
      ],
      "metadata": {
        "model": "meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8",
        "case_id": "get_then_create_event_tool"
      },
      "setup": {
        "duration": 0.07442002464085817,
        "outcome": "passed"
      },
      "call": {
        "duration": 4.3001746302470565,
        "outcome": "passed"
      },
      "teardown": {
        "duration": 0.0002673650160431862,
        "outcome": "passed"
      }
    },
    {
      "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_non_streaming_multi_turn_tool_calling[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-compare_monthly_expense_tool]",
      "lineno": 380,
      "outcome": "passed",
      "keywords": [
        "test_chat_non_streaming_multi_turn_tool_calling[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-compare_monthly_expense_tool]",
        "parametrize",
        "pytestmark",
        "meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-compare_monthly_expense_tool",
        "test_chat_completion.py",
        "openai_api",
        "verifications",
        "tests",
        "llama-stack",
        ""
      ],
      "metadata": {
        "model": "meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8",
        "case_id": "compare_monthly_expense_tool"
      },
      "setup": {
        "duration": 0.0752437636256218,
        "outcome": "passed"
      },
      "call": {
        "duration": 3.1100223967805505,
        "outcome": "passed"
      },
      "teardown": {
        "duration": 0.00023829564452171326,
        "outcome": "passed"
      }
    },
    {
      "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_multi_turn_tool_calling[meta-llama/Llama-3.3-70B-Instruct-Turbo-text_then_weather_tool]",
      "lineno": 471,
      "outcome": "failed",
      "keywords": [
        "test_chat_streaming_multi_turn_tool_calling[meta-llama/Llama-3.3-70B-Instruct-Turbo-text_then_weather_tool]",
        "parametrize",
        "pytestmark",
        "meta-llama/Llama-3.3-70B-Instruct-Turbo-text_then_weather_tool",
        "test_chat_completion.py",
        "openai_api",
        "verifications",
        "tests",
        "llama-stack",
        ""
      ],
      "metadata": {
        "model": "meta-llama/Llama-3.3-70B-Instruct-Turbo",
        "case_id": "text_then_weather_tool"
      },
      "setup": {
        "duration": 0.07147279102355242,
        "outcome": "passed"
      },
      "call": {
        "duration": 0.7697121743112803,
        "outcome": "failed",
        "crash": {
          "path": "/home/erichuang/llama-stack/tests/verifications/openai_api/test_chat_completion.py",
          "lineno": 521,
          "message": "AssertionError: Expected 0 tool calls, but got 1\nassert 1 == 0\n +  where 1 = len(([{'function': {'arguments': '{\"location\":\"San Francisco, CA\"}', 'name': 'get_weather'}, 'id': 'call_kpb5gnyu9klkx2yzft43dxez', 'type': 'function'}]))"
        },
        "traceback": [
          {
            "path": "tests/verifications/openai_api/test_chat_completion.py",
            "lineno": 521,
            "message": "AssertionError"
          }
        ],
        "longrepr": "request = <FixtureRequest for <Function test_chat_streaming_multi_turn_tool_calling[meta-llama/Llama-3.3-70B-Instruct-Turbo-text_then_weather_tool]>>\nopenai_client = <openai.OpenAI object at 0x7f3127dae0b0>\nmodel = 'meta-llama/Llama-3.3-70B-Instruct-Turbo', provider = 'together'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...-versatile', 'meta-llama/llama-4-scout-17b-16e-instruct', 'meta-llama/llama-4-maverick-17b-128e-instruct'], ...}, ...}}\ncase = {'case_id': 'text_then_weather_tool', 'expected': [{'answer': ['sol'], 'num_tool_calls': 0}, {'num_tool_calls': 1, 'to...], 'type': 'object'}}, 'type': 'function'}]}, 'tool_responses': [{'response': \"{'response': '70 degrees and foggy'}\"}]}\n\n    @pytest.mark.parametrize(\n        \"case\",\n        chat_completion_test_cases.get(\"test_chat_multi_turn_tool_calling\", {}).get(\"test_params\", {}).get(\"case\", []),\n        ids=case_id_generator,\n    )\n    def test_chat_streaming_multi_turn_tool_calling(request, openai_client, model, provider, verification_config, case):\n        \"\"\" \"\"\"\n        test_name_base = get_base_test_name(request)\n        if should_skip_test(verification_config, provider, model, test_name_base):\n            pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n    \n        messages = []\n        tools = case[\"input\"][\"tools\"]\n        expected_results = copy.deepcopy(case[\"expected\"])\n        tool_responses = copy.deepcopy(case.get(\"tool_responses\", []))\n        input_messages_turns = copy.deepcopy(case[\"input\"][\"messages\"])\n    \n        while len(input_messages_turns) > 0 or (len(messages) > 0 and messages[-1][\"role\"] == \"tool\"):\n            if len(messages) == 0 or messages[-1][\"role\"] != \"tool\":\n                new_messages = input_messages_turns.pop(0)\n                if isinstance(new_messages, list):\n                    messages.extend(new_messages)\n                else:\n                    messages.append(new_messages)\n    \n            # --- API Call (Streaming) ---\n            stream = openai_client.chat.completions.create(\n                model=model,\n                messages=messages,\n                tools=tools,\n                stream=True,\n            )\n    \n            # --- Process Stream ---\n            accumulated_content, accumulated_tool_calls = _accumulate_streaming_tool_calls(stream)\n    \n            # --- Construct Assistant Message for History ---\n            assistant_message_dict = {\"role\": \"assistant\"}\n            if accumulated_content:\n                assistant_message_dict[\"content\"] = accumulated_content\n            if accumulated_tool_calls:\n                assistant_message_dict[\"tool_calls\"] = accumulated_tool_calls\n    \n            messages.append(assistant_message_dict)\n    \n            # --- Assertions ---\n            expected = expected_results.pop(0)\n            num_tool_calls = expected[\"num_tool_calls\"]\n    \n>           assert len(accumulated_tool_calls or []) == num_tool_calls, (\n                f\"Expected {num_tool_calls} tool calls, but got {len(accumulated_tool_calls or [])}\"\n            )\nE           AssertionError: Expected 0 tool calls, but got 1\nE           assert 1 == 0\nE            +  where 1 = len(([{'function': {'arguments': '{\"location\":\"San Francisco, CA\"}', 'name': 'get_weather'}, 'id': 'call_kpb5gnyu9klkx2yzft43dxez', 'type': 'function'}]))\n\ntests/verifications/openai_api/test_chat_completion.py:521: AssertionError"
      },
      "teardown": {
        "duration": 0.0003948565572500229,
        "outcome": "passed"
      }
    },
    {
      "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_multi_turn_tool_calling[meta-llama/Llama-3.3-70B-Instruct-Turbo-weather_tool_then_text]",
      "lineno": 471,
      "outcome": "failed",
      "keywords": [
        "test_chat_streaming_multi_turn_tool_calling[meta-llama/Llama-3.3-70B-Instruct-Turbo-weather_tool_then_text]",
        "parametrize",
        "pytestmark",
        "meta-llama/Llama-3.3-70B-Instruct-Turbo-weather_tool_then_text",
        "test_chat_completion.py",
        "openai_api",
        "verifications",
        "tests",
        "llama-stack",
        ""
      ],
      "metadata": {
        "model": "meta-llama/Llama-3.3-70B-Instruct-Turbo",
        "case_id": "weather_tool_then_text"
      },
      "setup": {
        "duration": 0.0739708598703146,
        "outcome": "passed"
      },
      "call": {
        "duration": 0.8291563373059034,
        "outcome": "failed",
        "crash": {
          "path": "/home/erichuang/llama-stack/tests/verifications/openai_api/test_chat_completion.py",
          "lineno": 547,
          "message": "AssertionError: Expected content, but none received.\nassert ('' is not None and '' != '')"
        },
        "traceback": [
          {
            "path": "tests/verifications/openai_api/test_chat_completion.py",
            "lineno": 547,
            "message": "AssertionError"
          }
        ],
        "longrepr": "request = <FixtureRequest for <Function test_chat_streaming_multi_turn_tool_calling[meta-llama/Llama-3.3-70B-Instruct-Turbo-weather_tool_then_text]>>\nopenai_client = <openai.OpenAI object at 0x7f3127f3b220>\nmodel = 'meta-llama/Llama-3.3-70B-Instruct-Turbo', provider = 'together'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...-versatile', 'meta-llama/llama-4-scout-17b-16e-instruct', 'meta-llama/llama-4-maverick-17b-128e-instruct'], ...}, ...}}\ncase = {'case_id': 'weather_tool_then_text', 'expected': [{'num_tool_calls': 1, 'tool_arguments': {'location': 'San Francisco...], 'type': 'object'}}, 'type': 'function'}]}, 'tool_responses': [{'response': \"{'response': '70 degrees and foggy'}\"}]}\n\n    @pytest.mark.parametrize(\n        \"case\",\n        chat_completion_test_cases.get(\"test_chat_multi_turn_tool_calling\", {}).get(\"test_params\", {}).get(\"case\", []),\n        ids=case_id_generator,\n    )\n    def test_chat_streaming_multi_turn_tool_calling(request, openai_client, model, provider, verification_config, case):\n        \"\"\" \"\"\"\n        test_name_base = get_base_test_name(request)\n        if should_skip_test(verification_config, provider, model, test_name_base):\n            pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n    \n        messages = []\n        tools = case[\"input\"][\"tools\"]\n        expected_results = copy.deepcopy(case[\"expected\"])\n        tool_responses = copy.deepcopy(case.get(\"tool_responses\", []))\n        input_messages_turns = copy.deepcopy(case[\"input\"][\"messages\"])\n    \n        while len(input_messages_turns) > 0 or (len(messages) > 0 and messages[-1][\"role\"] == \"tool\"):\n            if len(messages) == 0 or messages[-1][\"role\"] != \"tool\":\n                new_messages = input_messages_turns.pop(0)\n                if isinstance(new_messages, list):\n                    messages.extend(new_messages)\n                else:\n                    messages.append(new_messages)\n    \n            # --- API Call (Streaming) ---\n            stream = openai_client.chat.completions.create(\n                model=model,\n                messages=messages,\n                tools=tools,\n                stream=True,\n            )\n    \n            # --- Process Stream ---\n            accumulated_content, accumulated_tool_calls = _accumulate_streaming_tool_calls(stream)\n    \n            # --- Construct Assistant Message for History ---\n            assistant_message_dict = {\"role\": \"assistant\"}\n            if accumulated_content:\n                assistant_message_dict[\"content\"] = accumulated_content\n            if accumulated_tool_calls:\n                assistant_message_dict[\"tool_calls\"] = accumulated_tool_calls\n    \n            messages.append(assistant_message_dict)\n    \n            # --- Assertions ---\n            expected = expected_results.pop(0)\n            num_tool_calls = expected[\"num_tool_calls\"]\n    \n            assert len(accumulated_tool_calls or []) == num_tool_calls, (\n                f\"Expected {num_tool_calls} tool calls, but got {len(accumulated_tool_calls or [])}\"\n            )\n    \n            if num_tool_calls > 0:\n                # Use the first accumulated tool call for assertion\n                tool_call = accumulated_tool_calls[0]\n                assert tool_call[\"function\"][\"name\"] == expected[\"tool_name\"], (\n                    f\"Expected tool '{expected['tool_name']}', got '{tool_call['function']['name']}'\"\n                )\n                # Parse the accumulated arguments string for comparison\n                actual_arguments = json.loads(tool_call[\"function\"][\"arguments\"])\n                assert actual_arguments == expected[\"tool_arguments\"], (\n                    f\"Expected arguments '{expected['tool_arguments']}', got '{actual_arguments}'\"\n                )\n    \n                # Prepare and append the tool response for the next turn\n                tool_response = tool_responses.pop(0)\n                messages.append(\n                    {\n                        \"role\": \"tool\",\n                        \"tool_call_id\": tool_call[\"id\"],\n                        \"content\": tool_response[\"response\"],\n                    }\n                )\n            else:\n>               assert accumulated_content is not None and accumulated_content != \"\", \"Expected content, but none received.\"\nE               AssertionError: Expected content, but none received.\nE               assert ('' is not None and '' != '')\n\ntests/verifications/openai_api/test_chat_completion.py:547: AssertionError"
      },
      "teardown": {
        "duration": 0.0003177514299750328,
        "outcome": "passed"
      }
    },
    {
      "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_multi_turn_tool_calling[meta-llama/Llama-3.3-70B-Instruct-Turbo-add_product_tool]",
      "lineno": 471,
      "outcome": "passed",
      "keywords": [
        "test_chat_streaming_multi_turn_tool_calling[meta-llama/Llama-3.3-70B-Instruct-Turbo-add_product_tool]",
        "parametrize",
        "pytestmark",
        "meta-llama/Llama-3.3-70B-Instruct-Turbo-add_product_tool",
        "test_chat_completion.py",
        "openai_api",
        "verifications",
        "tests",
        "llama-stack",
        ""
      ],
      "metadata": {
        "model": "meta-llama/Llama-3.3-70B-Instruct-Turbo",
        "case_id": "add_product_tool"
      },
      "setup": {
        "duration": 0.07326000090688467,
        "outcome": "passed"
      },
      "call": {
        "duration": 2.20385564584285,
        "outcome": "passed"
      },
      "teardown": {
        "duration": 0.00035414285957813263,
        "outcome": "passed"
      }
    },
    {
      "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_multi_turn_tool_calling[meta-llama/Llama-3.3-70B-Instruct-Turbo-get_then_create_event_tool]",
      "lineno": 471,
      "outcome": "failed",
      "keywords": [
        "test_chat_streaming_multi_turn_tool_calling[meta-llama/Llama-3.3-70B-Instruct-Turbo-get_then_create_event_tool]",
        "parametrize",
        "pytestmark",
        "meta-llama/Llama-3.3-70B-Instruct-Turbo-get_then_create_event_tool",
        "test_chat_completion.py",
        "openai_api",
        "verifications",
        "tests",
        "llama-stack",
        ""
      ],
      "metadata": {
        "model": "meta-llama/Llama-3.3-70B-Instruct-Turbo",
        "case_id": "get_then_create_event_tool"
      },
      "setup": {
        "duration": 0.10739517118781805,
        "outcome": "passed"
      },
      "call": {
        "duration": 3.1176233775913715,
        "outcome": "failed",
        "crash": {
          "path": "/home/erichuang/llama-stack/tests/verifications/openai_api/test_chat_completion.py",
          "lineno": 547,
          "message": "AssertionError: Expected content, but none received.\nassert ('' is not None and '' != '')"
        },
        "traceback": [
          {
            "path": "tests/verifications/openai_api/test_chat_completion.py",
            "lineno": 547,
            "message": "AssertionError"
          }
        ],
        "longrepr": "request = <FixtureRequest for <Function test_chat_streaming_multi_turn_tool_calling[meta-llama/Llama-3.3-70B-Instruct-Turbo-get_then_create_event_tool]>>\nopenai_client = <openai.OpenAI object at 0x7f3127e2b580>\nmodel = 'meta-llama/Llama-3.3-70B-Instruct-Turbo', provider = 'together'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...-versatile', 'meta-llama/llama-4-scout-17b-16e-instruct', 'meta-llama/llama-4-maverick-17b-128e-instruct'], ...}, ...}}\ncase = {'case_id': 'get_then_create_event_tool', 'expected': [{'num_tool_calls': 1, 'tool_arguments': {'date': '2025-03-03', ...ents found for 2025-03-03 at 10:00'}\"}, {'response': \"{'response': 'Successfully created new event with id: e_123'}\"}]}\n\n    @pytest.mark.parametrize(\n        \"case\",\n        chat_completion_test_cases.get(\"test_chat_multi_turn_tool_calling\", {}).get(\"test_params\", {}).get(\"case\", []),\n        ids=case_id_generator,\n    )\n    def test_chat_streaming_multi_turn_tool_calling(request, openai_client, model, provider, verification_config, case):\n        \"\"\" \"\"\"\n        test_name_base = get_base_test_name(request)\n        if should_skip_test(verification_config, provider, model, test_name_base):\n            pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n    \n        messages = []\n        tools = case[\"input\"][\"tools\"]\n        expected_results = copy.deepcopy(case[\"expected\"])\n        tool_responses = copy.deepcopy(case.get(\"tool_responses\", []))\n        input_messages_turns = copy.deepcopy(case[\"input\"][\"messages\"])\n    \n        while len(input_messages_turns) > 0 or (len(messages) > 0 and messages[-1][\"role\"] == \"tool\"):\n            if len(messages) == 0 or messages[-1][\"role\"] != \"tool\":\n                new_messages = input_messages_turns.pop(0)\n                if isinstance(new_messages, list):\n                    messages.extend(new_messages)\n                else:\n                    messages.append(new_messages)\n    \n            # --- API Call (Streaming) ---\n            stream = openai_client.chat.completions.create(\n                model=model,\n                messages=messages,\n                tools=tools,\n                stream=True,\n            )\n    \n            # --- Process Stream ---\n            accumulated_content, accumulated_tool_calls = _accumulate_streaming_tool_calls(stream)\n    \n            # --- Construct Assistant Message for History ---\n            assistant_message_dict = {\"role\": \"assistant\"}\n            if accumulated_content:\n                assistant_message_dict[\"content\"] = accumulated_content\n            if accumulated_tool_calls:\n                assistant_message_dict[\"tool_calls\"] = accumulated_tool_calls\n    \n            messages.append(assistant_message_dict)\n    \n            # --- Assertions ---\n            expected = expected_results.pop(0)\n            num_tool_calls = expected[\"num_tool_calls\"]\n    \n            assert len(accumulated_tool_calls or []) == num_tool_calls, (\n                f\"Expected {num_tool_calls} tool calls, but got {len(accumulated_tool_calls or [])}\"\n            )\n    \n            if num_tool_calls > 0:\n                # Use the first accumulated tool call for assertion\n                tool_call = accumulated_tool_calls[0]\n                assert tool_call[\"function\"][\"name\"] == expected[\"tool_name\"], (\n                    f\"Expected tool '{expected['tool_name']}', got '{tool_call['function']['name']}'\"\n                )\n                # Parse the accumulated arguments string for comparison\n                actual_arguments = json.loads(tool_call[\"function\"][\"arguments\"])\n                assert actual_arguments == expected[\"tool_arguments\"], (\n                    f\"Expected arguments '{expected['tool_arguments']}', got '{actual_arguments}'\"\n                )\n    \n                # Prepare and append the tool response for the next turn\n                tool_response = tool_responses.pop(0)\n                messages.append(\n                    {\n                        \"role\": \"tool\",\n                        \"tool_call_id\": tool_call[\"id\"],\n                        \"content\": tool_response[\"response\"],\n                    }\n                )\n            else:\n>               assert accumulated_content is not None and accumulated_content != \"\", \"Expected content, but none received.\"\nE               AssertionError: Expected content, but none received.\nE               assert ('' is not None and '' != '')\n\ntests/verifications/openai_api/test_chat_completion.py:547: AssertionError"
      },
      "teardown": {
        "duration": 0.0003009289503097534,
        "outcome": "passed"
      }
    },
    {
      "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_multi_turn_tool_calling[meta-llama/Llama-3.3-70B-Instruct-Turbo-compare_monthly_expense_tool]",
      "lineno": 471,
      "outcome": "failed",
      "keywords": [
        "test_chat_streaming_multi_turn_tool_calling[meta-llama/Llama-3.3-70B-Instruct-Turbo-compare_monthly_expense_tool]",
        "parametrize",
        "pytestmark",
        "meta-llama/Llama-3.3-70B-Instruct-Turbo-compare_monthly_expense_tool",
        "test_chat_completion.py",
        "openai_api",
        "verifications",
        "tests",
        "llama-stack",
        ""
      ],
      "metadata": {
        "model": "meta-llama/Llama-3.3-70B-Instruct-Turbo",
        "case_id": "compare_monthly_expense_tool"
      },
      "setup": {
        "duration": 0.07599783595651388,
        "outcome": "passed"
      },
      "call": {
        "duration": 2.0640214709565043,
        "outcome": "failed",
        "crash": {
          "path": "/home/erichuang/llama-stack/tests/verifications/openai_api/test_chat_completion.py",
          "lineno": 547,
          "message": "AssertionError: Expected content, but none received.\nassert ('' is not None and '' != '')"
        },
        "traceback": [
          {
            "path": "tests/verifications/openai_api/test_chat_completion.py",
            "lineno": 547,
            "message": "AssertionError"
          }
        ],
        "longrepr": "request = <FixtureRequest for <Function test_chat_streaming_multi_turn_tool_calling[meta-llama/Llama-3.3-70B-Instruct-Turbo-compare_monthly_expense_tool]>>\nopenai_client = <openai.OpenAI object at 0x7f3127f12a10>\nmodel = 'meta-llama/Llama-3.3-70B-Instruct-Turbo', provider = 'together'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...-versatile', 'meta-llama/llama-4-scout-17b-16e-instruct', 'meta-llama/llama-4-maverick-17b-128e-instruct'], ...}, ...}}\ncase = {'case_id': 'compare_monthly_expense_tool', 'expected': [{'num_tool_calls': 1, 'tool_arguments': {'month': 1, 'year': ... 'Total expenses for January 2025: $1000'}\"}, {'response': \"{'response': 'Total expenses for February 2024: $2000'}\"}]}\n\n    @pytest.mark.parametrize(\n        \"case\",\n        chat_completion_test_cases.get(\"test_chat_multi_turn_tool_calling\", {}).get(\"test_params\", {}).get(\"case\", []),\n        ids=case_id_generator,\n    )\n    def test_chat_streaming_multi_turn_tool_calling(request, openai_client, model, provider, verification_config, case):\n        \"\"\" \"\"\"\n        test_name_base = get_base_test_name(request)\n        if should_skip_test(verification_config, provider, model, test_name_base):\n            pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n    \n        messages = []\n        tools = case[\"input\"][\"tools\"]\n        expected_results = copy.deepcopy(case[\"expected\"])\n        tool_responses = copy.deepcopy(case.get(\"tool_responses\", []))\n        input_messages_turns = copy.deepcopy(case[\"input\"][\"messages\"])\n    \n        while len(input_messages_turns) > 0 or (len(messages) > 0 and messages[-1][\"role\"] == \"tool\"):\n            if len(messages) == 0 or messages[-1][\"role\"] != \"tool\":\n                new_messages = input_messages_turns.pop(0)\n                if isinstance(new_messages, list):\n                    messages.extend(new_messages)\n                else:\n                    messages.append(new_messages)\n    \n            # --- API Call (Streaming) ---\n            stream = openai_client.chat.completions.create(\n                model=model,\n                messages=messages,\n                tools=tools,\n                stream=True,\n            )\n    \n            # --- Process Stream ---\n            accumulated_content, accumulated_tool_calls = _accumulate_streaming_tool_calls(stream)\n    \n            # --- Construct Assistant Message for History ---\n            assistant_message_dict = {\"role\": \"assistant\"}\n            if accumulated_content:\n                assistant_message_dict[\"content\"] = accumulated_content\n            if accumulated_tool_calls:\n                assistant_message_dict[\"tool_calls\"] = accumulated_tool_calls\n    \n            messages.append(assistant_message_dict)\n    \n            # --- Assertions ---\n            expected = expected_results.pop(0)\n            num_tool_calls = expected[\"num_tool_calls\"]\n    \n            assert len(accumulated_tool_calls or []) == num_tool_calls, (\n                f\"Expected {num_tool_calls} tool calls, but got {len(accumulated_tool_calls or [])}\"\n            )\n    \n            if num_tool_calls > 0:\n                # Use the first accumulated tool call for assertion\n                tool_call = accumulated_tool_calls[0]\n                assert tool_call[\"function\"][\"name\"] == expected[\"tool_name\"], (\n                    f\"Expected tool '{expected['tool_name']}', got '{tool_call['function']['name']}'\"\n                )\n                # Parse the accumulated arguments string for comparison\n                actual_arguments = json.loads(tool_call[\"function\"][\"arguments\"])\n                assert actual_arguments == expected[\"tool_arguments\"], (\n                    f\"Expected arguments '{expected['tool_arguments']}', got '{actual_arguments}'\"\n                )\n    \n                # Prepare and append the tool response for the next turn\n                tool_response = tool_responses.pop(0)\n                messages.append(\n                    {\n                        \"role\": \"tool\",\n                        \"tool_call_id\": tool_call[\"id\"],\n                        \"content\": tool_response[\"response\"],\n                    }\n                )\n            else:\n>               assert accumulated_content is not None and accumulated_content != \"\", \"Expected content, but none received.\"\nE               AssertionError: Expected content, but none received.\nE               assert ('' is not None and '' != '')\n\ntests/verifications/openai_api/test_chat_completion.py:547: AssertionError"
      },
      "teardown": {
        "duration": 0.00029294565320014954,
        "outcome": "passed"
      }
    },
    {
      "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_multi_turn_tool_calling[meta-llama/Llama-4-Scout-17B-16E-Instruct-text_then_weather_tool]",
      "lineno": 471,
      "outcome": "failed",
      "keywords": [
        "test_chat_streaming_multi_turn_tool_calling[meta-llama/Llama-4-Scout-17B-16E-Instruct-text_then_weather_tool]",
        "parametrize",
        "pytestmark",
        "meta-llama/Llama-4-Scout-17B-16E-Instruct-text_then_weather_tool",
        "test_chat_completion.py",
        "openai_api",
        "verifications",
        "tests",
        "llama-stack",
        ""
      ],
      "metadata": {
        "model": "meta-llama/Llama-4-Scout-17B-16E-Instruct",
        "case_id": "text_then_weather_tool"
      },
      "setup": {
        "duration": 0.07202461268752813,
        "outcome": "passed"
      },
      "call": {
        "duration": 0.6093930723145604,
        "outcome": "failed",
        "crash": {
          "path": "/home/erichuang/llama-stack/tests/verifications/openai_api/test_chat_completion.py",
          "lineno": 688,
          "message": "IndexError: list index out of range"
        },
        "traceback": [
          {
            "path": "tests/verifications/openai_api/test_chat_completion.py",
            "lineno": 506,
            "message": ""
          },
          {
            "path": "tests/verifications/openai_api/test_chat_completion.py",
            "lineno": 688,
            "message": "IndexError"
          }
        ],
        "longrepr": "request = <FixtureRequest for <Function test_chat_streaming_multi_turn_tool_calling[meta-llama/Llama-4-Scout-17B-16E-Instruct-text_then_weather_tool]>>\nopenai_client = <openai.OpenAI object at 0x7f3127dad4e0>\nmodel = 'meta-llama/Llama-4-Scout-17B-16E-Instruct', provider = 'together'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...-versatile', 'meta-llama/llama-4-scout-17b-16e-instruct', 'meta-llama/llama-4-maverick-17b-128e-instruct'], ...}, ...}}\ncase = {'case_id': 'text_then_weather_tool', 'expected': [{'answer': ['sol'], 'num_tool_calls': 0}, {'num_tool_calls': 1, 'to...], 'type': 'object'}}, 'type': 'function'}]}, 'tool_responses': [{'response': \"{'response': '70 degrees and foggy'}\"}]}\n\n    @pytest.mark.parametrize(\n        \"case\",\n        chat_completion_test_cases.get(\"test_chat_multi_turn_tool_calling\", {}).get(\"test_params\", {}).get(\"case\", []),\n        ids=case_id_generator,\n    )\n    def test_chat_streaming_multi_turn_tool_calling(request, openai_client, model, provider, verification_config, case):\n        \"\"\" \"\"\"\n        test_name_base = get_base_test_name(request)\n        if should_skip_test(verification_config, provider, model, test_name_base):\n            pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n    \n        messages = []\n        tools = case[\"input\"][\"tools\"]\n        expected_results = copy.deepcopy(case[\"expected\"])\n        tool_responses = copy.deepcopy(case.get(\"tool_responses\", []))\n        input_messages_turns = copy.deepcopy(case[\"input\"][\"messages\"])\n    \n        while len(input_messages_turns) > 0 or (len(messages) > 0 and messages[-1][\"role\"] == \"tool\"):\n            if len(messages) == 0 or messages[-1][\"role\"] != \"tool\":\n                new_messages = input_messages_turns.pop(0)\n                if isinstance(new_messages, list):\n                    messages.extend(new_messages)\n                else:\n                    messages.append(new_messages)\n    \n            # --- API Call (Streaming) ---\n            stream = openai_client.chat.completions.create(\n                model=model,\n                messages=messages,\n                tools=tools,\n                stream=True,\n            )\n    \n            # --- Process Stream ---\n>           accumulated_content, accumulated_tool_calls = _accumulate_streaming_tool_calls(stream)\n\ntests/verifications/openai_api/test_chat_completion.py:506: \n_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ \n\nstream = <openai.Stream object at 0x7f3127e18760>\n\n    def _accumulate_streaming_tool_calls(stream):\n        \"\"\"Accumulates tool calls and content from a streaming ChatCompletion response.\"\"\"\n        tool_calls_buffer = {}\n        current_id = None\n        full_content = \"\"  # Initialize content accumulator\n        # Process streaming chunks\n        for chunk in stream:\n>           choice = chunk.choices[0]\nE           IndexError: list index out of range\n\ntests/verifications/openai_api/test_chat_completion.py:688: IndexError"
      },
      "teardown": {
        "duration": 0.00030322466045618057,
        "outcome": "passed"
      }
    },
    {
      "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_multi_turn_tool_calling[meta-llama/Llama-4-Scout-17B-16E-Instruct-weather_tool_then_text]",
      "lineno": 471,
      "outcome": "failed",
      "keywords": [
        "test_chat_streaming_multi_turn_tool_calling[meta-llama/Llama-4-Scout-17B-16E-Instruct-weather_tool_then_text]",
        "parametrize",
        "pytestmark",
        "meta-llama/Llama-4-Scout-17B-16E-Instruct-weather_tool_then_text",
        "test_chat_completion.py",
        "openai_api",
        "verifications",
        "tests",
        "llama-stack",
        ""
      ],
      "metadata": {
        "model": "meta-llama/Llama-4-Scout-17B-16E-Instruct",
        "case_id": "weather_tool_then_text"
      },
      "setup": {
        "duration": 0.06955079920589924,
        "outcome": "passed"
      },
      "call": {
        "duration": 0.4394088052213192,
        "outcome": "failed",
        "crash": {
          "path": "/home/erichuang/llama-stack/tests/verifications/openai_api/test_chat_completion.py",
          "lineno": 688,
          "message": "IndexError: list index out of range"
        },
        "traceback": [
          {
            "path": "tests/verifications/openai_api/test_chat_completion.py",
            "lineno": 506,
            "message": ""
          },
          {
            "path": "tests/verifications/openai_api/test_chat_completion.py",
            "lineno": 688,
            "message": "IndexError"
          }
        ],
        "longrepr": "request = <FixtureRequest for <Function test_chat_streaming_multi_turn_tool_calling[meta-llama/Llama-4-Scout-17B-16E-Instruct-weather_tool_then_text]>>\nopenai_client = <openai.OpenAI object at 0x7f312800e5c0>\nmodel = 'meta-llama/Llama-4-Scout-17B-16E-Instruct', provider = 'together'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...-versatile', 'meta-llama/llama-4-scout-17b-16e-instruct', 'meta-llama/llama-4-maverick-17b-128e-instruct'], ...}, ...}}\ncase = {'case_id': 'weather_tool_then_text', 'expected': [{'num_tool_calls': 1, 'tool_arguments': {'location': 'San Francisco...], 'type': 'object'}}, 'type': 'function'}]}, 'tool_responses': [{'response': \"{'response': '70 degrees and foggy'}\"}]}\n\n    @pytest.mark.parametrize(\n        \"case\",\n        chat_completion_test_cases.get(\"test_chat_multi_turn_tool_calling\", {}).get(\"test_params\", {}).get(\"case\", []),\n        ids=case_id_generator,\n    )\n    def test_chat_streaming_multi_turn_tool_calling(request, openai_client, model, provider, verification_config, case):\n        \"\"\" \"\"\"\n        test_name_base = get_base_test_name(request)\n        if should_skip_test(verification_config, provider, model, test_name_base):\n            pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n    \n        messages = []\n        tools = case[\"input\"][\"tools\"]\n        expected_results = copy.deepcopy(case[\"expected\"])\n        tool_responses = copy.deepcopy(case.get(\"tool_responses\", []))\n        input_messages_turns = copy.deepcopy(case[\"input\"][\"messages\"])\n    \n        while len(input_messages_turns) > 0 or (len(messages) > 0 and messages[-1][\"role\"] == \"tool\"):\n            if len(messages) == 0 or messages[-1][\"role\"] != \"tool\":\n                new_messages = input_messages_turns.pop(0)\n                if isinstance(new_messages, list):\n                    messages.extend(new_messages)\n                else:\n                    messages.append(new_messages)\n    \n            # --- API Call (Streaming) ---\n            stream = openai_client.chat.completions.create(\n                model=model,\n                messages=messages,\n                tools=tools,\n                stream=True,\n            )\n    \n            # --- Process Stream ---\n>           accumulated_content, accumulated_tool_calls = _accumulate_streaming_tool_calls(stream)\n\ntests/verifications/openai_api/test_chat_completion.py:506: \n_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ \n\nstream = <openai.Stream object at 0x7f3127e351e0>\n\n    def _accumulate_streaming_tool_calls(stream):\n        \"\"\"Accumulates tool calls and content from a streaming ChatCompletion response.\"\"\"\n        tool_calls_buffer = {}\n        current_id = None\n        full_content = \"\"  # Initialize content accumulator\n        # Process streaming chunks\n        for chunk in stream:\n>           choice = chunk.choices[0]\nE           IndexError: list index out of range\n\ntests/verifications/openai_api/test_chat_completion.py:688: IndexError"
      },
      "teardown": {
        "duration": 0.00038521457463502884,
        "outcome": "passed"
      }
    },
    {
      "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_multi_turn_tool_calling[meta-llama/Llama-4-Scout-17B-16E-Instruct-add_product_tool]",
      "lineno": 471,
      "outcome": "failed",
      "keywords": [
        "test_chat_streaming_multi_turn_tool_calling[meta-llama/Llama-4-Scout-17B-16E-Instruct-add_product_tool]",
        "parametrize",
        "pytestmark",
        "meta-llama/Llama-4-Scout-17B-16E-Instruct-add_product_tool",
        "test_chat_completion.py",
        "openai_api",
        "verifications",
        "tests",
        "llama-stack",
        ""
      ],
      "metadata": {
        "model": "meta-llama/Llama-4-Scout-17B-16E-Instruct",
        "case_id": "add_product_tool"
      },
      "setup": {
        "duration": 0.07154521346092224,
        "outcome": "passed"
      },
      "call": {
        "duration": 4.631643522530794,
        "outcome": "failed",
        "crash": {
          "path": "/home/erichuang/llama-stack/tests/verifications/openai_api/test_chat_completion.py",
          "lineno": 688,
          "message": "IndexError: list index out of range"
        },
        "traceback": [
          {
            "path": "tests/verifications/openai_api/test_chat_completion.py",
            "lineno": 506,
            "message": ""
          },
          {
            "path": "tests/verifications/openai_api/test_chat_completion.py",
            "lineno": 688,
            "message": "IndexError"
          }
        ],
        "longrepr": "request = <FixtureRequest for <Function test_chat_streaming_multi_turn_tool_calling[meta-llama/Llama-4-Scout-17B-16E-Instruct-add_product_tool]>>\nopenai_client = <openai.OpenAI object at 0x7f3127e1a8c0>\nmodel = 'meta-llama/Llama-4-Scout-17B-16E-Instruct', provider = 'together'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...-versatile', 'meta-llama/llama-4-scout-17b-16e-instruct', 'meta-llama/llama-4-maverick-17b-128e-instruct'], ...}, ...}}\ncase = {'case_id': 'add_product_tool', 'expected': [{'num_tool_calls': 1, 'tool_arguments': {'inStock': True, 'name': 'Widget...}}, 'type': 'function'}]}, 'tool_responses': [{'response': \"{'response': 'Successfully added product with id: 123'}\"}]}\n\n    @pytest.mark.parametrize(\n        \"case\",\n        chat_completion_test_cases.get(\"test_chat_multi_turn_tool_calling\", {}).get(\"test_params\", {}).get(\"case\", []),\n        ids=case_id_generator,\n    )\n    def test_chat_streaming_multi_turn_tool_calling(request, openai_client, model, provider, verification_config, case):\n        \"\"\" \"\"\"\n        test_name_base = get_base_test_name(request)\n        if should_skip_test(verification_config, provider, model, test_name_base):\n            pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n    \n        messages = []\n        tools = case[\"input\"][\"tools\"]\n        expected_results = copy.deepcopy(case[\"expected\"])\n        tool_responses = copy.deepcopy(case.get(\"tool_responses\", []))\n        input_messages_turns = copy.deepcopy(case[\"input\"][\"messages\"])\n    \n        while len(input_messages_turns) > 0 or (len(messages) > 0 and messages[-1][\"role\"] == \"tool\"):\n            if len(messages) == 0 or messages[-1][\"role\"] != \"tool\":\n                new_messages = input_messages_turns.pop(0)\n                if isinstance(new_messages, list):\n                    messages.extend(new_messages)\n                else:\n                    messages.append(new_messages)\n    \n            # --- API Call (Streaming) ---\n            stream = openai_client.chat.completions.create(\n                model=model,\n                messages=messages,\n                tools=tools,\n                stream=True,\n            )\n    \n            # --- Process Stream ---\n>           accumulated_content, accumulated_tool_calls = _accumulate_streaming_tool_calls(stream)\n\ntests/verifications/openai_api/test_chat_completion.py:506: \n_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ \n\nstream = <openai.Stream object at 0x7f3127de5660>\n\n    def _accumulate_streaming_tool_calls(stream):\n        \"\"\"Accumulates tool calls and content from a streaming ChatCompletion response.\"\"\"\n        tool_calls_buffer = {}\n        current_id = None\n        full_content = \"\"  # Initialize content accumulator\n        # Process streaming chunks\n        for chunk in stream:\n>           choice = chunk.choices[0]\nE           IndexError: list index out of range\n\ntests/verifications/openai_api/test_chat_completion.py:688: IndexError"
      },
      "teardown": {
        "duration": 0.0003909459337592125,
        "outcome": "passed"
      }
    },
    {
      "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_multi_turn_tool_calling[meta-llama/Llama-4-Scout-17B-16E-Instruct-get_then_create_event_tool]",
      "lineno": 471,
      "outcome": "failed",
      "keywords": [
        "test_chat_streaming_multi_turn_tool_calling[meta-llama/Llama-4-Scout-17B-16E-Instruct-get_then_create_event_tool]",
        "parametrize",
        "pytestmark",
        "meta-llama/Llama-4-Scout-17B-16E-Instruct-get_then_create_event_tool",
        "test_chat_completion.py",
        "openai_api",
        "verifications",
        "tests",
        "llama-stack",
        ""
      ],
      "metadata": {
        "model": "meta-llama/Llama-4-Scout-17B-16E-Instruct",
        "case_id": "get_then_create_event_tool"
      },
      "setup": {
        "duration": 0.07234212290495634,
        "outcome": "passed"
      },
      "call": {
        "duration": 0.5346909575164318,
        "outcome": "failed",
        "crash": {
          "path": "/home/erichuang/llama-stack/tests/verifications/openai_api/test_chat_completion.py",
          "lineno": 688,
          "message": "IndexError: list index out of range"
        },
        "traceback": [
          {
            "path": "tests/verifications/openai_api/test_chat_completion.py",
            "lineno": 506,
            "message": ""
          },
          {
            "path": "tests/verifications/openai_api/test_chat_completion.py",
            "lineno": 688,
            "message": "IndexError"
          }
        ],
        "longrepr": "request = <FixtureRequest for <Function test_chat_streaming_multi_turn_tool_calling[meta-llama/Llama-4-Scout-17B-16E-Instruct-get_then_create_event_tool]>>\nopenai_client = <openai.OpenAI object at 0x7f3127dbe8c0>\nmodel = 'meta-llama/Llama-4-Scout-17B-16E-Instruct', provider = 'together'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...-versatile', 'meta-llama/llama-4-scout-17b-16e-instruct', 'meta-llama/llama-4-maverick-17b-128e-instruct'], ...}, ...}}\ncase = {'case_id': 'get_then_create_event_tool', 'expected': [{'num_tool_calls': 1, 'tool_arguments': {'date': '2025-03-03', ...ents found for 2025-03-03 at 10:00'}\"}, {'response': \"{'response': 'Successfully created new event with id: e_123'}\"}]}\n\n    @pytest.mark.parametrize(\n        \"case\",\n        chat_completion_test_cases.get(\"test_chat_multi_turn_tool_calling\", {}).get(\"test_params\", {}).get(\"case\", []),\n        ids=case_id_generator,\n    )\n    def test_chat_streaming_multi_turn_tool_calling(request, openai_client, model, provider, verification_config, case):\n        \"\"\" \"\"\"\n        test_name_base = get_base_test_name(request)\n        if should_skip_test(verification_config, provider, model, test_name_base):\n            pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n    \n        messages = []\n        tools = case[\"input\"][\"tools\"]\n        expected_results = copy.deepcopy(case[\"expected\"])\n        tool_responses = copy.deepcopy(case.get(\"tool_responses\", []))\n        input_messages_turns = copy.deepcopy(case[\"input\"][\"messages\"])\n    \n        while len(input_messages_turns) > 0 or (len(messages) > 0 and messages[-1][\"role\"] == \"tool\"):\n            if len(messages) == 0 or messages[-1][\"role\"] != \"tool\":\n                new_messages = input_messages_turns.pop(0)\n                if isinstance(new_messages, list):\n                    messages.extend(new_messages)\n                else:\n                    messages.append(new_messages)\n    \n            # --- API Call (Streaming) ---\n            stream = openai_client.chat.completions.create(\n                model=model,\n                messages=messages,\n                tools=tools,\n                stream=True,\n            )\n    \n            # --- Process Stream ---\n>           accumulated_content, accumulated_tool_calls = _accumulate_streaming_tool_calls(stream)\n\ntests/verifications/openai_api/test_chat_completion.py:506: \n_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ \n\nstream = <openai.Stream object at 0x7f3127d717e0>\n\n    def _accumulate_streaming_tool_calls(stream):\n        \"\"\"Accumulates tool calls and content from a streaming ChatCompletion response.\"\"\"\n        tool_calls_buffer = {}\n        current_id = None\n        full_content = \"\"  # Initialize content accumulator\n        # Process streaming chunks\n        for chunk in stream:\n>           choice = chunk.choices[0]\nE           IndexError: list index out of range\n\ntests/verifications/openai_api/test_chat_completion.py:688: IndexError"
      },
      "teardown": {
        "duration": 0.00038490165024995804,
        "outcome": "passed"
      }
    },
    {
      "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_multi_turn_tool_calling[meta-llama/Llama-4-Scout-17B-16E-Instruct-compare_monthly_expense_tool]",
      "lineno": 471,
      "outcome": "failed",
      "keywords": [
        "test_chat_streaming_multi_turn_tool_calling[meta-llama/Llama-4-Scout-17B-16E-Instruct-compare_monthly_expense_tool]",
        "parametrize",
        "pytestmark",
        "meta-llama/Llama-4-Scout-17B-16E-Instruct-compare_monthly_expense_tool",
        "test_chat_completion.py",
        "openai_api",
        "verifications",
        "tests",
        "llama-stack",
        ""
      ],
      "metadata": {
        "model": "meta-llama/Llama-4-Scout-17B-16E-Instruct",
        "case_id": "compare_monthly_expense_tool"
      },
      "setup": {
        "duration": 0.0718430494889617,
        "outcome": "passed"
      },
      "call": {
        "duration": 0.57699906360358,
        "outcome": "failed",
        "crash": {
          "path": "/home/erichuang/llama-stack/tests/verifications/openai_api/test_chat_completion.py",
          "lineno": 688,
          "message": "IndexError: list index out of range"
        },
        "traceback": [
          {
            "path": "tests/verifications/openai_api/test_chat_completion.py",
            "lineno": 506,
            "message": ""
          },
          {
            "path": "tests/verifications/openai_api/test_chat_completion.py",
            "lineno": 688,
            "message": "IndexError"
          }
        ],
        "longrepr": "request = <FixtureRequest for <Function test_chat_streaming_multi_turn_tool_calling[meta-llama/Llama-4-Scout-17B-16E-Instruct-compare_monthly_expense_tool]>>\nopenai_client = <openai.OpenAI object at 0x7f3127f2e350>\nmodel = 'meta-llama/Llama-4-Scout-17B-16E-Instruct', provider = 'together'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...-versatile', 'meta-llama/llama-4-scout-17b-16e-instruct', 'meta-llama/llama-4-maverick-17b-128e-instruct'], ...}, ...}}\ncase = {'case_id': 'compare_monthly_expense_tool', 'expected': [{'num_tool_calls': 1, 'tool_arguments': {'month': 1, 'year': ... 'Total expenses for January 2025: $1000'}\"}, {'response': \"{'response': 'Total expenses for February 2024: $2000'}\"}]}\n\n    @pytest.mark.parametrize(\n        \"case\",\n        chat_completion_test_cases.get(\"test_chat_multi_turn_tool_calling\", {}).get(\"test_params\", {}).get(\"case\", []),\n        ids=case_id_generator,\n    )\n    def test_chat_streaming_multi_turn_tool_calling(request, openai_client, model, provider, verification_config, case):\n        \"\"\" \"\"\"\n        test_name_base = get_base_test_name(request)\n        if should_skip_test(verification_config, provider, model, test_name_base):\n            pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n    \n        messages = []\n        tools = case[\"input\"][\"tools\"]\n        expected_results = copy.deepcopy(case[\"expected\"])\n        tool_responses = copy.deepcopy(case.get(\"tool_responses\", []))\n        input_messages_turns = copy.deepcopy(case[\"input\"][\"messages\"])\n    \n        while len(input_messages_turns) > 0 or (len(messages) > 0 and messages[-1][\"role\"] == \"tool\"):\n            if len(messages) == 0 or messages[-1][\"role\"] != \"tool\":\n                new_messages = input_messages_turns.pop(0)\n                if isinstance(new_messages, list):\n                    messages.extend(new_messages)\n                else:\n                    messages.append(new_messages)\n    \n            # --- API Call (Streaming) ---\n            stream = openai_client.chat.completions.create(\n                model=model,\n                messages=messages,\n                tools=tools,\n                stream=True,\n            )\n    \n            # --- Process Stream ---\n>           accumulated_content, accumulated_tool_calls = _accumulate_streaming_tool_calls(stream)\n\ntests/verifications/openai_api/test_chat_completion.py:506: \n_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ \n\nstream = <openai.Stream object at 0x7f3127ed0ac0>\n\n    def _accumulate_streaming_tool_calls(stream):\n        \"\"\"Accumulates tool calls and content from a streaming ChatCompletion response.\"\"\"\n        tool_calls_buffer = {}\n        current_id = None\n        full_content = \"\"  # Initialize content accumulator\n        # Process streaming chunks\n        for chunk in stream:\n>           choice = chunk.choices[0]\nE           IndexError: list index out of range\n\ntests/verifications/openai_api/test_chat_completion.py:688: IndexError"
      },
      "teardown": {
        "duration": 0.0003002053126692772,
        "outcome": "passed"
      }
    },
    {
      "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_multi_turn_tool_calling[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-text_then_weather_tool]",
      "lineno": 471,
      "outcome": "failed",
      "keywords": [
        "test_chat_streaming_multi_turn_tool_calling[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-text_then_weather_tool]",
        "parametrize",
        "pytestmark",
        "meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-text_then_weather_tool",
        "test_chat_completion.py",
        "openai_api",
        "verifications",
        "tests",
        "llama-stack",
        ""
      ],
      "metadata": {
        "model": "meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8",
        "case_id": "text_then_weather_tool"
      },
      "setup": {
        "duration": 0.07102924212813377,
        "outcome": "passed"
      },
      "call": {
        "duration": 0.3103123838081956,
        "outcome": "failed",
        "crash": {
          "path": "/home/erichuang/llama-stack/tests/verifications/openai_api/test_chat_completion.py",
          "lineno": 688,
          "message": "IndexError: list index out of range"
        },
        "traceback": [
          {
            "path": "tests/verifications/openai_api/test_chat_completion.py",
            "lineno": 506,
            "message": ""
          },
          {
            "path": "tests/verifications/openai_api/test_chat_completion.py",
            "lineno": 688,
            "message": "IndexError"
          }
        ],
        "longrepr": "request = <FixtureRequest for <Function test_chat_streaming_multi_turn_tool_calling[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-text_then_weather_tool]>>\nopenai_client = <openai.OpenAI object at 0x7f3127e64f40>\nmodel = 'meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8'\nprovider = 'together'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...-versatile', 'meta-llama/llama-4-scout-17b-16e-instruct', 'meta-llama/llama-4-maverick-17b-128e-instruct'], ...}, ...}}\ncase = {'case_id': 'text_then_weather_tool', 'expected': [{'answer': ['sol'], 'num_tool_calls': 0}, {'num_tool_calls': 1, 'to...], 'type': 'object'}}, 'type': 'function'}]}, 'tool_responses': [{'response': \"{'response': '70 degrees and foggy'}\"}]}\n\n    @pytest.mark.parametrize(\n        \"case\",\n        chat_completion_test_cases.get(\"test_chat_multi_turn_tool_calling\", {}).get(\"test_params\", {}).get(\"case\", []),\n        ids=case_id_generator,\n    )\n    def test_chat_streaming_multi_turn_tool_calling(request, openai_client, model, provider, verification_config, case):\n        \"\"\" \"\"\"\n        test_name_base = get_base_test_name(request)\n        if should_skip_test(verification_config, provider, model, test_name_base):\n            pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n    \n        messages = []\n        tools = case[\"input\"][\"tools\"]\n        expected_results = copy.deepcopy(case[\"expected\"])\n        tool_responses = copy.deepcopy(case.get(\"tool_responses\", []))\n        input_messages_turns = copy.deepcopy(case[\"input\"][\"messages\"])\n    \n        while len(input_messages_turns) > 0 or (len(messages) > 0 and messages[-1][\"role\"] == \"tool\"):\n            if len(messages) == 0 or messages[-1][\"role\"] != \"tool\":\n                new_messages = input_messages_turns.pop(0)\n                if isinstance(new_messages, list):\n                    messages.extend(new_messages)\n                else:\n                    messages.append(new_messages)\n    \n            # --- API Call (Streaming) ---\n            stream = openai_client.chat.completions.create(\n                model=model,\n                messages=messages,\n                tools=tools,\n                stream=True,\n            )\n    \n            # --- Process Stream ---\n>           accumulated_content, accumulated_tool_calls = _accumulate_streaming_tool_calls(stream)\n\ntests/verifications/openai_api/test_chat_completion.py:506: \n_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ \n\nstream = <openai.Stream object at 0x7f3127f65d20>\n\n    def _accumulate_streaming_tool_calls(stream):\n        \"\"\"Accumulates tool calls and content from a streaming ChatCompletion response.\"\"\"\n        tool_calls_buffer = {}\n        current_id = None\n        full_content = \"\"  # Initialize content accumulator\n        # Process streaming chunks\n        for chunk in stream:\n>           choice = chunk.choices[0]\nE           IndexError: list index out of range\n\ntests/verifications/openai_api/test_chat_completion.py:688: IndexError"
      },
      "teardown": {
        "duration": 0.000398833304643631,
        "outcome": "passed"
      }
    },
    {
      "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_multi_turn_tool_calling[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-weather_tool_then_text]",
      "lineno": 471,
      "outcome": "failed",
      "keywords": [
        "test_chat_streaming_multi_turn_tool_calling[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-weather_tool_then_text]",
        "parametrize",
        "pytestmark",
        "meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-weather_tool_then_text",
        "test_chat_completion.py",
        "openai_api",
        "verifications",
        "tests",
        "llama-stack",
        ""
      ],
      "metadata": {
        "model": "meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8",
        "case_id": "weather_tool_then_text"
      },
      "setup": {
        "duration": 0.07346561551094055,
        "outcome": "passed"
      },
      "call": {
        "duration": 2.515005356632173,
        "outcome": "failed",
        "crash": {
          "path": "/home/erichuang/llama-stack/tests/verifications/openai_api/test_chat_completion.py",
          "lineno": 688,
          "message": "IndexError: list index out of range"
        },
        "traceback": [
          {
            "path": "tests/verifications/openai_api/test_chat_completion.py",
            "lineno": 506,
            "message": ""
          },
          {
            "path": "tests/verifications/openai_api/test_chat_completion.py",
            "lineno": 688,
            "message": "IndexError"
          }
        ],
        "longrepr": "request = <FixtureRequest for <Function test_chat_streaming_multi_turn_tool_calling[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-weather_tool_then_text]>>\nopenai_client = <openai.OpenAI object at 0x7f3127de4f40>\nmodel = 'meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8'\nprovider = 'together'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...-versatile', 'meta-llama/llama-4-scout-17b-16e-instruct', 'meta-llama/llama-4-maverick-17b-128e-instruct'], ...}, ...}}\ncase = {'case_id': 'weather_tool_then_text', 'expected': [{'num_tool_calls': 1, 'tool_arguments': {'location': 'San Francisco...], 'type': 'object'}}, 'type': 'function'}]}, 'tool_responses': [{'response': \"{'response': '70 degrees and foggy'}\"}]}\n\n    @pytest.mark.parametrize(\n        \"case\",\n        chat_completion_test_cases.get(\"test_chat_multi_turn_tool_calling\", {}).get(\"test_params\", {}).get(\"case\", []),\n        ids=case_id_generator,\n    )\n    def test_chat_streaming_multi_turn_tool_calling(request, openai_client, model, provider, verification_config, case):\n        \"\"\" \"\"\"\n        test_name_base = get_base_test_name(request)\n        if should_skip_test(verification_config, provider, model, test_name_base):\n            pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n    \n        messages = []\n        tools = case[\"input\"][\"tools\"]\n        expected_results = copy.deepcopy(case[\"expected\"])\n        tool_responses = copy.deepcopy(case.get(\"tool_responses\", []))\n        input_messages_turns = copy.deepcopy(case[\"input\"][\"messages\"])\n    \n        while len(input_messages_turns) > 0 or (len(messages) > 0 and messages[-1][\"role\"] == \"tool\"):\n            if len(messages) == 0 or messages[-1][\"role\"] != \"tool\":\n                new_messages = input_messages_turns.pop(0)\n                if isinstance(new_messages, list):\n                    messages.extend(new_messages)\n                else:\n                    messages.append(new_messages)\n    \n            # --- API Call (Streaming) ---\n            stream = openai_client.chat.completions.create(\n                model=model,\n                messages=messages,\n                tools=tools,\n                stream=True,\n            )\n    \n            # --- Process Stream ---\n>           accumulated_content, accumulated_tool_calls = _accumulate_streaming_tool_calls(stream)\n\ntests/verifications/openai_api/test_chat_completion.py:506: \n_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ \n\nstream = <openai.Stream object at 0x7f3127df6d10>\n\n    def _accumulate_streaming_tool_calls(stream):\n        \"\"\"Accumulates tool calls and content from a streaming ChatCompletion response.\"\"\"\n        tool_calls_buffer = {}\n        current_id = None\n        full_content = \"\"  # Initialize content accumulator\n        # Process streaming chunks\n        for chunk in stream:\n>           choice = chunk.choices[0]\nE           IndexError: list index out of range\n\ntests/verifications/openai_api/test_chat_completion.py:688: IndexError"
      },
      "teardown": {
        "duration": 0.0003784680739045143,
        "outcome": "passed"
      }
    },
    {
      "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_multi_turn_tool_calling[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-add_product_tool]",
      "lineno": 471,
      "outcome": "failed",
      "keywords": [
        "test_chat_streaming_multi_turn_tool_calling[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-add_product_tool]",
        "parametrize",
        "pytestmark",
        "meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-add_product_tool",
        "test_chat_completion.py",
        "openai_api",
        "verifications",
        "tests",
        "llama-stack",
        ""
      ],
      "metadata": {
        "model": "meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8",
        "case_id": "add_product_tool"
      },
      "setup": {
        "duration": 0.07150219846516848,
        "outcome": "passed"
      },
      "call": {
        "duration": 0.770132390782237,
        "outcome": "failed",
        "crash": {
          "path": "/home/erichuang/llama-stack/tests/verifications/openai_api/test_chat_completion.py",
          "lineno": 688,
          "message": "IndexError: list index out of range"
        },
        "traceback": [
          {
            "path": "tests/verifications/openai_api/test_chat_completion.py",
            "lineno": 506,
            "message": ""
          },
          {
            "path": "tests/verifications/openai_api/test_chat_completion.py",
            "lineno": 688,
            "message": "IndexError"
          }
        ],
        "longrepr": "request = <FixtureRequest for <Function test_chat_streaming_multi_turn_tool_calling[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-add_product_tool]>>\nopenai_client = <openai.OpenAI object at 0x7f3127df7070>\nmodel = 'meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8'\nprovider = 'together'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...-versatile', 'meta-llama/llama-4-scout-17b-16e-instruct', 'meta-llama/llama-4-maverick-17b-128e-instruct'], ...}, ...}}\ncase = {'case_id': 'add_product_tool', 'expected': [{'num_tool_calls': 1, 'tool_arguments': {'inStock': True, 'name': 'Widget...}}, 'type': 'function'}]}, 'tool_responses': [{'response': \"{'response': 'Successfully added product with id: 123'}\"}]}\n\n    @pytest.mark.parametrize(\n        \"case\",\n        chat_completion_test_cases.get(\"test_chat_multi_turn_tool_calling\", {}).get(\"test_params\", {}).get(\"case\", []),\n        ids=case_id_generator,\n    )\n    def test_chat_streaming_multi_turn_tool_calling(request, openai_client, model, provider, verification_config, case):\n        \"\"\" \"\"\"\n        test_name_base = get_base_test_name(request)\n        if should_skip_test(verification_config, provider, model, test_name_base):\n            pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n    \n        messages = []\n        tools = case[\"input\"][\"tools\"]\n        expected_results = copy.deepcopy(case[\"expected\"])\n        tool_responses = copy.deepcopy(case.get(\"tool_responses\", []))\n        input_messages_turns = copy.deepcopy(case[\"input\"][\"messages\"])\n    \n        while len(input_messages_turns) > 0 or (len(messages) > 0 and messages[-1][\"role\"] == \"tool\"):\n            if len(messages) == 0 or messages[-1][\"role\"] != \"tool\":\n                new_messages = input_messages_turns.pop(0)\n                if isinstance(new_messages, list):\n                    messages.extend(new_messages)\n                else:\n                    messages.append(new_messages)\n    \n            # --- API Call (Streaming) ---\n            stream = openai_client.chat.completions.create(\n                model=model,\n                messages=messages,\n                tools=tools,\n                stream=True,\n            )\n    \n            # --- Process Stream ---\n>           accumulated_content, accumulated_tool_calls = _accumulate_streaming_tool_calls(stream)\n\ntests/verifications/openai_api/test_chat_completion.py:506: \n_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ \n\nstream = <openai.Stream object at 0x7f3127eb7460>\n\n    def _accumulate_streaming_tool_calls(stream):\n        \"\"\"Accumulates tool calls and content from a streaming ChatCompletion response.\"\"\"\n        tool_calls_buffer = {}\n        current_id = None\n        full_content = \"\"  # Initialize content accumulator\n        # Process streaming chunks\n        for chunk in stream:\n>           choice = chunk.choices[0]\nE           IndexError: list index out of range\n\ntests/verifications/openai_api/test_chat_completion.py:688: IndexError"
      },
      "teardown": {
        "duration": 0.00032351352274417877,
        "outcome": "passed"
      }
    },
    {
      "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_multi_turn_tool_calling[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-get_then_create_event_tool]",
      "lineno": 471,
      "outcome": "failed",
      "keywords": [
        "test_chat_streaming_multi_turn_tool_calling[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-get_then_create_event_tool]",
        "parametrize",
        "pytestmark",
        "meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-get_then_create_event_tool",
        "test_chat_completion.py",
        "openai_api",
        "verifications",
        "tests",
        "llama-stack",
        ""
      ],
      "metadata": {
        "model": "meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8",
        "case_id": "get_then_create_event_tool"
      },
      "setup": {
        "duration": 0.07132976595312357,
        "outcome": "passed"
      },
      "call": {
        "duration": 0.5259293485432863,
        "outcome": "failed",
        "crash": {
          "path": "/home/erichuang/llama-stack/tests/verifications/openai_api/test_chat_completion.py",
          "lineno": 688,
          "message": "IndexError: list index out of range"
        },
        "traceback": [
          {
            "path": "tests/verifications/openai_api/test_chat_completion.py",
            "lineno": 506,
            "message": ""
          },
          {
            "path": "tests/verifications/openai_api/test_chat_completion.py",
            "lineno": 688,
            "message": "IndexError"
          }
        ],
        "longrepr": "request = <FixtureRequest for <Function test_chat_streaming_multi_turn_tool_calling[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-get_then_create_event_tool]>>\nopenai_client = <openai.OpenAI object at 0x7f3127ed1a20>\nmodel = 'meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8'\nprovider = 'together'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...-versatile', 'meta-llama/llama-4-scout-17b-16e-instruct', 'meta-llama/llama-4-maverick-17b-128e-instruct'], ...}, ...}}\ncase = {'case_id': 'get_then_create_event_tool', 'expected': [{'num_tool_calls': 1, 'tool_arguments': {'date': '2025-03-03', ...ents found for 2025-03-03 at 10:00'}\"}, {'response': \"{'response': 'Successfully created new event with id: e_123'}\"}]}\n\n    @pytest.mark.parametrize(\n        \"case\",\n        chat_completion_test_cases.get(\"test_chat_multi_turn_tool_calling\", {}).get(\"test_params\", {}).get(\"case\", []),\n        ids=case_id_generator,\n    )\n    def test_chat_streaming_multi_turn_tool_calling(request, openai_client, model, provider, verification_config, case):\n        \"\"\" \"\"\"\n        test_name_base = get_base_test_name(request)\n        if should_skip_test(verification_config, provider, model, test_name_base):\n            pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n    \n        messages = []\n        tools = case[\"input\"][\"tools\"]\n        expected_results = copy.deepcopy(case[\"expected\"])\n        tool_responses = copy.deepcopy(case.get(\"tool_responses\", []))\n        input_messages_turns = copy.deepcopy(case[\"input\"][\"messages\"])\n    \n        while len(input_messages_turns) > 0 or (len(messages) > 0 and messages[-1][\"role\"] == \"tool\"):\n            if len(messages) == 0 or messages[-1][\"role\"] != \"tool\":\n                new_messages = input_messages_turns.pop(0)\n                if isinstance(new_messages, list):\n                    messages.extend(new_messages)\n                else:\n                    messages.append(new_messages)\n    \n            # --- API Call (Streaming) ---\n            stream = openai_client.chat.completions.create(\n                model=model,\n                messages=messages,\n                tools=tools,\n                stream=True,\n            )\n    \n            # --- Process Stream ---\n>           accumulated_content, accumulated_tool_calls = _accumulate_streaming_tool_calls(stream)\n\ntests/verifications/openai_api/test_chat_completion.py:506: \n_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ \n\nstream = <openai.Stream object at 0x7f31280cc0a0>\n\n    def _accumulate_streaming_tool_calls(stream):\n        \"\"\"Accumulates tool calls and content from a streaming ChatCompletion response.\"\"\"\n        tool_calls_buffer = {}\n        current_id = None\n        full_content = \"\"  # Initialize content accumulator\n        # Process streaming chunks\n        for chunk in stream:\n>           choice = chunk.choices[0]\nE           IndexError: list index out of range\n\ntests/verifications/openai_api/test_chat_completion.py:688: IndexError"
      },
      "teardown": {
        "duration": 0.00037543755024671555,
        "outcome": "passed"
      }
    },
    {
      "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_streaming_multi_turn_tool_calling[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-compare_monthly_expense_tool]",
      "lineno": 471,
      "outcome": "failed",
      "keywords": [
        "test_chat_streaming_multi_turn_tool_calling[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-compare_monthly_expense_tool]",
        "parametrize",
        "pytestmark",
        "meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-compare_monthly_expense_tool",
        "test_chat_completion.py",
        "openai_api",
        "verifications",
        "tests",
        "llama-stack",
        ""
      ],
      "metadata": {
        "model": "meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8",
        "case_id": "compare_monthly_expense_tool"
      },
      "setup": {
        "duration": 0.07170393783599138,
        "outcome": "passed"
      },
      "call": {
        "duration": 0.4805993800982833,
        "outcome": "failed",
        "crash": {
          "path": "/home/erichuang/llama-stack/tests/verifications/openai_api/test_chat_completion.py",
          "lineno": 688,
          "message": "IndexError: list index out of range"
        },
        "traceback": [
          {
            "path": "tests/verifications/openai_api/test_chat_completion.py",
            "lineno": 506,
            "message": ""
          },
          {
            "path": "tests/verifications/openai_api/test_chat_completion.py",
            "lineno": 688,
            "message": "IndexError"
          }
        ],
        "longrepr": "request = <FixtureRequest for <Function test_chat_streaming_multi_turn_tool_calling[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-compare_monthly_expense_tool]>>\nopenai_client = <openai.OpenAI object at 0x7f3127eb60e0>\nmodel = 'meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8'\nprovider = 'together'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...-versatile', 'meta-llama/llama-4-scout-17b-16e-instruct', 'meta-llama/llama-4-maverick-17b-128e-instruct'], ...}, ...}}\ncase = {'case_id': 'compare_monthly_expense_tool', 'expected': [{'num_tool_calls': 1, 'tool_arguments': {'month': 1, 'year': ... 'Total expenses for January 2025: $1000'}\"}, {'response': \"{'response': 'Total expenses for February 2024: $2000'}\"}]}\n\n    @pytest.mark.parametrize(\n        \"case\",\n        chat_completion_test_cases.get(\"test_chat_multi_turn_tool_calling\", {}).get(\"test_params\", {}).get(\"case\", []),\n        ids=case_id_generator,\n    )\n    def test_chat_streaming_multi_turn_tool_calling(request, openai_client, model, provider, verification_config, case):\n        \"\"\" \"\"\"\n        test_name_base = get_base_test_name(request)\n        if should_skip_test(verification_config, provider, model, test_name_base):\n            pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n    \n        messages = []\n        tools = case[\"input\"][\"tools\"]\n        expected_results = copy.deepcopy(case[\"expected\"])\n        tool_responses = copy.deepcopy(case.get(\"tool_responses\", []))\n        input_messages_turns = copy.deepcopy(case[\"input\"][\"messages\"])\n    \n        while len(input_messages_turns) > 0 or (len(messages) > 0 and messages[-1][\"role\"] == \"tool\"):\n            if len(messages) == 0 or messages[-1][\"role\"] != \"tool\":\n                new_messages = input_messages_turns.pop(0)\n                if isinstance(new_messages, list):\n                    messages.extend(new_messages)\n                else:\n                    messages.append(new_messages)\n    \n            # --- API Call (Streaming) ---\n            stream = openai_client.chat.completions.create(\n                model=model,\n                messages=messages,\n                tools=tools,\n                stream=True,\n            )\n    \n            # --- Process Stream ---\n>           accumulated_content, accumulated_tool_calls = _accumulate_streaming_tool_calls(stream)\n\ntests/verifications/openai_api/test_chat_completion.py:506: \n_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ \n\nstream = <openai.Stream object at 0x7f3127e59f00>\n\n    def _accumulate_streaming_tool_calls(stream):\n        \"\"\"Accumulates tool calls and content from a streaming ChatCompletion response.\"\"\"\n        tool_calls_buffer = {}\n        current_id = None\n        full_content = \"\"  # Initialize content accumulator\n        # Process streaming chunks\n        for chunk in stream:\n>           choice = chunk.choices[0]\nE           IndexError: list index out of range\n\ntests/verifications/openai_api/test_chat_completion.py:688: IndexError"
      },
      "teardown": {
        "duration": 0.0003568241372704506,
        "outcome": "passed"
      }
    },
    {
      "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_multi_turn_multiple_images[meta-llama/Llama-3.3-70B-Instruct-Turbo-stream=False]",
      "lineno": 554,
      "outcome": "skipped",
      "keywords": [
        "test_chat_multi_turn_multiple_images[meta-llama/Llama-3.3-70B-Instruct-Turbo-stream=False]",
        "parametrize",
        "pytestmark",
        "meta-llama/Llama-3.3-70B-Instruct-Turbo-stream=False",
        "test_chat_completion.py",
        "openai_api",
        "verifications",
        "tests",
        "llama-stack",
        ""
      ],
      "metadata": {
        "model": "meta-llama/Llama-3.3-70B-Instruct-Turbo",
        "case_id": "stream=False"
      },
      "setup": {
        "duration": 0.07332183048129082,
        "outcome": "passed"
      },
      "call": {
        "duration": 0.00027661025524139404,
        "outcome": "skipped",
        "longrepr": "('/home/erichuang/llama-stack/tests/verifications/openai_api/test_chat_completion.py', 561, 'Skipped: Skipping test_chat_multi_turn_multiple_images for model meta-llama/Llama-3.3-70B-Instruct-Turbo on provider together based on config.')"
      },
      "teardown": {
        "duration": 0.00043700821697711945,
        "outcome": "passed"
      }
    },
    {
      "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_multi_turn_multiple_images[meta-llama/Llama-3.3-70B-Instruct-Turbo-stream=True]",
      "lineno": 554,
      "outcome": "skipped",
      "keywords": [
        "test_chat_multi_turn_multiple_images[meta-llama/Llama-3.3-70B-Instruct-Turbo-stream=True]",
        "parametrize",
        "pytestmark",
        "meta-llama/Llama-3.3-70B-Instruct-Turbo-stream=True",
        "test_chat_completion.py",
        "openai_api",
        "verifications",
        "tests",
        "llama-stack",
        ""
      ],
      "metadata": {
        "model": "meta-llama/Llama-3.3-70B-Instruct-Turbo",
        "case_id": "stream=True"
      },
      "setup": {
        "duration": 0.07106236275285482,
        "outcome": "passed"
      },
      "call": {
        "duration": 0.0002557719126343727,
        "outcome": "skipped",
        "longrepr": "('/home/erichuang/llama-stack/tests/verifications/openai_api/test_chat_completion.py', 561, 'Skipped: Skipping test_chat_multi_turn_multiple_images for model meta-llama/Llama-3.3-70B-Instruct-Turbo on provider together based on config.')"
      },
      "teardown": {
        "duration": 0.00020366813987493515,
        "outcome": "passed"
      }
    },
    {
      "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_multi_turn_multiple_images[meta-llama/Llama-4-Scout-17B-16E-Instruct-stream=False]",
      "lineno": 554,
      "outcome": "passed",
      "keywords": [
        "test_chat_multi_turn_multiple_images[meta-llama/Llama-4-Scout-17B-16E-Instruct-stream=False]",
        "parametrize",
        "pytestmark",
        "meta-llama/Llama-4-Scout-17B-16E-Instruct-stream=False",
        "test_chat_completion.py",
        "openai_api",
        "verifications",
        "tests",
        "llama-stack",
        ""
      ],
      "metadata": {
        "model": "meta-llama/Llama-4-Scout-17B-16E-Instruct",
        "case_id": "stream=False"
      },
      "setup": {
        "duration": 0.07133481372147799,
        "outcome": "passed"
      },
      "call": {
        "duration": 2.875141463242471,
        "outcome": "passed"
      },
      "teardown": {
        "duration": 0.00029349327087402344,
        "outcome": "passed"
      }
    },
    {
      "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_multi_turn_multiple_images[meta-llama/Llama-4-Scout-17B-16E-Instruct-stream=True]",
      "lineno": 554,
      "outcome": "failed",
      "keywords": [
        "test_chat_multi_turn_multiple_images[meta-llama/Llama-4-Scout-17B-16E-Instruct-stream=True]",
        "parametrize",
        "pytestmark",
        "meta-llama/Llama-4-Scout-17B-16E-Instruct-stream=True",
        "test_chat_completion.py",
        "openai_api",
        "verifications",
        "tests",
        "llama-stack",
        ""
      ],
      "metadata": {
        "model": "meta-llama/Llama-4-Scout-17B-16E-Instruct",
        "case_id": "stream=True"
      },
      "setup": {
        "duration": 0.07148486748337746,
        "outcome": "passed"
      },
      "call": {
        "duration": 1.4055311204865575,
        "outcome": "failed",
        "crash": {
          "path": "/home/erichuang/llama-stack/tests/verifications/openai_api/test_chat_completion.py",
          "lineno": 596,
          "message": "IndexError: list index out of range"
        },
        "traceback": [
          {
            "path": "tests/verifications/openai_api/test_chat_completion.py",
            "lineno": 596,
            "message": "IndexError"
          }
        ],
        "longrepr": "request = <FixtureRequest for <Function test_chat_multi_turn_multiple_images[meta-llama/Llama-4-Scout-17B-16E-Instruct-stream=True]>>\nopenai_client = <openai.OpenAI object at 0x7f3127f67a00>\nmodel = 'meta-llama/Llama-4-Scout-17B-16E-Instruct', provider = 'together'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...-versatile', 'meta-llama/llama-4-scout-17b-16e-instruct', 'meta-llama/llama-4-maverick-17b-128e-instruct'], ...}, ...}}\nmulti_image_data = ['data:image/jpeg;base64,/9j/4AAQSkZJRgABAQAAAQABAAD/2wBDAAIBAQEBAQIBAQECAgICAgQDAgICAgUEBAMEBgUGBgYFBgYGBwkIBgcJBwYGC...6pH9jaTzNv7vfRRXzubfxj9f8Pv8AkTz/AMX/ALbEz5Ly38lfMk/5Z/u64PxhqEZh+z/6rzvn2UUV5EvgPuzy/wAc6p5dt5ccibJpNkkdFFFec27mZ//Z']\nstream = True\n\n    @pytest.mark.parametrize(\"stream\", [False, True], ids=[\"stream=False\", \"stream=True\"])\n    def test_chat_multi_turn_multiple_images(\n        request, openai_client, model, provider, verification_config, multi_image_data, stream\n    ):\n        test_name_base = get_base_test_name(request)\n        if should_skip_test(verification_config, provider, model, test_name_base):\n            pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n    \n        messages_turn1 = [\n            {\n                \"role\": \"user\",\n                \"content\": [\n                    {\n                        \"type\": \"image_url\",\n                        \"image_url\": {\n                            \"url\": multi_image_data[0],\n                        },\n                    },\n                    {\n                        \"type\": \"image_url\",\n                        \"image_url\": {\n                            \"url\": multi_image_data[1],\n                        },\n                    },\n                    {\n                        \"type\": \"text\",\n                        \"text\": \"What furniture is in the first image that is not in the second image?\",\n                    },\n                ],\n            },\n        ]\n    \n        # First API call\n        response1 = openai_client.chat.completions.create(\n            model=model,\n            messages=messages_turn1,\n            stream=stream,\n        )\n        if stream:\n            message_content1 = \"\"\n            for chunk in response1:\n>               message_content1 += chunk.choices[0].delta.content or \"\"\nE               IndexError: list index out of range\n\ntests/verifications/openai_api/test_chat_completion.py:596: IndexError"
      },
      "teardown": {
        "duration": 0.00031107570976018906,
        "outcome": "passed"
      }
    },
    {
      "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_multi_turn_multiple_images[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-stream=False]",
      "lineno": 554,
      "outcome": "passed",
      "keywords": [
        "test_chat_multi_turn_multiple_images[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-stream=False]",
        "parametrize",
        "pytestmark",
        "meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-stream=False",
        "test_chat_completion.py",
        "openai_api",
        "verifications",
        "tests",
        "llama-stack",
        ""
      ],
      "metadata": {
        "model": "meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8",
        "case_id": "stream=False"
      },
      "setup": {
        "duration": 0.07265154179185629,
        "outcome": "passed"
      },
      "call": {
        "duration": 6.755587887018919,
        "outcome": "passed"
      },
      "teardown": {
        "duration": 0.00022470485419034958,
        "outcome": "passed"
      }
    },
    {
      "nodeid": "tests/verifications/openai_api/test_chat_completion.py::test_chat_multi_turn_multiple_images[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-stream=True]",
      "lineno": 554,
      "outcome": "failed",
      "keywords": [
        "test_chat_multi_turn_multiple_images[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-stream=True]",
        "parametrize",
        "pytestmark",
        "meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-stream=True",
        "test_chat_completion.py",
        "openai_api",
        "verifications",
        "tests",
        "llama-stack",
        ""
      ],
      "metadata": {
        "model": "meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8",
        "case_id": "stream=True"
      },
      "setup": {
        "duration": 0.07265954371541739,
        "outcome": "passed"
      },
      "call": {
        "duration": 1.973216057755053,
        "outcome": "failed",
        "crash": {
          "path": "/home/erichuang/llama-stack/tests/verifications/openai_api/test_chat_completion.py",
          "lineno": 596,
          "message": "IndexError: list index out of range"
        },
        "traceback": [
          {
            "path": "tests/verifications/openai_api/test_chat_completion.py",
            "lineno": 596,
            "message": "IndexError"
          }
        ],
        "longrepr": "request = <FixtureRequest for <Function test_chat_multi_turn_multiple_images[meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8-stream=True]>>\nopenai_client = <openai.OpenAI object at 0x7f3127cd28c0>\nmodel = 'meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8'\nprovider = 'together'\nverification_config = {'providers': {'cerebras': {'api_key_var': 'CEREBRAS_API_KEY', 'base_url': 'https://api.cerebras.ai/v1', 'model_displa...-versatile', 'meta-llama/llama-4-scout-17b-16e-instruct', 'meta-llama/llama-4-maverick-17b-128e-instruct'], ...}, ...}}\nmulti_image_data = ['data:image/jpeg;base64,/9j/4AAQSkZJRgABAQAAAQABAAD/2wBDAAIBAQEBAQIBAQECAgICAgQDAgICAgUEBAMEBgUGBgYFBgYGBwkIBgcJBwYGC...6pH9jaTzNv7vfRRXzubfxj9f8Pv8AkTz/AMX/ALbEz5Ly38lfMk/5Z/u64PxhqEZh+z/6rzvn2UUV5EvgPuzy/wAc6p5dt5ccibJpNkkdFFFec27mZ//Z']\nstream = True\n\n    @pytest.mark.parametrize(\"stream\", [False, True], ids=[\"stream=False\", \"stream=True\"])\n    def test_chat_multi_turn_multiple_images(\n        request, openai_client, model, provider, verification_config, multi_image_data, stream\n    ):\n        test_name_base = get_base_test_name(request)\n        if should_skip_test(verification_config, provider, model, test_name_base):\n            pytest.skip(f\"Skipping {test_name_base} for model {model} on provider {provider} based on config.\")\n    \n        messages_turn1 = [\n            {\n                \"role\": \"user\",\n                \"content\": [\n                    {\n                        \"type\": \"image_url\",\n                        \"image_url\": {\n                            \"url\": multi_image_data[0],\n                        },\n                    },\n                    {\n                        \"type\": \"image_url\",\n                        \"image_url\": {\n                            \"url\": multi_image_data[1],\n                        },\n                    },\n                    {\n                        \"type\": \"text\",\n                        \"text\": \"What furniture is in the first image that is not in the second image?\",\n                    },\n                ],\n            },\n        ]\n    \n        # First API call\n        response1 = openai_client.chat.completions.create(\n            model=model,\n            messages=messages_turn1,\n            stream=stream,\n        )\n        if stream:\n            message_content1 = \"\"\n            for chunk in response1:\n>               message_content1 += chunk.choices[0].delta.content or \"\"\nE               IndexError: list index out of range\n\ntests/verifications/openai_api/test_chat_completion.py:596: IndexError"
      },
      "teardown": {
        "duration": 0.0017197001725435257,
        "outcome": "passed"
      }
    }
  ],
  "run_timestamp": 1744915514
}