From 26dffff92a5bb09df1be620ae1216ac7089615fa Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?S=C3=A9bastien=20Han?= <seb@redhat.com>
Date: Wed, 14 May 2025 07:40:15 +0200
Subject: [PATCH 1/5] chore: remove pytest reports (#2156)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

# What does this PR do?

Cleanup old test code too.

Signed-off-by: Sébastien Han <seb@redhat.com>
---
 llama_stack/providers/tests/__init__.py       |   5 -
 .../providers/tests/ci_test_config.yaml       |  55 ----
 llama_stack/providers/tests/conftest.py       | 296 ------------------
 llama_stack/providers/tests/report.py         | 176 -----------
 llama_stack/templates/cerebras/report.md      |  43 ---
 llama_stack/templates/fireworks/report.md     |  45 ---
 llama_stack/templates/ollama/report.md        |  43 ---
 llama_stack/templates/tgi/report.md           |  44 ---
 llama_stack/templates/together/report.md      |  45 ---
 tests/integration/README.md                   |   1 -
 tests/integration/conftest.py                 |   5 -
 tests/integration/metadata.py                 |  54 ----
 tests/integration/report.py                   | 220 -------------
 13 files changed, 1032 deletions(-)
 delete mode 100644 llama_stack/providers/tests/__init__.py
 delete mode 100644 llama_stack/providers/tests/ci_test_config.yaml
 delete mode 100644 llama_stack/providers/tests/conftest.py
 delete mode 100644 llama_stack/providers/tests/report.py
 delete mode 100644 llama_stack/templates/cerebras/report.md
 delete mode 100644 llama_stack/templates/fireworks/report.md
 delete mode 100644 llama_stack/templates/ollama/report.md
 delete mode 100644 llama_stack/templates/tgi/report.md
 delete mode 100644 llama_stack/templates/together/report.md
 delete mode 100644 tests/integration/metadata.py
 delete mode 100644 tests/integration/report.py

diff --git a/llama_stack/providers/tests/__init__.py b/llama_stack/providers/tests/__init__.py
deleted file mode 100644
index 756f351d8..000000000
--- a/llama_stack/providers/tests/__init__.py
+++ /dev/null
@@ -1,5 +0,0 @@
-# Copyright (c) Meta Platforms, Inc. and affiliates.
-# All rights reserved.
-#
-# This source code is licensed under the terms described in the LICENSE file in
-# the root directory of this source tree.
diff --git a/llama_stack/providers/tests/ci_test_config.yaml b/llama_stack/providers/tests/ci_test_config.yaml
deleted file mode 100644
index 3edcd38bf..000000000
--- a/llama_stack/providers/tests/ci_test_config.yaml
+++ /dev/null
@@ -1,55 +0,0 @@
-inference:
-  tests:
-  - inference/test_vision_inference.py::test_vision_chat_completion_streaming
-  - inference/test_vision_inference.py::test_vision_chat_completion_non_streaming
-  - inference/test_text_inference.py::test_structured_output
-  - inference/test_text_inference.py::test_chat_completion_streaming
-  - inference/test_text_inference.py::test_chat_completion_non_streaming
-  - inference/test_text_inference.py::test_chat_completion_with_tool_calling
-  - inference/test_text_inference.py::test_chat_completion_with_tool_calling_streaming
-
-  scenarios:
-  - provider_fixtures:
-      inference: ollama
-  - fixture_combo_id: fireworks
-  - provider_fixtures:
-      inference: together
-    # - inference: tgi
-    # - inference: vllm_remote
-
-  inference_models:
-  - meta-llama/Llama-3.1-8B-Instruct
-  - meta-llama/Llama-3.2-11B-Vision-Instruct
-
-
-agents:
-  tests:
-   - agents/test_agents.py::test_agent_turns_with_safety
-   - agents/test_agents.py::test_rag_agent
-
-  scenarios:
-  - fixture_combo_id: ollama
-  - fixture_combo_id: together
-  - fixture_combo_id: fireworks
-
-  inference_models:
-  - meta-llama/Llama-3.2-1B-Instruct
-
-  safety_shield: meta-llama/Llama-Guard-3-1B
-
-
-memory:
-  tests:
-   - memory/test_memory.py::test_query_documents
-
-  scenarios:
-  - fixture_combo_id: ollama
-  - provider_fixtures:
-      inference: sentence_transformers
-      memory: faiss
-  - fixture_combo_id: chroma
-
-  inference_models:
-  - meta-llama/Llama-3.2-1B-Instruct
-
-  embedding_model: all-MiniLM-L6-v2
diff --git a/llama_stack/providers/tests/conftest.py b/llama_stack/providers/tests/conftest.py
deleted file mode 100644
index cd86af0d6..000000000
--- a/llama_stack/providers/tests/conftest.py
+++ /dev/null
@@ -1,296 +0,0 @@
-# Copyright (c) Meta Platforms, Inc. and affiliates.
-# All rights reserved.
-#
-# This source code is licensed under the terms described in the LICENSE file in
-# the root directory of this source tree.
-
-import os
-from collections import defaultdict
-from pathlib import Path
-from typing import Any
-
-import pytest
-import yaml
-from dotenv import load_dotenv
-from pydantic import BaseModel, Field
-from termcolor import colored
-
-from llama_stack.distribution.datatypes import Provider
-from llama_stack.providers.datatypes import RemoteProviderConfig
-
-from .env import get_env_or_fail
-from .report import Report
-
-
-class ProviderFixture(BaseModel):
-    providers: list[Provider]
-    provider_data: dict[str, Any] | None = None
-
-
-class TestScenario(BaseModel):
-    # provider fixtures can be either a mark or a dictionary of api -> providers
-    provider_fixtures: dict[str, str] = Field(default_factory=dict)
-    fixture_combo_id: str | None = None
-
-
-class APITestConfig(BaseModel):
-    scenarios: list[TestScenario] = Field(default_factory=list)
-    inference_models: list[str] = Field(default_factory=list)
-
-    # test name format should be <relative_path.py>::<test_name>
-    tests: list[str] = Field(default_factory=list)
-
-
-class MemoryApiTestConfig(APITestConfig):
-    embedding_model: str | None = Field(default_factory=None)
-
-
-class AgentsApiTestConfig(APITestConfig):
-    safety_shield: str | None = Field(default_factory=None)
-
-
-class TestConfig(BaseModel):
-    inference: APITestConfig | None = None
-    agents: AgentsApiTestConfig | None = None
-    memory: MemoryApiTestConfig | None = None
-
-
-def get_test_config_from_config_file(metafunc_config):
-    config_file = metafunc_config.getoption("--config")
-    if config_file is None:
-        return None
-
-    config_file_path = Path(__file__).parent / config_file
-    if not config_file_path.exists():
-        raise ValueError(
-            f"Test config {config_file} was specified but not found. Please make sure it exists in the llama_stack/providers/tests directory."
-        )
-    with open(config_file_path) as config_file:
-        config = yaml.safe_load(config_file)
-        return TestConfig(**config)
-
-
-def get_test_config_for_api(metafunc_config, api):
-    test_config = get_test_config_from_config_file(metafunc_config)
-    if test_config is None:
-        return None
-    return getattr(test_config, api)
-
-
-def get_provider_fixture_overrides_from_test_config(metafunc_config, api, default_provider_fixture_combinations):
-    api_config = get_test_config_for_api(metafunc_config, api)
-    if api_config is None:
-        return None
-
-    fixture_combo_ids = set()
-    custom_provider_fixture_combos = []
-    for scenario in api_config.scenarios:
-        if scenario.fixture_combo_id:
-            fixture_combo_ids.add(scenario.fixture_combo_id)
-        else:
-            custom_provider_fixture_combos.append(
-                pytest.param(
-                    scenario.provider_fixtures,
-                    id=scenario.provider_fixtures.get("inference") or "",
-                )
-            )
-
-    if len(fixture_combo_ids) > 0:
-        for default_fixture in default_provider_fixture_combinations:
-            if default_fixture.id in fixture_combo_ids:
-                custom_provider_fixture_combos.append(default_fixture)
-    return custom_provider_fixture_combos
-
-
-def remote_stack_fixture() -> ProviderFixture:
-    if url := os.getenv("REMOTE_STACK_URL", None):
-        config = RemoteProviderConfig.from_url(url)
-    else:
-        config = RemoteProviderConfig(
-            host=get_env_or_fail("REMOTE_STACK_HOST"),
-            port=int(get_env_or_fail("REMOTE_STACK_PORT")),
-        )
-    return ProviderFixture(
-        providers=[
-            Provider(
-                provider_id="test::remote",
-                provider_type="test::remote",
-                config=config.model_dump(),
-            )
-        ],
-    )
-
-
-def pytest_configure(config):
-    config.option.tbstyle = "short"
-    config.option.disable_warnings = True
-
-    """Load environment variables at start of test run"""
-    # Load from .env file if it exists
-    env_file = Path(__file__).parent / ".env"
-    if env_file.exists():
-        load_dotenv(env_file)
-
-    # Load any environment variables passed via --env
-    env_vars = config.getoption("--env") or []
-    for env_var in env_vars:
-        key, value = env_var.split("=", 1)
-        os.environ[key] = value
-
-    if config.getoption("--output") is not None:
-        config.pluginmanager.register(Report(config.getoption("--output")))
-
-
-def pytest_addoption(parser):
-    parser.addoption(
-        "--providers",
-        default="",
-        help=(
-            "Provider configuration in format: api1=provider1,api2=provider2. "
-            "Example: --providers inference=ollama,safety=meta-reference"
-        ),
-    )
-    parser.addoption(
-        "--config",
-        action="store",
-        help="Set test config file (supported format: YAML), e.g. --config=test_config.yml",
-    )
-    parser.addoption(
-        "--output",
-        action="store",
-        help="Set output file for test report, e.g. --output=pytest_report.md",
-    )
-    """Add custom command line options"""
-    parser.addoption("--env", action="append", help="Set environment variables, e.g. --env KEY=value")
-    parser.addoption(
-        "--inference-model",
-        action="store",
-        default="meta-llama/Llama-3.2-3B-Instruct",
-        help="Specify the inference model to use for testing",
-    )
-    parser.addoption(
-        "--safety-shield",
-        action="store",
-        default="meta-llama/Llama-Guard-3-1B",
-        help="Specify the safety shield to use for testing",
-    )
-    parser.addoption(
-        "--embedding-model",
-        action="store",
-        default=None,
-        help="Specify the embedding model to use for testing",
-    )
-    parser.addoption(
-        "--judge-model",
-        action="store",
-        default="meta-llama/Llama-3.1-8B-Instruct",
-        help="Specify the judge model to use for testing",
-    )
-
-
-def make_provider_id(providers: dict[str, str]) -> str:
-    return ":".join(f"{api}={provider}" for api, provider in sorted(providers.items()))
-
-
-def get_provider_marks(providers: dict[str, str]) -> list[Any]:
-    marks = []
-    for provider in providers.values():
-        marks.append(getattr(pytest.mark, provider))
-    return marks
-
-
-def get_provider_fixture_overrides(config, available_fixtures: dict[str, list[str]]) -> list[pytest.param] | None:
-    provider_str = config.getoption("--providers")
-    if not provider_str:
-        return None
-
-    fixture_dict = parse_fixture_string(provider_str, available_fixtures)
-    return [
-        pytest.param(
-            fixture_dict,
-            id=make_provider_id(fixture_dict),
-            marks=get_provider_marks(fixture_dict),
-        )
-    ]
-
-
-def parse_fixture_string(provider_str: str, available_fixtures: dict[str, list[str]]) -> dict[str, str]:
-    """Parse provider string of format 'api1=provider1,api2=provider2'"""
-    if not provider_str:
-        return {}
-
-    fixtures = {}
-    pairs = provider_str.split(",")
-    for pair in pairs:
-        if "=" not in pair:
-            raise ValueError(f"Invalid provider specification: {pair}. Expected format: api=provider")
-        api, fixture = pair.split("=")
-        if api not in available_fixtures:
-            raise ValueError(f"Unknown API: {api}. Available APIs: {list(available_fixtures.keys())}")
-        if fixture not in available_fixtures[api]:
-            raise ValueError(
-                f"Unknown provider '{fixture}' for API '{api}'. Available providers: {list(available_fixtures[api])}"
-            )
-        fixtures[api] = fixture
-
-    # Check that all provided APIs are supported
-    for api in available_fixtures.keys():
-        if api not in fixtures:
-            raise ValueError(
-                f"Missing provider fixture for API '{api}'. Available providers: {list(available_fixtures[api])}"
-            )
-    return fixtures
-
-
-def pytest_itemcollected(item):
-    # Get all markers as a list
-    filtered = ("asyncio", "parametrize")
-    marks = [mark.name for mark in item.iter_markers() if mark.name not in filtered]
-    if marks:
-        marks = colored(",".join(marks), "yellow")
-        item.name = f"{item.name}[{marks}]"
-
-
-def pytest_collection_modifyitems(session, config, items):
-    test_config = get_test_config_from_config_file(config)
-    if test_config is None:
-        return
-
-    required_tests = defaultdict(set)
-    for api_test_config in [
-        test_config.inference,
-        test_config.memory,
-        test_config.agents,
-    ]:
-        if api_test_config is None:
-            continue
-        for test in api_test_config.tests:
-            arr = test.split("::")
-            if len(arr) != 2:
-                raise ValueError(f"Invalid format for test name {test}")
-            test_path, func_name = arr
-            required_tests[Path(__file__).parent / test_path].add(func_name)
-
-    new_items, deselected_items = [], []
-    for item in items:
-        func_name = getattr(item, "originalname", item.name)
-        if func_name in required_tests[item.fspath]:
-            new_items.append(item)
-            continue
-        deselected_items.append(item)
-
-    items[:] = new_items
-    config.hook.pytest_deselected(items=deselected_items)
-
-
-pytest_plugins = [
-    "llama_stack.providers.tests.inference.fixtures",
-    "llama_stack.providers.tests.safety.fixtures",
-    "llama_stack.providers.tests.vector_io.fixtures",
-    "llama_stack.providers.tests.agents.fixtures",
-    "llama_stack.providers.tests.datasetio.fixtures",
-    "llama_stack.providers.tests.scoring.fixtures",
-    "llama_stack.providers.tests.eval.fixtures",
-    "llama_stack.providers.tests.post_training.fixtures",
-    "llama_stack.providers.tests.tools.fixtures",
-]
diff --git a/llama_stack/providers/tests/report.py b/llama_stack/providers/tests/report.py
deleted file mode 100644
index bc29534be..000000000
--- a/llama_stack/providers/tests/report.py
+++ /dev/null
@@ -1,176 +0,0 @@
-# Copyright (c) Meta Platforms, Inc. and affiliates.
-# All rights reserved.
-#
-# This source code is licensed under the terms described in the LICENSE file in
-# the root directory of this source tree.
-
-
-from collections import defaultdict
-from pathlib import Path
-
-import pytest
-from pytest import ExitCode
-from pytest_html.basereport import _process_outcome
-
-from llama_stack.models.llama.sku_list import all_registered_models
-from llama_stack.models.llama.sku_types import CoreModelId
-
-INFERENCE_APIS = ["chat_completion"]
-FUNCTIONALITIES = ["streaming", "structured_output", "tool_calling"]
-SUPPORTED_MODELS = {
-    "ollama": {
-        CoreModelId.llama3_1_8b_instruct.value,
-        CoreModelId.llama3_1_8b_instruct.value,
-        CoreModelId.llama3_1_70b_instruct.value,
-        CoreModelId.llama3_1_70b_instruct.value,
-        CoreModelId.llama3_1_405b_instruct.value,
-        CoreModelId.llama3_1_405b_instruct.value,
-        CoreModelId.llama3_2_1b_instruct.value,
-        CoreModelId.llama3_2_1b_instruct.value,
-        CoreModelId.llama3_2_3b_instruct.value,
-        CoreModelId.llama3_2_3b_instruct.value,
-        CoreModelId.llama3_2_11b_vision_instruct.value,
-        CoreModelId.llama3_2_11b_vision_instruct.value,
-        CoreModelId.llama3_2_90b_vision_instruct.value,
-        CoreModelId.llama3_2_90b_vision_instruct.value,
-        CoreModelId.llama3_3_70b_instruct.value,
-        CoreModelId.llama_guard_3_8b.value,
-        CoreModelId.llama_guard_3_1b.value,
-    },
-    "fireworks": {
-        CoreModelId.llama3_1_8b_instruct.value,
-        CoreModelId.llama3_1_70b_instruct.value,
-        CoreModelId.llama3_1_405b_instruct.value,
-        CoreModelId.llama3_2_1b_instruct.value,
-        CoreModelId.llama3_2_3b_instruct.value,
-        CoreModelId.llama3_2_11b_vision_instruct.value,
-        CoreModelId.llama3_2_90b_vision_instruct.value,
-        CoreModelId.llama3_3_70b_instruct.value,
-        CoreModelId.llama_guard_3_8b.value,
-        CoreModelId.llama_guard_3_11b_vision.value,
-    },
-    "together": {
-        CoreModelId.llama3_1_8b_instruct.value,
-        CoreModelId.llama3_1_70b_instruct.value,
-        CoreModelId.llama3_1_405b_instruct.value,
-        CoreModelId.llama3_2_3b_instruct.value,
-        CoreModelId.llama3_2_11b_vision_instruct.value,
-        CoreModelId.llama3_2_90b_vision_instruct.value,
-        CoreModelId.llama3_3_70b_instruct.value,
-        CoreModelId.llama_guard_3_8b.value,
-        CoreModelId.llama_guard_3_11b_vision.value,
-    },
-}
-
-
-class Report:
-    def __init__(self, output_path):
-        valid_file_format = (
-            output_path.split(".")[1] in ["md", "markdown"] if len(output_path.split(".")) == 2 else False
-        )
-        if not valid_file_format:
-            raise ValueError(f"Invalid output file {output_path}. Markdown file is required")
-        self.output_path = output_path
-        self.test_data = defaultdict(dict)
-        self.inference_tests = defaultdict(dict)
-
-    @pytest.hookimpl
-    def pytest_runtest_logreport(self, report):
-        # This hook is called in several phases, including setup, call and teardown
-        # The test is considered failed / error if any of the outcomes is not "Passed"
-        outcome = _process_outcome(report)
-        data = {
-            "outcome": report.outcome,
-            "longrepr": report.longrepr,
-            "name": report.nodeid,
-        }
-        if report.nodeid not in self.test_data:
-            self.test_data[report.nodeid] = data
-        elif self.test_data[report.nodeid] != outcome and outcome != "Passed":
-            self.test_data[report.nodeid] = data
-
-    @pytest.hookimpl
-    def pytest_sessionfinish(self, session, exitstatus):
-        if exitstatus <= ExitCode.INTERRUPTED:
-            return
-        report = []
-        report.append("# Llama Stack Integration Test Results Report")
-        report.append("\n## Summary")
-        report.append("\n## Supported Models: ")
-
-        header = "| Model Descriptor |"
-        dividor = "|:---|"
-        for k in SUPPORTED_MODELS.keys():
-            header += f"{k} |"
-            dividor += ":---:|"
-
-        report.append(header)
-        report.append(dividor)
-
-        rows = []
-        for model in all_registered_models():
-            if "Instruct" not in model.core_model_id.value and "Guard" not in model.core_model_id.value:
-                continue
-            row = f"| {model.core_model_id.value} |"
-            for k in SUPPORTED_MODELS.keys():
-                if model.core_model_id.value in SUPPORTED_MODELS[k]:
-                    row += " ✅ |"
-                else:
-                    row += " ❌ |"
-            rows.append(row)
-        report.extend(rows)
-
-        report.append("\n### Tests:")
-
-        for provider in SUPPORTED_MODELS.keys():
-            if provider not in self.inference_tests:
-                continue
-            report.append(f"\n #### {provider}")
-            test_table = [
-                "| Area | Model | API | Functionality Test | Status |",
-                "|:-----|:-----|:-----|:-----|:-----|",
-            ]
-            for api in INFERENCE_APIS:
-                tests = self.inference_tests[provider][api]
-                for test_nodeid in tests:
-                    row = "|{area} | {model} | {api} | {test} | {result} ".format(
-                        area="Text" if "text" in test_nodeid else "Vision",
-                        model=("Llama-3.1-8B-Instruct" if "text" in test_nodeid else "Llama3.2-11B-Vision-Instruct"),
-                        api=f"/{api}",
-                        test=self.get_simple_function_name(test_nodeid),
-                        result=("✅" if self.test_data[test_nodeid]["outcome"] == "passed" else "❌"),
-                    )
-                    test_table += [row]
-            report.extend(test_table)
-            report.append("\n")
-
-        output_file = Path(self.output_path)
-        output_file.write_text("\n".join(report))
-        print(f"\n Report generated: {output_file.absolute()}")
-
-    @pytest.hookimpl(trylast=True)
-    def pytest_collection_modifyitems(self, session, config, items):
-        for item in items:
-            inference = item.callspec.params.get("inference_stack")
-            if "inference" in item.nodeid:
-                func_name = getattr(item, "originalname", item.name)
-                for api in INFERENCE_APIS:
-                    if api in func_name:
-                        api_tests = self.inference_tests[inference].get(api, set())
-                        api_tests.add(item.nodeid)
-                        self.inference_tests[inference][api] = api_tests
-
-    def get_simple_function_name(self, nodeid):
-        """Extract function name from nodeid.
-
-        Examples:
-        - 'tests/test_math.py::test_addition' -> 'test_addition'
-        - 'tests/test_math.py::TestClass::test_method' -> test_method'
-        """
-        parts = nodeid.split("::")
-        func_name = nodeid  # Fallback to full nodeid if pattern doesn't match
-        if len(parts) == 2:  # Simple function
-            func_name = parts[1]
-        elif len(parts) == 3:  # Class method
-            func_name = parts[2]
-        return func_name.split("[")[0]
diff --git a/llama_stack/templates/cerebras/report.md b/llama_stack/templates/cerebras/report.md
deleted file mode 100644
index f240e354b..000000000
--- a/llama_stack/templates/cerebras/report.md
+++ /dev/null
@@ -1,43 +0,0 @@
-# Report for cerebras distribution
-
-## Supported Models
-| Model Descriptor | cerebras |
-|:---|:---|
-| meta-llama/Llama-3-8B-Instruct | ❌ |
-| meta-llama/Llama-3-70B-Instruct | ❌ |
-| meta-llama/Llama-3.1-8B-Instruct | ✅ |
-| meta-llama/Llama-3.1-70B-Instruct | ❌ |
-| meta-llama/Llama-3.1-405B-Instruct-FP8 | ❌ |
-| meta-llama/Llama-3.2-1B-Instruct | ❌ |
-| meta-llama/Llama-3.2-3B-Instruct | ❌ |
-| meta-llama/Llama-3.2-11B-Vision-Instruct | ❌ |
-| meta-llama/Llama-3.2-90B-Vision-Instruct | ❌ |
-| meta-llama/Llama-3.3-70B-Instruct | ✅ |
-| meta-llama/Llama-Guard-3-11B-Vision | ❌ |
-| meta-llama/Llama-Guard-3-1B | ❌ |
-| meta-llama/Llama-Guard-3-8B | ❌ |
-| meta-llama/Llama-Guard-2-8B | ❌ |
-
-## Inference
-| Model | API | Capability | Test | Status |
-|:----- |:-----|:-----|:-----|:-----|
-| Llama-3.1-8B-Instruct | /chat_completion | streaming | test_text_chat_completion_streaming | ✅ |
-| Llama-3.2-11B-Vision-Instruct | /chat_completion | streaming | test_image_chat_completion_streaming | ❌ |
-| Llama-3.2-11B-Vision-Instruct | /chat_completion | non_streaming | test_image_chat_completion_non_streaming | ❌ |
-| Llama-3.1-8B-Instruct | /chat_completion | non_streaming | test_text_chat_completion_non_streaming | ✅ |
-| Llama-3.1-8B-Instruct | /chat_completion | tool_calling | test_text_chat_completion_with_tool_calling_and_streaming | ✅ |
-| Llama-3.1-8B-Instruct | /chat_completion | tool_calling | test_text_chat_completion_with_tool_calling_and_non_streaming | ✅ |
-| Llama-3.1-8B-Instruct | /completion | streaming | test_text_completion_streaming | ✅ |
-| Llama-3.1-8B-Instruct | /completion | non_streaming | test_text_completion_non_streaming | ✅ |
-| Llama-3.1-8B-Instruct | /completion | structured_output | test_text_completion_structured_output | ❌ |
-
-## Vector IO
-| API | Capability | Test | Status |
-|:-----|:-----|:-----|:-----|
-| /retrieve |  | test_vector_db_retrieve | ✅ |
-
-## Agents
-| API | Capability | Test | Status |
-|:-----|:-----|:-----|:-----|
-| /create_agent_turn | rag | test_rag_agent | ✅ |
-| /create_agent_turn | custom_tool | test_custom_tool | ❌ |
diff --git a/llama_stack/templates/fireworks/report.md b/llama_stack/templates/fireworks/report.md
deleted file mode 100644
index b520acf8e..000000000
--- a/llama_stack/templates/fireworks/report.md
+++ /dev/null
@@ -1,45 +0,0 @@
-# Report for fireworks distribution
-
-## Supported Models
-| Model Descriptor | fireworks |
-|:---|:---|
-| Llama-3-8B-Instruct | ❌ |
-| Llama-3-70B-Instruct | ❌ |
-| Llama3.1-8B-Instruct | ✅ |
-| Llama3.1-70B-Instruct | ✅ |
-| Llama3.1-405B-Instruct | ✅ |
-| Llama3.2-1B-Instruct | ✅ |
-| Llama3.2-3B-Instruct | ✅ |
-| Llama3.2-11B-Vision-Instruct | ✅ |
-| Llama3.2-90B-Vision-Instruct | ✅ |
-| Llama3.3-70B-Instruct | ✅ |
-| Llama-Guard-3-11B-Vision | ✅ |
-| Llama-Guard-3-1B | ❌ |
-| Llama-Guard-3-8B | ✅ |
-| Llama-Guard-2-8B | ❌ |
-
-## Inference
-| Model | API | Capability | Test | Status |
-|:----- |:-----|:-----|:-----|:-----|
-| Llama-3.1-8B-Instruct | /chat_completion | streaming | test_text_chat_completion_streaming | ✅ |
-| Llama-3.2-11B-Vision-Instruct | /chat_completion | streaming | test_image_chat_completion_streaming | ✅ |
-| Llama-3.2-11B-Vision-Instruct | /chat_completion | non_streaming | test_image_chat_completion_non_streaming | ✅ |
-| Llama-3.1-8B-Instruct | /chat_completion | non_streaming | test_text_chat_completion_non_streaming | ✅ |
-| Llama-3.1-8B-Instruct | /chat_completion | tool_calling | test_text_chat_completion_with_tool_calling_and_streaming | ✅ |
-| Llama-3.1-8B-Instruct | /chat_completion | tool_calling | test_text_chat_completion_with_tool_calling_and_non_streaming | ✅ |
-| Llama-3.2-11B-Vision-Instruct | /chat_completion | log_probs | test_completion_log_probs_non_streaming | ✅ |
-| Llama-3.2-11B-Vision-Instruct | /chat_completion | log_probs | test_completion_log_probs_streaming | ✅ |
-| Llama-3.1-8B-Instruct | /completion | streaming | test_text_completion_streaming | ✅ |
-| Llama-3.1-8B-Instruct | /completion | non_streaming | test_text_completion_non_streaming | ✅ |
-| Llama-3.1-8B-Instruct | /completion | structured_output | test_text_completion_structured_output | ✅ |
-
-## Vector IO
-| Provider | API | Capability | Test | Status |
-|:-----|:-----|:-----|:-----|:-----|
-| inline::faiss | /retrieve |  | test_vector_db_retrieve | ✅ |
-
-## Agents
-| Provider | API | Capability | Test | Status |
-|:-----|:-----|:-----|:-----|:-----|
-| inline::meta-reference | /create_agent_turn | rag | test_rag_agent | ✅ |
-| inline::meta-reference | /create_agent_turn | custom_tool | test_custom_tool | ✅ |
diff --git a/llama_stack/templates/ollama/report.md b/llama_stack/templates/ollama/report.md
deleted file mode 100644
index 4b2dada3a..000000000
--- a/llama_stack/templates/ollama/report.md
+++ /dev/null
@@ -1,43 +0,0 @@
-# Report for ollama distribution
-
-## Supported Models
-| Model Descriptor | ollama |
-|:---|:---|
-| Llama-3-8B-Instruct | ❌ |
-| Llama-3-70B-Instruct | ❌ |
-| Llama3.1-8B-Instruct | ✅ |
-| Llama3.1-70B-Instruct | ✅ |
-| Llama3.1-405B-Instruct | ✅ |
-| Llama3.2-1B-Instruct | ✅ |
-| Llama3.2-3B-Instruct | ✅ |
-| Llama3.2-11B-Vision-Instruct | ✅ |
-| Llama3.2-90B-Vision-Instruct | ✅ |
-| Llama3.3-70B-Instruct | ✅ |
-| Llama-Guard-3-11B-Vision | ❌ |
-| Llama-Guard-3-1B | ✅ |
-| Llama-Guard-3-8B | ✅ |
-| Llama-Guard-2-8B | ❌ |
-
-## Inference
-| Model | API | Capability | Test | Status |
-|:----- |:-----|:-----|:-----|:-----|
-| Llama-3.1-8B-Instruct | /chat_completion | streaming | test_text_chat_completion_streaming | ✅ |
-| Llama-3.2-11B-Vision-Instruct | /chat_completion | streaming | test_image_chat_completion_streaming | ❌ |
-| Llama-3.2-11B-Vision-Instruct | /chat_completion | non_streaming | test_image_chat_completion_non_streaming | ❌ |
-| Llama-3.1-8B-Instruct | /chat_completion | non_streaming | test_text_chat_completion_non_streaming | ✅ |
-| Llama-3.1-8B-Instruct | /chat_completion | tool_calling | test_text_chat_completion_with_tool_calling_and_streaming | ✅ |
-| Llama-3.1-8B-Instruct | /chat_completion | tool_calling | test_text_chat_completion_with_tool_calling_and_non_streaming | ✅ |
-| Llama-3.1-8B-Instruct | /completion | streaming | test_text_completion_streaming | ✅ |
-| Llama-3.1-8B-Instruct | /completion | non_streaming | test_text_completion_non_streaming | ✅ |
-| Llama-3.1-8B-Instruct | /completion | structured_output | test_text_completion_structured_output | ✅ |
-
-## Vector IO
-| API | Capability | Test | Status |
-|:-----|:-----|:-----|:-----|
-| /retrieve |  | test_vector_db_retrieve | ✅ |
-
-## Agents
-| API | Capability | Test | Status |
-|:-----|:-----|:-----|:-----|
-| /create_agent_turn | rag | test_rag_agent | ✅ |
-| /create_agent_turn | custom_tool | test_custom_tool | ✅ |
diff --git a/llama_stack/templates/tgi/report.md b/llama_stack/templates/tgi/report.md
deleted file mode 100644
index b0f5d88a2..000000000
--- a/llama_stack/templates/tgi/report.md
+++ /dev/null
@@ -1,44 +0,0 @@
-# Report for tgi distribution
-
-## Supported Models
-| Model Descriptor | tgi |
-|:---|:---|
-| Llama-3-8B-Instruct | ✅ |
-| Llama-3-70B-Instruct | ✅ |
-| Llama3.1-8B-Instruct | ✅ |
-| Llama3.1-70B-Instruct | ✅ |
-| Llama3.1-405B-Instruct | ✅ |
-| Llama3.2-1B-Instruct | ✅ |
-| Llama3.2-3B-Instruct | ✅ |
-| Llama3.2-11B-Vision-Instruct | ✅ |
-| Llama3.2-90B-Vision-Instruct | ✅ |
-| Llama3.3-70B-Instruct | ✅ |
-| Llama-Guard-3-11B-Vision | ✅ |
-| Llama-Guard-3-1B | ✅ |
-| Llama-Guard-3-8B | ✅ |
-| Llama-Guard-2-8B | ✅ |
-
-## Inference
-| Model | API | Capability | Test | Status |
-|:----- |:-----|:-----|:-----|:-----|
-| Llama-3.1-8B-Instruct | /chat_completion | streaming | test_text_chat_completion_streaming | ✅ |
-| Llama-3.2-11B-Vision-Instruct | /chat_completion | streaming | test_image_chat_completion_streaming | ❌ |
-| Llama-3.2-11B-Vision-Instruct | /chat_completion | non_streaming | test_image_chat_completion_non_streaming | ❌ |
-| Llama-3.1-8B-Instruct | /chat_completion | non_streaming | test_text_chat_completion_non_streaming | ✅ |
-| Llama-3.1-8B-Instruct | /chat_completion | tool_calling | test_text_chat_completion_with_tool_calling_and_streaming | ✅ |
-| Llama-3.1-8B-Instruct | /chat_completion | tool_calling | test_text_chat_completion_with_tool_calling_and_non_streaming | ✅ |
-| Llama-3.1-8B-Instruct | /completion | streaming | test_text_completion_streaming | ✅ |
-| Llama-3.1-8B-Instruct | /completion | non_streaming | test_text_completion_non_streaming | ✅ |
-| Llama-3.1-8B-Instruct | /completion | structured_output | test_text_completion_structured_output | ✅ |
-
-## Vector IO
-| API | Capability | Test | Status |
-|:-----|:-----|:-----|:-----|
-| /retrieve |  | test_vector_db_retrieve | ✅ |
-
-## Agents
-| API | Capability | Test | Status |
-|:-----|:-----|:-----|:-----|
-| /create_agent_turn | rag | test_rag_agent | ✅ |
-| /create_agent_turn | custom_tool | test_custom_tool | ✅ |
-| /create_agent_turn | code_execution | test_code_interpreter_for_attachments | ✅ |
diff --git a/llama_stack/templates/together/report.md b/llama_stack/templates/together/report.md
deleted file mode 100644
index 71ae83597..000000000
--- a/llama_stack/templates/together/report.md
+++ /dev/null
@@ -1,45 +0,0 @@
-# Report for together distribution
-
-## Supported Models
-| Model Descriptor | together |
-|:---|:---|
-| Llama-3-8B-Instruct | ❌ |
-| Llama-3-70B-Instruct | ❌ |
-| Llama3.1-8B-Instruct | ✅ |
-| Llama3.1-70B-Instruct | ✅ |
-| Llama3.1-405B-Instruct | ✅ |
-| Llama3.2-1B-Instruct | ❌ |
-| Llama3.2-3B-Instruct | ✅ |
-| Llama3.2-11B-Vision-Instruct | ✅ |
-| Llama3.2-90B-Vision-Instruct | ✅ |
-| Llama3.3-70B-Instruct | ✅ |
-| Llama-Guard-3-11B-Vision | ✅ |
-| Llama-Guard-3-1B | ❌ |
-| Llama-Guard-3-8B | ✅ |
-| Llama-Guard-2-8B | ❌ |
-
-## Inference
-| Model | API | Capability | Test | Status |
-|:----- |:-----|:-----|:-----|:-----|
-| Llama-3.1-8B-Instruct | /chat_completion | streaming | test_text_chat_completion_streaming | ✅ |
-| Llama-3.2-11B-Vision-Instruct | /chat_completion | streaming | test_image_chat_completion_streaming | ✅ |
-| Llama-3.2-11B-Vision-Instruct | /chat_completion | non_streaming | test_image_chat_completion_non_streaming | ✅ |
-| Llama-3.1-8B-Instruct | /chat_completion | non_streaming | test_text_chat_completion_non_streaming | ✅ |
-| Llama-3.1-8B-Instruct | /chat_completion | tool_calling | test_text_chat_completion_with_tool_calling_and_streaming | ✅ |
-| Llama-3.1-8B-Instruct | /chat_completion | tool_calling | test_text_chat_completion_with_tool_calling_and_non_streaming | ✅ |
-| Llama-3.2-11B-Vision-Instruct | /chat_completion | log_probs | test_completion_log_probs_non_streaming | ✅ |
-| Llama-3.2-11B-Vision-Instruct | /chat_completion | log_probs | test_completion_log_probs_streaming | ✅ |
-| Llama-3.1-8B-Instruct | /completion | streaming | test_text_completion_streaming | ✅ |
-| Llama-3.1-8B-Instruct | /completion | non_streaming | test_text_completion_non_streaming | ✅ |
-| Llama-3.1-8B-Instruct | /completion | structured_output | test_text_completion_structured_output | ✅ |
-
-## Vector IO
-| Provider | API | Capability | Test | Status |
-|:-----|:-----|:-----|:-----|:-----|
-| inline::faiss | /retrieve |  | test_vector_db_retrieve | ✅ |
-
-## Agents
-| Provider | API | Capability | Test | Status |
-|:-----|:-----|:-----|:-----|:-----|
-| inline::meta-reference | /create_agent_turn | rag | test_rag_agent | ✅ |
-| inline::meta-reference | /create_agent_turn | custom_tool | test_custom_tool | ✅ |
diff --git a/tests/integration/README.md b/tests/integration/README.md
index 8c1ee6355..31d58c83f 100644
--- a/tests/integration/README.md
+++ b/tests/integration/README.md
@@ -28,7 +28,6 @@ if no model is specified.
 
 Experimental, under development, options:
 - `--record-responses`: record new API responses instead of using cached ones
-- `--report`: path where the test report should be written, e.g. --report=/path/to/report.md
 
 
 ## Examples
diff --git a/tests/integration/conftest.py b/tests/integration/conftest.py
index 131219e52..ec5918268 100644
--- a/tests/integration/conftest.py
+++ b/tests/integration/conftest.py
@@ -15,8 +15,6 @@ from dotenv import load_dotenv
 
 from llama_stack.log import get_logger
 
-from .report import Report
-
 logger = get_logger(__name__, category="tests")
 
 
@@ -60,9 +58,6 @@ def pytest_configure(config):
         os.environ["DISABLE_CODE_SANDBOX"] = "1"
         logger.info("Setting DISABLE_CODE_SANDBOX=1 for macOS")
 
-    if config.getoption("--report"):
-        config.pluginmanager.register(Report(config))
-
 
 def pytest_addoption(parser):
     parser.addoption(
diff --git a/tests/integration/metadata.py b/tests/integration/metadata.py
deleted file mode 100644
index 55663c046..000000000
--- a/tests/integration/metadata.py
+++ /dev/null
@@ -1,54 +0,0 @@
-# Copyright (c) Meta Platforms, Inc. and affiliates.
-# All rights reserved.
-#
-# This source code is licensed under the terms described in the LICENSE file in
-# the root directory of this source tree.
-
-from llama_stack.providers.datatypes import Api
-
-INFERENCE_API_CAPA_TEST_MAP = {
-    "chat_completion": {
-        "streaming": [
-            "test_text_chat_completion_streaming",
-            "test_image_chat_completion_streaming",
-        ],
-        "non_streaming": [
-            "test_image_chat_completion_non_streaming",
-            "test_text_chat_completion_non_streaming",
-        ],
-        "tool_calling": [
-            "test_text_chat_completion_with_tool_calling_and_streaming",
-            "test_text_chat_completion_with_tool_calling_and_non_streaming",
-        ],
-        "log_probs": [
-            "test_completion_log_probs_non_streaming",
-            "test_completion_log_probs_streaming",
-        ],
-    },
-    "completion": {
-        "streaming": ["test_text_completion_streaming"],
-        "non_streaming": ["test_text_completion_non_streaming"],
-        "structured_output": ["test_text_completion_structured_output"],
-    },
-}
-
-VECTORIO_API_TEST_MAP = {
-    "retrieve": {
-        "": ["test_vector_db_retrieve"],
-    }
-}
-
-AGENTS_API_TEST_MAP = {
-    "create_agent_turn": {
-        "rag": ["test_rag_agent"],
-        "custom_tool": ["test_custom_tool"],
-        "code_execution": ["test_code_interpreter_for_attachments"],
-    }
-}
-
-
-API_MAPS = {
-    Api.inference: INFERENCE_API_CAPA_TEST_MAP,
-    Api.vector_io: VECTORIO_API_TEST_MAP,
-    Api.agents: AGENTS_API_TEST_MAP,
-}
diff --git a/tests/integration/report.py b/tests/integration/report.py
deleted file mode 100644
index 97543fa9d..000000000
--- a/tests/integration/report.py
+++ /dev/null
@@ -1,220 +0,0 @@
-# Copyright (c) Meta Platforms, Inc. and affiliates.
-# All rights reserved.
-#
-# This source code is licensed under the terms described in the LICENSE file in
-# the root directory of this source tree.
-
-
-from collections import defaultdict
-from pathlib import Path
-
-import pytest
-from pytest import CollectReport
-from termcolor import cprint
-
-from llama_stack.models.llama.sku_list import (
-    all_registered_models,
-    llama3_1_instruct_models,
-    llama3_2_instruct_models,
-    llama3_3_instruct_models,
-    llama3_instruct_models,
-    safety_models,
-)
-from llama_stack.models.llama.sku_types import CoreModelId
-from llama_stack.providers.datatypes import Api
-
-from .metadata import API_MAPS
-
-
-def featured_models():
-    models = [
-        *llama3_instruct_models(),
-        *llama3_1_instruct_models(),
-        *llama3_2_instruct_models(),
-        *llama3_3_instruct_models(),
-        *safety_models(),
-    ]
-    return {model.huggingface_repo: model for model in models if not model.variant}
-
-
-SUPPORTED_MODELS = {
-    "ollama": {
-        CoreModelId.llama3_1_8b_instruct.value,
-        CoreModelId.llama3_1_8b_instruct.value,
-        CoreModelId.llama3_1_70b_instruct.value,
-        CoreModelId.llama3_1_70b_instruct.value,
-        CoreModelId.llama3_1_405b_instruct.value,
-        CoreModelId.llama3_1_405b_instruct.value,
-        CoreModelId.llama3_2_1b_instruct.value,
-        CoreModelId.llama3_2_1b_instruct.value,
-        CoreModelId.llama3_2_3b_instruct.value,
-        CoreModelId.llama3_2_3b_instruct.value,
-        CoreModelId.llama3_2_11b_vision_instruct.value,
-        CoreModelId.llama3_2_11b_vision_instruct.value,
-        CoreModelId.llama3_2_90b_vision_instruct.value,
-        CoreModelId.llama3_2_90b_vision_instruct.value,
-        CoreModelId.llama3_3_70b_instruct.value,
-        CoreModelId.llama_guard_3_8b.value,
-        CoreModelId.llama_guard_3_1b.value,
-    },
-    "tgi": {model.core_model_id.value for model in all_registered_models() if model.huggingface_repo},
-    "vllm": {model.core_model_id.value for model in all_registered_models() if model.huggingface_repo},
-}
-
-
-class Report:
-    def __init__(self, config):
-        self.distro_name = None
-        self.config = config
-        self.output_path = Path(config.getoption("--report")) if config.getoption("--report") else None
-
-        stack_config = self.config.getoption("--stack-config")
-        if stack_config:
-            is_url = stack_config.startswith("http") or "//" in stack_config
-            is_yaml = stack_config.endswith(".yaml")
-            if not is_url and not is_yaml:
-                self.distro_name = stack_config
-
-        self.report_data = defaultdict(dict)
-        # test function -> test nodeid
-        self.test_data = dict()
-        self.test_name_to_nodeid = defaultdict(list)
-        self.vision_model_id = None
-        self.text_model_id = None
-        self.client = None
-
-    @pytest.hookimpl(tryfirst=True)
-    def pytest_runtest_logreport(self, report):
-        # This hook is called in several phases, including setup, call and teardown
-        # The test is considered failed / error if any of the outcomes is not "Passed"
-        outcome = self._process_outcome(report)
-        if report.nodeid not in self.test_data:
-            self.test_data[report.nodeid] = outcome
-        elif self.test_data[report.nodeid] != outcome and outcome != "Passed":
-            self.test_data[report.nodeid] = outcome
-
-    def pytest_sessionfinish(self, session):
-        if not self.client:
-            return
-
-        report = []
-        report.append(f"# Report for {self.distro_name} distribution")
-        report.append("\n## Supported Models")
-
-        header = f"| Model Descriptor | {self.distro_name} |"
-        dividor = "|:---|:---|"
-
-        report.append(header)
-        report.append(dividor)
-
-        rows = []
-        if self.distro_name in SUPPORTED_MODELS:
-            for model in all_registered_models():
-                if ("Instruct" not in model.core_model_id.value and "Guard" not in model.core_model_id.value) or (
-                    model.variant
-                ):
-                    continue
-                row = f"| {model.core_model_id.value} |"
-                if model.core_model_id.value in SUPPORTED_MODELS[self.distro_name]:
-                    row += " ✅ |"
-                else:
-                    row += " ❌ |"
-                rows.append(row)
-        else:
-            supported_models = {m.identifier for m in self.client.models.list()}
-            for hf_name, model in featured_models().items():
-                row = f"| {model.core_model_id.value} |"
-                if hf_name in supported_models:
-                    row += " ✅ |"
-                else:
-                    row += " ❌ |"
-                rows.append(row)
-        report.extend(rows)
-
-        report.append("\n## Inference")
-        test_table = [
-            "| Model | API | Capability | Test | Status |",
-            "|:----- |:-----|:-----|:-----|:-----|",
-        ]
-        for api, capa_map in API_MAPS[Api.inference].items():
-            for capa, tests in capa_map.items():
-                for test_name in tests:
-                    model_id = self.text_model_id if "text" in test_name else self.vision_model_id
-                    test_nodeids = self.test_name_to_nodeid[test_name]
-                    if not test_nodeids:
-                        continue
-
-                    # There might be more than one parametrizations for the same test function. We take
-                    # the result of the first one for now. Ideally we should mark the test as failed if
-                    # any of the parametrizations failed.
-                    test_table.append(
-                        f"| {model_id} | /{api} | {capa} | {test_name} | {self._print_result_icon(self.test_data[test_nodeids[0]])} |"
-                    )
-
-        report.extend(test_table)
-
-        name_map = {Api.vector_io: "Vector IO", Api.agents: "Agents"}
-        providers = self.client.providers.list()
-        for api_group in [Api.vector_io, Api.agents]:
-            api_capitalized = name_map[api_group]
-            report.append(f"\n## {api_capitalized}")
-            test_table = [
-                "| Provider | API | Capability | Test | Status |",
-                "|:-----|:-----|:-----|:-----|:-----|",
-            ]
-            provider = [p for p in providers if p.api == str(api_group.name)]
-            provider_str = ",".join(str(p) for p in provider) if provider else ""
-            for api, capa_map in API_MAPS[api_group].items():
-                for capa, tests in capa_map.items():
-                    for test_name in tests:
-                        test_nodeids = self.test_name_to_nodeid[test_name]
-                        if not test_nodeids:
-                            continue
-                        test_table.append(
-                            f"| {provider_str} | /{api} | {capa} | {test_name} | {self._print_result_icon(self.test_data[test_nodeids[0]])} |"
-                        )
-            report.extend(test_table)
-
-        output_file = self.output_path
-        text = "\n".join(report) + "\n"
-        output_file.write_text(text)
-        cprint(f"\nReport generated: {output_file.absolute()}", "green")
-
-    def pytest_runtest_makereport(self, item, call):
-        func_name = getattr(item, "originalname", item.name)
-        self.test_name_to_nodeid[func_name].append(item.nodeid)
-
-        # Get values from fixtures for report output
-        if model_id := item.funcargs.get("text_model_id"):
-            parts = model_id.split("/")
-            text_model = parts[1] if len(parts) > 1 else model_id
-            self.text_model_id = self.text_model_id or text_model
-        elif model_id := item.funcargs.get("vision_model_id"):
-            parts = model_id.split("/")
-            vision_model = parts[1] if len(parts) > 1 else model_id
-            self.vision_model_id = self.vision_model_id or vision_model
-
-        if not self.client:
-            self.client = item.funcargs.get("llama_stack_client")
-
-    def _print_result_icon(self, result):
-        if result == "Passed":
-            return "✅"
-        elif result == "Failed" or result == "Error":
-            return "❌"
-        else:
-            #  result == "Skipped":
-            return "⏭️"
-
-    def _process_outcome(self, report: CollectReport):
-        if self._is_error(report):
-            return "Error"
-        if hasattr(report, "wasxfail"):
-            if report.outcome in ["passed", "failed"]:
-                return "XPassed"
-            if report.outcome == "skipped":
-                return "XFailed"
-        return report.outcome.capitalize()
-
-    def _is_error(self, report: CollectReport):
-        return report.when in ["setup", "teardown", "collect"] and report.outcome == "failed"

From dd07c7a5b51ba1dcf4c36a3e9100dd4c09278e78 Mon Sep 17 00:00:00 2001
From: Derek Higgins <derekh@redhat.com>
Date: Wed, 14 May 2025 06:41:51 +0100
Subject: [PATCH 2/5] fix: Make search tool talk about models (#2151)

Prevent it from returning results about
'LT Wright Maverick Scout' knives. Ultimatly
we want the word "model" in the returned results
putting llm in the search term make this more likely.

Closes: #2150

Signed-off-by: Derek Higgins <derekh@redhat.com>
---
 .../verifications/openai_api/fixtures/test_cases/responses.yaml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/verifications/openai_api/fixtures/test_cases/responses.yaml b/tests/verifications/openai_api/fixtures/test_cases/responses.yaml
index ed5f571e8..262d82526 100644
--- a/tests/verifications/openai_api/fixtures/test_cases/responses.yaml
+++ b/tests/verifications/openai_api/fixtures/test_cases/responses.yaml
@@ -79,7 +79,7 @@ test_response_multi_turn_image:
           - type: input_image
             image_url: "https://upload.wikimedia.org/wikipedia/commons/f/f7/Llamas%2C_Vernagt-Stausee%2C_Italy.jpg"
         output: "llama"
-      - input: "Search the web using the search tool for the animal from the previous response. Your search query should be a single phrase that includes the animal's name and the words 'maverick' and 'scout'."
+      - input: "Search the web using the search tool for the animal from the previous response. Your search query should be a single phrase that includes the animal's name and the words 'maverick', 'scout' and 'llm'"
         tools:
         - type: web_search
         output: "model"

From 1de0dfaab58ffb1d86d13083ec5d92ee45431c8e Mon Sep 17 00:00:00 2001
From: Ihar Hrachyshka <ihar.hrachyshka@gmail.com>
Date: Wed, 14 May 2025 03:37:07 -0400
Subject: [PATCH 3/5] docs: Clarify kfp provider is both inline and remote
 (#2144)

The provider selling point *is* using the same provider for both.

Signed-off-by: Ihar Hrachyshka <ihar.hrachyshka@gmail.com>

Signed-off-by: Ihar Hrachyshka <ihar.hrachyshka@gmail.com>
---
 docs/source/providers/external.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/source/providers/external.md b/docs/source/providers/external.md
index ee36ebc3c..6c36901ee 100644
--- a/docs/source/providers/external.md
+++ b/docs/source/providers/external.md
@@ -53,7 +53,7 @@ Here's a list of known external providers that you can use with Llama Stack:
 | Name | Description | API | Type | Repository |
 |------|-------------|-----|------|------------|
 | KubeFlow Training | Train models with KubeFlow | Post Training | Remote | [llama-stack-provider-kft](https://github.com/opendatahub-io/llama-stack-provider-kft) |
-| KubeFlow Pipelines | Train models with KubeFlow Pipelines | Post Training | Remote | [llama-stack-provider-kfp-trainer](https://github.com/opendatahub-io/llama-stack-provider-kfp-trainer) |
+| KubeFlow Pipelines | Train models with KubeFlow Pipelines | Post Training | Inline **and** Remote | [llama-stack-provider-kfp-trainer](https://github.com/opendatahub-io/llama-stack-provider-kfp-trainer) |
 | RamaLama | Inference models with RamaLama | Inference | Remote | [ramalama-stack](https://github.com/containers/ramalama-stack) |
 | TrustyAI LM-Eval | Evaluate models with TrustyAI LM-Eval | Eval | Remote | [llama-stack-provider-lmeval](https://github.com/trustyai-explainability/llama-stack-provider-lmeval) |
 

From 43d4447ff0b8d466871cf7d5a5f00898ea56fec4 Mon Sep 17 00:00:00 2001
From: Ilya Kolchinsky <58424190+ilya-kolchinsky@users.noreply.github.com>
Date: Wed, 14 May 2025 11:38:00 +0200
Subject: [PATCH 4/5] fix: remote vLLM tool execution now works when the last
 chunk contains the call arguments (#2112)

# What does this PR do?
Closes #2111.
Fixes an error causing Llama Stack to just return `<tool_call>` and
complete the turn without actually executing the tool. See the issue
description for more detail.

## Test Plan
1) Ran existing unit tests
2) Added a dedicated test verifying correct behavior in this edge case
3) Ran the code snapshot from #2111
---
 .../providers/remote/inference/vllm/vllm.py   | 14 ++--
 .../providers/inference/test_remote_vllm.py   | 80 +++++++++++++++++++
 2 files changed, 87 insertions(+), 7 deletions(-)

diff --git a/llama_stack/providers/remote/inference/vllm/vllm.py b/llama_stack/providers/remote/inference/vllm/vllm.py
index 8bc733fd3..3fb28ee08 100644
--- a/llama_stack/providers/remote/inference/vllm/vllm.py
+++ b/llama_stack/providers/remote/inference/vllm/vllm.py
@@ -168,6 +168,12 @@ async def _process_vllm_chat_completion_stream_response(
             log.warning("vLLM failed to generation any completions - check the vLLM server logs for an error.")
             continue
         choice = chunk.choices[0]
+        if choice.delta.tool_calls:
+            tool_call = convert_tool_call(choice.delta.tool_calls[0])
+            tool_call_buf.tool_name += str(tool_call.tool_name)
+            tool_call_buf.call_id += tool_call.call_id
+            # TODO: remove str() when dict type for 'arguments' is no longer allowed
+            tool_call_buf.arguments += str(tool_call.arguments)
         if choice.finish_reason:
             args_str = tool_call_buf.arguments
             args = None
@@ -208,13 +214,7 @@ async def _process_vllm_chat_completion_stream_response(
                     stop_reason=_convert_to_vllm_finish_reason(choice.finish_reason),
                 )
             )
-        elif choice.delta.tool_calls:
-            tool_call = convert_tool_call(choice.delta.tool_calls[0])
-            tool_call_buf.tool_name += str(tool_call.tool_name)
-            tool_call_buf.call_id += tool_call.call_id
-            # TODO: remove str() when dict type for 'arguments' is no longer allowed
-            tool_call_buf.arguments += str(tool_call.arguments)
-        else:
+        elif not choice.delta.tool_calls:
             yield ChatCompletionResponseStreamChunk(
                 event=ChatCompletionResponseEvent(
                     event_type=event_type,
diff --git a/tests/unit/providers/inference/test_remote_vllm.py b/tests/unit/providers/inference/test_remote_vllm.py
index a2e3b64c2..a8c4e07a0 100644
--- a/tests/unit/providers/inference/test_remote_vllm.py
+++ b/tests/unit/providers/inference/test_remote_vllm.py
@@ -28,6 +28,7 @@ from openai.types.model import Model as OpenAIModel
 
 from llama_stack.apis.inference import (
     ChatCompletionRequest,
+    ChatCompletionResponseEventType,
     CompletionMessage,
     SystemMessage,
     ToolChoice,
@@ -294,3 +295,82 @@ async def test_get_params_empty_tools(vllm_inference_adapter):
     )
     params = await vllm_inference_adapter._get_params(request)
     assert "tools" not in params
+
+
+@pytest.mark.asyncio
+async def test_process_vllm_chat_completion_stream_response_tool_call_args_last_chunk():
+    """
+    Tests the edge case where the model returns the arguments for the tool call in the same chunk that
+    contains the finish reason (i.e., the last one).
+    We want to make sure the tool call is executed in this case, and the parameters are passed correctly.
+    """
+
+    mock_tool_name = "mock_tool"
+    mock_tool_arguments = {"arg1": 0, "arg2": 100}
+    mock_tool_arguments_str = json.dumps(mock_tool_arguments)
+
+    async def mock_stream():
+        mock_chunks = [
+            OpenAIChatCompletionChunk(
+                id="chunk-1",
+                created=1,
+                model="foo",
+                object="chat.completion.chunk",
+                choices=[
+                    {
+                        "delta": {
+                            "content": None,
+                            "tool_calls": [
+                                {
+                                    "index": 0,
+                                    "id": "mock_id",
+                                    "type": "function",
+                                    "function": {
+                                        "name": mock_tool_name,
+                                        "arguments": None,
+                                    },
+                                }
+                            ],
+                        },
+                        "finish_reason": None,
+                        "logprobs": None,
+                        "index": 0,
+                    }
+                ],
+            ),
+            OpenAIChatCompletionChunk(
+                id="chunk-1",
+                created=1,
+                model="foo",
+                object="chat.completion.chunk",
+                choices=[
+                    {
+                        "delta": {
+                            "content": None,
+                            "tool_calls": [
+                                {
+                                    "index": 0,
+                                    "id": None,
+                                    "function": {
+                                        "name": None,
+                                        "arguments": mock_tool_arguments_str,
+                                    },
+                                }
+                            ],
+                        },
+                        "finish_reason": "tool_calls",
+                        "logprobs": None,
+                        "index": 0,
+                    }
+                ],
+            ),
+        ]
+        for chunk in mock_chunks:
+            yield chunk
+
+    chunks = [chunk async for chunk in _process_vllm_chat_completion_stream_response(mock_stream())]
+    assert len(chunks) == 2
+    assert chunks[-1].event.event_type == ChatCompletionResponseEventType.complete
+    assert chunks[-2].event.delta.type == "tool_call"
+    assert chunks[-2].event.delta.tool_call.tool_name == mock_tool_name
+    assert chunks[-2].event.delta.tool_call.arguments == mock_tool_arguments

From a1fbfb51e203919f87fc58e9127dc5c9260e92a6 Mon Sep 17 00:00:00 2001
From: Nathan Weinberg <31703736+nathan-weinberg@users.noreply.github.com>
Date: Wed, 14 May 2025 08:59:58 -0400
Subject: [PATCH 5/5] ci(chore): use hashes for all version pinning (#2157)

# What does this PR do?
most third-party actions use hashes for pinning but not all

do proper hash pinning on all remaining actions using tags

Signed-off-by: Nathan Weinberg <nweinber@redhat.com>
---
 .github/workflows/integration-auth-tests.yml  | 5 +++--
 .github/workflows/integration-tests.yml       | 2 +-
 .github/workflows/test-external-providers.yml | 4 ++--
 3 files changed, 6 insertions(+), 5 deletions(-)

diff --git a/.github/workflows/integration-auth-tests.yml b/.github/workflows/integration-auth-tests.yml
index 19a4ae003..54db40cd9 100644
--- a/.github/workflows/integration-auth-tests.yml
+++ b/.github/workflows/integration-auth-tests.yml
@@ -28,12 +28,13 @@ jobs:
 
     steps:
       - name: Checkout repository
-        uses: actions/checkout@v4
+        uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
 
       - name: Install uv
-        uses: astral-sh/setup-uv@v5
+        uses: astral-sh/setup-uv@c7f87aa956e4c323abf06d5dec078e358f6b4d04 # v6.0.0
         with:
           python-version: "3.10"
+          activate-environment: true
 
       - name: Set Up Environment and Install Dependencies
         run: |
diff --git a/.github/workflows/integration-tests.yml b/.github/workflows/integration-tests.yml
index f82a7cdd2..d755ff0ae 100644
--- a/.github/workflows/integration-tests.yml
+++ b/.github/workflows/integration-tests.yml
@@ -106,7 +106,7 @@ jobs:
 
       - name: Upload all logs to artifacts
         if: always()
-        uses: actions/upload-artifact@v4
+        uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02 # v4.6.2
         with:
           name: logs-${{ github.run_id }}-${{ github.run_attempt }}-${{ matrix.client-type }}-${{ matrix.test-type }}
           path: |
diff --git a/.github/workflows/test-external-providers.yml b/.github/workflows/test-external-providers.yml
index b2329c420..77e280349 100644
--- a/.github/workflows/test-external-providers.yml
+++ b/.github/workflows/test-external-providers.yml
@@ -23,10 +23,10 @@ jobs:
         # container and point 'uv pip install' to the correct path...
     steps:
       - name: Checkout repository
-        uses: actions/checkout@v4
+        uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
 
       - name: Install uv
-        uses: astral-sh/setup-uv@v6
+        uses: astral-sh/setup-uv@c7f87aa956e4c323abf06d5dec078e358f6b4d04 # v6.0.0
         with:
           python-version: "3.10"