mirror of
				https://github.com/meta-llama/llama-stack.git
				synced 2025-10-23 08:33:09 +00:00 
			
		
		
		
	
		
			Some checks failed
		
		
	
	Integration Auth Tests / test-matrix (oauth2_token) (push) Failing after 1s
				
			SqlStore Integration Tests / test-postgres (3.13) (push) Failing after 0s
				
			SqlStore Integration Tests / test-postgres (3.12) (push) Failing after 0s
				
			Test External Providers Installed via Module / test-external-providers-from-module (venv) (push) Has been skipped
				
			Python Package Build Test / build (3.13) (push) Failing after 2s
				
			Test Llama Stack Build / generate-matrix (push) Successful in 6s
				
			Unit Tests / unit-tests (3.12) (push) Failing after 5s
				
			Test Llama Stack Build / build-single-provider (push) Failing after 9s
				
			Test Llama Stack Build / build-ubi9-container-distribution (push) Failing after 10s
				
			Vector IO Integration Tests / test-matrix (push) Failing after 14s
				
			Unit Tests / unit-tests (3.13) (push) Failing after 7s
				
			Test External API and Providers / test-external (venv) (push) Failing after 12s
				
			API Conformance Tests / check-schema-compatibility (push) Successful in 19s
				
			Test Llama Stack Build / build (push) Failing after 7s
				
			Integration Tests (Replay) / Integration Tests (, , , client=, ) (push) Failing after 26s
				
			Test Llama Stack Build / build-custom-container-distribution (push) Failing after 25s
				
			Python Package Build Test / build (3.12) (push) Failing after 33s
				
			UI Tests / ui-tests (22) (push) Successful in 1m26s
				
			Pre-commit / pre-commit (push) Successful in 2m18s
				
			Also a critical bug fix so test recordings can be found inside docker
		
			
				
	
	
		
			319 lines
		
	
	
	
		
			11 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
			
		
		
	
	
			319 lines
		
	
	
	
		
			11 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
| # Copyright (c) Meta Platforms, Inc. and affiliates.
 | |
| # All rights reserved.
 | |
| #
 | |
| # This source code is licensed under the terms described in the LICENSE file in
 | |
| # the root directory of this source tree.
 | |
| import inspect
 | |
| import itertools
 | |
| import os
 | |
| import tempfile
 | |
| import textwrap
 | |
| import time
 | |
| from pathlib import Path
 | |
| 
 | |
| import pytest
 | |
| from dotenv import load_dotenv
 | |
| 
 | |
| from llama_stack.log import get_logger
 | |
| from llama_stack.testing.api_recorder import patch_httpx_for_test_id
 | |
| 
 | |
| from .suites import SETUP_DEFINITIONS, SUITE_DEFINITIONS
 | |
| 
 | |
| logger = get_logger(__name__, category="tests")
 | |
| 
 | |
| 
 | |
| @pytest.hookimpl(hookwrapper=True)
 | |
| def pytest_runtest_makereport(item, call):
 | |
|     outcome = yield
 | |
|     report = outcome.get_result()
 | |
|     if report.when == "call":
 | |
|         item.execution_outcome = report.outcome
 | |
|         item.was_xfail = getattr(report, "wasxfail", False)
 | |
| 
 | |
| 
 | |
| def pytest_sessionstart(session):
 | |
|     # stop macOS from complaining about duplicate OpenMP libraries
 | |
|     os.environ["KMP_DUPLICATE_LIB_OK"] = "TRUE"
 | |
|     if "LLAMA_STACK_TEST_INFERENCE_MODE" not in os.environ:
 | |
|         os.environ["LLAMA_STACK_TEST_INFERENCE_MODE"] = "replay"
 | |
| 
 | |
|     if "SQLITE_STORE_DIR" not in os.environ:
 | |
|         os.environ["SQLITE_STORE_DIR"] = tempfile.mkdtemp()
 | |
| 
 | |
|     # Set test stack config type for api_recorder test isolation
 | |
|     stack_config = session.config.getoption("--stack-config", default=None)
 | |
|     if stack_config and (
 | |
|         stack_config.startswith("server:") or stack_config.startswith("docker:") or stack_config.startswith("http")
 | |
|     ):
 | |
|         os.environ["LLAMA_STACK_TEST_STACK_CONFIG_TYPE"] = "server"
 | |
|         logger.info(f"Test stack config type: server (stack_config={stack_config})")
 | |
|     else:
 | |
|         os.environ["LLAMA_STACK_TEST_STACK_CONFIG_TYPE"] = "library_client"
 | |
|         logger.info(f"Test stack config type: library_client (stack_config={stack_config})")
 | |
| 
 | |
|     patch_httpx_for_test_id()
 | |
| 
 | |
| 
 | |
| @pytest.fixture(autouse=True)
 | |
| def _track_test_context(request):
 | |
|     """Automatically track current test context for isolated recordings.
 | |
| 
 | |
|     This fixture runs for every test and stores the test's nodeid in a contextvar
 | |
|     that the recording system can access to determine which subdirectory to use.
 | |
|     """
 | |
|     from llama_stack.core.testing_context import reset_test_context, set_test_context
 | |
| 
 | |
|     token = set_test_context(request.node.nodeid)
 | |
| 
 | |
|     yield
 | |
| 
 | |
|     reset_test_context(token)
 | |
| 
 | |
| 
 | |
| def pytest_runtest_teardown(item):
 | |
|     # Check if the test actually ran and passed or failed, but was not skipped or an expected failure (xfail)
 | |
|     outcome = getattr(item, "execution_outcome", None)
 | |
|     was_xfail = getattr(item, "was_xfail", False)
 | |
| 
 | |
|     name = item.nodeid
 | |
|     if not any(x in name for x in ("inference/", "safety/", "agents/")):
 | |
|         return
 | |
| 
 | |
|     logger.debug(f"Test '{item.nodeid}' outcome was '{outcome}' (xfail={was_xfail})")
 | |
|     if outcome in ("passed", "failed") and not was_xfail:
 | |
|         interval_seconds = os.getenv("LLAMA_STACK_TEST_INTERVAL_SECONDS")
 | |
|         if interval_seconds:
 | |
|             time.sleep(float(interval_seconds))
 | |
| 
 | |
| 
 | |
| def pytest_configure(config):
 | |
|     config.option.tbstyle = "short"
 | |
|     config.option.disable_warnings = True
 | |
| 
 | |
|     load_dotenv()
 | |
| 
 | |
|     env_vars = config.getoption("--env") or []
 | |
|     for env_var in env_vars:
 | |
|         key, value = env_var.split("=", 1)
 | |
|         os.environ[key] = value
 | |
| 
 | |
|     inference_mode = config.getoption("--inference-mode")
 | |
|     os.environ["LLAMA_STACK_TEST_INFERENCE_MODE"] = inference_mode
 | |
| 
 | |
|     suite = config.getoption("--suite")
 | |
|     if suite:
 | |
|         if suite not in SUITE_DEFINITIONS:
 | |
|             raise pytest.UsageError(f"Unknown suite: {suite}. Available: {', '.join(sorted(SUITE_DEFINITIONS.keys()))}")
 | |
| 
 | |
|     # Apply setups (global parameterizations): env + defaults
 | |
|     setup = config.getoption("--setup")
 | |
|     if suite and not setup:
 | |
|         setup = SUITE_DEFINITIONS[suite].default_setup
 | |
| 
 | |
|     if setup:
 | |
|         if setup not in SETUP_DEFINITIONS:
 | |
|             raise pytest.UsageError(
 | |
|                 f"Unknown setup '{setup}'. Available: {', '.join(sorted(SETUP_DEFINITIONS.keys()))}"
 | |
|             )
 | |
| 
 | |
|         setup_obj = SETUP_DEFINITIONS[setup]
 | |
|         logger.info(f"Applying setup '{setup}'{' for suite ' + suite if suite else ''}")
 | |
|         # Apply env first
 | |
|         for k, v in setup_obj.env.items():
 | |
|             if k not in os.environ:
 | |
|                 os.environ[k] = str(v)
 | |
|         # Apply defaults if not provided explicitly
 | |
|         for dest, value in setup_obj.defaults.items():
 | |
|             current = getattr(config.option, dest, None)
 | |
|             if current is None:
 | |
|                 setattr(config.option, dest, value)
 | |
| 
 | |
|     # Apply global fallback for embedding_dimension if still not set
 | |
|     if getattr(config.option, "embedding_dimension", None) is None:
 | |
|         config.option.embedding_dimension = 384
 | |
| 
 | |
| 
 | |
| def pytest_addoption(parser):
 | |
|     parser.addoption(
 | |
|         "--stack-config",
 | |
|         help=textwrap.dedent(
 | |
|             """
 | |
|             a 'pointer' to the stack. this can be either be:
 | |
|             (a) a template name like `starter`, or
 | |
|             (b) a path to a run.yaml file, or
 | |
|             (c) an adhoc config spec, e.g. `inference=fireworks,safety=llama-guard,agents=meta-reference`, or
 | |
|             (d) a server config like `server:ci-tests`, or
 | |
|             (e) a docker config like `docker:ci-tests` (builds and runs container)
 | |
|             """
 | |
|         ),
 | |
|     )
 | |
|     parser.addoption("--env", action="append", help="Set environment variables, e.g. --env KEY=value")
 | |
|     parser.addoption(
 | |
|         "--text-model",
 | |
|         help="comma-separated list of text models. Fixture name: text_model_id",
 | |
|     )
 | |
|     parser.addoption(
 | |
|         "--vision-model",
 | |
|         help="comma-separated list of vision models. Fixture name: vision_model_id",
 | |
|     )
 | |
|     parser.addoption(
 | |
|         "--embedding-model",
 | |
|         help="comma-separated list of embedding models. Fixture name: embedding_model_id",
 | |
|     )
 | |
|     parser.addoption(
 | |
|         "--safety-shield",
 | |
|         help="comma-separated list of safety shields. Fixture name: shield_id",
 | |
|     )
 | |
|     parser.addoption(
 | |
|         "--judge-model",
 | |
|         help="Specify the judge model to use for testing",
 | |
|     )
 | |
|     parser.addoption(
 | |
|         "--embedding-dimension",
 | |
|         type=int,
 | |
|         default=768,
 | |
|         help="Output dimensionality of the embedding model to use for testing. Default: 768",
 | |
|     )
 | |
| 
 | |
|     parser.addoption(
 | |
|         "--inference-mode",
 | |
|         help="Inference mode: { record, replay, live, record-if-missing } (default: replay)",
 | |
|         choices=["record", "replay", "live", "record-if-missing"],
 | |
|         default="replay",
 | |
|     )
 | |
|     parser.addoption(
 | |
|         "--report",
 | |
|         help="Path where the test report should be written, e.g. --report=/path/to/report.md",
 | |
|     )
 | |
| 
 | |
|     available_suites = ", ".join(sorted(SUITE_DEFINITIONS.keys()))
 | |
|     suite_help = (
 | |
|         f"Single test suite to run (narrows collection). Available: {available_suites}. Example: --suite=responses"
 | |
|     )
 | |
|     parser.addoption("--suite", help=suite_help)
 | |
| 
 | |
|     # Global setups for any suite
 | |
|     available_setups = ", ".join(sorted(SETUP_DEFINITIONS.keys()))
 | |
|     setup_help = (
 | |
|         f"Global test setup configuration. Available: {available_setups}. "
 | |
|         "Can be used with any suite. Example: --setup=ollama"
 | |
|     )
 | |
|     parser.addoption("--setup", help=setup_help)
 | |
| 
 | |
| 
 | |
| MODEL_SHORT_IDS = {
 | |
|     "meta-llama/Llama-3.2-3B-Instruct": "3B",
 | |
|     "meta-llama/Llama-3.1-8B-Instruct": "8B",
 | |
|     "meta-llama/Llama-3.1-70B-Instruct": "70B",
 | |
|     "meta-llama/Llama-3.1-405B-Instruct": "405B",
 | |
|     "meta-llama/Llama-3.2-11B-Vision-Instruct": "11B",
 | |
|     "meta-llama/Llama-3.2-90B-Vision-Instruct": "90B",
 | |
|     "meta-llama/Llama-3.3-70B-Instruct": "70B",
 | |
|     "meta-llama/Llama-Guard-3-1B": "Guard1B",
 | |
|     "meta-llama/Llama-Guard-3-8B": "Guard8B",
 | |
|     "nomic-ai/nomic-embed-text-v1.5": "Nomic-v1.5",
 | |
| }
 | |
| 
 | |
| 
 | |
| def get_short_id(value):
 | |
|     return MODEL_SHORT_IDS.get(value, value)
 | |
| 
 | |
| 
 | |
| def pytest_generate_tests(metafunc):
 | |
|     """
 | |
|     This is the main function which processes CLI arguments and generates various combinations of parameters.
 | |
|     It is also responsible for generating test IDs which are succinct enough.
 | |
| 
 | |
|     Each option can be comma separated list of values which results in multiple parameter combinations.
 | |
|     """
 | |
|     params = []
 | |
|     param_values = {}
 | |
|     id_parts = []
 | |
| 
 | |
|     # Map of fixture name to its CLI option and ID prefix
 | |
|     fixture_configs = {
 | |
|         "text_model_id": ("--text-model", "txt"),
 | |
|         "vision_model_id": ("--vision-model", "vis"),
 | |
|         "embedding_model_id": ("--embedding-model", "emb"),
 | |
|         "shield_id": ("--safety-shield", "shield"),
 | |
|         "judge_model_id": ("--judge-model", "judge"),
 | |
|         "embedding_dimension": ("--embedding-dimension", "dim"),
 | |
|     }
 | |
| 
 | |
|     # Collect all parameters and their values
 | |
|     for fixture_name, (option, id_prefix) in fixture_configs.items():
 | |
|         if fixture_name not in metafunc.fixturenames:
 | |
|             continue
 | |
| 
 | |
|         params.append(fixture_name)
 | |
|         # Use getattr on config.option to see values set by pytest_configure fallbacks
 | |
|         dest = option.lstrip("-").replace("-", "_")
 | |
|         val = getattr(metafunc.config.option, dest, None)
 | |
| 
 | |
|         values = [v.strip() for v in str(val).split(",")] if val else [None]
 | |
|         param_values[fixture_name] = values
 | |
|         if val:
 | |
|             id_parts.extend(f"{id_prefix}={get_short_id(v)}" for v in values)
 | |
| 
 | |
|     if not params:
 | |
|         return
 | |
| 
 | |
|     # Generate all combinations of parameter values
 | |
|     value_combinations = list(itertools.product(*[param_values[p] for p in params]))
 | |
| 
 | |
|     # Generate test IDs
 | |
|     test_ids = []
 | |
|     non_empty_params = [(i, values) for i, values in enumerate(param_values.values()) if values[0] is not None]
 | |
| 
 | |
|     # Get actual function parameters using inspect
 | |
|     test_func_params = set(inspect.signature(metafunc.function).parameters.keys())
 | |
| 
 | |
|     if non_empty_params:
 | |
|         # For each combination, build an ID from the non-None parameters
 | |
|         for combo in value_combinations:
 | |
|             parts = []
 | |
|             for param_name, val in zip(params, combo, strict=True):
 | |
|                 # Only include if parameter is in test function signature and value is meaningful
 | |
|                 if param_name in test_func_params and val:
 | |
|                     prefix = fixture_configs[param_name][1]  # Get the ID prefix
 | |
|                     parts.append(f"{prefix}={get_short_id(val)}")
 | |
|             if parts:
 | |
|                 test_ids.append(":".join(parts))
 | |
| 
 | |
|     metafunc.parametrize(params, value_combinations, scope="session", ids=test_ids if test_ids else None)
 | |
| 
 | |
| 
 | |
| pytest_plugins = ["tests.integration.fixtures.common"]
 | |
| 
 | |
| 
 | |
| def pytest_ignore_collect(path: str, config: pytest.Config) -> bool:
 | |
|     """Skip collecting paths outside the selected suite roots for speed."""
 | |
|     suite = config.getoption("--suite")
 | |
|     if not suite:
 | |
|         return False
 | |
| 
 | |
|     sobj = SUITE_DEFINITIONS.get(suite)
 | |
|     roots: list[str] = sobj.get("roots", []) if isinstance(sobj, dict) else getattr(sobj, "roots", [])
 | |
|     if not roots:
 | |
|         return False
 | |
| 
 | |
|     p = Path(str(path)).resolve()
 | |
| 
 | |
|     # Only constrain within tests/integration to avoid ignoring unrelated tests
 | |
|     integration_root = (Path(str(config.rootpath)) / "tests" / "integration").resolve()
 | |
|     if not p.is_relative_to(integration_root):
 | |
|         return False
 | |
| 
 | |
|     for r in roots:
 | |
|         rp = (Path(str(config.rootpath)) / r).resolve()
 | |
|         if rp.is_file():
 | |
|             # Allow the exact file and any ancestor directories so pytest can walk into it.
 | |
|             if p == rp:
 | |
|                 return False
 | |
|             if p.is_dir() and rp.is_relative_to(p):
 | |
|                 return False
 | |
|         else:
 | |
|             # Allow anything inside an allowed directory
 | |
|             if p.is_relative_to(rp):
 | |
|                 return False
 | |
|     return True
 |