mirror of
https://github.com/meta-llama/llama-stack.git
synced 2025-06-27 18:50:41 +00:00
You now run the integration tests with these options: ```bash Custom options: --stack-config=STACK_CONFIG a 'pointer' to the stack. this can be either be: (a) a template name like `fireworks`, or (b) a path to a run.yaml file, or (c) an adhoc config spec, e.g. `inference=fireworks,safety=llama-guard,agents=meta- reference` --env=ENV Set environment variables, e.g. --env KEY=value --text-model=TEXT_MODEL comma-separated list of text models. Fixture name: text_model_id --vision-model=VISION_MODEL comma-separated list of vision models. Fixture name: vision_model_id --embedding-model=EMBEDDING_MODEL comma-separated list of embedding models. Fixture name: embedding_model_id --safety-shield=SAFETY_SHIELD comma-separated list of safety shields. Fixture name: shield_id --judge-model=JUDGE_MODEL comma-separated list of judge models. Fixture name: judge_model_id --embedding-dimension=EMBEDDING_DIMENSION Output dimensionality of the embedding model to use for testing. Default: 384 --record-responses Record new API responses instead of using cached ones. --report=REPORT Path where the test report should be written, e.g. --report=/path/to/report.md ``` Importantly, if you don't specify any of the models (text-model, vision-model, etc.) the relevant tests will get **skipped!** This will make running tests somewhat more annoying since all options will need to be specified. We will make this easier by adding some easy wrapper yaml configs. ## Test Plan Example: ```bash ashwin@ashwin-mbp ~/local/llama-stack/tests/integration (unify_tests) $ LLAMA_STACK_CONFIG=fireworks pytest -s -v inference/test_text_inference.py \ --text-model meta-llama/Llama-3.2-3B-Instruct ```
160 lines
5.4 KiB
Python
160 lines
5.4 KiB
Python
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
|
# All rights reserved.
|
|
#
|
|
# This source code is licensed under the terms described in the LICENSE file in
|
|
# the root directory of this source tree.
|
|
import inspect
|
|
import itertools
|
|
import os
|
|
import textwrap
|
|
|
|
from dotenv import load_dotenv
|
|
|
|
from .report import Report
|
|
|
|
|
|
def pytest_configure(config):
|
|
config.option.tbstyle = "short"
|
|
config.option.disable_warnings = True
|
|
|
|
load_dotenv()
|
|
|
|
env_vars = config.getoption("--env") or []
|
|
for env_var in env_vars:
|
|
key, value = env_var.split("=", 1)
|
|
os.environ[key] = value
|
|
|
|
if config.getoption("--report"):
|
|
config.pluginmanager.register(Report(config))
|
|
|
|
|
|
def pytest_addoption(parser):
|
|
parser.addoption(
|
|
"--stack-config",
|
|
help=textwrap.dedent(
|
|
"""
|
|
a 'pointer' to the stack. this can be either be:
|
|
(a) a template name like `fireworks`, or
|
|
(b) a path to a run.yaml file, or
|
|
(c) an adhoc config spec, e.g. `inference=fireworks,safety=llama-guard,agents=meta-reference`
|
|
"""
|
|
),
|
|
)
|
|
parser.addoption("--env", action="append", help="Set environment variables, e.g. --env KEY=value")
|
|
parser.addoption(
|
|
"--text-model",
|
|
help="comma-separated list of text models. Fixture name: text_model_id",
|
|
)
|
|
parser.addoption(
|
|
"--vision-model",
|
|
help="comma-separated list of vision models. Fixture name: vision_model_id",
|
|
)
|
|
parser.addoption(
|
|
"--embedding-model",
|
|
help="comma-separated list of embedding models. Fixture name: embedding_model_id",
|
|
)
|
|
parser.addoption(
|
|
"--safety-shield",
|
|
help="comma-separated list of safety shields. Fixture name: shield_id",
|
|
)
|
|
parser.addoption(
|
|
"--judge-model",
|
|
help="comma-separated list of judge models. Fixture name: judge_model_id",
|
|
)
|
|
parser.addoption(
|
|
"--embedding-dimension",
|
|
type=int,
|
|
help="Output dimensionality of the embedding model to use for testing. Default: 384",
|
|
)
|
|
parser.addoption(
|
|
"--record-responses",
|
|
action="store_true",
|
|
help="Record new API responses instead of using cached ones.",
|
|
)
|
|
parser.addoption(
|
|
"--report",
|
|
help="Path where the test report should be written, e.g. --report=/path/to/report.md",
|
|
)
|
|
|
|
|
|
MODEL_SHORT_IDS = {
|
|
"meta-llama/Llama-3.2-3B-Instruct": "3B",
|
|
"meta-llama/Llama-3.1-8B-Instruct": "8B",
|
|
"meta-llama/Llama-3.1-70B-Instruct": "70B",
|
|
"meta-llama/Llama-3.1-405B-Instruct": "405B",
|
|
"meta-llama/Llama-3.2-11B-Vision-Instruct": "11B",
|
|
"meta-llama/Llama-3.2-90B-Vision-Instruct": "90B",
|
|
"meta-llama/Llama-3.3-70B-Instruct": "70B",
|
|
"meta-llama/Llama-Guard-3-1B": "Guard1B",
|
|
"meta-llama/Llama-Guard-3-8B": "Guard8B",
|
|
"all-MiniLM-L6-v2": "MiniLM",
|
|
}
|
|
|
|
|
|
def get_short_id(value):
|
|
return MODEL_SHORT_IDS.get(value, value)
|
|
|
|
|
|
def pytest_generate_tests(metafunc):
|
|
"""
|
|
This is the main function which processes CLI arguments and generates various combinations of parameters.
|
|
It is also responsible for generating test IDs which are succinct enough.
|
|
|
|
Each option can be comma separated list of values which results in multiple parameter combinations.
|
|
"""
|
|
params = []
|
|
param_values = {}
|
|
id_parts = []
|
|
|
|
# Map of fixture name to its CLI option and ID prefix
|
|
fixture_configs = {
|
|
"text_model_id": ("--text-model", "txt"),
|
|
"vision_model_id": ("--vision-model", "vis"),
|
|
"embedding_model_id": ("--embedding-model", "emb"),
|
|
"shield_id": ("--safety-shield", "shield"),
|
|
"judge_model_id": ("--judge-model", "judge"),
|
|
"embedding_dimension": ("--embedding-dimension", "dim"),
|
|
}
|
|
|
|
# Collect all parameters and their values
|
|
for fixture_name, (option, id_prefix) in fixture_configs.items():
|
|
if fixture_name not in metafunc.fixturenames:
|
|
continue
|
|
|
|
params.append(fixture_name)
|
|
val = metafunc.config.getoption(option)
|
|
|
|
values = [v.strip() for v in str(val).split(",")] if val else [None]
|
|
param_values[fixture_name] = values
|
|
if val:
|
|
id_parts.extend(f"{id_prefix}={get_short_id(v)}" for v in values)
|
|
|
|
if not params:
|
|
return
|
|
|
|
# Generate all combinations of parameter values
|
|
value_combinations = list(itertools.product(*[param_values[p] for p in params]))
|
|
|
|
# Generate test IDs
|
|
test_ids = []
|
|
non_empty_params = [(i, values) for i, values in enumerate(param_values.values()) if values[0] is not None]
|
|
|
|
# Get actual function parameters using inspect
|
|
test_func_params = set(inspect.signature(metafunc.function).parameters.keys())
|
|
|
|
if non_empty_params:
|
|
# For each combination, build an ID from the non-None parameters
|
|
for combo in value_combinations:
|
|
parts = []
|
|
for param_name, val in zip(params, combo, strict=True):
|
|
# Only include if parameter is in test function signature and value is meaningful
|
|
if param_name in test_func_params and val:
|
|
prefix = fixture_configs[param_name][1] # Get the ID prefix
|
|
parts.append(f"{prefix}={get_short_id(val)}")
|
|
if parts:
|
|
test_ids.append(":".join(parts))
|
|
|
|
metafunc.parametrize(params, value_combinations, scope="session", ids=test_ids if test_ids else None)
|
|
|
|
|
|
pytest_plugins = ["tests.integration.fixtures.common"]
|