feat(tests): migrate to global "setups" system for test configuration

This PR refactors the integration test system from suite-specific "contexts" to global "setups".
This provides better separation of concerns: **suites = what to test, setups = how to configure.**

Key changes:
- New `tests/integration/setups.py` with global, reusable configurations (ollama, vllm, gpt, claude)
- Simplified `tests/integration/suites.py` to only define test collection roots + default setup references
- Updated CLI from `--context` to `--setup` parameter that works with any suite
- Modified `scripts/integration-tests.sh` to use `--test-setup` instead of `--test-context`
- Updated documentation to reflect the new global setup system

Benefits:
- Setups can be reused across multiple suites (e.g., use "gpt" with any suite)
- Clear separation between test selection (suites) and configuration (setups)
- Easier to add new configurations without modifying existing suites
- Centralized configuration management

Usage examples:
- `pytest tests/integration --suite=responses --setup=gpt`
- `pytest tests/integration --suite=vision --setup=ollama`
- `pytest tests/integration --suite=base --setup=vllm`
This commit is contained in:
Ashwin Bharambe 2025-09-08 14:56:08 -07:00
parent 47b640370e
commit c662d8aa31
10 changed files with 272 additions and 178 deletions

View file

@ -8,46 +8,111 @@
# For example:
#
# ```bash
# pytest tests/integration/ --suite=vision
# pytest tests/integration/ --suite=vision --setup=ollama
# ```
#
# Each suite can:
# - restrict collection to specific roots (dirs or files)
# - provide default CLI option values (e.g. text_model, embedding_model, etc.)
"""
Each suite defines what to run (roots). Suites can be run with different global setups defined in setups.py.
Setups provide environment variables and model defaults that can be reused across multiple suites.
CLI examples:
pytest tests/integration --suite=responses --setup=gpt
pytest tests/integration --suite=vision --setup=ollama
pytest tests/integration --suite=base --setup=vllm
"""
from pathlib import Path
from pydantic import BaseModel, Field
this_dir = Path(__file__).parent
default_roots = [
class Suite(BaseModel):
name: str
roots: list[str]
default_setup: str | None = None
class Setup(BaseModel):
"""A reusable test configuration with environment and CLI defaults."""
name: str
description: str
defaults: dict[str, str] = Field(default_factory=dict)
env: dict[str, str] = Field(default_factory=dict)
# Global setups - can be used with any suite "technically" but in reality, some setups might work
# only for specific test suites.
SETUP_DEFINITIONS: dict[str, Setup] = {
"ollama": Setup(
name="ollama",
description="Local Ollama provider with text + safety models",
env={
"OLLAMA_URL": "http://0.0.0.0:11434",
},
defaults={
"text_model": "ollama/llama3.2:3b-instruct-fp16",
"embedding_model": "sentence-transformers/all-MiniLM-L6-v2",
"safety_model": "ollama/llama-guard3:1b",
"safety_shield": "llama-guard",
},
),
"ollama-vision": Setup(
name="ollama",
description="Local Ollama provider with a vision model",
env={
"OLLAMA_URL": "http://0.0.0.0:11434",
},
defaults={
"vision_model": "ollama/llama3.2-vision:11b",
"embedding_model": "sentence-transformers/all-MiniLM-L6-v2",
},
),
"vllm": Setup(
name="vllm",
description="vLLM provider with a text model",
env={
"VLLM_URL": "http://localhost:8000/v1",
},
defaults={
"text_model": "vllm/meta-llama/Llama-3.2-1B-Instruct",
"embedding_model": "sentence-transformers/all-MiniLM-L6-v2",
},
),
"gpt": Setup(
name="gpt",
description="OpenAI GPT models for high-quality responses and tool calling",
defaults={
"text_model": "openai/gpt-4o",
"embedding_model": "sentence-transformers/all-MiniLM-L6-v2",
},
),
}
base_roots = [
str(p)
for p in this_dir.glob("*")
if p.is_dir()
and p.name not in ("__pycache__", "fixtures", "test_cases", "recordings", "responses", "post_training")
]
SUITE_DEFINITIONS: dict[str, dict] = {
"base": {
"description": "Base suite that includes most tests but runs them with a text Ollama model",
"roots": default_roots,
"defaults": {
"text_model": "ollama/llama3.2:3b-instruct-fp16",
"embedding_model": "sentence-transformers/all-MiniLM-L6-v2",
},
},
"responses": {
"description": "Suite that includes only the OpenAI Responses tests; needs a strong tool-calling model",
"roots": ["tests/integration/responses"],
"defaults": {
"text_model": "openai/gpt-4o",
"embedding_model": "sentence-transformers/all-MiniLM-L6-v2",
},
},
"vision": {
"description": "Suite that includes only the vision tests",
"roots": ["tests/integration/inference/test_vision_inference.py"],
"defaults": {
"vision_model": "ollama/llama3.2-vision:11b",
"embedding_model": "sentence-transformers/all-MiniLM-L6-v2",
},
},
SUITE_DEFINITIONS: dict[str, Suite] = {
"base": Suite(
name="base",
roots=base_roots,
default_setup="ollama",
),
"responses": Suite(
name="responses",
roots=["tests/integration/responses"],
default_setup="gpt",
),
"vision": Suite(
name="vision",
roots=["tests/integration/inference/test_vision_inference.py"],
default_setup="ollama-vision",
),
}