feat(tests): migrate to global "setups" system for test configuration (#3390)

This PR refactors the integration test system to use global "setups" which provides better separation of concerns: **suites = what to test, setups = how to configure.** NOTE: if you naming suggestions, please provide feedback Changes: - New `tests/integration/setups.py` with global, reusable configurations (ollama, vllm, gpt, claude) - Modified `scripts/integration-tests.sh` options to match with the underlying pytest options - Updated documentation to reflect the new global setup system The main benefit is that setups can be reused across multiple suites (e.g., use "gpt" with any suite) even though sometimes they could specifically tailored for a suite (vision <> ollama-vision). It is now easier to add new configurations without modifying existing suites. Usage examples: - `pytest tests/integration --suite=responses --setup=gpt` - `pytest tests/integration --suite=vision` # auto-selects "ollama-vision" setup - `pytest tests/integration --suite=base --setup=vllm`
2025-12-03 09:53:45 +00:00 · 2025-09-09 15:50:56 -07:00 · 2025-09-09 15:50:56 -07:00 · a8aa815b6a
commit a8aa815b6a
parent 28696c3f30
11 changed files with 385 additions and 206 deletions
--- a/tests/integration/suites.py
+++ b/tests/integration/suites.py
@ -8,46 +8,112 @@
 # For example:
 #
 # ```bash
-# pytest tests/integration/ --suite=vision
+# pytest tests/integration/ --suite=vision --setup=ollama
 # ```
 #
-# Each suite can:
-# - restrict collection to specific roots (dirs or files)
-# - provide default CLI option values (e.g. text_model, embedding_model, etc.)
+"""
+Each suite defines what to run (roots). Suites can be run with different global setups defined in setups.py.
+Setups provide environment variables and model defaults that can be reused across multiple suites.
+
+CLI examples:
+  pytest tests/integration --suite=responses --setup=gpt
+  pytest tests/integration --suite=vision --setup=ollama
+  pytest tests/integration --suite=base --setup=vllm
+"""

 from pathlib import Path

+from pydantic import BaseModel, Field
+
 this_dir = Path(__file__).parent
-default_roots = [
+
+
+class Suite(BaseModel):
+    name: str
+    roots: list[str]
+    default_setup: str | None = None
+
+
+class Setup(BaseModel):
+    """A reusable test configuration with environment and CLI defaults."""
+
+    name: str
+    description: str
+    defaults: dict[str, str] = Field(default_factory=dict)
+    env: dict[str, str] = Field(default_factory=dict)
+
+
+# Global setups - can be used with any suite "technically" but in reality, some setups might work
+# only for specific test suites.
+SETUP_DEFINITIONS: dict[str, Setup] = {
+    "ollama": Setup(
+        name="ollama",
+        description="Local Ollama provider with text + safety models",
+        env={
+            "OLLAMA_URL": "http://0.0.0.0:11434",
+            "SAFETY_MODEL": "ollama/llama-guard3:1b",
+        },
+        defaults={
+            "text_model": "ollama/llama3.2:3b-instruct-fp16",
+            "embedding_model": "sentence-transformers/all-MiniLM-L6-v2",
+            "safety_model": "ollama/llama-guard3:1b",
+            "safety_shield": "llama-guard",
+        },
+    ),
+    "ollama-vision": Setup(
+        name="ollama",
+        description="Local Ollama provider with a vision model",
+        env={
+            "OLLAMA_URL": "http://0.0.0.0:11434",
+        },
+        defaults={
+            "vision_model": "ollama/llama3.2-vision:11b",
+            "embedding_model": "sentence-transformers/all-MiniLM-L6-v2",
+        },
+    ),
+    "vllm": Setup(
+        name="vllm",
+        description="vLLM provider with a text model",
+        env={
+            "VLLM_URL": "http://localhost:8000/v1",
+        },
+        defaults={
+            "text_model": "vllm/meta-llama/Llama-3.2-1B-Instruct",
+            "embedding_model": "sentence-transformers/all-MiniLM-L6-v2",
+        },
+    ),
+    "gpt": Setup(
+        name="gpt",
+        description="OpenAI GPT models for high-quality responses and tool calling",
+        defaults={
+            "text_model": "openai/gpt-4o",
+            "embedding_model": "sentence-transformers/all-MiniLM-L6-v2",
+        },
+    ),
+}
+
+
+base_roots = [
    str(p)
    for p in this_dir.glob("*")
    if p.is_dir()
    and p.name not in ("__pycache__", "fixtures", "test_cases", "recordings", "responses", "post_training")
 ]

-SUITE_DEFINITIONS: dict[str, dict] = {
-    "base": {
-        "description": "Base suite that includes most tests but runs them with a text Ollama model",
-        "roots": default_roots,
-        "defaults": {
-            "text_model": "ollama/llama3.2:3b-instruct-fp16",
-            "embedding_model": "sentence-transformers/all-MiniLM-L6-v2",
-        },
-    },
-    "responses": {
-        "description": "Suite that includes only the OpenAI Responses tests; needs a strong tool-calling model",
-        "roots": ["tests/integration/responses"],
-        "defaults": {
-            "text_model": "openai/gpt-4o",
-            "embedding_model": "sentence-transformers/all-MiniLM-L6-v2",
-        },
-    },
-    "vision": {
-        "description": "Suite that includes only the vision tests",
-        "roots": ["tests/integration/inference/test_vision_inference.py"],
-        "defaults": {
-            "vision_model": "ollama/llama3.2-vision:11b",
-            "embedding_model": "sentence-transformers/all-MiniLM-L6-v2",
-        },
-    },
+SUITE_DEFINITIONS: dict[str, Suite] = {
+    "base": Suite(
+        name="base",
+        roots=base_roots,
+        default_setup="ollama",
+    ),
+    "responses": Suite(
+        name="responses",
+        roots=["tests/integration/responses"],
+        default_setup="gpt",
+    ),
+    "vision": Suite(
+        name="vision",
+        roots=["tests/integration/inference/test_vision_inference.py"],
+        default_setup="ollama-vision",
+    ),
 }