Merge branch 'main' into qdrant-keyword-search

This commit is contained in:
Francisco Arceo 2025-09-09 19:07:28 -06:00 committed by GitHub
commit 401d455809
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
19 changed files with 411 additions and 229 deletions

View file

@ -5,21 +5,22 @@ inputs:
stack-config:
description: 'Stack configuration to use'
required: true
provider:
description: 'Provider to use for tests'
required: true
setup:
description: 'Setup to use for tests (e.g., ollama, gpt, vllm)'
required: false
default: ''
inference-mode:
description: 'Inference mode (record or replay)'
required: true
test-suite:
suite:
description: 'Test suite to use: base, responses, vision, etc.'
required: false
default: ''
test-subdirs:
description: 'Comma-separated list of test subdirectories to run; overrides test-suite'
subdirs:
description: 'Comma-separated list of test subdirectories to run; overrides suite'
required: false
default: ''
test-pattern:
pattern:
description: 'Regex pattern to pass to pytest -k'
required: false
default: ''
@ -37,14 +38,23 @@ runs:
- name: Run Integration Tests
shell: bash
run: |
uv run --no-sync ./scripts/integration-tests.sh \
--stack-config '${{ inputs.stack-config }}' \
--provider '${{ inputs.provider }}' \
--test-subdirs '${{ inputs.test-subdirs }}' \
--test-pattern '${{ inputs.test-pattern }}' \
--inference-mode '${{ inputs.inference-mode }}' \
--test-suite '${{ inputs.test-suite }}' \
| tee pytest-${{ inputs.inference-mode }}.log
SCRIPT_ARGS="--stack-config ${{ inputs.stack-config }} --inference-mode ${{ inputs.inference-mode }}"
# Add optional arguments only if they are provided
if [ -n '${{ inputs.setup }}' ]; then
SCRIPT_ARGS="$SCRIPT_ARGS --setup ${{ inputs.setup }}"
fi
if [ -n '${{ inputs.suite }}' ]; then
SCRIPT_ARGS="$SCRIPT_ARGS --suite ${{ inputs.suite }}"
fi
if [ -n '${{ inputs.subdirs }}' ]; then
SCRIPT_ARGS="$SCRIPT_ARGS --subdirs ${{ inputs.subdirs }}"
fi
if [ -n '${{ inputs.pattern }}' ]; then
SCRIPT_ARGS="$SCRIPT_ARGS --pattern ${{ inputs.pattern }}"
fi
uv run --no-sync ./scripts/integration-tests.sh $SCRIPT_ARGS | tee pytest-${{ inputs.inference-mode }}.log
- name: Commit and push recordings
@ -58,7 +68,7 @@ runs:
echo "New recordings detected, committing and pushing"
git add tests/integration/recordings/
git commit -m "Recordings update from CI (test-suite: ${{ inputs.test-suite }})"
git commit -m "Recordings update from CI (suite: ${{ inputs.suite }})"
git fetch origin ${{ github.ref_name }}
git rebase origin/${{ github.ref_name }}
echo "Rebased successfully"

View file

@ -1,7 +1,7 @@
name: Setup Ollama
description: Start Ollama
inputs:
test-suite:
suite:
description: 'Test suite to use: base, responses, vision, etc.'
required: false
default: ''
@ -11,7 +11,7 @@ runs:
- name: Start Ollama
shell: bash
run: |
if [ "${{ inputs.test-suite }}" == "vision" ]; then
if [ "${{ inputs.suite }}" == "vision" ]; then
image="ollama-with-vision-model"
else
image="ollama-with-models"

View file

@ -8,11 +8,11 @@ inputs:
client-version:
description: 'Client version (latest or published)'
required: true
provider:
description: 'Provider to setup (ollama or vllm)'
required: true
setup:
description: 'Setup to configure (ollama, vllm, gpt, etc.)'
required: false
default: 'ollama'
test-suite:
suite:
description: 'Test suite to use: base, responses, vision, etc.'
required: false
default: ''
@ -30,13 +30,13 @@ runs:
client-version: ${{ inputs.client-version }}
- name: Setup ollama
if: ${{ inputs.provider == 'ollama' && inputs.inference-mode == 'record' }}
if: ${{ (inputs.setup == 'ollama' || inputs.setup == 'ollama-vision') && inputs.inference-mode == 'record' }}
uses: ./.github/actions/setup-ollama
with:
test-suite: ${{ inputs.test-suite }}
suite: ${{ inputs.suite }}
- name: Setup vllm
if: ${{ inputs.provider == 'vllm' && inputs.inference-mode == 'record' }}
if: ${{ inputs.setup == 'vllm' && inputs.inference-mode == 'record' }}
uses: ./.github/actions/setup-vllm
- name: Build Llama Stack

View file

@ -28,8 +28,8 @@ on:
description: 'Test against both the latest and published versions'
type: boolean
default: false
test-provider:
description: 'Test against a specific provider'
test-setup:
description: 'Test against a specific setup'
type: string
default: 'ollama'
@ -42,18 +42,18 @@ jobs:
run-replay-mode-tests:
runs-on: ubuntu-latest
name: ${{ format('Integration Tests ({0}, {1}, {2}, client={3}, {4})', matrix.client-type, matrix.provider, matrix.python-version, matrix.client-version, matrix.test-suite) }}
name: ${{ format('Integration Tests ({0}, {1}, {2}, client={3}, {4})', matrix.client-type, matrix.setup, matrix.python-version, matrix.client-version, matrix.suite) }}
strategy:
fail-fast: false
matrix:
client-type: [library, server]
# Use vllm on weekly schedule, otherwise use test-provider input (defaults to ollama)
provider: ${{ (github.event.schedule == '1 0 * * 0') && fromJSON('["vllm"]') || fromJSON(format('["{0}"]', github.event.inputs.test-provider || 'ollama')) }}
# Use vllm on weekly schedule, otherwise use test-setup input (defaults to ollama)
setup: ${{ (github.event.schedule == '1 0 * * 0') && fromJSON('["vllm"]') || fromJSON(format('["{0}"]', github.event.inputs.test-setup || 'ollama')) }}
# Use Python 3.13 only on nightly schedule (daily latest client test), otherwise use 3.12
python-version: ${{ github.event.schedule == '0 0 * * *' && fromJSON('["3.12", "3.13"]') || fromJSON('["3.12"]') }}
client-version: ${{ (github.event.schedule == '0 0 * * *' || github.event.inputs.test-all-client-versions == 'true') && fromJSON('["published", "latest"]') || fromJSON('["latest"]') }}
test-suite: [base, vision]
suite: [base, vision]
steps:
- name: Checkout repository
@ -64,14 +64,14 @@ jobs:
with:
python-version: ${{ matrix.python-version }}
client-version: ${{ matrix.client-version }}
provider: ${{ matrix.provider }}
test-suite: ${{ matrix.test-suite }}
setup: ${{ matrix.setup }}
suite: ${{ matrix.suite }}
inference-mode: 'replay'
- name: Run tests
uses: ./.github/actions/run-and-record-tests
with:
stack-config: ${{ matrix.client-type == 'library' && 'ci-tests' || 'server:ci-tests' }}
provider: ${{ matrix.provider }}
setup: ${{ matrix.setup }}
inference-mode: 'replay'
test-suite: ${{ matrix.test-suite }}
suite: ${{ matrix.suite }}

View file

@ -10,19 +10,19 @@ run-name: Run the integration test suite from tests/integration
on:
workflow_dispatch:
inputs:
test-provider:
description: 'Test against a specific provider'
test-setup:
description: 'Test against a specific setup'
type: string
default: 'ollama'
test-suite:
suite:
description: 'Test suite to use: base, responses, vision, etc.'
type: string
default: ''
test-subdirs:
description: 'Comma-separated list of test subdirectories to run; overrides test-suite'
subdirs:
description: 'Comma-separated list of test subdirectories to run; overrides suite'
type: string
default: ''
test-pattern:
pattern:
description: 'Regex pattern to pass to pytest -k'
type: string
default: ''
@ -39,10 +39,10 @@ jobs:
run: |
echo "::group::Workflow Inputs"
echo "branch: ${{ github.ref_name }}"
echo "test-provider: ${{ inputs.test-provider }}"
echo "test-suite: ${{ inputs.test-suite }}"
echo "test-subdirs: ${{ inputs.test-subdirs }}"
echo "test-pattern: ${{ inputs.test-pattern }}"
echo "test-setup: ${{ inputs.test-setup }}"
echo "suite: ${{ inputs.suite }}"
echo "subdirs: ${{ inputs.subdirs }}"
echo "pattern: ${{ inputs.pattern }}"
echo "::endgroup::"
- name: Checkout repository
@ -55,16 +55,16 @@ jobs:
with:
python-version: "3.12" # Use single Python version for recording
client-version: "latest"
provider: ${{ inputs.test-provider || 'ollama' }}
test-suite: ${{ inputs.test-suite }}
setup: ${{ inputs.test-setup || 'ollama' }}
suite: ${{ inputs.suite }}
inference-mode: 'record'
- name: Run and record tests
uses: ./.github/actions/run-and-record-tests
with:
stack-config: 'server:ci-tests' # recording must be done with server since more tests are run
provider: ${{ inputs.test-provider || 'ollama' }}
setup: ${{ inputs.test-setup || 'ollama' }}
inference-mode: 'record'
test-suite: ${{ inputs.test-suite }}
test-subdirs: ${{ inputs.test-subdirs }}
test-pattern: ${{ inputs.test-pattern }}
suite: ${{ inputs.suite }}
subdirs: ${{ inputs.subdirs }}
pattern: ${{ inputs.pattern }}

View file

@ -45,6 +45,7 @@ from llama_stack.core.utils.dynamic import instantiate_class_type
from llama_stack.core.utils.exec import formulate_run_args, run_command
from llama_stack.core.utils.image_types import LlamaStackImageType
from llama_stack.providers.datatypes import Api
from llama_stack.providers.utils.sqlstore.sqlstore import SqliteSqlStoreConfig
DISTRIBS_PATH = Path(__file__).parent.parent.parent / "distributions"
@ -294,6 +295,12 @@ def _generate_run_config(
if build_config.external_providers_dir
else EXTERNAL_PROVIDERS_DIR,
)
if not run_config.inference_store:
run_config.inference_store = SqliteSqlStoreConfig(
**SqliteSqlStoreConfig.sample_run_config(
__distro_dir__=(DISTRIBS_BASE_DIR / image_name).as_posix(), db_name="inference_store.db"
)
)
# build providers dict
provider_registry = get_provider_registry(build_config)
for api in apis:

View file

@ -10,7 +10,6 @@ import json
import logging # allow-direct-logging
import os
import sys
from concurrent.futures import ThreadPoolExecutor
from enum import Enum
from io import BytesIO
from pathlib import Path
@ -148,7 +147,6 @@ class LlamaStackAsLibraryClient(LlamaStackClient):
self.async_client = AsyncLlamaStackAsLibraryClient(
config_path_or_distro_name, custom_provider_registry, provider_data, skip_logger_removal
)
self.pool_executor = ThreadPoolExecutor(max_workers=4)
self.provider_data = provider_data
self.loop = asyncio.new_event_loop()

View file

@ -30,7 +30,7 @@ def available_providers() -> list[ProviderSpec]:
adapter=AdapterSpec(
adapter_type="huggingface",
pip_packages=[
"datasets",
"datasets>=4.0.0",
],
module="llama_stack.providers.remote.datasetio.huggingface",
config_class="llama_stack.providers.remote.datasetio.huggingface.HuggingfaceDatasetIOConfig",
@ -42,7 +42,7 @@ def available_providers() -> list[ProviderSpec]:
adapter=AdapterSpec(
adapter_type="nvidia",
pip_packages=[
"datasets",
"datasets>=4.0.0",
],
module="llama_stack.providers.remote.datasetio.nvidia",
config_class="llama_stack.providers.remote.datasetio.nvidia.NvidiaDatasetIOConfig",

View file

@ -48,7 +48,7 @@ def available_providers() -> list[ProviderSpec]:
InlineProviderSpec(
api=Api.post_training,
provider_type="inline::huggingface-gpu",
pip_packages=["trl", "transformers", "peft", "datasets", "torch"],
pip_packages=["trl", "transformers", "peft", "datasets>=4.0.0", "torch"],
module="llama_stack.providers.inline.post_training.huggingface",
config_class="llama_stack.providers.inline.post_training.huggingface.HuggingFacePostTrainingConfig",
api_dependencies=[

View file

@ -18,7 +18,7 @@
"class-variance-authority": "^0.7.1",
"clsx": "^2.1.1",
"framer-motion": "^12.23.12",
"llama-stack-client": "^0.2.20",
"llama-stack-client": "^0.2.21",
"lucide-react": "^0.542.0",
"next": "15.3.3",
"next-auth": "^4.24.11",
@ -10278,9 +10278,9 @@
"license": "MIT"
},
"node_modules/llama-stack-client": {
"version": "0.2.20",
"resolved": "https://registry.npmjs.org/llama-stack-client/-/llama-stack-client-0.2.20.tgz",
"integrity": "sha512-1vD5nizTX5JEW8TADxKgy/P1W8YZoPSpdnmfxbdYbWgpQ3BWtbvLS6jmDk7VwVA5fRC4895VfHsRDfS1liHarw==",
"version": "0.2.21",
"resolved": "https://registry.npmjs.org/llama-stack-client/-/llama-stack-client-0.2.21.tgz",
"integrity": "sha512-rjU2Vx5xStxDYavU8K1An/SYXiQQjroLcK98B+p0Paz/a7OgRao2S0YwvThJjPUyChY4fO03UIXP9LpmHqlXWQ==",
"license": "MIT",
"dependencies": {
"@types/node": "^18.11.18",

View file

@ -23,7 +23,7 @@
"class-variance-authority": "^0.7.1",
"clsx": "^2.1.1",
"framer-motion": "^12.23.12",
"llama-stack-client": "^0.2.20",
"llama-stack-client": "^0.2.21",
"lucide-react": "^0.542.0",
"next": "15.3.3",
"next-auth": "^4.24.11",

View file

@ -7,7 +7,7 @@ required-version = ">=0.7.0"
[project]
name = "llama_stack"
version = "0.2.20"
version = "0.2.21"
authors = [{ name = "Meta Llama", email = "llama-oss@meta.com" }]
description = "Llama Stack"
readme = "README.md"
@ -31,7 +31,7 @@ dependencies = [
"huggingface-hub>=0.34.0,<1.0",
"jinja2>=3.1.6",
"jsonschema",
"llama-stack-client>=0.2.20",
"llama-stack-client>=0.2.21",
"openai>=1.99.6",
"prompt-toolkit",
"python-dotenv",
@ -55,7 +55,7 @@ dependencies = [
ui = [
"streamlit",
"pandas",
"llama-stack-client>=0.2.20",
"llama-stack-client>=0.2.21",
"streamlit-option-menu",
]
@ -114,7 +114,7 @@ test = [
"psycopg2-binary>=2.9.0",
"pypdf",
"mcp",
"datasets",
"datasets>=4.0.0",
"autoevals",
"transformers",
"sqlalchemy",

71
scripts/get_setup_env.py Normal file
View file

@ -0,0 +1,71 @@
#!/usr/bin/env python3
# Copyright (c) Meta Platforms, Inc. and affiliates.
# All rights reserved.
#
# This source code is licensed under the terms described in the LICENSE file in
# the root directory of this source tree.
"""
Small helper script to extract environment variables from a test setup.
Used by integration-tests.sh to set environment variables before starting the server.
"""
import argparse
import sys
from tests.integration.suites import SETUP_DEFINITIONS, SUITE_DEFINITIONS
def get_setup_env_vars(setup_name, suite_name=None):
"""
Get environment variables for a setup, with optional suite default fallback.
Args:
setup_name: Name of the setup (e.g., 'ollama', 'gpt')
suite_name: Optional suite name to get default setup if setup_name is None
Returns:
Dictionary of environment variables
"""
# If no setup specified, try to get default from suite
if not setup_name and suite_name:
suite = SUITE_DEFINITIONS.get(suite_name)
if suite and suite.default_setup:
setup_name = suite.default_setup
if not setup_name:
return {}
setup = SETUP_DEFINITIONS.get(setup_name)
if not setup:
print(
f"Error: Unknown setup '{setup_name}'. Available: {', '.join(sorted(SETUP_DEFINITIONS.keys()))}",
file=sys.stderr,
)
sys.exit(1)
return setup.env
def main():
parser = argparse.ArgumentParser(description="Extract environment variables from a test setup")
parser.add_argument("--setup", help="Setup name (e.g., ollama, gpt)")
parser.add_argument("--suite", help="Suite name to get default setup from if --setup not provided")
parser.add_argument("--format", choices=["bash", "json"], default="bash", help="Output format (default: bash)")
args = parser.parse_args()
env_vars = get_setup_env_vars(args.setup, args.suite)
if args.format == "bash":
# Output as bash export statements
for key, value in env_vars.items():
print(f"export {key}='{value}'")
elif args.format == "json":
import json
print(json.dumps(env_vars))
if __name__ == "__main__":
main()

View file

@ -14,7 +14,7 @@ set -euo pipefail
# Default values
BRANCH=""
TEST_SUBDIRS=""
TEST_PROVIDER="ollama"
TEST_SETUP="ollama"
TEST_SUITE="base"
TEST_PATTERN=""
@ -27,24 +27,24 @@ Trigger the integration test recording workflow remotely. This way you do not ne
OPTIONS:
-b, --branch BRANCH Branch to run the workflow on (defaults to current branch)
-p, --test-provider PROVIDER Test provider to use: vllm or ollama (default: ollama)
-t, --test-suite SUITE Test suite to use: base, responses, vision, etc. (default: base)
-s, --test-subdirs DIRS Comma-separated list of test subdirectories to run (overrides suite)
-k, --test-pattern PATTERN Regex pattern to pass to pytest -k
-t, --suite SUITE Test suite to use: base, responses, vision, etc. (default: base)
-p, --setup SETUP Test setup to use: vllm, ollama, gpt, etc. (default: ollama)
-s, --subdirs DIRS Comma-separated list of test subdirectories to run (overrides suite)
-k, --pattern PATTERN Regex pattern to pass to pytest -k
-h, --help Show this help message
EXAMPLES:
# Record tests for current branch with agents subdirectory
$0 --test-subdirs "agents"
$0 --subdirs "agents"
# Record tests for specific branch with vision tests
$0 -b my-feature-branch --test-suite vision
$0 -b my-feature-branch --suite vision
# Record multiple test subdirectories with specific provider
$0 --test-subdirs "agents,inference" --test-provider vllm
# Record multiple test subdirectories with specific setup
$0 --subdirs "agents,inference" --setup vllm
# Record tests matching a specific pattern
$0 --test-subdirs "inference" --test-pattern "test_streaming"
$0 --subdirs "inference" --pattern "test_streaming"
EOF
}
@ -63,19 +63,19 @@ while [[ $# -gt 0 ]]; do
BRANCH="$2"
shift 2
;;
-s|--test-subdirs)
-s|--subdirs)
TEST_SUBDIRS="$2"
shift 2
;;
-p|--test-provider)
TEST_PROVIDER="$2"
-p|--setup)
TEST_SETUP="$2"
shift 2
;;
-t|--test-suite)
-t|--suite)
TEST_SUITE="$2"
shift 2
;;
-k|--test-pattern)
-k|--pattern)
TEST_PATTERN="$2"
shift 2
;;
@ -93,21 +93,16 @@ done
# Validate required parameters
if [[ -z "$TEST_SUBDIRS" && -z "$TEST_SUITE" ]]; then
echo "Error: --test-subdirs or --test-suite is required"
echo "Error: --subdirs or --suite is required"
echo "Please specify which test subdirectories to run or test suite to use, e.g.:"
echo " $0 --test-subdirs \"agents,inference\""
echo " $0 --test-suite vision"
echo " $0 --subdirs \"agents,inference\""
echo " $0 --suite vision"
echo ""
exit 1
fi
# Validate test provider
if [[ "$TEST_PROVIDER" != "vllm" && "$TEST_PROVIDER" != "ollama" ]]; then
echo "❌ Error: Invalid test provider '$TEST_PROVIDER'"
echo " Supported providers: vllm, ollama"
echo " Example: $0 --test-subdirs \"agents\" --test-provider vllm"
exit 1
fi
# Validate test setup (optional - setups are validated by the workflow itself)
# Common setups: ollama, vllm, gpt, etc.
# Check if required tools are installed
if ! command -v gh &> /dev/null; then
@ -237,7 +232,7 @@ fi
# Build the workflow dispatch command
echo "Triggering integration test recording workflow..."
echo "Branch: $BRANCH"
echo "Test provider: $TEST_PROVIDER"
echo "Test setup: $TEST_SETUP"
echo "Test subdirs: $TEST_SUBDIRS"
echo "Test suite: $TEST_SUITE"
echo "Test pattern: ${TEST_PATTERN:-"(none)"}"
@ -245,16 +240,16 @@ echo ""
# Prepare inputs for gh workflow run
if [[ -n "$TEST_SUBDIRS" ]]; then
INPUTS="-f test-subdirs='$TEST_SUBDIRS'"
INPUTS="-f subdirs='$TEST_SUBDIRS'"
fi
if [[ -n "$TEST_PROVIDER" ]]; then
INPUTS="$INPUTS -f test-provider='$TEST_PROVIDER'"
if [[ -n "$TEST_SETUP" ]]; then
INPUTS="$INPUTS -f test-setup='$TEST_SETUP'"
fi
if [[ -n "$TEST_SUITE" ]]; then
INPUTS="$INPUTS -f test-suite='$TEST_SUITE'"
INPUTS="$INPUTS -f suite='$TEST_SUITE'"
fi
if [[ -n "$TEST_PATTERN" ]]; then
INPUTS="$INPUTS -f test-pattern='$TEST_PATTERN'"
INPUTS="$INPUTS -f pattern='$TEST_PATTERN'"
fi
# Run the workflow

View file

@ -13,10 +13,10 @@ set -euo pipefail
# Default values
STACK_CONFIG=""
PROVIDER=""
TEST_SUITE="base"
TEST_SETUP=""
TEST_SUBDIRS=""
TEST_PATTERN=""
TEST_SUITE="base"
INFERENCE_MODE="replay"
EXTRA_PARAMS=""
@ -27,29 +27,30 @@ Usage: $0 [OPTIONS]
Options:
--stack-config STRING Stack configuration to use (required)
--provider STRING Provider to use (ollama, vllm, etc.) (required)
--test-suite STRING Comma-separated list of test suites to run (default: 'base')
--suite STRING Test suite to run (default: 'base')
--setup STRING Test setup (models, env) to use (e.g., 'ollama', 'ollama-vision', 'gpt', 'vllm')
--inference-mode STRING Inference mode: record or replay (default: replay)
--test-subdirs STRING Comma-separated list of test subdirectories to run (overrides suite)
--test-pattern STRING Regex pattern to pass to pytest -k
--subdirs STRING Comma-separated list of test subdirectories to run (overrides suite)
--pattern STRING Regex pattern to pass to pytest -k
--help Show this help message
Suites are defined in tests/integration/suites.py. They are used to narrow the collection of tests and provide default model options.
Suites are defined in tests/integration/suites.py and define which tests to run.
Setups are defined in tests/integration/setups.py and provide global configuration (models, env).
You can also specify subdirectories (of tests/integration) to select tests from, which will override the suite.
Examples:
# Basic inference tests with ollama
$0 --stack-config server:ci-tests --provider ollama
$0 --stack-config server:ci-tests --suite base --setup ollama
# Multiple test directories with vllm
$0 --stack-config server:ci-tests --provider vllm --test-subdirs 'inference,agents'
$0 --stack-config server:ci-tests --subdirs 'inference,agents' --setup vllm
# Vision tests with ollama
$0 --stack-config server:ci-tests --provider ollama --test-suite vision
$0 --stack-config server:ci-tests --suite vision # default setup for this suite is ollama-vision
# Record mode for updating test recordings
$0 --stack-config server:ci-tests --provider ollama --inference-mode record
$0 --stack-config server:ci-tests --suite base --inference-mode record
EOF
}
@ -60,15 +61,15 @@ while [[ $# -gt 0 ]]; do
STACK_CONFIG="$2"
shift 2
;;
--provider)
PROVIDER="$2"
--setup)
TEST_SETUP="$2"
shift 2
;;
--test-subdirs)
--subdirs)
TEST_SUBDIRS="$2"
shift 2
;;
--test-suite)
--suite)
TEST_SUITE="$2"
shift 2
;;
@ -76,7 +77,7 @@ while [[ $# -gt 0 ]]; do
INFERENCE_MODE="$2"
shift 2
;;
--test-pattern)
--pattern)
TEST_PATTERN="$2"
shift 2
;;
@ -96,11 +97,13 @@ done
# Validate required parameters
if [[ -z "$STACK_CONFIG" ]]; then
echo "Error: --stack-config is required"
usage
exit 1
fi
if [[ -z "$PROVIDER" ]]; then
echo "Error: --provider is required"
if [[ -z "$TEST_SETUP" && -n "$TEST_SUBDIRS" ]]; then
echo "Error: --test-setup is required when --test-subdirs is provided"
usage
exit 1
fi
@ -111,7 +114,7 @@ fi
echo "=== Llama Stack Integration Test Runner ==="
echo "Stack Config: $STACK_CONFIG"
echo "Provider: $PROVIDER"
echo "Setup: $TEST_SETUP"
echo "Inference Mode: $INFERENCE_MODE"
echo "Test Suite: $TEST_SUITE"
echo "Test Subdirs: $TEST_SUBDIRS"
@ -129,21 +132,25 @@ echo ""
# Set environment variables
export LLAMA_STACK_CLIENT_TIMEOUT=300
export LLAMA_STACK_TEST_INFERENCE_MODE="$INFERENCE_MODE"
# Configure provider-specific settings
if [[ "$PROVIDER" == "ollama" ]]; then
export OLLAMA_URL="http://0.0.0.0:11434"
export TEXT_MODEL="ollama/llama3.2:3b-instruct-fp16"
export SAFETY_MODEL="ollama/llama-guard3:1b"
EXTRA_PARAMS="--safety-shield=llama-guard"
else
export VLLM_URL="http://localhost:8000/v1"
export TEXT_MODEL="vllm/meta-llama/Llama-3.2-1B-Instruct"
EXTRA_PARAMS=""
fi
THIS_DIR=$(dirname "$0")
if [[ -n "$TEST_SETUP" ]]; then
EXTRA_PARAMS="--setup=$TEST_SETUP"
fi
# Apply setup-specific environment variables (needed for server startup and tests)
echo "=== Applying Setup Environment Variables ==="
# the server needs this
export LLAMA_STACK_TEST_INFERENCE_MODE="$INFERENCE_MODE"
SETUP_ENV=$(PYTHONPATH=$THIS_DIR/.. python "$THIS_DIR/get_setup_env.py" --suite "$TEST_SUITE" --setup "$TEST_SETUP" --format bash)
echo "Setting up environment variables:"
echo "$SETUP_ENV"
eval "$SETUP_ENV"
echo ""
ROOT_DIR="$THIS_DIR/.."
cd $ROOT_DIR
@ -162,6 +169,18 @@ fi
# Start Llama Stack Server if needed
if [[ "$STACK_CONFIG" == *"server:"* ]]; then
stop_server() {
echo "Stopping Llama Stack Server..."
pids=$(lsof -i :8321 | awk 'NR>1 {print $2}')
if [[ -n "$pids" ]]; then
echo "Killing Llama Stack Server processes: $pids"
kill -9 $pids
else
echo "No Llama Stack Server processes found ?!"
fi
echo "Llama Stack Server stopped"
}
# check if server is already running
if curl -s http://localhost:8321/v1/health 2>/dev/null | grep -q "OK"; then
echo "Llama Stack Server is already running, skipping start"
@ -185,14 +204,16 @@ if [[ "$STACK_CONFIG" == *"server:"* ]]; then
done
echo ""
fi
trap stop_server EXIT ERR INT TERM
fi
# Run tests
echo "=== Running Integration Tests ==="
EXCLUDE_TESTS="builtin_tool or safety_with_image or code_interpreter or test_rag"
# Additional exclusions for vllm provider
if [[ "$PROVIDER" == "vllm" ]]; then
# Additional exclusions for vllm setup
if [[ "$TEST_SETUP" == "vllm" ]]; then
EXCLUDE_TESTS="${EXCLUDE_TESTS} or test_inference_store_tool_calls"
fi
@ -229,20 +250,22 @@ if [[ -n "$TEST_SUBDIRS" ]]; then
echo "Total test files: $(echo $TEST_FILES | wc -w)"
PYTEST_TARGET="$TEST_FILES"
EXTRA_PARAMS="$EXTRA_PARAMS --text-model=$TEXT_MODEL --embedding-model=sentence-transformers/all-MiniLM-L6-v2"
else
PYTEST_TARGET="tests/integration/"
EXTRA_PARAMS="$EXTRA_PARAMS --suite=$TEST_SUITE"
fi
set +e
set -x
pytest -s -v $PYTEST_TARGET \
--stack-config="$STACK_CONFIG" \
--inference-mode="$INFERENCE_MODE" \
-k "$PYTEST_PATTERN" \
$EXTRA_PARAMS \
--color=yes \
--capture=tee-sys
exit_code=$?
set +x
set -e
if [ $exit_code -eq 0 ]; then
@ -260,18 +283,5 @@ echo "=== System Resources After Tests ==="
free -h 2>/dev/null || echo "free command not available"
df -h
# stop server
if [[ "$STACK_CONFIG" == *"server:"* ]]; then
echo "Stopping Llama Stack Server..."
pids=$(lsof -i :8321 | awk 'NR>1 {print $2}')
if [[ -n "$pids" ]]; then
echo "Killing Llama Stack Server processes: $pids"
kill -9 $pids
else
echo "No Llama Stack Server processes found ?!"
fi
echo "Llama Stack Server stopped"
fi
echo ""
echo "=== Integration Tests Complete ==="

View file

@ -6,9 +6,7 @@ Integration tests verify complete workflows across different providers using Lla
```bash
# Run all integration tests with existing recordings
LLAMA_STACK_TEST_INFERENCE_MODE=replay \
LLAMA_STACK_TEST_RECORDING_DIR=tests/integration/recordings \
uv run --group test \
uv run --group test \
pytest -sv tests/integration/ --stack-config=starter
```
@ -42,25 +40,35 @@ Model parameters can be influenced by the following options:
Each of these are comma-separated lists and can be used to generate multiple parameter combinations. Note that tests will be skipped
if no model is specified.
### Suites (fast selection + sane defaults)
### Suites and Setups
- `--suite`: comma-separated list of named suites that both narrow which tests are collected and prefill common model options (unless you pass them explicitly).
- `--suite`: single named suite that narrows which tests are collected.
- Available suites:
- `responses`: collects tests under `tests/integration/responses`; this is a separate suite because it needs a strong tool-calling model.
- `vision`: collects only `tests/integration/inference/test_vision_inference.py`; defaults `--vision-model=ollama/llama3.2-vision:11b`, `--embedding-model=sentence-transformers/all-MiniLM-L6-v2`.
- Explicit flags always win. For example, `--suite=responses --text-model=<X>` overrides the suites text model.
- `base`: collects most tests (excludes responses and post_training)
- `responses`: collects tests under `tests/integration/responses` (needs strong tool-calling models)
- `vision`: collects only `tests/integration/inference/test_vision_inference.py`
- `--setup`: global configuration that can be used with any suite. Setups prefill model/env defaults; explicit CLI flags always win.
- Available setups:
- `ollama`: Local Ollama provider with lightweight models (sets OLLAMA_URL, uses llama3.2:3b-instruct-fp16)
- `vllm`: VLLM provider for efficient local inference (sets VLLM_URL, uses Llama-3.2-1B-Instruct)
- `gpt`: OpenAI GPT models for high-quality responses (uses gpt-4o)
- `claude`: Anthropic Claude models for high-quality responses (uses claude-3-5-sonnet)
Examples:
Examples
```bash
# Fast responses run with defaults
pytest -s -v tests/integration --stack-config=server:starter --suite=responses
# Fast responses run with a strong tool-calling model
pytest -s -v tests/integration --stack-config=server:starter --suite=responses --setup=gpt
# Fast single-file vision run with defaults
pytest -s -v tests/integration --stack-config=server:starter --suite=vision
# Fast single-file vision run with Ollama defaults
pytest -s -v tests/integration --stack-config=server:starter --suite=vision --setup=ollama
# Combine suites and override a default
pytest -s -v tests/integration --stack-config=server:starter --suite=responses,vision --embedding-model=text-embedding-3-small
# Base suite with VLLM for performance
pytest -s -v tests/integration --stack-config=server:starter --suite=base --setup=vllm
# Override a default from setup
pytest -s -v tests/integration --stack-config=server:starter \
--suite=responses --setup=gpt --embedding-model=text-embedding-3-small
```
## Examples
@ -127,14 +135,13 @@ pytest tests/integration/
### RECORD Mode
Captures API interactions for later replay:
```bash
LLAMA_STACK_TEST_INFERENCE_MODE=record \
pytest tests/integration/inference/test_new_feature.py
pytest tests/integration/inference/test_new_feature.py --inference-mode=record
```
### LIVE Mode
Tests make real API calls (but not recorded):
```bash
LLAMA_STACK_TEST_INFERENCE_MODE=live pytest tests/integration/
pytest tests/integration/ --inference-mode=live
```
By default, the recording directory is `tests/integration/recordings`. You can override this by setting the `LLAMA_STACK_TEST_RECORDING_DIR` environment variable.
@ -155,15 +162,14 @@ cat recordings/responses/abc123.json | jq '.'
#### Remote Re-recording (Recommended)
Use the automated workflow script for easier re-recording:
```bash
./scripts/github/schedule-record-workflow.sh --test-subdirs "inference,agents"
./scripts/github/schedule-record-workflow.sh --subdirs "inference,agents"
```
See the [main testing guide](../README.md#remote-re-recording-recommended) for full details.
#### Local Re-recording
```bash
# Re-record specific tests
LLAMA_STACK_TEST_INFERENCE_MODE=record \
pytest -s -v --stack-config=server:starter tests/integration/inference/test_modified.py
pytest -s -v --stack-config=server:starter tests/integration/inference/test_modified.py --inference-mode=record
```
Note that when re-recording tests, you must use a Stack pointing to a server (i.e., `server:starter`). This subtlety exists because the set of tests run in server are a superset of the set of tests run in the library client.

View file

@ -15,7 +15,7 @@ from dotenv import load_dotenv
from llama_stack.log import get_logger
from .suites import SUITE_DEFINITIONS
from .suites import SETUP_DEFINITIONS, SUITE_DEFINITIONS
logger = get_logger(__name__, category="tests")
@ -63,19 +63,33 @@ def pytest_configure(config):
key, value = env_var.split("=", 1)
os.environ[key] = value
suites_raw = config.getoption("--suite")
suites: list[str] = []
if suites_raw:
suites = [p.strip() for p in str(suites_raw).split(",") if p.strip()]
unknown = [p for p in suites if p not in SUITE_DEFINITIONS]
if unknown:
inference_mode = config.getoption("--inference-mode")
os.environ["LLAMA_STACK_TEST_INFERENCE_MODE"] = inference_mode
suite = config.getoption("--suite")
if suite:
if suite not in SUITE_DEFINITIONS:
raise pytest.UsageError(f"Unknown suite: {suite}. Available: {', '.join(sorted(SUITE_DEFINITIONS.keys()))}")
# Apply setups (global parameterizations): env + defaults
setup = config.getoption("--setup")
if suite and not setup:
setup = SUITE_DEFINITIONS[suite].default_setup
if setup:
if setup not in SETUP_DEFINITIONS:
raise pytest.UsageError(
f"Unknown suite(s): {', '.join(unknown)}. Available: {', '.join(sorted(SUITE_DEFINITIONS.keys()))}"
f"Unknown setup '{setup}'. Available: {', '.join(sorted(SETUP_DEFINITIONS.keys()))}"
)
for suite in suites:
suite_def = SUITE_DEFINITIONS.get(suite, {})
defaults: dict = suite_def.get("defaults", {})
for dest, value in defaults.items():
setup_obj = SETUP_DEFINITIONS[setup]
logger.info(f"Applying setup '{setup}'{' for suite ' + suite if suite else ''}")
# Apply env first
for k, v in setup_obj.env.items():
if k not in os.environ:
os.environ[k] = str(v)
# Apply defaults if not provided explicitly
for dest, value in setup_obj.defaults.items():
current = getattr(config.option, dest, None)
if not current:
setattr(config.option, dest, value)
@ -120,6 +134,13 @@ def pytest_addoption(parser):
default=384,
help="Output dimensionality of the embedding model to use for testing. Default: 384",
)
parser.addoption(
"--inference-mode",
help="Inference mode: { record, replay, live } (default: replay)",
choices=["record", "replay", "live"],
default="replay",
)
parser.addoption(
"--report",
help="Path where the test report should be written, e.g. --report=/path/to/report.md",
@ -127,14 +148,18 @@ def pytest_addoption(parser):
available_suites = ", ".join(sorted(SUITE_DEFINITIONS.keys()))
suite_help = (
"Comma-separated integration test suites to narrow collection and prefill defaults. "
"Available: "
f"{available_suites}. "
"Explicit CLI flags (e.g., --text-model) override suite defaults. "
"Examples: --suite=responses or --suite=responses,vision."
f"Single test suite to run (narrows collection). Available: {available_suites}. Example: --suite=responses"
)
parser.addoption("--suite", help=suite_help)
# Global setups for any suite
available_setups = ", ".join(sorted(SETUP_DEFINITIONS.keys()))
setup_help = (
f"Global test setup configuration. Available: {available_setups}. "
"Can be used with any suite. Example: --setup=ollama"
)
parser.addoption("--setup", help=setup_help)
MODEL_SHORT_IDS = {
"meta-llama/Llama-3.2-3B-Instruct": "3B",
@ -221,16 +246,12 @@ pytest_plugins = ["tests.integration.fixtures.common"]
def pytest_ignore_collect(path: str, config: pytest.Config) -> bool:
"""Skip collecting paths outside the selected suite roots for speed."""
suites_raw = config.getoption("--suite")
if not suites_raw:
suite = config.getoption("--suite")
if not suite:
return False
names = [p.strip() for p in str(suites_raw).split(",") if p.strip()]
roots: list[str] = []
for name in names:
suite_def = SUITE_DEFINITIONS.get(name)
if suite_def:
roots.extend(suite_def.get("roots", []))
sobj = SUITE_DEFINITIONS.get(suite)
roots: list[str] = sobj.get("roots", []) if isinstance(sobj, dict) else getattr(sobj, "roots", [])
if not roots:
return False

View file

@ -8,46 +8,112 @@
# For example:
#
# ```bash
# pytest tests/integration/ --suite=vision
# pytest tests/integration/ --suite=vision --setup=ollama
# ```
#
# Each suite can:
# - restrict collection to specific roots (dirs or files)
# - provide default CLI option values (e.g. text_model, embedding_model, etc.)
"""
Each suite defines what to run (roots). Suites can be run with different global setups defined in setups.py.
Setups provide environment variables and model defaults that can be reused across multiple suites.
CLI examples:
pytest tests/integration --suite=responses --setup=gpt
pytest tests/integration --suite=vision --setup=ollama
pytest tests/integration --suite=base --setup=vllm
"""
from pathlib import Path
from pydantic import BaseModel, Field
this_dir = Path(__file__).parent
default_roots = [
class Suite(BaseModel):
name: str
roots: list[str]
default_setup: str | None = None
class Setup(BaseModel):
"""A reusable test configuration with environment and CLI defaults."""
name: str
description: str
defaults: dict[str, str] = Field(default_factory=dict)
env: dict[str, str] = Field(default_factory=dict)
# Global setups - can be used with any suite "technically" but in reality, some setups might work
# only for specific test suites.
SETUP_DEFINITIONS: dict[str, Setup] = {
"ollama": Setup(
name="ollama",
description="Local Ollama provider with text + safety models",
env={
"OLLAMA_URL": "http://0.0.0.0:11434",
"SAFETY_MODEL": "ollama/llama-guard3:1b",
},
defaults={
"text_model": "ollama/llama3.2:3b-instruct-fp16",
"embedding_model": "sentence-transformers/all-MiniLM-L6-v2",
"safety_model": "ollama/llama-guard3:1b",
"safety_shield": "llama-guard",
},
),
"ollama-vision": Setup(
name="ollama",
description="Local Ollama provider with a vision model",
env={
"OLLAMA_URL": "http://0.0.0.0:11434",
},
defaults={
"vision_model": "ollama/llama3.2-vision:11b",
"embedding_model": "sentence-transformers/all-MiniLM-L6-v2",
},
),
"vllm": Setup(
name="vllm",
description="vLLM provider with a text model",
env={
"VLLM_URL": "http://localhost:8000/v1",
},
defaults={
"text_model": "vllm/meta-llama/Llama-3.2-1B-Instruct",
"embedding_model": "sentence-transformers/all-MiniLM-L6-v2",
},
),
"gpt": Setup(
name="gpt",
description="OpenAI GPT models for high-quality responses and tool calling",
defaults={
"text_model": "openai/gpt-4o",
"embedding_model": "sentence-transformers/all-MiniLM-L6-v2",
},
),
}
base_roots = [
str(p)
for p in this_dir.glob("*")
if p.is_dir()
and p.name not in ("__pycache__", "fixtures", "test_cases", "recordings", "responses", "post_training")
]
SUITE_DEFINITIONS: dict[str, dict] = {
"base": {
"description": "Base suite that includes most tests but runs them with a text Ollama model",
"roots": default_roots,
"defaults": {
"text_model": "ollama/llama3.2:3b-instruct-fp16",
"embedding_model": "sentence-transformers/all-MiniLM-L6-v2",
},
},
"responses": {
"description": "Suite that includes only the OpenAI Responses tests; needs a strong tool-calling model",
"roots": ["tests/integration/responses"],
"defaults": {
"text_model": "openai/gpt-4o",
"embedding_model": "sentence-transformers/all-MiniLM-L6-v2",
},
},
"vision": {
"description": "Suite that includes only the vision tests",
"roots": ["tests/integration/inference/test_vision_inference.py"],
"defaults": {
"vision_model": "ollama/llama3.2-vision:11b",
"embedding_model": "sentence-transformers/all-MiniLM-L6-v2",
},
},
SUITE_DEFINITIONS: dict[str, Suite] = {
"base": Suite(
name="base",
roots=base_roots,
default_setup="ollama",
),
"responses": Suite(
name="responses",
roots=["tests/integration/responses"],
default_setup="gpt",
),
"vision": Suite(
name="vision",
roots=["tests/integration/inference/test_vision_inference.py"],
default_setup="ollama-vision",
),
}

16
uv.lock generated
View file

@ -895,7 +895,6 @@ dependencies = [
{ name = "numpy" },
{ name = "packaging" },
]
sdist = { url = "https://files.pythonhosted.org/packages/5c/f4/7c2136f4660ca504266cc08b38df2aa1db14fea93393b82e099ff34d7290/faiss_cpu-1.11.0.post1.tar.gz", hash = "sha256:06b1ea9ddec9e4d9a41c8ef7478d493b08d770e9a89475056e963081eed757d1", size = 70543, upload-time = "2025-07-15T09:15:02.127Z" }
wheels = [
{ url = "https://files.pythonhosted.org/packages/30/1e/9980758efa55b4e7a5d6df1ae17c9ddbe5a636bfbf7d22d47c67f7a530f4/faiss_cpu-1.11.0.post1-cp312-cp312-macosx_13_0_x86_64.whl", hash = "sha256:68f6ce2d9c510a5765af2f5711bd76c2c37bd598af747f3300224bdccf45378c", size = 7913676, upload-time = "2025-07-15T09:14:06.077Z" },
{ url = "https://files.pythonhosted.org/packages/05/d1/bd785887085faa02916c52320527b8bb54288835b0a3138df89a0e323cc8/faiss_cpu-1.11.0.post1-cp312-cp312-macosx_14_0_arm64.whl", hash = "sha256:b940c530a8236cc0b9fd9d6e87b3d70b9c6c216bc2baf2649356c908902e52c9", size = 3313952, upload-time = "2025-07-15T09:14:07.584Z" },
@ -1748,10 +1747,9 @@ wheels = [
{ url = "https://files.pythonhosted.org/packages/5f/e4/f1546746049c99c6b8b247e2f34485b9eae36faa9322b84e2a17262e6712/litellm-1.74.9-py3-none-any.whl", hash = "sha256:ab8f8a6e4d8689d3c7c4f9c3bbc7e46212cc3ebc74ddd0f3c0c921bb459c9874", size = 8740449, upload-time = "2025-07-28T16:42:36.8Z" },
]
[[package]]
name = "llama-stack"
version = "0.2.20"
version = "0.2.21"
source = { editable = "." }
dependencies = [
{ name = "aiohttp" },
@ -1889,8 +1887,8 @@ requires-dist = [
{ name = "huggingface-hub", specifier = ">=0.34.0,<1.0" },
{ name = "jinja2", specifier = ">=3.1.6" },
{ name = "jsonschema" },
{ name = "llama-stack-client", specifier = ">=0.2.20" },
{ name = "llama-stack-client", marker = "extra == 'ui'", specifier = ">=0.2.20" },
{ name = "llama-stack-client", specifier = ">=0.2.21" },
{ name = "llama-stack-client", marker = "extra == 'ui'", specifier = ">=0.2.21" },
{ name = "openai", specifier = ">=1.99.6" },
{ name = "opentelemetry-exporter-otlp-proto-http", specifier = ">=1.30.0" },
{ name = "opentelemetry-sdk", specifier = ">=1.30.0" },
@ -1958,7 +1956,7 @@ test = [
{ name = "aiosqlite" },
{ name = "autoevals" },
{ name = "chardet" },
{ name = "datasets" },
{ name = "datasets", specifier = ">=4.0.0" },
{ name = "mcp" },
{ name = "milvus-lite", specifier = ">=2.5.0" },
{ name = "openai", specifier = ">=1.100.0" },
@ -1999,7 +1997,7 @@ unit = [
[[package]]
name = "llama-stack-client"
version = "0.2.20"
version = "0.2.21"
source = { registry = "https://pypi.org/simple" }
dependencies = [
{ name = "anyio" },
@ -2018,9 +2016,9 @@ dependencies = [
{ name = "tqdm" },
{ name = "typing-extensions" },
]
sdist = { url = "https://files.pythonhosted.org/packages/21/91/c5e32219a5192825dd601700e68205c815c5cfee60c64c22172e46a0c83e/llama_stack_client-0.2.20.tar.gz", hash = "sha256:356257f0a4bbb64205f89e113d715925853d5e34ec744e72466da72790ba415b", size = 318311, upload-time = "2025-08-29T21:10:12.854Z" }
sdist = { url = "https://files.pythonhosted.org/packages/f1/d3/8c50561d167f1e9b601b8fffe852b44c1ff97aaa6db6cdedd611d9e02a65/llama_stack_client-0.2.21.tar.gz", hash = "sha256:bd931fdcadedec5ccdbaa3c54d0c17761af1c227711ad6150dc0dd33d7b66ce2", size = 318319, upload-time = "2025-09-08T22:26:57.668Z" }
wheels = [
{ url = "https://files.pythonhosted.org/packages/b0/ba/84914c4eead2fd9251c149fd6a7da28b78acd620793e3c4506116645cb60/llama_stack_client-0.2.20-py3-none-any.whl", hash = "sha256:6e178981d2ce971da2145c79d5b2b123fa50e063ed431494975c2ba01c5b8016", size = 369899, upload-time = "2025-08-29T21:10:11.113Z" },
{ url = "https://files.pythonhosted.org/packages/02/77/dadc682046a2c7ad68be8d2d2afac7007bf4d22efb0d3929d85ab9706ffe/llama_stack_client-0.2.21-py3-none-any.whl", hash = "sha256:adba82fdf18ab3b8ac218cedba4927bd5d26c23c2318e75c8763a44bb6b40693", size = 369902, upload-time = "2025-09-08T22:26:56.308Z" },
]
[[package]]