From a8aa815b6a194fad76e4e5f7e73faf588b5d0e01 Mon Sep 17 00:00:00 2001 From: Ashwin Bharambe Date: Tue, 9 Sep 2025 15:50:56 -0700 Subject: [PATCH 1/6] feat(tests): migrate to global "setups" system for test configuration (#3390) This PR refactors the integration test system to use global "setups" which provides better separation of concerns: **suites = what to test, setups = how to configure.** NOTE: if you naming suggestions, please provide feedback Changes: - New `tests/integration/setups.py` with global, reusable configurations (ollama, vllm, gpt, claude) - Modified `scripts/integration-tests.sh` options to match with the underlying pytest options - Updated documentation to reflect the new global setup system The main benefit is that setups can be reused across multiple suites (e.g., use "gpt" with any suite) even though sometimes they could specifically tailored for a suite (vision <> ollama-vision). It is now easier to add new configurations without modifying existing suites. Usage examples: - `pytest tests/integration --suite=responses --setup=gpt` - `pytest tests/integration --suite=vision` # auto-selects "ollama-vision" setup - `pytest tests/integration --suite=base --setup=vllm` --- .../actions/run-and-record-tests/action.yml | 42 +++--- .github/actions/setup-ollama/action.yml | 4 +- .../actions/setup-test-environment/action.yml | 14 +- .github/workflows/integration-tests.yml | 20 +-- .../workflows/record-integration-tests.yml | 32 ++--- scripts/get_setup_env.py | 71 ++++++++++ scripts/github/schedule-record-workflow.sh | 57 ++++---- scripts/integration-tests.sh | 106 ++++++++------- tests/integration/README.md | 48 ++++--- tests/integration/conftest.py | 71 ++++++---- tests/integration/suites.py | 126 +++++++++++++----- 11 files changed, 385 insertions(+), 206 deletions(-) create mode 100644 scripts/get_setup_env.py diff --git a/.github/actions/run-and-record-tests/action.yml b/.github/actions/run-and-record-tests/action.yml index 7f028b104..a3eb31d9f 100644 --- a/.github/actions/run-and-record-tests/action.yml +++ b/.github/actions/run-and-record-tests/action.yml @@ -5,21 +5,22 @@ inputs: stack-config: description: 'Stack configuration to use' required: true - provider: - description: 'Provider to use for tests' - required: true + setup: + description: 'Setup to use for tests (e.g., ollama, gpt, vllm)' + required: false + default: '' inference-mode: description: 'Inference mode (record or replay)' required: true - test-suite: + suite: description: 'Test suite to use: base, responses, vision, etc.' required: false default: '' - test-subdirs: - description: 'Comma-separated list of test subdirectories to run; overrides test-suite' + subdirs: + description: 'Comma-separated list of test subdirectories to run; overrides suite' required: false default: '' - test-pattern: + pattern: description: 'Regex pattern to pass to pytest -k' required: false default: '' @@ -37,14 +38,23 @@ runs: - name: Run Integration Tests shell: bash run: | - uv run --no-sync ./scripts/integration-tests.sh \ - --stack-config '${{ inputs.stack-config }}' \ - --provider '${{ inputs.provider }}' \ - --test-subdirs '${{ inputs.test-subdirs }}' \ - --test-pattern '${{ inputs.test-pattern }}' \ - --inference-mode '${{ inputs.inference-mode }}' \ - --test-suite '${{ inputs.test-suite }}' \ - | tee pytest-${{ inputs.inference-mode }}.log + SCRIPT_ARGS="--stack-config ${{ inputs.stack-config }} --inference-mode ${{ inputs.inference-mode }}" + + # Add optional arguments only if they are provided + if [ -n '${{ inputs.setup }}' ]; then + SCRIPT_ARGS="$SCRIPT_ARGS --setup ${{ inputs.setup }}" + fi + if [ -n '${{ inputs.suite }}' ]; then + SCRIPT_ARGS="$SCRIPT_ARGS --suite ${{ inputs.suite }}" + fi + if [ -n '${{ inputs.subdirs }}' ]; then + SCRIPT_ARGS="$SCRIPT_ARGS --subdirs ${{ inputs.subdirs }}" + fi + if [ -n '${{ inputs.pattern }}' ]; then + SCRIPT_ARGS="$SCRIPT_ARGS --pattern ${{ inputs.pattern }}" + fi + + uv run --no-sync ./scripts/integration-tests.sh $SCRIPT_ARGS | tee pytest-${{ inputs.inference-mode }}.log - name: Commit and push recordings @@ -58,7 +68,7 @@ runs: echo "New recordings detected, committing and pushing" git add tests/integration/recordings/ - git commit -m "Recordings update from CI (test-suite: ${{ inputs.test-suite }})" + git commit -m "Recordings update from CI (suite: ${{ inputs.suite }})" git fetch origin ${{ github.ref_name }} git rebase origin/${{ github.ref_name }} echo "Rebased successfully" diff --git a/.github/actions/setup-ollama/action.yml b/.github/actions/setup-ollama/action.yml index dc2f87e8c..5c95d131d 100644 --- a/.github/actions/setup-ollama/action.yml +++ b/.github/actions/setup-ollama/action.yml @@ -1,7 +1,7 @@ name: Setup Ollama description: Start Ollama inputs: - test-suite: + suite: description: 'Test suite to use: base, responses, vision, etc.' required: false default: '' @@ -11,7 +11,7 @@ runs: - name: Start Ollama shell: bash run: | - if [ "${{ inputs.test-suite }}" == "vision" ]; then + if [ "${{ inputs.suite }}" == "vision" ]; then image="ollama-with-vision-model" else image="ollama-with-models" diff --git a/.github/actions/setup-test-environment/action.yml b/.github/actions/setup-test-environment/action.yml index 3be76f009..478e8f598 100644 --- a/.github/actions/setup-test-environment/action.yml +++ b/.github/actions/setup-test-environment/action.yml @@ -8,11 +8,11 @@ inputs: client-version: description: 'Client version (latest or published)' required: true - provider: - description: 'Provider to setup (ollama or vllm)' - required: true + setup: + description: 'Setup to configure (ollama, vllm, gpt, etc.)' + required: false default: 'ollama' - test-suite: + suite: description: 'Test suite to use: base, responses, vision, etc.' required: false default: '' @@ -30,13 +30,13 @@ runs: client-version: ${{ inputs.client-version }} - name: Setup ollama - if: ${{ inputs.provider == 'ollama' && inputs.inference-mode == 'record' }} + if: ${{ (inputs.setup == 'ollama' || inputs.setup == 'ollama-vision') && inputs.inference-mode == 'record' }} uses: ./.github/actions/setup-ollama with: - test-suite: ${{ inputs.test-suite }} + suite: ${{ inputs.suite }} - name: Setup vllm - if: ${{ inputs.provider == 'vllm' && inputs.inference-mode == 'record' }} + if: ${{ inputs.setup == 'vllm' && inputs.inference-mode == 'record' }} uses: ./.github/actions/setup-vllm - name: Build Llama Stack diff --git a/.github/workflows/integration-tests.yml b/.github/workflows/integration-tests.yml index bb53eea2f..711eccd9e 100644 --- a/.github/workflows/integration-tests.yml +++ b/.github/workflows/integration-tests.yml @@ -28,8 +28,8 @@ on: description: 'Test against both the latest and published versions' type: boolean default: false - test-provider: - description: 'Test against a specific provider' + test-setup: + description: 'Test against a specific setup' type: string default: 'ollama' @@ -42,18 +42,18 @@ jobs: run-replay-mode-tests: runs-on: ubuntu-latest - name: ${{ format('Integration Tests ({0}, {1}, {2}, client={3}, {4})', matrix.client-type, matrix.provider, matrix.python-version, matrix.client-version, matrix.test-suite) }} + name: ${{ format('Integration Tests ({0}, {1}, {2}, client={3}, {4})', matrix.client-type, matrix.setup, matrix.python-version, matrix.client-version, matrix.suite) }} strategy: fail-fast: false matrix: client-type: [library, server] - # Use vllm on weekly schedule, otherwise use test-provider input (defaults to ollama) - provider: ${{ (github.event.schedule == '1 0 * * 0') && fromJSON('["vllm"]') || fromJSON(format('["{0}"]', github.event.inputs.test-provider || 'ollama')) }} + # Use vllm on weekly schedule, otherwise use test-setup input (defaults to ollama) + setup: ${{ (github.event.schedule == '1 0 * * 0') && fromJSON('["vllm"]') || fromJSON(format('["{0}"]', github.event.inputs.test-setup || 'ollama')) }} # Use Python 3.13 only on nightly schedule (daily latest client test), otherwise use 3.12 python-version: ${{ github.event.schedule == '0 0 * * *' && fromJSON('["3.12", "3.13"]') || fromJSON('["3.12"]') }} client-version: ${{ (github.event.schedule == '0 0 * * *' || github.event.inputs.test-all-client-versions == 'true') && fromJSON('["published", "latest"]') || fromJSON('["latest"]') }} - test-suite: [base, vision] + suite: [base, vision] steps: - name: Checkout repository @@ -64,14 +64,14 @@ jobs: with: python-version: ${{ matrix.python-version }} client-version: ${{ matrix.client-version }} - provider: ${{ matrix.provider }} - test-suite: ${{ matrix.test-suite }} + setup: ${{ matrix.setup }} + suite: ${{ matrix.suite }} inference-mode: 'replay' - name: Run tests uses: ./.github/actions/run-and-record-tests with: stack-config: ${{ matrix.client-type == 'library' && 'ci-tests' || 'server:ci-tests' }} - provider: ${{ matrix.provider }} + setup: ${{ matrix.setup }} inference-mode: 'replay' - test-suite: ${{ matrix.test-suite }} + suite: ${{ matrix.suite }} diff --git a/.github/workflows/record-integration-tests.yml b/.github/workflows/record-integration-tests.yml index 01797a54b..65a04f125 100644 --- a/.github/workflows/record-integration-tests.yml +++ b/.github/workflows/record-integration-tests.yml @@ -10,19 +10,19 @@ run-name: Run the integration test suite from tests/integration on: workflow_dispatch: inputs: - test-provider: - description: 'Test against a specific provider' + test-setup: + description: 'Test against a specific setup' type: string default: 'ollama' - test-suite: + suite: description: 'Test suite to use: base, responses, vision, etc.' type: string default: '' - test-subdirs: - description: 'Comma-separated list of test subdirectories to run; overrides test-suite' + subdirs: + description: 'Comma-separated list of test subdirectories to run; overrides suite' type: string default: '' - test-pattern: + pattern: description: 'Regex pattern to pass to pytest -k' type: string default: '' @@ -39,10 +39,10 @@ jobs: run: | echo "::group::Workflow Inputs" echo "branch: ${{ github.ref_name }}" - echo "test-provider: ${{ inputs.test-provider }}" - echo "test-suite: ${{ inputs.test-suite }}" - echo "test-subdirs: ${{ inputs.test-subdirs }}" - echo "test-pattern: ${{ inputs.test-pattern }}" + echo "test-setup: ${{ inputs.test-setup }}" + echo "suite: ${{ inputs.suite }}" + echo "subdirs: ${{ inputs.subdirs }}" + echo "pattern: ${{ inputs.pattern }}" echo "::endgroup::" - name: Checkout repository @@ -55,16 +55,16 @@ jobs: with: python-version: "3.12" # Use single Python version for recording client-version: "latest" - provider: ${{ inputs.test-provider || 'ollama' }} - test-suite: ${{ inputs.test-suite }} + setup: ${{ inputs.test-setup || 'ollama' }} + suite: ${{ inputs.suite }} inference-mode: 'record' - name: Run and record tests uses: ./.github/actions/run-and-record-tests with: stack-config: 'server:ci-tests' # recording must be done with server since more tests are run - provider: ${{ inputs.test-provider || 'ollama' }} + setup: ${{ inputs.test-setup || 'ollama' }} inference-mode: 'record' - test-suite: ${{ inputs.test-suite }} - test-subdirs: ${{ inputs.test-subdirs }} - test-pattern: ${{ inputs.test-pattern }} + suite: ${{ inputs.suite }} + subdirs: ${{ inputs.subdirs }} + pattern: ${{ inputs.pattern }} diff --git a/scripts/get_setup_env.py b/scripts/get_setup_env.py new file mode 100644 index 000000000..fad601e76 --- /dev/null +++ b/scripts/get_setup_env.py @@ -0,0 +1,71 @@ +#!/usr/bin/env python3 +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the terms described in the LICENSE file in +# the root directory of this source tree. + +""" +Small helper script to extract environment variables from a test setup. +Used by integration-tests.sh to set environment variables before starting the server. +""" + +import argparse +import sys + +from tests.integration.suites import SETUP_DEFINITIONS, SUITE_DEFINITIONS + + +def get_setup_env_vars(setup_name, suite_name=None): + """ + Get environment variables for a setup, with optional suite default fallback. + + Args: + setup_name: Name of the setup (e.g., 'ollama', 'gpt') + suite_name: Optional suite name to get default setup if setup_name is None + + Returns: + Dictionary of environment variables + """ + # If no setup specified, try to get default from suite + if not setup_name and suite_name: + suite = SUITE_DEFINITIONS.get(suite_name) + if suite and suite.default_setup: + setup_name = suite.default_setup + + if not setup_name: + return {} + + setup = SETUP_DEFINITIONS.get(setup_name) + if not setup: + print( + f"Error: Unknown setup '{setup_name}'. Available: {', '.join(sorted(SETUP_DEFINITIONS.keys()))}", + file=sys.stderr, + ) + sys.exit(1) + + return setup.env + + +def main(): + parser = argparse.ArgumentParser(description="Extract environment variables from a test setup") + parser.add_argument("--setup", help="Setup name (e.g., ollama, gpt)") + parser.add_argument("--suite", help="Suite name to get default setup from if --setup not provided") + parser.add_argument("--format", choices=["bash", "json"], default="bash", help="Output format (default: bash)") + + args = parser.parse_args() + + env_vars = get_setup_env_vars(args.setup, args.suite) + + if args.format == "bash": + # Output as bash export statements + for key, value in env_vars.items(): + print(f"export {key}='{value}'") + elif args.format == "json": + import json + + print(json.dumps(env_vars)) + + +if __name__ == "__main__": + main() diff --git a/scripts/github/schedule-record-workflow.sh b/scripts/github/schedule-record-workflow.sh index 09e055611..c292e53e6 100755 --- a/scripts/github/schedule-record-workflow.sh +++ b/scripts/github/schedule-record-workflow.sh @@ -14,7 +14,7 @@ set -euo pipefail # Default values BRANCH="" TEST_SUBDIRS="" -TEST_PROVIDER="ollama" +TEST_SETUP="ollama" TEST_SUITE="base" TEST_PATTERN="" @@ -27,24 +27,24 @@ Trigger the integration test recording workflow remotely. This way you do not ne OPTIONS: -b, --branch BRANCH Branch to run the workflow on (defaults to current branch) - -p, --test-provider PROVIDER Test provider to use: vllm or ollama (default: ollama) - -t, --test-suite SUITE Test suite to use: base, responses, vision, etc. (default: base) - -s, --test-subdirs DIRS Comma-separated list of test subdirectories to run (overrides suite) - -k, --test-pattern PATTERN Regex pattern to pass to pytest -k + -t, --suite SUITE Test suite to use: base, responses, vision, etc. (default: base) + -p, --setup SETUP Test setup to use: vllm, ollama, gpt, etc. (default: ollama) + -s, --subdirs DIRS Comma-separated list of test subdirectories to run (overrides suite) + -k, --pattern PATTERN Regex pattern to pass to pytest -k -h, --help Show this help message EXAMPLES: # Record tests for current branch with agents subdirectory - $0 --test-subdirs "agents" + $0 --subdirs "agents" # Record tests for specific branch with vision tests - $0 -b my-feature-branch --test-suite vision + $0 -b my-feature-branch --suite vision - # Record multiple test subdirectories with specific provider - $0 --test-subdirs "agents,inference" --test-provider vllm + # Record multiple test subdirectories with specific setup + $0 --subdirs "agents,inference" --setup vllm # Record tests matching a specific pattern - $0 --test-subdirs "inference" --test-pattern "test_streaming" + $0 --subdirs "inference" --pattern "test_streaming" EOF } @@ -63,19 +63,19 @@ while [[ $# -gt 0 ]]; do BRANCH="$2" shift 2 ;; - -s|--test-subdirs) + -s|--subdirs) TEST_SUBDIRS="$2" shift 2 ;; - -p|--test-provider) - TEST_PROVIDER="$2" + -p|--setup) + TEST_SETUP="$2" shift 2 ;; - -t|--test-suite) + -t|--suite) TEST_SUITE="$2" shift 2 ;; - -k|--test-pattern) + -k|--pattern) TEST_PATTERN="$2" shift 2 ;; @@ -93,21 +93,16 @@ done # Validate required parameters if [[ -z "$TEST_SUBDIRS" && -z "$TEST_SUITE" ]]; then - echo "Error: --test-subdirs or --test-suite is required" + echo "Error: --subdirs or --suite is required" echo "Please specify which test subdirectories to run or test suite to use, e.g.:" - echo " $0 --test-subdirs \"agents,inference\"" - echo " $0 --test-suite vision" + echo " $0 --subdirs \"agents,inference\"" + echo " $0 --suite vision" echo "" exit 1 fi -# Validate test provider -if [[ "$TEST_PROVIDER" != "vllm" && "$TEST_PROVIDER" != "ollama" ]]; then - echo "❌ Error: Invalid test provider '$TEST_PROVIDER'" - echo " Supported providers: vllm, ollama" - echo " Example: $0 --test-subdirs \"agents\" --test-provider vllm" - exit 1 -fi +# Validate test setup (optional - setups are validated by the workflow itself) +# Common setups: ollama, vllm, gpt, etc. # Check if required tools are installed if ! command -v gh &> /dev/null; then @@ -237,7 +232,7 @@ fi # Build the workflow dispatch command echo "Triggering integration test recording workflow..." echo "Branch: $BRANCH" -echo "Test provider: $TEST_PROVIDER" +echo "Test setup: $TEST_SETUP" echo "Test subdirs: $TEST_SUBDIRS" echo "Test suite: $TEST_SUITE" echo "Test pattern: ${TEST_PATTERN:-"(none)"}" @@ -245,16 +240,16 @@ echo "" # Prepare inputs for gh workflow run if [[ -n "$TEST_SUBDIRS" ]]; then - INPUTS="-f test-subdirs='$TEST_SUBDIRS'" + INPUTS="-f subdirs='$TEST_SUBDIRS'" fi -if [[ -n "$TEST_PROVIDER" ]]; then - INPUTS="$INPUTS -f test-provider='$TEST_PROVIDER'" +if [[ -n "$TEST_SETUP" ]]; then + INPUTS="$INPUTS -f test-setup='$TEST_SETUP'" fi if [[ -n "$TEST_SUITE" ]]; then - INPUTS="$INPUTS -f test-suite='$TEST_SUITE'" + INPUTS="$INPUTS -f suite='$TEST_SUITE'" fi if [[ -n "$TEST_PATTERN" ]]; then - INPUTS="$INPUTS -f test-pattern='$TEST_PATTERN'" + INPUTS="$INPUTS -f pattern='$TEST_PATTERN'" fi # Run the workflow diff --git a/scripts/integration-tests.sh b/scripts/integration-tests.sh index ab7e37579..eee60951d 100755 --- a/scripts/integration-tests.sh +++ b/scripts/integration-tests.sh @@ -13,10 +13,10 @@ set -euo pipefail # Default values STACK_CONFIG="" -PROVIDER="" +TEST_SUITE="base" +TEST_SETUP="" TEST_SUBDIRS="" TEST_PATTERN="" -TEST_SUITE="base" INFERENCE_MODE="replay" EXTRA_PARAMS="" @@ -27,29 +27,30 @@ Usage: $0 [OPTIONS] Options: --stack-config STRING Stack configuration to use (required) - --provider STRING Provider to use (ollama, vllm, etc.) (required) - --test-suite STRING Comma-separated list of test suites to run (default: 'base') + --suite STRING Test suite to run (default: 'base') + --setup STRING Test setup (models, env) to use (e.g., 'ollama', 'ollama-vision', 'gpt', 'vllm') --inference-mode STRING Inference mode: record or replay (default: replay) - --test-subdirs STRING Comma-separated list of test subdirectories to run (overrides suite) - --test-pattern STRING Regex pattern to pass to pytest -k + --subdirs STRING Comma-separated list of test subdirectories to run (overrides suite) + --pattern STRING Regex pattern to pass to pytest -k --help Show this help message -Suites are defined in tests/integration/suites.py. They are used to narrow the collection of tests and provide default model options. +Suites are defined in tests/integration/suites.py and define which tests to run. +Setups are defined in tests/integration/setups.py and provide global configuration (models, env). You can also specify subdirectories (of tests/integration) to select tests from, which will override the suite. Examples: # Basic inference tests with ollama - $0 --stack-config server:ci-tests --provider ollama + $0 --stack-config server:ci-tests --suite base --setup ollama # Multiple test directories with vllm - $0 --stack-config server:ci-tests --provider vllm --test-subdirs 'inference,agents' + $0 --stack-config server:ci-tests --subdirs 'inference,agents' --setup vllm # Vision tests with ollama - $0 --stack-config server:ci-tests --provider ollama --test-suite vision + $0 --stack-config server:ci-tests --suite vision # default setup for this suite is ollama-vision # Record mode for updating test recordings - $0 --stack-config server:ci-tests --provider ollama --inference-mode record + $0 --stack-config server:ci-tests --suite base --inference-mode record EOF } @@ -60,15 +61,15 @@ while [[ $# -gt 0 ]]; do STACK_CONFIG="$2" shift 2 ;; - --provider) - PROVIDER="$2" + --setup) + TEST_SETUP="$2" shift 2 ;; - --test-subdirs) + --subdirs) TEST_SUBDIRS="$2" shift 2 ;; - --test-suite) + --suite) TEST_SUITE="$2" shift 2 ;; @@ -76,7 +77,7 @@ while [[ $# -gt 0 ]]; do INFERENCE_MODE="$2" shift 2 ;; - --test-pattern) + --pattern) TEST_PATTERN="$2" shift 2 ;; @@ -96,11 +97,13 @@ done # Validate required parameters if [[ -z "$STACK_CONFIG" ]]; then echo "Error: --stack-config is required" + usage exit 1 fi -if [[ -z "$PROVIDER" ]]; then - echo "Error: --provider is required" +if [[ -z "$TEST_SETUP" && -n "$TEST_SUBDIRS" ]]; then + echo "Error: --test-setup is required when --test-subdirs is provided" + usage exit 1 fi @@ -111,7 +114,7 @@ fi echo "=== Llama Stack Integration Test Runner ===" echo "Stack Config: $STACK_CONFIG" -echo "Provider: $PROVIDER" +echo "Setup: $TEST_SETUP" echo "Inference Mode: $INFERENCE_MODE" echo "Test Suite: $TEST_SUITE" echo "Test Subdirs: $TEST_SUBDIRS" @@ -129,21 +132,25 @@ echo "" # Set environment variables export LLAMA_STACK_CLIENT_TIMEOUT=300 -export LLAMA_STACK_TEST_INFERENCE_MODE="$INFERENCE_MODE" - -# Configure provider-specific settings -if [[ "$PROVIDER" == "ollama" ]]; then - export OLLAMA_URL="http://0.0.0.0:11434" - export TEXT_MODEL="ollama/llama3.2:3b-instruct-fp16" - export SAFETY_MODEL="ollama/llama-guard3:1b" - EXTRA_PARAMS="--safety-shield=llama-guard" -else - export VLLM_URL="http://localhost:8000/v1" - export TEXT_MODEL="vllm/meta-llama/Llama-3.2-1B-Instruct" - EXTRA_PARAMS="" -fi THIS_DIR=$(dirname "$0") + +if [[ -n "$TEST_SETUP" ]]; then + EXTRA_PARAMS="--setup=$TEST_SETUP" +fi + +# Apply setup-specific environment variables (needed for server startup and tests) +echo "=== Applying Setup Environment Variables ===" + +# the server needs this +export LLAMA_STACK_TEST_INFERENCE_MODE="$INFERENCE_MODE" + +SETUP_ENV=$(PYTHONPATH=$THIS_DIR/.. python "$THIS_DIR/get_setup_env.py" --suite "$TEST_SUITE" --setup "$TEST_SETUP" --format bash) +echo "Setting up environment variables:" +echo "$SETUP_ENV" +eval "$SETUP_ENV" +echo "" + ROOT_DIR="$THIS_DIR/.." cd $ROOT_DIR @@ -162,6 +169,18 @@ fi # Start Llama Stack Server if needed if [[ "$STACK_CONFIG" == *"server:"* ]]; then + stop_server() { + echo "Stopping Llama Stack Server..." + pids=$(lsof -i :8321 | awk 'NR>1 {print $2}') + if [[ -n "$pids" ]]; then + echo "Killing Llama Stack Server processes: $pids" + kill -9 $pids + else + echo "No Llama Stack Server processes found ?!" + fi + echo "Llama Stack Server stopped" + } + # check if server is already running if curl -s http://localhost:8321/v1/health 2>/dev/null | grep -q "OK"; then echo "Llama Stack Server is already running, skipping start" @@ -185,14 +204,16 @@ if [[ "$STACK_CONFIG" == *"server:"* ]]; then done echo "" fi + + trap stop_server EXIT ERR INT TERM fi # Run tests echo "=== Running Integration Tests ===" EXCLUDE_TESTS="builtin_tool or safety_with_image or code_interpreter or test_rag" -# Additional exclusions for vllm provider -if [[ "$PROVIDER" == "vllm" ]]; then +# Additional exclusions for vllm setup +if [[ "$TEST_SETUP" == "vllm" ]]; then EXCLUDE_TESTS="${EXCLUDE_TESTS} or test_inference_store_tool_calls" fi @@ -229,20 +250,22 @@ if [[ -n "$TEST_SUBDIRS" ]]; then echo "Total test files: $(echo $TEST_FILES | wc -w)" PYTEST_TARGET="$TEST_FILES" - EXTRA_PARAMS="$EXTRA_PARAMS --text-model=$TEXT_MODEL --embedding-model=sentence-transformers/all-MiniLM-L6-v2" else PYTEST_TARGET="tests/integration/" EXTRA_PARAMS="$EXTRA_PARAMS --suite=$TEST_SUITE" fi set +e +set -x pytest -s -v $PYTEST_TARGET \ --stack-config="$STACK_CONFIG" \ + --inference-mode="$INFERENCE_MODE" \ -k "$PYTEST_PATTERN" \ $EXTRA_PARAMS \ --color=yes \ --capture=tee-sys exit_code=$? +set +x set -e if [ $exit_code -eq 0 ]; then @@ -260,18 +283,5 @@ echo "=== System Resources After Tests ===" free -h 2>/dev/null || echo "free command not available" df -h -# stop server -if [[ "$STACK_CONFIG" == *"server:"* ]]; then - echo "Stopping Llama Stack Server..." - pids=$(lsof -i :8321 | awk 'NR>1 {print $2}') - if [[ -n "$pids" ]]; then - echo "Killing Llama Stack Server processes: $pids" - kill -9 $pids - else - echo "No Llama Stack Server processes found ?!" - fi - echo "Llama Stack Server stopped" -fi - echo "" echo "=== Integration Tests Complete ===" diff --git a/tests/integration/README.md b/tests/integration/README.md index b05beeb98..467f97e02 100644 --- a/tests/integration/README.md +++ b/tests/integration/README.md @@ -6,9 +6,7 @@ Integration tests verify complete workflows across different providers using Lla ```bash # Run all integration tests with existing recordings -LLAMA_STACK_TEST_INFERENCE_MODE=replay \ - LLAMA_STACK_TEST_RECORDING_DIR=tests/integration/recordings \ - uv run --group test \ +uv run --group test \ pytest -sv tests/integration/ --stack-config=starter ``` @@ -42,25 +40,35 @@ Model parameters can be influenced by the following options: Each of these are comma-separated lists and can be used to generate multiple parameter combinations. Note that tests will be skipped if no model is specified. -### Suites (fast selection + sane defaults) +### Suites and Setups -- `--suite`: comma-separated list of named suites that both narrow which tests are collected and prefill common model options (unless you pass them explicitly). +- `--suite`: single named suite that narrows which tests are collected. - Available suites: - - `responses`: collects tests under `tests/integration/responses`; this is a separate suite because it needs a strong tool-calling model. - - `vision`: collects only `tests/integration/inference/test_vision_inference.py`; defaults `--vision-model=ollama/llama3.2-vision:11b`, `--embedding-model=sentence-transformers/all-MiniLM-L6-v2`. -- Explicit flags always win. For example, `--suite=responses --text-model=` overrides the suite’s text model. + - `base`: collects most tests (excludes responses and post_training) + - `responses`: collects tests under `tests/integration/responses` (needs strong tool-calling models) + - `vision`: collects only `tests/integration/inference/test_vision_inference.py` +- `--setup`: global configuration that can be used with any suite. Setups prefill model/env defaults; explicit CLI flags always win. + - Available setups: + - `ollama`: Local Ollama provider with lightweight models (sets OLLAMA_URL, uses llama3.2:3b-instruct-fp16) + - `vllm`: VLLM provider for efficient local inference (sets VLLM_URL, uses Llama-3.2-1B-Instruct) + - `gpt`: OpenAI GPT models for high-quality responses (uses gpt-4o) + - `claude`: Anthropic Claude models for high-quality responses (uses claude-3-5-sonnet) -Examples: +Examples ```bash -# Fast responses run with defaults -pytest -s -v tests/integration --stack-config=server:starter --suite=responses +# Fast responses run with a strong tool-calling model +pytest -s -v tests/integration --stack-config=server:starter --suite=responses --setup=gpt -# Fast single-file vision run with defaults -pytest -s -v tests/integration --stack-config=server:starter --suite=vision +# Fast single-file vision run with Ollama defaults +pytest -s -v tests/integration --stack-config=server:starter --suite=vision --setup=ollama -# Combine suites and override a default -pytest -s -v tests/integration --stack-config=server:starter --suite=responses,vision --embedding-model=text-embedding-3-small +# Base suite with VLLM for performance +pytest -s -v tests/integration --stack-config=server:starter --suite=base --setup=vllm + +# Override a default from setup +pytest -s -v tests/integration --stack-config=server:starter \ + --suite=responses --setup=gpt --embedding-model=text-embedding-3-small ``` ## Examples @@ -127,14 +135,13 @@ pytest tests/integration/ ### RECORD Mode Captures API interactions for later replay: ```bash -LLAMA_STACK_TEST_INFERENCE_MODE=record \ -pytest tests/integration/inference/test_new_feature.py +pytest tests/integration/inference/test_new_feature.py --inference-mode=record ``` ### LIVE Mode Tests make real API calls (but not recorded): ```bash -LLAMA_STACK_TEST_INFERENCE_MODE=live pytest tests/integration/ +pytest tests/integration/ --inference-mode=live ``` By default, the recording directory is `tests/integration/recordings`. You can override this by setting the `LLAMA_STACK_TEST_RECORDING_DIR` environment variable. @@ -155,15 +162,14 @@ cat recordings/responses/abc123.json | jq '.' #### Remote Re-recording (Recommended) Use the automated workflow script for easier re-recording: ```bash -./scripts/github/schedule-record-workflow.sh --test-subdirs "inference,agents" +./scripts/github/schedule-record-workflow.sh --subdirs "inference,agents" ``` See the [main testing guide](../README.md#remote-re-recording-recommended) for full details. #### Local Re-recording ```bash # Re-record specific tests -LLAMA_STACK_TEST_INFERENCE_MODE=record \ -pytest -s -v --stack-config=server:starter tests/integration/inference/test_modified.py +pytest -s -v --stack-config=server:starter tests/integration/inference/test_modified.py --inference-mode=record ``` Note that when re-recording tests, you must use a Stack pointing to a server (i.e., `server:starter`). This subtlety exists because the set of tests run in server are a superset of the set of tests run in the library client. diff --git a/tests/integration/conftest.py b/tests/integration/conftest.py index 96260fdb7..4735264c3 100644 --- a/tests/integration/conftest.py +++ b/tests/integration/conftest.py @@ -15,7 +15,7 @@ from dotenv import load_dotenv from llama_stack.log import get_logger -from .suites import SUITE_DEFINITIONS +from .suites import SETUP_DEFINITIONS, SUITE_DEFINITIONS logger = get_logger(__name__, category="tests") @@ -63,19 +63,33 @@ def pytest_configure(config): key, value = env_var.split("=", 1) os.environ[key] = value - suites_raw = config.getoption("--suite") - suites: list[str] = [] - if suites_raw: - suites = [p.strip() for p in str(suites_raw).split(",") if p.strip()] - unknown = [p for p in suites if p not in SUITE_DEFINITIONS] - if unknown: + inference_mode = config.getoption("--inference-mode") + os.environ["LLAMA_STACK_TEST_INFERENCE_MODE"] = inference_mode + + suite = config.getoption("--suite") + if suite: + if suite not in SUITE_DEFINITIONS: + raise pytest.UsageError(f"Unknown suite: {suite}. Available: {', '.join(sorted(SUITE_DEFINITIONS.keys()))}") + + # Apply setups (global parameterizations): env + defaults + setup = config.getoption("--setup") + if suite and not setup: + setup = SUITE_DEFINITIONS[suite].default_setup + + if setup: + if setup not in SETUP_DEFINITIONS: raise pytest.UsageError( - f"Unknown suite(s): {', '.join(unknown)}. Available: {', '.join(sorted(SUITE_DEFINITIONS.keys()))}" + f"Unknown setup '{setup}'. Available: {', '.join(sorted(SETUP_DEFINITIONS.keys()))}" ) - for suite in suites: - suite_def = SUITE_DEFINITIONS.get(suite, {}) - defaults: dict = suite_def.get("defaults", {}) - for dest, value in defaults.items(): + + setup_obj = SETUP_DEFINITIONS[setup] + logger.info(f"Applying setup '{setup}'{' for suite ' + suite if suite else ''}") + # Apply env first + for k, v in setup_obj.env.items(): + if k not in os.environ: + os.environ[k] = str(v) + # Apply defaults if not provided explicitly + for dest, value in setup_obj.defaults.items(): current = getattr(config.option, dest, None) if not current: setattr(config.option, dest, value) @@ -120,6 +134,13 @@ def pytest_addoption(parser): default=384, help="Output dimensionality of the embedding model to use for testing. Default: 384", ) + + parser.addoption( + "--inference-mode", + help="Inference mode: { record, replay, live } (default: replay)", + choices=["record", "replay", "live"], + default="replay", + ) parser.addoption( "--report", help="Path where the test report should be written, e.g. --report=/path/to/report.md", @@ -127,14 +148,18 @@ def pytest_addoption(parser): available_suites = ", ".join(sorted(SUITE_DEFINITIONS.keys())) suite_help = ( - "Comma-separated integration test suites to narrow collection and prefill defaults. " - "Available: " - f"{available_suites}. " - "Explicit CLI flags (e.g., --text-model) override suite defaults. " - "Examples: --suite=responses or --suite=responses,vision." + f"Single test suite to run (narrows collection). Available: {available_suites}. Example: --suite=responses" ) parser.addoption("--suite", help=suite_help) + # Global setups for any suite + available_setups = ", ".join(sorted(SETUP_DEFINITIONS.keys())) + setup_help = ( + f"Global test setup configuration. Available: {available_setups}. " + "Can be used with any suite. Example: --setup=ollama" + ) + parser.addoption("--setup", help=setup_help) + MODEL_SHORT_IDS = { "meta-llama/Llama-3.2-3B-Instruct": "3B", @@ -221,16 +246,12 @@ pytest_plugins = ["tests.integration.fixtures.common"] def pytest_ignore_collect(path: str, config: pytest.Config) -> bool: """Skip collecting paths outside the selected suite roots for speed.""" - suites_raw = config.getoption("--suite") - if not suites_raw: + suite = config.getoption("--suite") + if not suite: return False - names = [p.strip() for p in str(suites_raw).split(",") if p.strip()] - roots: list[str] = [] - for name in names: - suite_def = SUITE_DEFINITIONS.get(name) - if suite_def: - roots.extend(suite_def.get("roots", [])) + sobj = SUITE_DEFINITIONS.get(suite) + roots: list[str] = sobj.get("roots", []) if isinstance(sobj, dict) else getattr(sobj, "roots", []) if not roots: return False diff --git a/tests/integration/suites.py b/tests/integration/suites.py index 602855055..bacd7ef52 100644 --- a/tests/integration/suites.py +++ b/tests/integration/suites.py @@ -8,46 +8,112 @@ # For example: # # ```bash -# pytest tests/integration/ --suite=vision +# pytest tests/integration/ --suite=vision --setup=ollama # ``` # -# Each suite can: -# - restrict collection to specific roots (dirs or files) -# - provide default CLI option values (e.g. text_model, embedding_model, etc.) +""" +Each suite defines what to run (roots). Suites can be run with different global setups defined in setups.py. +Setups provide environment variables and model defaults that can be reused across multiple suites. + +CLI examples: + pytest tests/integration --suite=responses --setup=gpt + pytest tests/integration --suite=vision --setup=ollama + pytest tests/integration --suite=base --setup=vllm +""" from pathlib import Path +from pydantic import BaseModel, Field + this_dir = Path(__file__).parent -default_roots = [ + + +class Suite(BaseModel): + name: str + roots: list[str] + default_setup: str | None = None + + +class Setup(BaseModel): + """A reusable test configuration with environment and CLI defaults.""" + + name: str + description: str + defaults: dict[str, str] = Field(default_factory=dict) + env: dict[str, str] = Field(default_factory=dict) + + +# Global setups - can be used with any suite "technically" but in reality, some setups might work +# only for specific test suites. +SETUP_DEFINITIONS: dict[str, Setup] = { + "ollama": Setup( + name="ollama", + description="Local Ollama provider with text + safety models", + env={ + "OLLAMA_URL": "http://0.0.0.0:11434", + "SAFETY_MODEL": "ollama/llama-guard3:1b", + }, + defaults={ + "text_model": "ollama/llama3.2:3b-instruct-fp16", + "embedding_model": "sentence-transformers/all-MiniLM-L6-v2", + "safety_model": "ollama/llama-guard3:1b", + "safety_shield": "llama-guard", + }, + ), + "ollama-vision": Setup( + name="ollama", + description="Local Ollama provider with a vision model", + env={ + "OLLAMA_URL": "http://0.0.0.0:11434", + }, + defaults={ + "vision_model": "ollama/llama3.2-vision:11b", + "embedding_model": "sentence-transformers/all-MiniLM-L6-v2", + }, + ), + "vllm": Setup( + name="vllm", + description="vLLM provider with a text model", + env={ + "VLLM_URL": "http://localhost:8000/v1", + }, + defaults={ + "text_model": "vllm/meta-llama/Llama-3.2-1B-Instruct", + "embedding_model": "sentence-transformers/all-MiniLM-L6-v2", + }, + ), + "gpt": Setup( + name="gpt", + description="OpenAI GPT models for high-quality responses and tool calling", + defaults={ + "text_model": "openai/gpt-4o", + "embedding_model": "sentence-transformers/all-MiniLM-L6-v2", + }, + ), +} + + +base_roots = [ str(p) for p in this_dir.glob("*") if p.is_dir() and p.name not in ("__pycache__", "fixtures", "test_cases", "recordings", "responses", "post_training") ] -SUITE_DEFINITIONS: dict[str, dict] = { - "base": { - "description": "Base suite that includes most tests but runs them with a text Ollama model", - "roots": default_roots, - "defaults": { - "text_model": "ollama/llama3.2:3b-instruct-fp16", - "embedding_model": "sentence-transformers/all-MiniLM-L6-v2", - }, - }, - "responses": { - "description": "Suite that includes only the OpenAI Responses tests; needs a strong tool-calling model", - "roots": ["tests/integration/responses"], - "defaults": { - "text_model": "openai/gpt-4o", - "embedding_model": "sentence-transformers/all-MiniLM-L6-v2", - }, - }, - "vision": { - "description": "Suite that includes only the vision tests", - "roots": ["tests/integration/inference/test_vision_inference.py"], - "defaults": { - "vision_model": "ollama/llama3.2-vision:11b", - "embedding_model": "sentence-transformers/all-MiniLM-L6-v2", - }, - }, +SUITE_DEFINITIONS: dict[str, Suite] = { + "base": Suite( + name="base", + roots=base_roots, + default_setup="ollama", + ), + "responses": Suite( + name="responses", + roots=["tests/integration/responses"], + default_setup="gpt", + ), + "vision": Suite( + name="vision", + roots=["tests/integration/inference/test_vision_inference.py"], + default_setup="ollama-vision", + ), } From 9d3a234bf3772083e148d8168a204b9cb2c200ac Mon Sep 17 00:00:00 2001 From: ehhuang Date: Tue, 9 Sep 2025 15:51:20 -0700 Subject: [PATCH 2/6] chore: remove unused variable (#3389) # What does this PR do? ## Test Plan --- llama_stack/core/library_client.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/llama_stack/core/library_client.py b/llama_stack/core/library_client.py index 9e7a8006c..ea5a2ac8e 100644 --- a/llama_stack/core/library_client.py +++ b/llama_stack/core/library_client.py @@ -10,7 +10,6 @@ import json import logging # allow-direct-logging import os import sys -from concurrent.futures import ThreadPoolExecutor from enum import Enum from io import BytesIO from pathlib import Path @@ -148,7 +147,6 @@ class LlamaStackAsLibraryClient(LlamaStackClient): self.async_client = AsyncLlamaStackAsLibraryClient( config_path_or_distro_name, custom_provider_registry, provider_data, skip_logger_removal ) - self.pool_executor = ThreadPoolExecutor(max_workers=4) self.provider_data = provider_data self.loop = asyncio.new_event_loop() From dd1f946b3ee4232dc8e13d3836e7f19e65f5e112 Mon Sep 17 00:00:00 2001 From: Matthew Farrellee Date: Tue, 9 Sep 2025 18:54:58 -0400 Subject: [PATCH 3/6] feat: include a default inference store during llama stack build (#3373) # What does this PR do? enables completions storage when using `llama stack build --providers` - - GET /v1/chat/completions - GET /v1/chat/completions/{id} todo: llama stack build and distro codegen should use the same code paths ## Test Plan ci --- llama_stack/cli/stack/_build.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/llama_stack/cli/stack/_build.py b/llama_stack/cli/stack/_build.py index c6e204773..b14e6fe55 100644 --- a/llama_stack/cli/stack/_build.py +++ b/llama_stack/cli/stack/_build.py @@ -45,6 +45,7 @@ from llama_stack.core.utils.dynamic import instantiate_class_type from llama_stack.core.utils.exec import formulate_run_args, run_command from llama_stack.core.utils.image_types import LlamaStackImageType from llama_stack.providers.datatypes import Api +from llama_stack.providers.utils.sqlstore.sqlstore import SqliteSqlStoreConfig DISTRIBS_PATH = Path(__file__).parent.parent.parent / "distributions" @@ -294,6 +295,12 @@ def _generate_run_config( if build_config.external_providers_dir else EXTERNAL_PROVIDERS_DIR, ) + if not run_config.inference_store: + run_config.inference_store = SqliteSqlStoreConfig( + **SqliteSqlStoreConfig.sample_run_config( + __distro_dir__=(DISTRIBS_BASE_DIR / image_name).as_posix(), db_name="inference_store.db" + ) + ) # build providers dict provider_registry = get_provider_registry(build_config) for api in apis: From 81ad240faa48d2a2d91e5fbfc3dda21443432a6f Mon Sep 17 00:00:00 2001 From: Ashwin Bharambe Date: Tue, 9 Sep 2025 23:00:50 -0700 Subject: [PATCH 4/6] fix(k8s): unwedge run.yaml to add files --- .../k8s-benchmark/stack-configmap.yaml | 19 +- .../k8s-benchmark/stack_run_config.yaml | 9 + .../distributions/k8s/stack-configmap.yaml | 182 +++++------------- .../distributions/k8s/stack_run_config.yaml | 9 + 4 files changed, 77 insertions(+), 142 deletions(-) diff --git a/docs/source/distributions/k8s-benchmark/stack-configmap.yaml b/docs/source/distributions/k8s-benchmark/stack-configmap.yaml index edf4ebd75..bf6109b68 100644 --- a/docs/source/distributions/k8s-benchmark/stack-configmap.yaml +++ b/docs/source/distributions/k8s-benchmark/stack-configmap.yaml @@ -6,6 +6,7 @@ data: apis: - agents - inference + - files - safety - telemetry - tool_runtime @@ -19,13 +20,6 @@ data: max_tokens: ${env.VLLM_MAX_TOKENS:=4096} api_token: ${env.VLLM_API_TOKEN:=fake} tls_verify: ${env.VLLM_TLS_VERIFY:=true} - - provider_id: vllm-safety - provider_type: remote::vllm - config: - url: ${env.VLLM_SAFETY_URL:=http://localhost:8000/v1} - max_tokens: ${env.VLLM_MAX_TOKENS:=4096} - api_token: ${env.VLLM_API_TOKEN:=fake} - tls_verify: ${env.VLLM_TLS_VERIFY:=true} - provider_id: sentence-transformers provider_type: inline::sentence-transformers config: {} @@ -41,6 +35,14 @@ data: db: ${env.POSTGRES_DB:=llamastack} user: ${env.POSTGRES_USER:=llamastack} password: ${env.POSTGRES_PASSWORD:=llamastack} + files: + - provider_id: meta-reference-files + provider_type: inline::localfs + config: + storage_dir: ${env.FILES_STORAGE_DIR:=~/.llama/distributions/starter/files} + metadata_store: + type: sqlite + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/starter}/files_metadata.db safety: - provider_id: llama-guard provider_type: inline::llama-guard @@ -111,9 +113,6 @@ data: - model_id: ${env.INFERENCE_MODEL} provider_id: vllm-inference model_type: llm - - model_id: ${env.SAFETY_MODEL} - provider_id: vllm-safety - model_type: llm shields: - shield_id: ${env.SAFETY_MODEL:=meta-llama/Llama-Guard-3-1B} vector_dbs: [] diff --git a/docs/source/distributions/k8s-benchmark/stack_run_config.yaml b/docs/source/distributions/k8s-benchmark/stack_run_config.yaml index 5a810639e..f8ff7811b 100644 --- a/docs/source/distributions/k8s-benchmark/stack_run_config.yaml +++ b/docs/source/distributions/k8s-benchmark/stack_run_config.yaml @@ -3,6 +3,7 @@ image_name: kubernetes-benchmark-demo apis: - agents - inference +- files - safety - telemetry - tool_runtime @@ -31,6 +32,14 @@ providers: db: ${env.POSTGRES_DB:=llamastack} user: ${env.POSTGRES_USER:=llamastack} password: ${env.POSTGRES_PASSWORD:=llamastack} + files: + - provider_id: meta-reference-files + provider_type: inline::localfs + config: + storage_dir: ${env.FILES_STORAGE_DIR:=~/.llama/distributions/starter/files} + metadata_store: + type: sqlite + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/starter}/files_metadata.db safety: - provider_id: llama-guard provider_type: inline::llama-guard diff --git a/docs/source/distributions/k8s/stack-configmap.yaml b/docs/source/distributions/k8s/stack-configmap.yaml index 4f95554e3..3dbb0da97 100644 --- a/docs/source/distributions/k8s/stack-configmap.yaml +++ b/docs/source/distributions/k8s/stack-configmap.yaml @@ -1,137 +1,55 @@ apiVersion: v1 data: - stack_run_config.yaml: | - version: '2' - image_name: kubernetes-demo - apis: - - agents - - inference - - safety - - telemetry - - tool_runtime - - vector_io - providers: - inference: - - provider_id: vllm-inference - provider_type: remote::vllm - config: - url: ${env.VLLM_URL:=http://localhost:8000/v1} - max_tokens: ${env.VLLM_MAX_TOKENS:=4096} - api_token: ${env.VLLM_API_TOKEN:=fake} - tls_verify: ${env.VLLM_TLS_VERIFY:=true} - - provider_id: vllm-safety - provider_type: remote::vllm - config: - url: ${env.VLLM_SAFETY_URL:=http://localhost:8000/v1} - max_tokens: ${env.VLLM_MAX_TOKENS:=4096} - api_token: ${env.VLLM_API_TOKEN:=fake} - tls_verify: ${env.VLLM_TLS_VERIFY:=true} - - provider_id: sentence-transformers - provider_type: inline::sentence-transformers - config: {} - vector_io: - - provider_id: ${env.ENABLE_CHROMADB:+chromadb} - provider_type: remote::chromadb - config: - url: ${env.CHROMADB_URL:=} - kvstore: - type: postgres - host: ${env.POSTGRES_HOST:=localhost} - port: ${env.POSTGRES_PORT:=5432} - db: ${env.POSTGRES_DB:=llamastack} - user: ${env.POSTGRES_USER:=llamastack} - password: ${env.POSTGRES_PASSWORD:=llamastack} - safety: - - provider_id: llama-guard - provider_type: inline::llama-guard - config: - excluded_categories: [] - agents: - - provider_id: meta-reference - provider_type: inline::meta-reference - config: - persistence_store: - type: postgres - host: ${env.POSTGRES_HOST:=localhost} - port: ${env.POSTGRES_PORT:=5432} - db: ${env.POSTGRES_DB:=llamastack} - user: ${env.POSTGRES_USER:=llamastack} - password: ${env.POSTGRES_PASSWORD:=llamastack} - responses_store: - type: postgres - host: ${env.POSTGRES_HOST:=localhost} - port: ${env.POSTGRES_PORT:=5432} - db: ${env.POSTGRES_DB:=llamastack} - user: ${env.POSTGRES_USER:=llamastack} - password: ${env.POSTGRES_PASSWORD:=llamastack} - telemetry: - - provider_id: meta-reference - provider_type: inline::meta-reference - config: - service_name: "${env.OTEL_SERVICE_NAME:=\u200B}" - sinks: ${env.TELEMETRY_SINKS:=console} - tool_runtime: - - provider_id: brave-search - provider_type: remote::brave-search - config: - api_key: ${env.BRAVE_SEARCH_API_KEY:+} - max_results: 3 - - provider_id: tavily-search - provider_type: remote::tavily-search - config: - api_key: ${env.TAVILY_SEARCH_API_KEY:+} - max_results: 3 - - provider_id: rag-runtime - provider_type: inline::rag-runtime - config: {} - - provider_id: model-context-protocol - provider_type: remote::model-context-protocol - config: {} - metadata_store: - type: postgres - host: ${env.POSTGRES_HOST:=localhost} - port: ${env.POSTGRES_PORT:=5432} - db: ${env.POSTGRES_DB:=llamastack} - user: ${env.POSTGRES_USER:=llamastack} - password: ${env.POSTGRES_PASSWORD:=llamastack} - table_name: llamastack_kvstore - inference_store: - type: postgres - host: ${env.POSTGRES_HOST:=localhost} - port: ${env.POSTGRES_PORT:=5432} - db: ${env.POSTGRES_DB:=llamastack} - user: ${env.POSTGRES_USER:=llamastack} - password: ${env.POSTGRES_PASSWORD:=llamastack} - models: - - metadata: - embedding_dimension: 384 - model_id: all-MiniLM-L6-v2 - provider_id: sentence-transformers - model_type: embedding - - metadata: {} - model_id: ${env.INFERENCE_MODEL} - provider_id: vllm-inference - model_type: llm - - metadata: {} - model_id: ${env.SAFETY_MODEL:=meta-llama/Llama-Guard-3-1B} - provider_id: vllm-safety - model_type: llm - shields: - - shield_id: ${env.SAFETY_MODEL:=meta-llama/Llama-Guard-3-1B} - vector_dbs: [] - datasets: [] - scoring_fns: [] - benchmarks: [] - tool_groups: - - toolgroup_id: builtin::websearch - provider_id: tavily-search - - toolgroup_id: builtin::rag - provider_id: rag-runtime - server: - port: 8321 - auth: - provider_config: - type: github_token + stack_run_config.yaml: "version: '2'\nimage_name: kubernetes-demo\napis:\n- agents\n- + inference\n- files\n- safety\n- telemetry\n- tool_runtime\n- vector_io\nproviders:\n + \ inference:\n - provider_id: vllm-inference\n provider_type: remote::vllm\n + \ config:\n url: ${env.VLLM_URL:=http://localhost:8000/v1}\n max_tokens: + ${env.VLLM_MAX_TOKENS:=4096}\n api_token: ${env.VLLM_API_TOKEN:=fake}\n tls_verify: + ${env.VLLM_TLS_VERIFY:=true}\n - provider_id: vllm-safety\n provider_type: + remote::vllm\n config:\n url: ${env.VLLM_SAFETY_URL:=http://localhost:8000/v1}\n + \ max_tokens: ${env.VLLM_MAX_TOKENS:=4096}\n api_token: ${env.VLLM_API_TOKEN:=fake}\n + \ tls_verify: ${env.VLLM_TLS_VERIFY:=true}\n - provider_id: sentence-transformers\n + \ provider_type: inline::sentence-transformers\n config: {}\n vector_io:\n + \ - provider_id: ${env.ENABLE_CHROMADB:+chromadb}\n provider_type: remote::chromadb\n + \ config:\n url: ${env.CHROMADB_URL:=}\n kvstore:\n type: postgres\n + \ host: ${env.POSTGRES_HOST:=localhost}\n port: ${env.POSTGRES_PORT:=5432}\n + \ db: ${env.POSTGRES_DB:=llamastack}\n user: ${env.POSTGRES_USER:=llamastack}\n + \ password: ${env.POSTGRES_PASSWORD:=llamastack}\n files:\n - provider_id: + meta-reference-files\n provider_type: inline::localfs\n config:\n storage_dir: + ${env.FILES_STORAGE_DIR:=~/.llama/distributions/starter/files}\n metadata_store:\n + \ type: sqlite\n db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/starter}/files_metadata.db + \ \n safety:\n - provider_id: llama-guard\n provider_type: inline::llama-guard\n + \ config:\n excluded_categories: []\n agents:\n - provider_id: meta-reference\n + \ provider_type: inline::meta-reference\n config:\n persistence_store:\n + \ type: postgres\n host: ${env.POSTGRES_HOST:=localhost}\n port: + ${env.POSTGRES_PORT:=5432}\n db: ${env.POSTGRES_DB:=llamastack}\n user: + ${env.POSTGRES_USER:=llamastack}\n password: ${env.POSTGRES_PASSWORD:=llamastack}\n + \ responses_store:\n type: postgres\n host: ${env.POSTGRES_HOST:=localhost}\n + \ port: ${env.POSTGRES_PORT:=5432}\n db: ${env.POSTGRES_DB:=llamastack}\n + \ user: ${env.POSTGRES_USER:=llamastack}\n password: ${env.POSTGRES_PASSWORD:=llamastack}\n + \ telemetry:\n - provider_id: meta-reference\n provider_type: inline::meta-reference\n + \ config:\n service_name: \"${env.OTEL_SERVICE_NAME:=\\u200B}\"\n sinks: + ${env.TELEMETRY_SINKS:=console}\n tool_runtime:\n - provider_id: brave-search\n + \ provider_type: remote::brave-search\n config:\n api_key: ${env.BRAVE_SEARCH_API_KEY:+}\n + \ max_results: 3\n - provider_id: tavily-search\n provider_type: remote::tavily-search\n + \ config:\n api_key: ${env.TAVILY_SEARCH_API_KEY:+}\n max_results: + 3\n - provider_id: rag-runtime\n provider_type: inline::rag-runtime\n config: + {}\n - provider_id: model-context-protocol\n provider_type: remote::model-context-protocol\n + \ config: {}\nmetadata_store:\n type: postgres\n host: ${env.POSTGRES_HOST:=localhost}\n + \ port: ${env.POSTGRES_PORT:=5432}\n db: ${env.POSTGRES_DB:=llamastack}\n user: + ${env.POSTGRES_USER:=llamastack}\n password: ${env.POSTGRES_PASSWORD:=llamastack}\n + \ table_name: llamastack_kvstore\ninference_store:\n type: postgres\n host: + ${env.POSTGRES_HOST:=localhost}\n port: ${env.POSTGRES_PORT:=5432}\n db: ${env.POSTGRES_DB:=llamastack}\n + \ user: ${env.POSTGRES_USER:=llamastack}\n password: ${env.POSTGRES_PASSWORD:=llamastack}\nmodels:\n- + metadata:\n embedding_dimension: 384\n model_id: all-MiniLM-L6-v2\n provider_id: + sentence-transformers\n model_type: embedding\n- metadata: {}\n model_id: ${env.INFERENCE_MODEL}\n + \ provider_id: vllm-inference\n model_type: llm\n- metadata: {}\n model_id: + ${env.SAFETY_MODEL:=meta-llama/Llama-Guard-3-1B}\n provider_id: vllm-safety\n + \ model_type: llm\nshields:\n- shield_id: ${env.SAFETY_MODEL:=meta-llama/Llama-Guard-3-1B}\nvector_dbs: + []\ndatasets: []\nscoring_fns: []\nbenchmarks: []\ntool_groups:\n- toolgroup_id: + builtin::websearch\n provider_id: tavily-search\n- toolgroup_id: builtin::rag\n + \ provider_id: rag-runtime\nserver:\n port: 8321\n auth:\n provider_config:\n + \ type: github_token\n" kind: ConfigMap metadata: creationTimestamp: null diff --git a/docs/source/distributions/k8s/stack_run_config.yaml b/docs/source/distributions/k8s/stack_run_config.yaml index a2d65e1a9..b841ab977 100644 --- a/docs/source/distributions/k8s/stack_run_config.yaml +++ b/docs/source/distributions/k8s/stack_run_config.yaml @@ -3,6 +3,7 @@ image_name: kubernetes-demo apis: - agents - inference +- files - safety - telemetry - tool_runtime @@ -38,6 +39,14 @@ providers: db: ${env.POSTGRES_DB:=llamastack} user: ${env.POSTGRES_USER:=llamastack} password: ${env.POSTGRES_PASSWORD:=llamastack} + files: + - provider_id: meta-reference-files + provider_type: inline::localfs + config: + storage_dir: ${env.FILES_STORAGE_DIR:=~/.llama/distributions/starter/files} + metadata_store: + type: sqlite + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/starter}/files_metadata.db safety: - provider_id: llama-guard provider_type: inline::llama-guard From 1c23aeb9372fa3e1286a7d6d8210994000efae6d Mon Sep 17 00:00:00 2001 From: Cesare Pompeiano Date: Wed, 10 Sep 2025 11:19:21 +0200 Subject: [PATCH 5/6] feat: Add vector_db_id to chunk metadata (#3304) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit # What does this PR do? When running RAG in a multi vector DB setting, it can be difficult to trace where retrieved chunks originate from. This PR adds the `vector_db_id` into each chunk’s metadata, making it easier to understand which database a given chunk came from. This is helpful for debugging and for analyzing retrieval behavior of multiple DBs. Relevant code: ```python for vector_db_id, result in zip(vector_db_ids, results): for chunk, score in zip(result.chunks, result.scores): if not hasattr(chunk, "metadata") or chunk.metadata is None: chunk.metadata = {} chunk.metadata["vector_db_id"] = vector_db_id chunks.append(chunk) scores.append(score) ``` ## Test Plan * Ran Llama Stack in debug mode. * Verified that `vector_db_id` was added to each chunk’s metadata. * Confirmed that the metadata was printed in the console when using the RAG tool. --------- Co-authored-by: are-ces Co-authored-by: Francisco Arceo --- .../inline/tool_runtime/rag/memory.py | 16 +++++- tests/unit/rag/test_rag_query.py | 55 +++++++++++++++++++ 2 files changed, 69 insertions(+), 2 deletions(-) diff --git a/llama_stack/providers/inline/tool_runtime/rag/memory.py b/llama_stack/providers/inline/tool_runtime/rag/memory.py index cb526e8ee..aa629cca8 100644 --- a/llama_stack/providers/inline/tool_runtime/rag/memory.py +++ b/llama_stack/providers/inline/tool_runtime/rag/memory.py @@ -167,8 +167,18 @@ class MemoryToolRuntimeImpl(ToolGroupsProtocolPrivate, ToolRuntime, RAGToolRunti for vector_db_id in vector_db_ids ] results: list[QueryChunksResponse] = await asyncio.gather(*tasks) - chunks = [c for r in results for c in r.chunks] - scores = [s for r in results for s in r.scores] + + chunks = [] + scores = [] + + for vector_db_id, result in zip(vector_db_ids, results, strict=False): + for chunk, score in zip(result.chunks, result.scores, strict=False): + if not hasattr(chunk, "metadata") or chunk.metadata is None: + chunk.metadata = {} + chunk.metadata["vector_db_id"] = vector_db_id + + chunks.append(chunk) + scores.append(score) if not chunks: return RAGQueryResult(content=None) @@ -203,6 +213,7 @@ class MemoryToolRuntimeImpl(ToolGroupsProtocolPrivate, ToolRuntime, RAGToolRunti metadata_keys_to_exclude_from_context = [ "token_count", "metadata_token_count", + "vector_db_id", ] metadata_for_context = {} for k in chunk_metadata_keys_to_include_from_context: @@ -227,6 +238,7 @@ class MemoryToolRuntimeImpl(ToolGroupsProtocolPrivate, ToolRuntime, RAGToolRunti "document_ids": [c.metadata["document_id"] for c in chunks[: len(picked)]], "chunks": [c.content for c in chunks[: len(picked)]], "scores": scores[: len(picked)], + "vector_db_ids": [c.metadata["vector_db_id"] for c in chunks[: len(picked)]], }, ) diff --git a/tests/unit/rag/test_rag_query.py b/tests/unit/rag/test_rag_query.py index d18d90716..7b897bfe0 100644 --- a/tests/unit/rag/test_rag_query.py +++ b/tests/unit/rag/test_rag_query.py @@ -81,3 +81,58 @@ class TestRagQuery: # Test that invalid mode raises an error with pytest.raises(ValueError): RAGQueryConfig(mode="wrong_mode") + + @pytest.mark.asyncio + async def test_query_adds_vector_db_id_to_chunk_metadata(self): + rag_tool = MemoryToolRuntimeImpl( + config=MagicMock(), + vector_io_api=MagicMock(), + inference_api=MagicMock(), + ) + + vector_db_ids = ["db1", "db2"] + + # Fake chunks from each DB + chunk_metadata1 = ChunkMetadata( + document_id="doc1", + chunk_id="chunk1", + source="test_source1", + metadata_token_count=5, + ) + chunk1 = Chunk( + content="chunk from db1", + metadata={"vector_db_id": "db1", "document_id": "doc1"}, + stored_chunk_id="c1", + chunk_metadata=chunk_metadata1, + ) + + chunk_metadata2 = ChunkMetadata( + document_id="doc2", + chunk_id="chunk2", + source="test_source2", + metadata_token_count=5, + ) + chunk2 = Chunk( + content="chunk from db2", + metadata={"vector_db_id": "db2", "document_id": "doc2"}, + stored_chunk_id="c2", + chunk_metadata=chunk_metadata2, + ) + + rag_tool.vector_io_api.query_chunks = AsyncMock( + side_effect=[ + QueryChunksResponse(chunks=[chunk1], scores=[0.9]), + QueryChunksResponse(chunks=[chunk2], scores=[0.8]), + ] + ) + + result = await rag_tool.query(content="test", vector_db_ids=vector_db_ids) + returned_chunks = result.metadata["chunks"] + returned_scores = result.metadata["scores"] + returned_doc_ids = result.metadata["document_ids"] + returned_vector_db_ids = result.metadata["vector_db_ids"] + + assert returned_chunks == ["chunk from db1", "chunk from db2"] + assert returned_scores == (0.9, 0.8) + assert returned_doc_ids == ["doc1", "doc2"] + assert returned_vector_db_ids == ["db1", "db2"] From 167143131053c8de6ea620a83ebdec41c0b24e50 Mon Sep 17 00:00:00 2001 From: Akram Ben Aissi Date: Wed, 10 Sep 2025 12:55:57 +0200 Subject: [PATCH 6/6] fix: Add missing files_api parameter to MemoryToolRuntimeImpl test (#3394) # What does this PR do? The test_query_adds_vector_db_id_to_chunk_metadata test was failing because MemoryToolRuntimeImpl.__init__() now requires a files_api parameter. Fixes failing unit tests for Python 3.12 and 3.13. ## Test Plan --- tests/unit/rag/test_rag_query.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/unit/rag/test_rag_query.py b/tests/unit/rag/test_rag_query.py index 7b897bfe0..183b4d049 100644 --- a/tests/unit/rag/test_rag_query.py +++ b/tests/unit/rag/test_rag_query.py @@ -88,6 +88,7 @@ class TestRagQuery: config=MagicMock(), vector_io_api=MagicMock(), inference_api=MagicMock(), + files_api=MagicMock(), ) vector_db_ids = ["db1", "db2"]