mirror of
https://github.com/meta-llama/llama-stack.git
synced 2025-10-04 04:04:14 +00:00
A couple of important updates: - When recording tests, we cannot be generating a matrix because all the independent recordings will conflict. - In fact, we just don't need a matrix on test types any more because things are very fast and the overhead of `llama stack build` and setting up `uv` etc. is much more. - Refactored the running of tests into an independent action
73 lines
2.6 KiB
YAML
73 lines
2.6 KiB
YAML
name: 'Run Integration Tests'
|
|
description: 'Run integration tests with configurable execution mode and provider settings'
|
|
|
|
inputs:
|
|
test-types:
|
|
description: 'Test types to run (JSON array)'
|
|
required: true
|
|
stack-config:
|
|
description: 'Stack configuration: "ci-tests" or "server:ci-tests"'
|
|
required: true
|
|
provider:
|
|
description: 'Provider to use: "ollama" or "vllm"'
|
|
required: true
|
|
inference-mode:
|
|
description: 'Inference mode: "record" or "replay"'
|
|
required: true
|
|
|
|
outputs:
|
|
logs-path:
|
|
description: 'Path to generated log files'
|
|
value: '*.log'
|
|
|
|
runs:
|
|
using: 'composite'
|
|
steps:
|
|
- name: Run Integration Tests
|
|
env:
|
|
LLAMA_STACK_CLIENT_TIMEOUT: "300"
|
|
LLAMA_STACK_TEST_RECORDING_DIR: "tests/integration/recordings"
|
|
LLAMA_STACK_TEST_INFERENCE_MODE: ${{ inputs.inference-mode }}
|
|
shell: bash
|
|
run: |
|
|
stack_config="${{ inputs.stack-config }}"
|
|
EXCLUDE_TESTS="builtin_tool or safety_with_image or code_interpreter or test_rag"
|
|
|
|
# Configure provider-specific settings
|
|
if [ "${{ inputs.provider }}" == "ollama" ]; then
|
|
export OLLAMA_URL="http://0.0.0.0:11434"
|
|
export TEXT_MODEL="ollama/llama3.2:3b-instruct-fp16"
|
|
export SAFETY_MODEL="ollama/llama-guard3:1b"
|
|
EXTRA_PARAMS="--safety-shield=llama-guard"
|
|
else
|
|
export VLLM_URL="http://localhost:8000/v1"
|
|
export TEXT_MODEL="vllm/meta-llama/Llama-3.2-1B-Instruct"
|
|
EXTRA_PARAMS=""
|
|
EXCLUDE_TESTS="${EXCLUDE_TESTS} or test_inference_store_tool_calls"
|
|
fi
|
|
|
|
TEST_TYPES='${{ inputs.test-types }}'
|
|
echo "Test types to run: $TEST_TYPES"
|
|
|
|
for test_type in $(echo "$TEST_TYPES" | jq -r '.[]'); do
|
|
# if provider is vllm, exclude the following tests: (safety, post_training, tool_runtime)
|
|
if [ "${{ inputs.provider }}" == "vllm" ]; then
|
|
if [ "$test_type" == "safety" ] || [ "$test_type" == "post_training" ] || [ "$test_type" == "tool_runtime" ]; then
|
|
continue
|
|
fi
|
|
fi
|
|
|
|
echo "=== Running tests for: $test_type ==="
|
|
|
|
if uv run pytest -s -v tests/integration/$test_type --stack-config=${stack_config} \
|
|
-k "not( ${EXCLUDE_TESTS} )" \
|
|
--text-model=$TEXT_MODEL \
|
|
--embedding-model=sentence-transformers/all-MiniLM-L6-v2 \
|
|
--color=yes ${EXTRA_PARAMS} \
|
|
--capture=tee-sys | tee pytest-${{ inputs.inference-mode }}-$test_type.log; then
|
|
echo "✅ Tests completed for $test_type"
|
|
else
|
|
echo "❌ Tests failed for $test_type"
|
|
exit 1
|
|
fi
|
|
done
|