mirror of
https://github.com/meta-llama/llama-stack.git
synced 2025-08-15 22:18:00 +00:00
feat(ci): keep only one re-recording job because independent recordings will conflict (#2956)
A couple of important updates: - When recording tests, we cannot be generating a matrix because all the independent recordings will conflict. - In fact, we just don't need a matrix on test types any more because things are very fast and the overhead of `llama stack build` and setting up `uv` etc. is much more. - Refactored the running of tests into an independent action
This commit is contained in:
parent
b237df8f18
commit
f6afb3c26b
2 changed files with 158 additions and 103 deletions
73
.github/actions/run-integration-tests/action.yml
vendored
Normal file
73
.github/actions/run-integration-tests/action.yml
vendored
Normal file
|
@ -0,0 +1,73 @@
|
|||
name: 'Run Integration Tests'
|
||||
description: 'Run integration tests with configurable execution mode and provider settings'
|
||||
|
||||
inputs:
|
||||
test-types:
|
||||
description: 'Test types to run (JSON array)'
|
||||
required: true
|
||||
stack-config:
|
||||
description: 'Stack configuration: "ci-tests" or "server:ci-tests"'
|
||||
required: true
|
||||
provider:
|
||||
description: 'Provider to use: "ollama" or "vllm"'
|
||||
required: true
|
||||
inference-mode:
|
||||
description: 'Inference mode: "record" or "replay"'
|
||||
required: true
|
||||
|
||||
outputs:
|
||||
logs-path:
|
||||
description: 'Path to generated log files'
|
||||
value: '*.log'
|
||||
|
||||
runs:
|
||||
using: 'composite'
|
||||
steps:
|
||||
- name: Run Integration Tests
|
||||
env:
|
||||
LLAMA_STACK_CLIENT_TIMEOUT: "300"
|
||||
LLAMA_STACK_TEST_RECORDING_DIR: "tests/integration/recordings"
|
||||
LLAMA_STACK_TEST_INFERENCE_MODE: ${{ inputs.inference-mode }}
|
||||
shell: bash
|
||||
run: |
|
||||
stack_config="${{ inputs.stack-config }}"
|
||||
EXCLUDE_TESTS="builtin_tool or safety_with_image or code_interpreter or test_rag"
|
||||
|
||||
# Configure provider-specific settings
|
||||
if [ "${{ inputs.provider }}" == "ollama" ]; then
|
||||
export OLLAMA_URL="http://0.0.0.0:11434"
|
||||
export TEXT_MODEL="ollama/llama3.2:3b-instruct-fp16"
|
||||
export SAFETY_MODEL="ollama/llama-guard3:1b"
|
||||
EXTRA_PARAMS="--safety-shield=llama-guard"
|
||||
else
|
||||
export VLLM_URL="http://localhost:8000/v1"
|
||||
export TEXT_MODEL="vllm/meta-llama/Llama-3.2-1B-Instruct"
|
||||
EXTRA_PARAMS=""
|
||||
EXCLUDE_TESTS="${EXCLUDE_TESTS} or test_inference_store_tool_calls"
|
||||
fi
|
||||
|
||||
TEST_TYPES='${{ inputs.test-types }}'
|
||||
echo "Test types to run: $TEST_TYPES"
|
||||
|
||||
for test_type in $(echo "$TEST_TYPES" | jq -r '.[]'); do
|
||||
# if provider is vllm, exclude the following tests: (safety, post_training, tool_runtime)
|
||||
if [ "${{ inputs.provider }}" == "vllm" ]; then
|
||||
if [ "$test_type" == "safety" ] || [ "$test_type" == "post_training" ] || [ "$test_type" == "tool_runtime" ]; then
|
||||
continue
|
||||
fi
|
||||
fi
|
||||
|
||||
echo "=== Running tests for: $test_type ==="
|
||||
|
||||
if uv run pytest -s -v tests/integration/$test_type --stack-config=${stack_config} \
|
||||
-k "not( ${EXCLUDE_TESTS} )" \
|
||||
--text-model=$TEXT_MODEL \
|
||||
--embedding-model=sentence-transformers/all-MiniLM-L6-v2 \
|
||||
--color=yes ${EXTRA_PARAMS} \
|
||||
--capture=tee-sys | tee pytest-${{ inputs.inference-mode }}-$test_type.log; then
|
||||
echo "✅ Tests completed for $test_type"
|
||||
else
|
||||
echo "❌ Tests failed for $test_type"
|
||||
exit 1
|
||||
fi
|
||||
done
|
Loading…
Add table
Add a link
Reference in a new issue