mirror of
https://github.com/meta-llama/llama-stack.git
synced 2025-06-27 18:50:41 +00:00
# What does this PR do? adds an inline HF SFTTrainer provider. Alongside touchtune -- this is a super popular option for running training jobs. The config allows a user to specify some key fields such as a model, chat_template, device, etc the provider comes with one recipe `finetune_single_device` which works both with and without LoRA. any model that is a valid HF identifier can be given and the model will be pulled. this has been tested so far with CPU and MPS device types, but should be compatible with CUDA out of the box The provider processes the given dataset into the proper format, establishes the various steps per epoch, steps per save, steps per eval, sets a sane SFTConfig, and runs n_epochs of training if checkpoint_dir is none, no model is saved. If there is a checkpoint dir, a model is saved every `save_steps` and at the end of training. ## Test Plan re-enabled post_training integration test suite with a singular test that loads the simpleqa dataset: https://huggingface.co/datasets/llamastack/simpleqa and a tiny granite model: https://huggingface.co/ibm-granite/granite-3.3-2b-instruct. The test now uses the llama stack client and the proper post_training API runs one step with a batch_size of 1. This test runs on CPU on the Ubuntu runner so it needs to be a small batch and a single step. [//]: # (## Documentation) --------- Signed-off-by: Charlie Doern <cdoern@redhat.com>
126 lines
4.3 KiB
YAML
126 lines
4.3 KiB
YAML
name: Integration Tests
|
|
|
|
on:
|
|
push:
|
|
branches: [ main ]
|
|
pull_request:
|
|
branches: [ main ]
|
|
paths:
|
|
- 'llama_stack/**'
|
|
- 'tests/integration/**'
|
|
- 'uv.lock'
|
|
- 'pyproject.toml'
|
|
- 'requirements.txt'
|
|
- '.github/workflows/integration-tests.yml' # This workflow
|
|
|
|
concurrency:
|
|
group: ${{ github.workflow }}-${{ github.ref }}
|
|
cancel-in-progress: true
|
|
|
|
jobs:
|
|
test-matrix:
|
|
runs-on: ubuntu-latest
|
|
strategy:
|
|
matrix:
|
|
# Listing tests manually since some of them currently fail
|
|
# TODO: generate matrix list from tests/integration when fixed
|
|
test-type: [agents, inference, datasets, inspect, scoring, post_training, providers]
|
|
client-type: [library, http]
|
|
fail-fast: false # we want to run all tests regardless of failure
|
|
|
|
steps:
|
|
- name: Checkout repository
|
|
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
|
|
|
|
- name: Install uv
|
|
uses: astral-sh/setup-uv@c7f87aa956e4c323abf06d5dec078e358f6b4d04 # v6.0.0
|
|
with:
|
|
python-version: "3.10"
|
|
activate-environment: true
|
|
|
|
- name: Setup ollama
|
|
uses: ./.github/actions/setup-ollama
|
|
|
|
- name: Set Up Environment and Install Dependencies
|
|
run: |
|
|
uv sync --extra dev --extra test
|
|
uv pip install ollama faiss-cpu
|
|
# always test against the latest version of the client
|
|
# TODO: this is not necessarily a good idea. we need to test against both published and latest
|
|
# to find out backwards compatibility issues.
|
|
uv pip install git+https://github.com/meta-llama/llama-stack-client-python.git@main
|
|
uv pip install -e .
|
|
llama stack build --template ollama --image-type venv
|
|
|
|
- name: Start Llama Stack server in background
|
|
if: matrix.client-type == 'http'
|
|
env:
|
|
INFERENCE_MODEL: "meta-llama/Llama-3.2-3B-Instruct"
|
|
run: |
|
|
source .venv/bin/activate
|
|
LLAMA_STACK_LOG_FILE=server.log nohup uv run llama stack run ./llama_stack/templates/ollama/run.yaml --image-type venv &
|
|
|
|
- name: Wait for Llama Stack server to be ready
|
|
if: matrix.client-type == 'http'
|
|
run: |
|
|
echo "Waiting for Llama Stack server..."
|
|
for i in {1..30}; do
|
|
if curl -s http://localhost:8321/v1/health | grep -q "OK"; then
|
|
echo "Llama Stack server is up!"
|
|
exit 0
|
|
fi
|
|
sleep 1
|
|
done
|
|
echo "Llama Stack server failed to start"
|
|
cat server.log
|
|
exit 1
|
|
|
|
- name: Verify Ollama status is OK
|
|
if: matrix.client-type == 'http'
|
|
run: |
|
|
echo "Verifying Ollama status..."
|
|
ollama_status=$(curl -s -L http://127.0.0.1:8321/v1/providers/ollama|jq --raw-output .health.status)
|
|
echo "Ollama status: $ollama_status"
|
|
if [ "$ollama_status" != "OK" ]; then
|
|
echo "Ollama health check failed"
|
|
exit 1
|
|
fi
|
|
- name: Check Storage and Memory Available Before Tests
|
|
if: ${{ always() }}
|
|
run: |
|
|
free -h
|
|
df -h
|
|
|
|
- name: Run Integration Tests
|
|
env:
|
|
INFERENCE_MODEL: "meta-llama/Llama-3.2-3B-Instruct"
|
|
run: |
|
|
if [ "${{ matrix.client-type }}" == "library" ]; then
|
|
stack_config="ollama"
|
|
else
|
|
stack_config="http://localhost:8321"
|
|
fi
|
|
uv run pytest -v tests/integration/${{ matrix.test-type }} --stack-config=${stack_config} \
|
|
-k "not(builtin_tool or safety_with_image or code_interpreter or test_rag)" \
|
|
--text-model="meta-llama/Llama-3.2-3B-Instruct" \
|
|
--embedding-model=all-MiniLM-L6-v2
|
|
|
|
- name: Check Storage and Memory Available After Tests
|
|
if: ${{ always() }}
|
|
run: |
|
|
free -h
|
|
df -h
|
|
|
|
- name: Write ollama logs to file
|
|
if: ${{ always() }}
|
|
run: |
|
|
sudo journalctl -u ollama.service > ollama.log
|
|
|
|
- name: Upload all logs to artifacts
|
|
if: ${{ always() }}
|
|
uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02 # v4.6.2
|
|
with:
|
|
name: logs-${{ github.run_id }}-${{ github.run_attempt }}-${{ matrix.client-type }}-${{ matrix.test-type }}
|
|
path: |
|
|
*.log
|
|
retention-days: 1
|