mirror of
https://github.com/meta-llama/llama-stack.git
synced 2025-10-03 19:57:35 +00:00
Merge 746e9c91a9
into 9f6c658f2a
This commit is contained in:
commit
cb191bf36d
6 changed files with 34 additions and 18 deletions
|
@ -68,7 +68,8 @@ runs:
|
||||||
echo "New recordings detected, committing and pushing"
|
echo "New recordings detected, committing and pushing"
|
||||||
git add tests/integration/recordings/
|
git add tests/integration/recordings/
|
||||||
|
|
||||||
git commit -m "Recordings update from CI (suite: ${{ inputs.suite }})"
|
git commit -m "Recordings update from CI (setup: ${{ inputs.setup }}, suite: ${{ inputs.suite }})"
|
||||||
|
|
||||||
git fetch origin ${{ github.ref_name }}
|
git fetch origin ${{ github.ref_name }}
|
||||||
git rebase origin/${{ github.ref_name }}
|
git rebase origin/${{ github.ref_name }}
|
||||||
echo "Rebased successfully"
|
echo "Rebased successfully"
|
||||||
|
@ -82,7 +83,8 @@ runs:
|
||||||
if: ${{ always() }}
|
if: ${{ always() }}
|
||||||
shell: bash
|
shell: bash
|
||||||
run: |
|
run: |
|
||||||
sudo docker logs ollama > ollama-${{ inputs.inference-mode }}.log || true
|
sudo docker logs ollama > ollama-${{ inputs.inference-mode }}.log 2>&1 || true
|
||||||
|
sudo docker logs vllm > vllm-${{ inputs.inference-mode }}.log 2>&1 || true
|
||||||
|
|
||||||
- name: Upload logs
|
- name: Upload logs
|
||||||
if: ${{ always() }}
|
if: ${{ always() }}
|
||||||
|
|
9
.github/actions/setup-vllm/action.yml
vendored
9
.github/actions/setup-vllm/action.yml
vendored
|
@ -11,13 +11,14 @@ runs:
|
||||||
--name vllm \
|
--name vllm \
|
||||||
-p 8000:8000 \
|
-p 8000:8000 \
|
||||||
--privileged=true \
|
--privileged=true \
|
||||||
quay.io/higginsd/vllm-cpu:65393ee064 \
|
quay.io/higginsd/vllm-cpu:65393ee064-qwen3 \
|
||||||
--host 0.0.0.0 \
|
--host 0.0.0.0 \
|
||||||
--port 8000 \
|
--port 8000 \
|
||||||
--enable-auto-tool-choice \
|
--enable-auto-tool-choice \
|
||||||
--tool-call-parser llama3_json \
|
--tool-call-parser hermes \
|
||||||
--model /root/.cache/Llama-3.2-1B-Instruct \
|
--model /root/.cache/Qwen3-0.6B \
|
||||||
--served-model-name meta-llama/Llama-3.2-1B-Instruct
|
--served-model-name Qwen/Qwen3-0.6B \
|
||||||
|
--max-model-len 8192
|
||||||
|
|
||||||
# Wait for vllm to be ready
|
# Wait for vllm to be ready
|
||||||
echo "Waiting for vllm to be ready..."
|
echo "Waiting for vllm to be ready..."
|
||||||
|
|
23
.github/workflows/integration-tests.yml
vendored
23
.github/workflows/integration-tests.yml
vendored
|
@ -21,7 +21,6 @@ on:
|
||||||
schedule:
|
schedule:
|
||||||
# If changing the cron schedule, update the provider in the test-matrix job
|
# If changing the cron schedule, update the provider in the test-matrix job
|
||||||
- cron: '0 0 * * *' # (test latest client) Daily at 12 AM UTC
|
- cron: '0 0 * * *' # (test latest client) Daily at 12 AM UTC
|
||||||
- cron: '1 0 * * 0' # (test vllm) Weekly on Sunday at 1 AM UTC
|
|
||||||
workflow_dispatch:
|
workflow_dispatch:
|
||||||
inputs:
|
inputs:
|
||||||
test-all-client-versions:
|
test-all-client-versions:
|
||||||
|
@ -48,24 +47,38 @@ jobs:
|
||||||
fail-fast: false
|
fail-fast: false
|
||||||
matrix:
|
matrix:
|
||||||
client-type: [library, server]
|
client-type: [library, server]
|
||||||
# Use vllm on weekly schedule, otherwise use test-setup input (defaults to ollama)
|
|
||||||
setup: ${{ (github.event.schedule == '1 0 * * 0') && fromJSON('["vllm"]') || fromJSON(format('["{0}"]', github.event.inputs.test-setup || 'ollama')) }}
|
|
||||||
# Use Python 3.13 only on nightly schedule (daily latest client test), otherwise use 3.12
|
# Use Python 3.13 only on nightly schedule (daily latest client test), otherwise use 3.12
|
||||||
python-version: ${{ github.event.schedule == '0 0 * * *' && fromJSON('["3.12", "3.13"]') || fromJSON('["3.12"]') }}
|
python-version: ${{ github.event.schedule == '0 0 * * *' && fromJSON('["3.12", "3.13"]') || fromJSON('["3.12"]') }}
|
||||||
client-version: ${{ (github.event.schedule == '0 0 * * *' || github.event.inputs.test-all-client-versions == 'true') && fromJSON('["published", "latest"]') || fromJSON('["latest"]') }}
|
client-version: ${{ (github.event.schedule == '0 0 * * *' || github.event.inputs.test-all-client-versions == 'true') && fromJSON('["published", "latest"]') || fromJSON('["latest"]') }}
|
||||||
|
setup: [ollama, vllm]
|
||||||
suite: [base, vision]
|
suite: [base, vision]
|
||||||
|
exclude:
|
||||||
|
- setup: vllm
|
||||||
|
suite: vision
|
||||||
|
|
||||||
|
|
||||||
steps:
|
steps:
|
||||||
- name: Checkout repository
|
- name: Checkout repository
|
||||||
uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0
|
uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0
|
||||||
|
|
||||||
|
# This could in theory be done in the matrix, but it was getting too complex
|
||||||
|
- name: Update Matrix
|
||||||
|
id: update-matrix
|
||||||
|
run: |
|
||||||
|
REWRITTEN_SUITE="${{ matrix.suite }}"
|
||||||
|
if [[ "${{ matrix.setup }}" == "vllm" && "${{ matrix.suite }}" == "base" ]]; then
|
||||||
|
REWRITTEN_SUITE="base-vllm-subset"
|
||||||
|
fi
|
||||||
|
echo "suite=${REWRITTEN_SUITE}" >> $GITHUB_OUTPUT
|
||||||
|
echo "Rewritten suite: ${REWRITTEN_SUITE}"
|
||||||
|
|
||||||
- name: Setup test environment
|
- name: Setup test environment
|
||||||
uses: ./.github/actions/setup-test-environment
|
uses: ./.github/actions/setup-test-environment
|
||||||
with:
|
with:
|
||||||
python-version: ${{ matrix.python-version }}
|
python-version: ${{ matrix.python-version }}
|
||||||
client-version: ${{ matrix.client-version }}
|
client-version: ${{ matrix.client-version }}
|
||||||
setup: ${{ matrix.setup }}
|
setup: ${{ matrix.setup }}
|
||||||
suite: ${{ matrix.suite }}
|
suite: ${{ steps.update-matrix.outputs.suite }}
|
||||||
inference-mode: 'replay'
|
inference-mode: 'replay'
|
||||||
|
|
||||||
- name: Run tests
|
- name: Run tests
|
||||||
|
@ -74,4 +87,4 @@ jobs:
|
||||||
stack-config: ${{ matrix.client-type == 'library' && 'ci-tests' || 'server:ci-tests' }}
|
stack-config: ${{ matrix.client-type == 'library' && 'ci-tests' || 'server:ci-tests' }}
|
||||||
setup: ${{ matrix.setup }}
|
setup: ${{ matrix.setup }}
|
||||||
inference-mode: 'replay'
|
inference-mode: 'replay'
|
||||||
suite: ${{ matrix.suite }}
|
suite: ${{ steps.update-matrix.outputs.suite }}
|
||||||
|
|
|
@ -212,11 +212,6 @@ fi
|
||||||
echo "=== Running Integration Tests ==="
|
echo "=== Running Integration Tests ==="
|
||||||
EXCLUDE_TESTS="builtin_tool or safety_with_image or code_interpreter or test_rag"
|
EXCLUDE_TESTS="builtin_tool or safety_with_image or code_interpreter or test_rag"
|
||||||
|
|
||||||
# Additional exclusions for vllm setup
|
|
||||||
if [[ "$TEST_SETUP" == "vllm" ]]; then
|
|
||||||
EXCLUDE_TESTS="${EXCLUDE_TESTS} or test_inference_store_tool_calls"
|
|
||||||
fi
|
|
||||||
|
|
||||||
PYTEST_PATTERN="not( $EXCLUDE_TESTS )"
|
PYTEST_PATTERN="not( $EXCLUDE_TESTS )"
|
||||||
if [[ -n "$TEST_PATTERN" ]]; then
|
if [[ -n "$TEST_PATTERN" ]]; then
|
||||||
PYTEST_PATTERN="${PYTEST_PATTERN} and $TEST_PATTERN"
|
PYTEST_PATTERN="${PYTEST_PATTERN} and $TEST_PATTERN"
|
||||||
|
|
|
@ -78,7 +78,7 @@ SETUP_DEFINITIONS: dict[str, Setup] = {
|
||||||
"VLLM_URL": "http://localhost:8000/v1",
|
"VLLM_URL": "http://localhost:8000/v1",
|
||||||
},
|
},
|
||||||
defaults={
|
defaults={
|
||||||
"text_model": "vllm/meta-llama/Llama-3.2-1B-Instruct",
|
"text_model": "vllm/Qwen/Qwen3-0.6B",
|
||||||
"embedding_model": "sentence-transformers/all-MiniLM-L6-v2",
|
"embedding_model": "sentence-transformers/all-MiniLM-L6-v2",
|
||||||
},
|
},
|
||||||
),
|
),
|
||||||
|
@ -147,6 +147,11 @@ SUITE_DEFINITIONS: dict[str, Suite] = {
|
||||||
roots=base_roots,
|
roots=base_roots,
|
||||||
default_setup="ollama",
|
default_setup="ollama",
|
||||||
),
|
),
|
||||||
|
"base-vllm-subset": Suite(
|
||||||
|
name="base-vllm-subset",
|
||||||
|
roots=["tests/integration/inference"],
|
||||||
|
default_setup="vllm",
|
||||||
|
),
|
||||||
"responses": Suite(
|
"responses": Suite(
|
||||||
name="responses",
|
name="responses",
|
||||||
roots=["tests/integration/responses"],
|
roots=["tests/integration/responses"],
|
||||||
|
|
|
@ -259,7 +259,7 @@
|
||||||
"messages": [
|
"messages": [
|
||||||
{
|
{
|
||||||
"role": "system",
|
"role": "system",
|
||||||
"content": "You are a helpful assistant. Michael Jordan was born in 1963. He played basketball for the Chicago Bulls for 15 seasons."
|
"content": "You are a helpful assistant. Michael Jordan was born in 1963. His first name is \"Michael\", He played basketball for the Chicago Bulls for 15 seasons and was drafted in 1984"
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"role": "user",
|
"role": "user",
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue