update tests slightly, other fixes

2025-12-22 18:56:24 +00:00 · 2025-07-30 17:10:01 -07:00 · 2025-07-30 17:10:01 -07:00 · e9f8458770
commit e9f8458770
parent 0b02af792d
5 changed files with 24 additions and 32 deletions
--- a/.github/actions/run-integration-tests/action.yml
+++ b/.github/actions/run-integration-tests/action.yml
@ -30,7 +30,6 @@ runs:
    - name: Run Integration Tests
      env:
        LLAMA_STACK_CLIENT_TIMEOUT: "300"
-        LLAMA_STACK_TEST_RECORDING_DIR: "tests/integration/recordings"
        LLAMA_STACK_TEST_INFERENCE_MODE: ${{ inputs.inference-mode }}
      shell: bash
      run: |
@ -51,6 +50,7 @@ runs:
        fi

        if [ "${{ inputs.run-vision-tests }}" == "true" ]; then
+          export LLAMA_STACK_TEST_RECORDING_DIR="tests/integration/recordings/vision"
          if uv run pytest -s -v tests/integration/inference/test_vision_inference.py --stack-config=${stack_config} \
            -k "not( ${EXCLUDE_TESTS} )" \
            --vision-model=ollama/llama3.2-vision:11b \
@ -66,6 +66,8 @@ runs:
          exit 0
        fi

+        # Run non-vision tests
+        export LLAMA_STACK_TEST_RECORDING_DIR="tests/integration/recordings"
        TEST_TYPES='${{ inputs.test-types }}'
        echo "Test types to run: $TEST_TYPES"

--- a/.github/workflows/integration-tests.yml
+++ b/.github/workflows/integration-tests.yml
@ -128,11 +128,10 @@ jobs:

      - name: Commit and push recordings
        run: |
-          if ! git diff --quiet tests/integration/recordings/; then
-            echo "Committing recordings"
+          if ! git status --porcelain tests/integration/recordings/ | grep -q .; then
+            echo "New recordings detected, committing and pushing"
            git add tests/integration/recordings/
            git commit -m "Update recordings"
-            echo "Pushing all recording commits to PR"
            git push origin HEAD:${{ github.head_ref }}
          else
            echo "No recording changes"
--- a/.github/workflows/integration-vision-tests.yml
+++ b/.github/workflows/integration-vision-tests.yml
@ -25,7 +25,14 @@ on:
        default: false

 concurrency:
-  group: ${{ github.workflow }}-${{ github.ref }}-${{ contains(github.event.pull_request.labels.*.name, 're-record-tests') && 'rerecord' || 'replay' }}
+  group: >-
+    ${{ github.workflow }}-${{ github.ref }}-${{
+    (github.event.action == 'opened' ||
+     github.event.action == 'synchronize' ||
+     (github.event.action == 'labeled' && contains(github.event.pull_request.labels.*.name, 're-record-tests'))) &&
+    (contains(github.event.pull_request.labels.*.name, 're-record-tests') && 'rerecord' || 'replay') ||
+    'no-run'
+    }}
  cancel-in-progress: true

 jobs:
@ -100,11 +107,10 @@ jobs:

      - name: Commit and push recordings
        run: |
-          if ! git diff --quiet tests/integration/recordings/; then
-            echo "Committing recordings"
+          if ! git status --porcelain tests/integration/recordings/ | grep -q .; then
+            echo "New recordings detected, committing and pushing"
            git add tests/integration/recordings/
            git commit -m "Update recordings"
-            echo "Pushing all recording commits to PR"
            git push origin HEAD:${{ github.head_ref }}
          else
            echo "No recording changes"
--- a/tests/integration/inference/test_vision_inference.py
+++ b/tests/integration/inference/test_vision_inference.py
@ -25,12 +25,6 @@ def base64_image_data(image_path):
    return base64.b64encode(image_path.read_bytes()).decode("utf-8")


-@pytest.fixture
-def base64_image_url(base64_image_data, image_path):
-    # suffix includes the ., so we remove it
-    return f"data:image/{image_path.suffix[1:]};base64,{base64_image_data}"
-
-
 def test_image_chat_completion_non_streaming(client_with_models, vision_model_id):
    message = {
        "role": "user",
@ -78,7 +72,9 @@ def multi_image_data():
 def test_image_chat_completion_multiple_images(client_with_models, vision_model_id, multi_image_data, stream):
    supported_models = ["llama-4", "gpt-4o", "llama4"]
    if not any(model in vision_model_id.lower() for model in supported_models):
-        pytest.skip(f"Skip for non-supported model: {vision_model_id}")
+        pytest.skip(
+            f"Skip since multi-image tests are only supported for {supported_models}, not for {vision_model_id}"
+        )

    messages = [
        {
@ -183,24 +179,13 @@ def test_image_chat_completion_streaming(client_with_models, vision_model_id):
    assert any(expected in streamed_content for expected in {"dog", "puppy", "pup"})


-@pytest.mark.parametrize("type_", ["url", "data"])
-def test_image_chat_completion_base64(client_with_models, vision_model_id, base64_image_data, base64_image_url, type_):
+def test_image_chat_completion_base64(client_with_models, vision_model_id, base64_image_data):
    image_spec = {
-        "url": {
-            "type": "image",
-            "image": {
-                "url": {
-                    "uri": base64_image_url,
-                },
-            },
+        "type": "image",
+        "image": {
+            "data": base64_image_data,
        },
-        "data": {
-            "type": "image",
-            "image": {
-                "data": base64_image_data,
-            },
-        },
-    }[type_]
+    }

    message = {
        "role": "user",