fix(ci, tests): ensure uv environments in CI are kosher, record tests (#3193)

I started this PR trying to unbreak a newly broken test `test_agent_name`. This test was broken all along but did not show up because during testing we were pulling the "non-updated" llama stack client. See this comment: https://github.com/llamastack/llama-stack/pull/3119#discussion_r2270988205 While fixing this, I encountered a large amount of badness in our CI workflow definitions. - We weren't passing `LLAMA_STACK_DIR` or `LLAMA_STACK_CLIENT_DIR` overrides to `llama stack build` at all in some cases. - Even when we did, we used `uv run` liberally. The first thing `uv run` does is "syncs" the project environment. This means, it is going to undo any mutations we might have done ourselves. But we make many mutations in our CI runners to these environments. The most important of which is why `llama stack build` where we install distro dependencies. As a result, when you tried to run the integration tests, you would see old, strange versions. ## Test Plan Re-record using: ``` sh scripts/integration-tests.sh --stack-config ci-tests \ --provider ollama --test-pattern test_agent_name --inference-mode record ``` Then re-run with `--inference-mode replay`. But: Eventually, this test turned out to be quite flaky for telemetry reasons. I haven't investigated it for now and just disabled it sadly since we have a release to push out.
2025-12-03 18:00:36 +00:00 · 2025-08-18 17:02:24 -07:00 · 2025-08-18 17:02:24 -07:00 · eb07a0f86a
commit eb07a0f86a
parent ac78e9f66a
14 changed files with 366 additions and 36 deletions
--- a/.github/actions/run-and-record-tests/action.yml
+++ b/.github/actions/run-and-record-tests/action.yml
@ -36,7 +36,7 @@ runs:
    - name: Run Integration Tests
      shell: bash
      run: |
-        ./scripts/integration-tests.sh \
+        uv run --no-sync ./scripts/integration-tests.sh \
          --stack-config '${{ inputs.stack-config }}' \
          --provider '${{ inputs.provider }}' \
          --test-subdirs '${{ inputs.test-subdirs }}' \
--- a/.github/actions/setup-runner/action.yml
+++ b/.github/actions/setup-runner/action.yml
@ -16,14 +16,16 @@ runs:
      uses: astral-sh/setup-uv@6b9c6063abd6010835644d4c2e1bef4cf5cd0fca # v6.0.1
      with:
        python-version: ${{ inputs.python-version }}
        activate-environment: true
        version: 0.7.6
    - name: Install dependencies
      shell: bash
      run: |
        echo "Updating project dependencies via uv sync"
        uv sync --all-groups
-        uv pip install ollama faiss-cpu
+
        echo "Installing ad-hoc dependencies"
        uv pip install faiss-cpu
        # Install llama-stack-client-python based on the client-version input
        if [ "${{ inputs.client-version }}" = "latest" ]; then
@ -37,4 +39,5 @@ runs:
          exit 1
        fi
-        uv pip install -e .
+        echo "Installed llama packages"
        uv pip list | grep llama
--- a/.github/actions/setup-test-environment/action.yml
+++ b/.github/actions/setup-test-environment/action.yml
@ -42,7 +42,22 @@ runs:
    - name: Build Llama Stack
      shell: bash
      run: |
-        uv run llama stack build --template ci-tests --image-type venv
+        # Install llama-stack-client-python based on the client-version input
        if [ "${{ inputs.client-version }}" = "latest" ]; then
          echo "Installing latest llama-stack-client-python from main branch"
          export LLAMA_STACK_CLIENT_DIR=git+https://github.com/llamastack/llama-stack-client-python.git@main
        elif [ "${{ inputs.client-version }}" = "published" ]; then
          echo "Installing published llama-stack-client-python from PyPI"
          unset LLAMA_STACK_CLIENT_DIR
        else
          echo "Invalid client-version: ${{ inputs.client-version }}"
          exit 1
        fi
        echo "Building Llama Stack"
        LLAMA_STACK_DIR=. \
          uv run --no-sync llama stack build --template ci-tests --image-type venv
    - name: Configure git for commits
      shell: bash
--- a/.github/workflows/install-script-ci.yml
+++ b/.github/workflows/install-script-ci.yml
@ -30,7 +30,8 @@ jobs:
      - name: Build a single provider
        run: |
-          USE_COPY_NOT_MOUNT=true LLAMA_STACK_DIR=. uv run llama stack build --template starter --image-type container --image-name test
+          USE_COPY_NOT_MOUNT=true LLAMA_STACK_DIR=. uv run --no-sync \
            llama stack build --template starter --image-type container --image-name test
      - name: Run installer end-to-end
        run: |
--- a/.github/workflows/integration-vector-io-tests.yml
+++ b/.github/workflows/integration-vector-io-tests.yml
@ -144,7 +144,7 @@ jobs:
      - name: Build Llama Stack
        run: |
-          uv run llama stack build --template ci-tests --image-type venv
+          uv run --no-sync llama stack build --template ci-tests --image-type venv
      - name: Check Storage and Memory Available Before Tests
        if: ${{ always() }}
@ -167,7 +167,8 @@ jobs:
          ENABLE_WEAVIATE: ${{ matrix.vector-io-provider == 'remote::weaviate' && 'true' || '' }}
          WEAVIATE_CLUSTER_URL: ${{ matrix.vector-io-provider == 'remote::weaviate' && 'localhost:8080' || '' }}
        run: |
-          uv run pytest -sv --stack-config="files=inline::localfs,inference=inline::sentence-transformers,vector_io=${{ matrix.vector-io-provider }}" \
+          uv run --no-sync \
            pytest -sv --stack-config="files=inline::localfs,inference=inline::sentence-transformers,vector_io=${{ matrix.vector-io-provider }}" \
            tests/integration/vector_io \
            --embedding-model inline::sentence-transformers/all-MiniLM-L6-v2
--- a/.github/workflows/test-external.yml
+++ b/.github/workflows/test-external.yml
@ -44,11 +44,11 @@ jobs:
      - name: Print distro dependencies
        run: |
-          USE_COPY_NOT_MOUNT=true LLAMA_STACK_DIR=. uv run llama stack build --config tests/external/build.yaml --print-deps-only
+          USE_COPY_NOT_MOUNT=true LLAMA_STACK_DIR=. uv run --no-sync llama stack build --config tests/external/build.yaml --print-deps-only
      - name: Build distro from config file
        run: |
-          USE_COPY_NOT_MOUNT=true LLAMA_STACK_DIR=. uv run llama stack build --config tests/external/build.yaml
+          USE_COPY_NOT_MOUNT=true LLAMA_STACK_DIR=. uv run --no-sync llama stack build --config tests/external/build.yaml
      - name: Start Llama Stack server in background
        if: ${{ matrix.image-type }} == 'venv'
--- a/llama_stack/core/build_venv.sh
+++ b/llama_stack/core/build_venv.sh
@ -151,23 +151,37 @@ run() {
    fi
  else
    if [ -n "$LLAMA_STACK_DIR" ]; then
-      if [ ! -d "$LLAMA_STACK_DIR" ]; then
+      # only warn if DIR does not start with "git+"
      if [ ! -d "$LLAMA_STACK_DIR" ] && [[ "$LLAMA_STACK_DIR" != git+* ]]; then
        printf "${RED}Warning: LLAMA_STACK_DIR is set but directory does not exist: %s${NC}\n" "$LLAMA_STACK_DIR" >&2
        exit 1
      fi
      printf "Installing from LLAMA_STACK_DIR: %s\n"  "$LLAMA_STACK_DIR"
-      uv pip install --no-cache-dir -e "$LLAMA_STACK_DIR"
+      # editable only if LLAMA_STACK_DIR does not start with "git+"
      if [[ "$LLAMA_STACK_DIR" != git+* ]]; then
        EDITABLE="-e"
      else
        EDITABLE=""
      fi
      uv pip install --no-cache-dir $EDITABLE "$LLAMA_STACK_DIR"
    else
      uv pip install --no-cache-dir llama-stack
    fi
    if [ -n "$LLAMA_STACK_CLIENT_DIR" ]; then
-      if [ ! -d "$LLAMA_STACK_CLIENT_DIR" ]; then
+      # only warn if DIR does not start with "git+"
      if [ ! -d "$LLAMA_STACK_CLIENT_DIR" ] && [[ "$LLAMA_STACK_CLIENT_DIR" != git+* ]]; then
        printf "${RED}Warning: LLAMA_STACK_CLIENT_DIR is set but directory does not exist: %s${NC}\n" "$LLAMA_STACK_CLIENT_DIR" >&2
        exit 1
      fi
      printf "Installing from LLAMA_STACK_CLIENT_DIR: %s\n" "$LLAMA_STACK_CLIENT_DIR"
-      uv pip install --no-cache-dir -e "$LLAMA_STACK_CLIENT_DIR"
+      # editable only if LLAMA_STACK_CLIENT_DIR does not start with "git+"
      if [[ "$LLAMA_STACK_CLIENT_DIR" != git+* ]]; then
        EDITABLE="-e"
      else
        EDITABLE=""
      fi
      uv pip install --no-cache-dir $EDITABLE "$LLAMA_STACK_CLIENT_DIR"
    fi
    printf "Installing pip dependencies\n"
--- a/llama_stack/testing/inference_recorder.py
+++ b/llama_stack/testing/inference_recorder.py
@ -261,7 +261,7 @@ async def _patched_inference_method(original_method, self, client_type, endpoint
        else:
            raise RuntimeError(
                f"No recorded response found for request hash: {request_hash}\n"
-                f"Endpoint: {endpoint}\n"
+                f"Request: {method} {url} {body}\n"
                f"Model: {body.get('model', 'unknown')}\n"
                f"To record this response, run with LLAMA_STACK_INFERENCE_MODE=record"
            )
--- a/scripts/integration-tests.sh
+++ b/scripts/integration-tests.sh
@ -111,6 +111,9 @@ echo "Inference Mode: $INFERENCE_MODE"
 echo "Test Pattern: $TEST_PATTERN"
 echo ""
 echo "Checking llama packages"
 uv pip list | grep llama
 # Check storage and memory before tests
 echo "=== System Resources Before Tests ==="
 free -h 2>/dev/null || echo "free command not available"
--- a/tests/integration/agents/test_agents.py
+++ b/tests/integration/agents/test_agents.py
@ -133,24 +133,15 @@ def test_agent_simple(llama_stack_client, agent_config):
        assert "I can't" in logs_str
@pytest.mark.skip(reason="this test was disabled for a long time, and now has turned flaky")
 def test_agent_name(llama_stack_client, text_model_id):
    agent_name = f"test-agent-{uuid4()}"
-
+    agent = Agent(
-    try:
+        llama_stack_client,
-        agent = Agent(
+        model=text_model_id,
-            llama_stack_client,
+        instructions="You are a helpful assistant",
-            model=text_model_id,
+        name=agent_name,
-            instructions="You are a helpful assistant",
+    )
            name=agent_name,
        )
    except TypeError:
        agent = Agent(
            llama_stack_client,
            model=text_model_id,
            instructions="You are a helpful assistant",
        )
        return
    session_id = agent.create_session(f"test-session-{uuid4()}")
    agent.create_turn(
--- a/tests/integration/recordings/index.sqlite
+++ b/tests/integration/recordings/index.sqlite
--- a/tests/integration/recordings/responses/4a3a4447b16b.json
+++ b/tests/integration/recordings/responses/4a3a4447b16b.json
@ -14,7 +14,7 @@
        "models": [
          {
            "model": "nomic-embed-text:latest",
-            "modified_at": "2025-08-15T21:55:08.088554Z",
+            "modified_at": "2025-08-18T12:47:56.732989-07:00",
            "digest": "0a109f422b47e3a30ba2b10eca18548e944e8a23073ee3f3e947efcf3c45e59f",
            "size": 274302450,
            "details": {
@ -28,9 +28,41 @@
              "quantization_level": "F16"
            }
          },
          {
            "model": "llama3.2-vision:11b",
            "modified_at": "2025-07-30T18:45:02.517873-07:00",
            "digest": "6f2f9757ae97e8a3f8ea33d6adb2b11d93d9a35bef277cd2c0b1b5af8e8d0b1e",
            "size": 7816589186,
            "details": {
              "parent_model": "",
              "format": "gguf",
              "family": "mllama",
              "families": [
                "mllama"
              ],
              "parameter_size": "10.7B",
              "quantization_level": "Q4_K_M"
            }
          },
          {
            "model": "llama3.2-vision:latest",
            "modified_at": "2025-07-29T20:18:47.920468-07:00",
            "digest": "6f2f9757ae97e8a3f8ea33d6adb2b11d93d9a35bef277cd2c0b1b5af8e8d0b1e",
            "size": 7816589186,
            "details": {
              "parent_model": "",
              "format": "gguf",
              "family": "mllama",
              "families": [
                "mllama"
              ],
              "parameter_size": "10.7B",
              "quantization_level": "Q4_K_M"
            }
          },
          {
            "model": "llama-guard3:1b",
-            "modified_at": "2025-07-31T04:44:58Z",
+            "modified_at": "2025-07-25T14:39:44.978630-07:00",
            "digest": "494147e06bf99e10dbe67b63a07ac81c162f18ef3341aa3390007ac828571b3b",
            "size": 1600181919,
            "details": {
@ -46,7 +78,7 @@
          },
          {
            "model": "all-minilm:l6-v2",
-            "modified_at": "2025-07-31T04:42:15Z",
+            "modified_at": "2025-07-24T15:15:11.129290-07:00",
            "digest": "1b226e2802dbb772b5fc32a58f103ca1804ef7501331012de126ab22f67475ef",
            "size": 45960996,
            "details": {
@ -60,9 +92,57 @@
              "quantization_level": "F16"
            }
          },
          {
            "model": "llama3.2:1b",
            "modified_at": "2025-07-17T22:02:24.953208-07:00",
            "digest": "baf6a787fdffd633537aa2eb51cfd54cb93ff08e28040095462bb63daf552878",
            "size": 1321098329,
            "details": {
              "parent_model": "",
              "format": "gguf",
              "family": "llama",
              "families": [
                "llama"
              ],
              "parameter_size": "1.2B",
              "quantization_level": "Q8_0"
            }
          },
          {
            "model": "all-minilm:latest",
            "modified_at": "2025-06-03T16:50:10.946583-07:00",
            "digest": "1b226e2802dbb772b5fc32a58f103ca1804ef7501331012de126ab22f67475ef",
            "size": 45960996,
            "details": {
              "parent_model": "",
              "format": "gguf",
              "family": "bert",
              "families": [
                "bert"
              ],
              "parameter_size": "23M",
              "quantization_level": "F16"
            }
          },
          {
            "model": "llama3.2:3b",
            "modified_at": "2025-05-01T11:15:23.797447-07:00",
            "digest": "a80c4f17acd55265feec403c7aef86be0c25983ab279d83f3bcd3abbcb5b8b72",
            "size": 2019393189,
            "details": {
              "parent_model": "",
              "format": "gguf",
              "family": "llama",
              "families": [
                "llama"
              ],
              "parameter_size": "3.2B",
              "quantization_level": "Q4_K_M"
            }
          },
          {
            "model": "llama3.2:3b-instruct-fp16",
-            "modified_at": "2025-07-31T04:42:05Z",
+            "modified_at": "2025-04-30T15:33:48.939665-07:00",
            "digest": "195a8c01d91ec3cb1e0aad4624a51f2602c51fa7d96110f8ab5a20c84081804d",
            "size": 6433703586,
            "details": {
--- a/tests/integration/recordings/responses/731824c54461.json
+++ b/tests/integration/recordings/responses/731824c54461.json
@ -0,0 +1,203 @@
 {
  "request": {
    "method": "POST",
    "url": "http://localhost:11434/api/generate",
    "headers": {},
    "body": {
      "model": "llama3.2:3b-instruct-fp16",
      "raw": true,
      "prompt": "<|begin_of_text|><|start_header_id|>system<|end_header_id|>\n\nYou are a helpful assistant<|eot_id|><|start_header_id|>user<|end_header_id|>\n\nGive me a sentence that contains the word: hello<|eot_id|><|start_header_id|>assistant<|end_header_id|>\n\n",
      "options": {
        "temperature": 0.0
      },
      "stream": true
    },
    "endpoint": "/api/generate",
    "model": "llama3.2:3b-instruct-fp16"
  },
  "response": {
    "body": [
      {
        "__type__": "ollama._types.GenerateResponse",
        "__data__": {
          "model": "llama3.2:3b-instruct-fp16",
          "created_at": "2025-08-18T19:47:58.267146Z",
          "done": false,
          "done_reason": null,
          "total_duration": null,
          "load_duration": null,
          "prompt_eval_count": null,
          "prompt_eval_duration": null,
          "eval_count": null,
          "eval_duration": null,
          "response": "Hello",
          "thinking": null,
          "context": null
        }
      },
      {
        "__type__": "ollama._types.GenerateResponse",
        "__data__": {
          "model": "llama3.2:3b-instruct-fp16",
          "created_at": "2025-08-18T19:47:58.309006Z",
          "done": false,
          "done_reason": null,
          "total_duration": null,
          "load_duration": null,
          "prompt_eval_count": null,
          "prompt_eval_duration": null,
          "eval_count": null,
          "eval_duration": null,
          "response": ",",
          "thinking": null,
          "context": null
        }
      },
      {
        "__type__": "ollama._types.GenerateResponse",
        "__data__": {
          "model": "llama3.2:3b-instruct-fp16",
          "created_at": "2025-08-18T19:47:58.351179Z",
          "done": false,
          "done_reason": null,
          "total_duration": null,
          "load_duration": null,
          "prompt_eval_count": null,
          "prompt_eval_duration": null,
          "eval_count": null,
          "eval_duration": null,
          "response": " how",
          "thinking": null,
          "context": null
        }
      },
      {
        "__type__": "ollama._types.GenerateResponse",
        "__data__": {
          "model": "llama3.2:3b-instruct-fp16",
          "created_at": "2025-08-18T19:47:58.393262Z",
          "done": false,
          "done_reason": null,
          "total_duration": null,
          "load_duration": null,
          "prompt_eval_count": null,
          "prompt_eval_duration": null,
          "eval_count": null,
          "eval_duration": null,
          "response": " can",
          "thinking": null,
          "context": null
        }
      },
      {
        "__type__": "ollama._types.GenerateResponse",
        "__data__": {
          "model": "llama3.2:3b-instruct-fp16",
          "created_at": "2025-08-18T19:47:58.436079Z",
          "done": false,
          "done_reason": null,
          "total_duration": null,
          "load_duration": null,
          "prompt_eval_count": null,
          "prompt_eval_duration": null,
          "eval_count": null,
          "eval_duration": null,
          "response": " I",
          "thinking": null,
          "context": null
        }
      },
      {
        "__type__": "ollama._types.GenerateResponse",
        "__data__": {
          "model": "llama3.2:3b-instruct-fp16",
          "created_at": "2025-08-18T19:47:58.478393Z",
          "done": false,
          "done_reason": null,
          "total_duration": null,
          "load_duration": null,
          "prompt_eval_count": null,
          "prompt_eval_duration": null,
          "eval_count": null,
          "eval_duration": null,
          "response": " assist",
          "thinking": null,
          "context": null
        }
      },
      {
        "__type__": "ollama._types.GenerateResponse",
        "__data__": {
          "model": "llama3.2:3b-instruct-fp16",
          "created_at": "2025-08-18T19:47:58.520608Z",
          "done": false,
          "done_reason": null,
          "total_duration": null,
          "load_duration": null,
          "prompt_eval_count": null,
          "prompt_eval_duration": null,
          "eval_count": null,
          "eval_duration": null,
          "response": " you",
          "thinking": null,
          "context": null
        }
      },
      {
        "__type__": "ollama._types.GenerateResponse",
        "__data__": {
          "model": "llama3.2:3b-instruct-fp16",
          "created_at": "2025-08-18T19:47:58.562885Z",
          "done": false,
          "done_reason": null,
          "total_duration": null,
          "load_duration": null,
          "prompt_eval_count": null,
          "prompt_eval_duration": null,
          "eval_count": null,
          "eval_duration": null,
          "response": " today",
          "thinking": null,
          "context": null
        }
      },
      {
        "__type__": "ollama._types.GenerateResponse",
        "__data__": {
          "model": "llama3.2:3b-instruct-fp16",
          "created_at": "2025-08-18T19:47:58.604683Z",
          "done": false,
          "done_reason": null,
          "total_duration": null,
          "load_duration": null,
          "prompt_eval_count": null,
          "prompt_eval_duration": null,
          "eval_count": null,
          "eval_duration": null,
          "response": "?",
          "thinking": null,
          "context": null
        }
      },
      {
        "__type__": "ollama._types.GenerateResponse",
        "__data__": {
          "model": "llama3.2:3b-instruct-fp16",
          "created_at": "2025-08-18T19:47:58.646586Z",
          "done": true,
          "done_reason": "stop",
          "total_duration": 1011323917,
          "load_duration": 76575458,
          "prompt_eval_count": 31,
          "prompt_eval_duration": 553259250,
          "eval_count": 10,
          "eval_duration": 380302792,
          "response": "",
          "thinking": null,
          "context": null
        }
      }
    ],
    "is_streaming": true
  }
 }
--- a/tests/integration/recordings/responses/d0ac68cbde69.json
+++ b/tests/integration/recordings/responses/d0ac68cbde69.json
@ -11,7 +11,26 @@
    "body": {
      "__type__": "ollama._types.ProcessResponse",
      "__data__": {
-        "models": []
+        "models": [
          {
            "model": "llama3.2:3b-instruct-fp16",
            "name": "llama3.2:3b-instruct-fp16",
            "digest": "195a8c01d91ec3cb1e0aad4624a51f2602c51fa7d96110f8ab5a20c84081804d",
            "expires_at": "2025-08-18T13:47:44.262256-07:00",
            "size": 7919570944,
            "size_vram": 7919570944,
            "details": {
              "parent_model": "",
              "format": "gguf",
              "family": "llama",
              "families": [
                "llama"
              ],
              "parameter_size": "3.2B",
              "quantization_level": "F16"
            }
          }
        ]
      }
    },
    "is_streaming": false