test: use llama stack build when starting server (#2999)

# What does this PR do? This should be more robust as sometimes its run without running build first. ## Test Plan OLLAMA_URL=http://localhost:11434 LLAMA_STACK_TEST_INFERENCE_MODE=replay LLAMA_STACK_TEST_RECORDING_DIR=tests/integration/recordings LLAMA_STACK_CONFIG=server:starter uv run --with pytest-repeat pytest tests/integration/telemetry --text-model="ollama/llama3.2:3b-instruct-fp16" -vvs
2025-12-04 18:13:44 +00:00 · 2025-07-31 21:09:14 -07:00 · 2025-07-31 21:09:14 -07:00 · 194abe7734
commit 194abe7734
parent 0b08d64ddb
1 changed files with 3 additions and 2 deletions
--- a/tests/integration/fixtures/common.py
+++ b/tests/integration/fixtures/common.py
@ -6,6 +6,7 @@

 import inspect
 import os
+import shlex
 import signal
 import socket
 import subprocess
@ -38,10 +39,10 @@ def is_port_available(port: int, host: str = "localhost") -> bool:

 def start_llama_stack_server(config_name: str) -> subprocess.Popen:
    """Start a llama stack server with the given config."""
-    cmd = ["llama", "stack", "run", config_name]
+    cmd = f"uv run --with llama-stack llama stack build --template {config_name} --image-type venv --run"
    devnull = open(os.devnull, "w")
    process = subprocess.Popen(
-        cmd,
+        shlex.split(cmd),
        stdout=devnull,  # redirect stdout to devnull to prevent deadlock
        stderr=subprocess.PIPE,  # keep stderr to see errors
        text=True,