From 194abe773422cafd718c812eee64ac18c2d5bbe0 Mon Sep 17 00:00:00 2001
From: ehhuang <ehhuang@users.noreply.github.com>
Date: Thu, 31 Jul 2025 21:09:14 -0700
Subject: [PATCH] test: use llama stack build when starting server (#2999)

# What does this PR do?
This should be more robust as sometimes its run without running build
first.

## Test Plan
OLLAMA_URL=http://localhost:11434 LLAMA_STACK_TEST_INFERENCE_MODE=replay
LLAMA_STACK_TEST_RECORDING_DIR=tests/integration/recordings
LLAMA_STACK_CONFIG=server:starter uv run --with pytest-repeat pytest
tests/integration/telemetry
--text-model="ollama/llama3.2:3b-instruct-fp16" -vvs
---
 tests/integration/fixtures/common.py | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/tests/integration/fixtures/common.py b/tests/integration/fixtures/common.py
index bdbe0d66f..37c7474a0 100644
--- a/tests/integration/fixtures/common.py
+++ b/tests/integration/fixtures/common.py
@@ -6,6 +6,7 @@
 
 import inspect
 import os
+import shlex
 import signal
 import socket
 import subprocess
@@ -38,10 +39,10 @@ def is_port_available(port: int, host: str = "localhost") -> bool:
 
 def start_llama_stack_server(config_name: str) -> subprocess.Popen:
     """Start a llama stack server with the given config."""
-    cmd = ["llama", "stack", "run", config_name]
+    cmd = f"uv run --with llama-stack llama stack build --template {config_name} --image-type venv --run"
     devnull = open(os.devnull, "w")
     process = subprocess.Popen(
-        cmd,
+        shlex.split(cmd),
         stdout=devnull,  # redirect stdout to devnull to prevent deadlock
         stderr=subprocess.PIPE,  # keep stderr to see errors
         text=True,