test: use llama stack build when starting server (#2999)

# What does this PR do?
This should be more robust as sometimes its run without running build
first.

## Test Plan
OLLAMA_URL=http://localhost:11434 LLAMA_STACK_TEST_INFERENCE_MODE=replay
LLAMA_STACK_TEST_RECORDING_DIR=tests/integration/recordings
LLAMA_STACK_CONFIG=server:starter uv run --with pytest-repeat pytest
tests/integration/telemetry
--text-model="ollama/llama3.2:3b-instruct-fp16" -vvs
This commit is contained in:
ehhuang 2025-07-31 21:09:14 -07:00 committed by GitHub
parent 0b08d64ddb
commit 194abe7734
No known key found for this signature in database
GPG key ID: B5690EEEBB952194

View file

@ -6,6 +6,7 @@
import inspect import inspect
import os import os
import shlex
import signal import signal
import socket import socket
import subprocess import subprocess
@ -38,10 +39,10 @@ def is_port_available(port: int, host: str = "localhost") -> bool:
def start_llama_stack_server(config_name: str) -> subprocess.Popen: def start_llama_stack_server(config_name: str) -> subprocess.Popen:
"""Start a llama stack server with the given config.""" """Start a llama stack server with the given config."""
cmd = ["llama", "stack", "run", config_name] cmd = f"uv run --with llama-stack llama stack build --template {config_name} --image-type venv --run"
devnull = open(os.devnull, "w") devnull = open(os.devnull, "w")
process = subprocess.Popen( process = subprocess.Popen(
cmd, shlex.split(cmd),
stdout=devnull, # redirect stdout to devnull to prevent deadlock stdout=devnull, # redirect stdout to devnull to prevent deadlock
stderr=subprocess.PIPE, # keep stderr to see errors stderr=subprocess.PIPE, # keep stderr to see errors
text=True, text=True,