mirror of
https://github.com/meta-llama/llama-stack.git
synced 2025-08-12 04:50:39 +00:00
test: use llama stack build when starting server (#2999)
# What does this PR do? This should be more robust as sometimes its run without running build first. ## Test Plan OLLAMA_URL=http://localhost:11434 LLAMA_STACK_TEST_INFERENCE_MODE=replay LLAMA_STACK_TEST_RECORDING_DIR=tests/integration/recordings LLAMA_STACK_CONFIG=server:starter uv run --with pytest-repeat pytest tests/integration/telemetry --text-model="ollama/llama3.2:3b-instruct-fp16" -vvs
This commit is contained in:
parent
0b08d64ddb
commit
194abe7734
1 changed files with 3 additions and 2 deletions
|
@ -6,6 +6,7 @@
|
|||
|
||||
import inspect
|
||||
import os
|
||||
import shlex
|
||||
import signal
|
||||
import socket
|
||||
import subprocess
|
||||
|
@ -38,10 +39,10 @@ def is_port_available(port: int, host: str = "localhost") -> bool:
|
|||
|
||||
def start_llama_stack_server(config_name: str) -> subprocess.Popen:
|
||||
"""Start a llama stack server with the given config."""
|
||||
cmd = ["llama", "stack", "run", config_name]
|
||||
cmd = f"uv run --with llama-stack llama stack build --template {config_name} --image-type venv --run"
|
||||
devnull = open(os.devnull, "w")
|
||||
process = subprocess.Popen(
|
||||
cmd,
|
||||
shlex.split(cmd),
|
||||
stdout=devnull, # redirect stdout to devnull to prevent deadlock
|
||||
stderr=subprocess.PIPE, # keep stderr to see errors
|
||||
text=True,
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue