mirror of
https://github.com/meta-llama/llama-stack.git
synced 2025-07-24 21:29:53 +00:00
# What does this PR do? A PTY is unnecessary for interactive mode since `subprocess.run()` already inherits the calling terminal’s stdin, stdout, and stderr, allowing natural interaction. Using a PTY can introduce unwanted side effects like buffering issues and inconsistent signal handling. Standard input/output is sufficient for most interactive programs. This commit simplifies the command execution by: 1. Removing PTY-based execution in favor of direct subprocess handling 2. Consolidating command execution into a single run_command function 3. Improving error handling with specific subprocess error types 4. Adding proper type hints and documentation 5. Maintaining Ctrl+C handling for graceful interruption ## Test Plan ``` llama stack run ``` Signed-off-by: Sébastien Han <seb@redhat.com>
79 lines
2.2 KiB
YAML
79 lines
2.2 KiB
YAML
name: Integration tests
|
|
|
|
on:
|
|
pull_request:
|
|
push:
|
|
branches: [main]
|
|
|
|
jobs:
|
|
ollama:
|
|
runs-on: ubuntu-latest
|
|
|
|
steps:
|
|
- name: Checkout repository
|
|
uses: actions/checkout@v4
|
|
|
|
- name: Install uv
|
|
uses: astral-sh/setup-uv@v5
|
|
with:
|
|
python-version: "3.10"
|
|
|
|
- name: Install Ollama
|
|
run: |
|
|
curl -fsSL https://ollama.com/install.sh | sh
|
|
|
|
- name: Pull Ollama image
|
|
run: |
|
|
ollama pull llama3.2:3b-instruct-fp16
|
|
|
|
- name: Start Ollama in background
|
|
run: |
|
|
nohup ollama run llama3.2:3b-instruct-fp16 > ollama.log 2>&1 &
|
|
|
|
- name: Set Up Environment and Install Dependencies
|
|
run: |
|
|
uv sync --extra dev --extra test
|
|
uv pip install ollama faiss-cpu
|
|
uv pip install -e .
|
|
|
|
- name: Wait for Ollama to start
|
|
run: |
|
|
echo "Waiting for Ollama..."
|
|
for i in {1..30}; do
|
|
if curl -s http://localhost:11434 | grep -q "Ollama is running"; then
|
|
echo "Ollama is running!"
|
|
exit 0
|
|
fi
|
|
sleep 1
|
|
done
|
|
echo "Ollama failed to start"
|
|
ollama ps
|
|
ollama.log
|
|
exit 1
|
|
|
|
- name: Start Llama Stack server in background
|
|
env:
|
|
INFERENCE_MODEL: "meta-llama/Llama-3.2-3B-Instruct"
|
|
run: |
|
|
source .venv/bin/activate
|
|
nohup uv run llama stack run ./llama_stack/templates/ollama/run.yaml --image-type venv > server.log 2>&1 &
|
|
|
|
- name: Wait for Llama Stack server to be ready
|
|
run: |
|
|
echo "Waiting for Llama Stack server..."
|
|
for i in {1..30}; do
|
|
if curl -s http://localhost:8321/v1/health | grep -q "OK"; then
|
|
echo " Llama Stack server is up!"
|
|
exit 0
|
|
fi
|
|
sleep 1
|
|
done
|
|
echo " Llama Stack server failed to start"
|
|
cat server.log
|
|
exit 1
|
|
|
|
- name: Run Inference Integration Tests
|
|
env:
|
|
INFERENCE_MODEL: "meta-llama/Llama-3.2-3B-Instruct"
|
|
run: |
|
|
uv run pytest -v tests/integration/inference --stack-config=ollama --text-model="meta-llama/Llama-3.2-3B-Instruct" --embedding-model=all-MiniLM-L6-v2
|