mirror of
https://github.com/meta-llama/llama-stack.git
synced 2025-07-26 06:07:43 +00:00
The `/v1/providers` now reports the health status of each provider when implemented. ``` curl -L http://127.0.0.1:8321/v1/providers|jq % Total % Received % Xferd Average Speed Time Time Time Current Dload Upload Total Spent Left Speed 100 4072 100 4072 0 0 246k 0 --:--:-- --:--:-- --:--:-- 248k { "data": [ { "api": "inference", "provider_id": "ollama", "provider_type": "remote::ollama", "config": { "url": "http://localhost:11434" }, "health": { "status": "OK" } }, { "api": "vector_io", "provider_id": "faiss", "provider_type": "inline::faiss", "config": { "kvstore": { "type": "sqlite", "namespace": null, "db_path": "/Users/leseb/.llama/distributions/ollama/faiss_store.db" } }, "health": { "status": "Not Implemented", "message": "Provider does not implement health check" } }, { "api": "safety", "provider_id": "llama-guard", "provider_type": "inline::llama-guard", "config": { "excluded_categories": [] }, "health": { "status": "Not Implemented", "message": "Provider does not implement health check" } }, { "api": "agents", "provider_id": "meta-reference", "provider_type": "inline::meta-reference", "config": { "persistence_store": { "type": "sqlite", "namespace": null, "db_path": "/Users/leseb/.llama/distributions/ollama/agents_store.db" } }, "health": { "status": "Not Implemented", "message": "Provider does not implement health check" } }, { "api": "telemetry", "provider_id": "meta-reference", "provider_type": "inline::meta-reference", "config": { "service_name": "llama-stack", "sinks": "console,sqlite", "sqlite_db_path": "/Users/leseb/.llama/distributions/ollama/trace_store.db" }, "health": { "status": "Not Implemented", "message": "Provider does not implement health check" } }, { "api": "eval", "provider_id": "meta-reference", "provider_type": "inline::meta-reference", "config": { "kvstore": { "type": "sqlite", "namespace": null, "db_path": "/Users/leseb/.llama/distributions/ollama/meta_reference_eval.db" } }, "health": { "status": "Not Implemented", "message": "Provider does not implement health check" } }, { "api": "datasetio", "provider_id": "huggingface", "provider_type": "remote::huggingface", "config": { "kvstore": { "type": "sqlite", "namespace": null, "db_path": "/Users/leseb/.llama/distributions/ollama/huggingface_datasetio.db" } }, "health": { "status": "Not Implemented", "message": "Provider does not implement health check" } }, { "api": "datasetio", "provider_id": "localfs", "provider_type": "inline::localfs", "config": { "kvstore": { "type": "sqlite", "namespace": null, "db_path": "/Users/leseb/.llama/distributions/ollama/localfs_datasetio.db" } }, "health": { "status": "Not Implemented", "message": "Provider does not implement health check" } }, { "api": "scoring", "provider_id": "basic", "provider_type": "inline::basic", "config": {}, "health": { "status": "Not Implemented", "message": "Provider does not implement health check" } }, { "api": "scoring", "provider_id": "llm-as-judge", "provider_type": "inline::llm-as-judge", "config": {}, "health": { "status": "Not Implemented", "message": "Provider does not implement health check" } }, { "api": "scoring", "provider_id": "braintrust", "provider_type": "inline::braintrust", "config": { "openai_api_key": "********" }, "health": { "status": "Not Implemented", "message": "Provider does not implement health check" } }, { "api": "tool_runtime", "provider_id": "brave-search", "provider_type": "remote::brave-search", "config": { "api_key": "********", "max_results": 3 }, "health": { "status": "Not Implemented", "message": "Provider does not implement health check" } }, { "api": "tool_runtime", "provider_id": "tavily-search", "provider_type": "remote::tavily-search", "config": { "api_key": "********", "max_results": 3 }, "health": { "status": "Not Implemented", "message": "Provider does not implement health check" } }, { "api": "tool_runtime", "provider_id": "code-interpreter", "provider_type": "inline::code-interpreter", "config": {}, "health": { "status": "Not Implemented", "message": "Provider does not implement health check" } }, { "api": "tool_runtime", "provider_id": "rag-runtime", "provider_type": "inline::rag-runtime", "config": {}, "health": { "status": "Not Implemented", "message": "Provider does not implement health check" } }, { "api": "tool_runtime", "provider_id": "model-context-protocol", "provider_type": "remote::model-context-protocol", "config": {}, "health": { "status": "Not Implemented", "message": "Provider does not implement health check" } }, { "api": "tool_runtime", "provider_id": "wolfram-alpha", "provider_type": "remote::wolfram-alpha", "config": { "api_key": "********" }, "health": { "status": "Not Implemented", "message": "Provider does not implement health check" } } ] } ``` Per providers too: ``` curl -L http://127.0.0.1:8321/v1/providers/ollama {"api":"inference","provider_id":"ollama","provider_type":"remote::ollama","config":{"url":"http://localhost:11434"},"health":{"status":"OK"}} ``` Signed-off-by: Sébastien Han <seb@redhat.com>
125 lines
4.2 KiB
YAML
125 lines
4.2 KiB
YAML
name: Integration Tests
|
|
|
|
on:
|
|
push:
|
|
branches: [ main ]
|
|
pull_request:
|
|
branches: [ main ]
|
|
paths:
|
|
- 'distributions/**'
|
|
- 'llama_stack/**'
|
|
- 'tests/integration/**'
|
|
- 'uv.lock'
|
|
- 'pyproject.toml'
|
|
- 'requirements.txt'
|
|
- '.github/workflows/integration-tests.yml' # This workflow
|
|
|
|
concurrency:
|
|
group: ${{ github.workflow }}-${{ github.ref }}
|
|
cancel-in-progress: true
|
|
|
|
jobs:
|
|
test-matrix:
|
|
runs-on: ubuntu-latest
|
|
strategy:
|
|
matrix:
|
|
# Listing tests manually since some of them currently fail
|
|
# TODO: generate matrix list from tests/integration when fixed
|
|
test-type: [agents, inference, datasets, inspect, scoring, post_training, providers]
|
|
client-type: [library, http]
|
|
fail-fast: false # we want to run all tests regardless of failure
|
|
|
|
steps:
|
|
- name: Checkout repository
|
|
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
|
|
|
|
- name: Install uv
|
|
uses: astral-sh/setup-uv@22695119d769bdb6f7032ad67b9bca0ef8c4a174 # v5.4.0
|
|
with:
|
|
python-version: "3.10"
|
|
|
|
- name: Install Ollama
|
|
run: |
|
|
curl -fsSL https://ollama.com/install.sh | sh
|
|
|
|
- name: Pull Ollama image
|
|
run: |
|
|
ollama pull llama3.2:3b-instruct-fp16
|
|
|
|
- name: Start Ollama in background
|
|
run: |
|
|
nohup ollama run llama3.2:3b-instruct-fp16 > ollama.log 2>&1 &
|
|
|
|
- name: Set Up Environment and Install Dependencies
|
|
run: |
|
|
uv sync --extra dev --extra test
|
|
uv pip install ollama faiss-cpu
|
|
# always test against the latest version of the client
|
|
# TODO: this is not necessarily a good idea. we need to test against both published and latest
|
|
# to find out backwards compatibility issues.
|
|
uv pip install git+https://github.com/meta-llama/llama-stack-client-python.git@main
|
|
uv pip install -e .
|
|
llama stack build --template ollama --image-type venv
|
|
|
|
- name: Wait for Ollama to start
|
|
run: |
|
|
echo "Waiting for Ollama..."
|
|
for i in {1..30}; do
|
|
if curl -s http://localhost:11434 | grep -q "Ollama is running"; then
|
|
echo "Ollama is running!"
|
|
exit 0
|
|
fi
|
|
sleep 1
|
|
done
|
|
echo "Ollama failed to start"
|
|
ollama ps
|
|
ollama.log
|
|
exit 1
|
|
|
|
- name: Start Llama Stack server in background
|
|
if: matrix.client-type == 'http'
|
|
env:
|
|
INFERENCE_MODEL: "meta-llama/Llama-3.2-3B-Instruct"
|
|
run: |
|
|
source .venv/bin/activate
|
|
nohup uv run llama stack run ./llama_stack/templates/ollama/run.yaml --image-type venv > server.log 2>&1 &
|
|
|
|
- name: Wait for Llama Stack server to be ready
|
|
if: matrix.client-type == 'http'
|
|
run: |
|
|
echo "Waiting for Llama Stack server..."
|
|
for i in {1..30}; do
|
|
if curl -s http://localhost:8321/v1/health | grep -q "OK"; then
|
|
echo "Llama Stack server is up!"
|
|
exit 0
|
|
fi
|
|
sleep 1
|
|
done
|
|
echo "Llama Stack server failed to start"
|
|
cat server.log
|
|
exit 1
|
|
|
|
- name: Verify Ollama status is OK
|
|
if: matrix.client-type == 'http'
|
|
run: |
|
|
echo "Verifying Ollama status..."
|
|
ollama_status=$(curl -s -L http://127.0.0.1:8321/v1/providers/ollama|jq --raw-output .health.status)
|
|
echo "Ollama status: $ollama_status"
|
|
if [ "$ollama_status" != "OK" ]; then
|
|
echo "Ollama health check failed"
|
|
exit 1
|
|
fi
|
|
|
|
- name: Run Integration Tests
|
|
env:
|
|
INFERENCE_MODEL: "meta-llama/Llama-3.2-3B-Instruct"
|
|
run: |
|
|
if [ "${{ matrix.client-type }}" == "library" ]; then
|
|
stack_config="ollama"
|
|
else
|
|
stack_config="http://localhost:8321"
|
|
fi
|
|
uv run pytest -v tests/integration/${{ matrix.test-type }} --stack-config=${stack_config} \
|
|
-k "not(builtin_tool or safety_with_image or code_interpreter or test_rag)" \
|
|
--text-model="meta-llama/Llama-3.2-3B-Instruct" \
|
|
--embedding-model=all-MiniLM-L6-v2
|