diff --git a/.github/workflows/integration-tests.yml b/.github/workflows/integration-tests.yml index 6e7e99ef9..e03c7401c 100644 --- a/.github/workflows/integration-tests.yml +++ b/.github/workflows/integration-tests.yml @@ -25,7 +25,8 @@ jobs: matrix: # Listing tests manually since some of them currently fail # TODO: generate matrix list from tests/integration when fixed - test-type: [inference, datasets, inspect, scoring, post_training, providers] + test-type: [agents, inference, datasets, inspect, scoring, post_training, providers] + client-type: [library, http] fail-fast: false # we want to run all tests regardless of failure steps: @@ -54,6 +55,8 @@ jobs: uv sync --extra dev --extra test uv pip install ollama faiss-cpu # always test against the latest version of the client + # TODO: this is not necessarily a good idea. we need to test against both published and latest + # to find out backwards compatibility issues. uv pip install git+https://github.com/meta-llama/llama-stack-client-python.git@main uv pip install -e . llama stack build --template ollama --image-type venv @@ -74,6 +77,7 @@ jobs: exit 1 - name: Start Llama Stack server in background + if: matrix.client-type == 'http' env: INFERENCE_MODEL: "meta-llama/Llama-3.2-3B-Instruct" run: | @@ -81,6 +85,7 @@ jobs: nohup uv run llama stack run ./llama_stack/templates/ollama/run.yaml --image-type venv > server.log 2>&1 & - name: Wait for Llama Stack server to be ready + if: matrix.client-type == 'http' run: | echo "Waiting for Llama Stack server..." for i in {1..30}; do @@ -98,4 +103,12 @@ jobs: env: INFERENCE_MODEL: "meta-llama/Llama-3.2-3B-Instruct" run: | - uv run pytest -v tests/integration/${{ matrix.test-type }} --stack-config=ollama --text-model="meta-llama/Llama-3.2-3B-Instruct" --embedding-model=all-MiniLM-L6-v2 + if [ "${{ matrix.client-type }}" == "library" ]; then + stack_config="ollama" + else + stack_config="http://localhost:8321" + fi + uv run pytest -v tests/integration/${{ matrix.test-type }} --stack-config=${stack_config} \ + -k "not(builtin_tool or safety_with_image or code_interpreter or test_rag)" \ + --text-model="meta-llama/Llama-3.2-3B-Instruct" \ + --embedding-model=all-MiniLM-L6-v2 diff --git a/tests/integration/inference/test_text_inference.py b/tests/integration/inference/test_text_inference.py index f558254e5..c1eec35f0 100644 --- a/tests/integration/inference/test_text_inference.py +++ b/tests/integration/inference/test_text_inference.py @@ -275,6 +275,7 @@ def test_text_chat_completion_first_token_profiling(client_with_models, text_mod model_id=text_model_id, messages=messages, stream=False, + timeout=120, # Increase timeout to 2 minutes for large conversation history ) message_content = response.completion_message.content.lower().strip() assert len(message_content) > 0 @@ -301,6 +302,7 @@ def test_text_chat_completion_streaming(client_with_models, text_model_id, test_ model_id=text_model_id, messages=[{"role": "user", "content": question}], stream=True, + timeout=120, # Increase timeout to 2 minutes for large conversation history ) streamed_content = [str(chunk.event.delta.text.lower().strip()) for chunk in response] assert len(streamed_content) > 0