forked from phoenix-oss/llama-stack-mirror
test: make sure integration tests runs against the server (#1743)
Previously, the integration tests started the server, but never really used it because `--stack-config=ollama` uses the ollama template and the inline "llama stack as library" client, not the HTTP client. This PR makes sure we test it both ways. We also add agents tests to the mix. ## Test Plan GitHub --------- Signed-off-by: Sébastien Han <seb@redhat.com> Co-authored-by: Sébastien Han <seb@redhat.com>
This commit is contained in:
parent
2ffa2b77ed
commit
b440a1dc42
2 changed files with 17 additions and 2 deletions
17
.github/workflows/integration-tests.yml
vendored
17
.github/workflows/integration-tests.yml
vendored
|
@ -25,7 +25,8 @@ jobs:
|
|||
matrix:
|
||||
# Listing tests manually since some of them currently fail
|
||||
# TODO: generate matrix list from tests/integration when fixed
|
||||
test-type: [inference, datasets, inspect, scoring, post_training, providers]
|
||||
test-type: [agents, inference, datasets, inspect, scoring, post_training, providers]
|
||||
client-type: [library, http]
|
||||
fail-fast: false # we want to run all tests regardless of failure
|
||||
|
||||
steps:
|
||||
|
@ -54,6 +55,8 @@ jobs:
|
|||
uv sync --extra dev --extra test
|
||||
uv pip install ollama faiss-cpu
|
||||
# always test against the latest version of the client
|
||||
# TODO: this is not necessarily a good idea. we need to test against both published and latest
|
||||
# to find out backwards compatibility issues.
|
||||
uv pip install git+https://github.com/meta-llama/llama-stack-client-python.git@main
|
||||
uv pip install -e .
|
||||
llama stack build --template ollama --image-type venv
|
||||
|
@ -74,6 +77,7 @@ jobs:
|
|||
exit 1
|
||||
|
||||
- name: Start Llama Stack server in background
|
||||
if: matrix.client-type == 'http'
|
||||
env:
|
||||
INFERENCE_MODEL: "meta-llama/Llama-3.2-3B-Instruct"
|
||||
run: |
|
||||
|
@ -81,6 +85,7 @@ jobs:
|
|||
nohup uv run llama stack run ./llama_stack/templates/ollama/run.yaml --image-type venv > server.log 2>&1 &
|
||||
|
||||
- name: Wait for Llama Stack server to be ready
|
||||
if: matrix.client-type == 'http'
|
||||
run: |
|
||||
echo "Waiting for Llama Stack server..."
|
||||
for i in {1..30}; do
|
||||
|
@ -98,4 +103,12 @@ jobs:
|
|||
env:
|
||||
INFERENCE_MODEL: "meta-llama/Llama-3.2-3B-Instruct"
|
||||
run: |
|
||||
uv run pytest -v tests/integration/${{ matrix.test-type }} --stack-config=ollama --text-model="meta-llama/Llama-3.2-3B-Instruct" --embedding-model=all-MiniLM-L6-v2
|
||||
if [ "${{ matrix.client-type }}" == "library" ]; then
|
||||
stack_config="ollama"
|
||||
else
|
||||
stack_config="http://localhost:8321"
|
||||
fi
|
||||
uv run pytest -v tests/integration/${{ matrix.test-type }} --stack-config=${stack_config} \
|
||||
-k "not(builtin_tool or safety_with_image or code_interpreter or test_rag)" \
|
||||
--text-model="meta-llama/Llama-3.2-3B-Instruct" \
|
||||
--embedding-model=all-MiniLM-L6-v2
|
||||
|
|
|
@ -275,6 +275,7 @@ def test_text_chat_completion_first_token_profiling(client_with_models, text_mod
|
|||
model_id=text_model_id,
|
||||
messages=messages,
|
||||
stream=False,
|
||||
timeout=120, # Increase timeout to 2 minutes for large conversation history
|
||||
)
|
||||
message_content = response.completion_message.content.lower().strip()
|
||||
assert len(message_content) > 0
|
||||
|
@ -301,6 +302,7 @@ def test_text_chat_completion_streaming(client_with_models, text_model_id, test_
|
|||
model_id=text_model_id,
|
||||
messages=[{"role": "user", "content": question}],
|
||||
stream=True,
|
||||
timeout=120, # Increase timeout to 2 minutes for large conversation history
|
||||
)
|
||||
streamed_content = [str(chunk.event.delta.text.lower().strip()) for chunk in response]
|
||||
assert len(streamed_content) > 0
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue