test: make sure integration tests runs against the server (#1743)

Previously, the integration tests started the server, but never really used it because `--stack-config=ollama` uses the ollama template and the inline "llama stack as library" client, not the HTTP client. This PR makes sure we test it both ways. We also add agents tests to the mix. ## Test Plan GitHub --------- Signed-off-by: Sébastien Han <seb@redhat.com> Co-authored-by: Sébastien Han <seb@redhat.com>
2025-12-06 10:37:22 +00:00 · 2025-03-31 13:38:47 -07:00 · 2025-03-31 13:38:47 -07:00 · b440a1dc42
commit b440a1dc42
parent 2ffa2b77ed
2 changed files with 17 additions and 2 deletions
--- a/tests/integration/inference/test_text_inference.py
+++ b/tests/integration/inference/test_text_inference.py
@ -275,6 +275,7 @@ def test_text_chat_completion_first_token_profiling(client_with_models, text_mod
        model_id=text_model_id,
        messages=messages,
        stream=False,
+        timeout=120,  # Increase timeout to 2 minutes for large conversation history
    )
    message_content = response.completion_message.content.lower().strip()
    assert len(message_content) > 0
@ -301,6 +302,7 @@ def test_text_chat_completion_streaming(client_with_models, text_model_id, test_
        model_id=text_model_id,
        messages=[{"role": "user", "content": question}],
        stream=True,
+        timeout=120,  # Increase timeout to 2 minutes for large conversation history
    )
    streamed_content = [str(chunk.event.delta.text.lower().strip()) for chunk in response]
    assert len(streamed_content) > 0