feat(ci): use replay mode, setup ollama if specific label exists on PR (#2955)

This PR makes setting up Ollama optional for CI. By default, we use
`replay` mode for inference requests and use the stored results from the
`tests/integration/recordings/` directory.

Every so often, users will update tests which will need us to re-record.
To do this, we check for the existence of a label `re-record-tests` on
the PR. If detected,
- ollama is spun up
- inference mode is set to record
- after the tests are done, if any new changes are detected, they are
pushed back to the PR

## Test Plan

This is GitHub CI. Gotta test it live.
This commit is contained in:
Ashwin Bharambe 2025-07-29 16:50:26 -07:00 committed by GitHub
parent 0ac503ec0d
commit b237df8f18
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
11 changed files with 1519 additions and 13 deletions

View file

@ -10,7 +10,6 @@ import pytest
from llama_stack_client import Agent
from llama_stack import LlamaStackAsLibraryClient
from llama_stack.apis.models import ModelType
from llama_stack.distribution.datatypes import AuthenticationRequiredError
AUTH_TOKEN = "test-token"
@ -24,7 +23,7 @@ def mcp_server():
yield mcp_server_info
def test_mcp_invocation(llama_stack_client, mcp_server):
def test_mcp_invocation(llama_stack_client, text_model_id, mcp_server):
if not isinstance(llama_stack_client, LlamaStackAsLibraryClient):
pytest.skip("The local MCP server only reliably reachable from library client.")
@ -69,14 +68,10 @@ def test_mcp_invocation(llama_stack_client, mcp_server):
assert content[0].type == "text"
assert content[0].text == "Hello, world!"
models = [
m for m in llama_stack_client.models.list() if m.model_type == ModelType.llm and "guard" not in m.identifier
]
model_id = models[0].identifier
print(f"Using model: {model_id}")
print(f"Using model: {text_model_id}")
agent = Agent(
client=llama_stack_client,
model=model_id,
model=text_model_id,
instructions="You are a helpful assistant.",
tools=[test_toolgroup_id],
)