feat(ci): add support for running vision inference tests (#2972)

This PR significantly refactors the Integration Tests workflow. The main
goal behind the PR was to enable recording of vision tests which were
never run as part of our CI ever before. During debugging, I ended up
making several other changes refactoring and hopefully increasing the
robustness of the workflow.

After doing the experiments, I have updated the trigger event to be
`pull_request_target` so this workflow can get write permissions by
default but it will run with source code from the base (main) branch in
the source repository only. If you do change the workflow, you'd need to
experiment using the `workflow_dispatch` triggers. This should not be
news to anyone using Github Actions (except me!)

It is likely to be a little rocky though while I learn more about GitHub
Actions, etc. Please be patient :)

---------

Co-authored-by: github-actions[bot] <github-actions[bot]@users.noreply.github.com>
This commit is contained in:
Ashwin Bharambe 2025-07-31 11:50:42 -07:00 committed by GitHub
parent 709c974bd8
commit 27d866795c
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
108 changed files with 13985 additions and 15254 deletions

View file

@ -25,12 +25,6 @@ def base64_image_data(image_path):
return base64.b64encode(image_path.read_bytes()).decode("utf-8")
@pytest.fixture
def base64_image_url(base64_image_data, image_path):
# suffix includes the ., so we remove it
return f"data:image/{image_path.suffix[1:]};base64,{base64_image_data}"
def test_image_chat_completion_non_streaming(client_with_models, vision_model_id):
message = {
"role": "user",
@ -78,7 +72,9 @@ def multi_image_data():
def test_image_chat_completion_multiple_images(client_with_models, vision_model_id, multi_image_data, stream):
supported_models = ["llama-4", "gpt-4o", "llama4"]
if not any(model in vision_model_id.lower() for model in supported_models):
pytest.skip(f"Skip for non-supported model: {vision_model_id}")
pytest.skip(
f"Skip since multi-image tests are only supported for {supported_models}, not for {vision_model_id}"
)
messages = [
{
@ -183,24 +179,13 @@ def test_image_chat_completion_streaming(client_with_models, vision_model_id):
assert any(expected in streamed_content for expected in {"dog", "puppy", "pup"})
@pytest.mark.parametrize("type_", ["url", "data"])
def test_image_chat_completion_base64(client_with_models, vision_model_id, base64_image_data, base64_image_url, type_):
def test_image_chat_completion_base64(client_with_models, vision_model_id, base64_image_data):
image_spec = {
"url": {
"type": "image",
"image": {
"url": {
"uri": base64_image_url,
},
},
"type": "image",
"image": {
"data": base64_image_data,
},
"data": {
"type": "image",
"image": {
"data": base64_image_data,
},
},
}[type_]
}
message = {
"role": "user",