llama-stack-mirror/tests/integration/batches/test_batches.py
Varsha 32fde8d9a8
feat: Add /v1/embeddings endpoint to batches API (#3384)
# What does this PR do?
This PR extends the Llama Stack Batches API to support the
/v1/embeddings endpoint, enabling efficient batch processing of
embedding requests alongside the existing /v1/chat/completions and
/v1/completions support.

<!-- If resolving an issue, uncomment and update the line below -->
<!-- Closes #[issue-number] -->
Closes: https://github.com/llamastack/llama-stack/issues/3145

## Test Plan
<!-- Describe the tests you ran to verify your changes with result
summaries. *Provide clear instructions so the plan can be easily
re-executed.* -->
```
(stack-client) ➜  llama-stack git:(support/embeddings-api) conda activate stack-client && python -m pytest tests/unit/providers/batches/test_reference.py -v                             
============================================================================================================================================ test session starts =============================================================================================================================================
platform darwin -- Python 3.12.11, pytest-7.4.4, pluggy-1.5.0 -- /Users/vnarsing/miniconda3/envs/stack-client/bin/python
cachedir: .pytest_cache
metadata: {'Python': '3.12.11', 'Platform': 'macOS-15.6.1-arm64-arm-64bit', 'Packages': {'pytest': '7.4.4', 'pluggy': '1.5.0'}, 'Plugins': {'asyncio': '0.23.8', 'cov': '6.0.0', 'timeout': '2.2.0', 'socket': '0.7.0', 'xdist': '3.8.0', 'html': '3.1.1', 'langsmith': '0.3.39', 'anyio': '4.8.0', 'metadata': '3.0.0'}}
rootdir: /Users/vnarsing/go/src/github/meta-llama/llama-stack
configfile: pyproject.toml
plugins: asyncio-0.23.8, cov-6.0.0, timeout-2.2.0, socket-0.7.0, xdist-3.8.0, html-3.1.1, langsmith-0.3.39, anyio-4.8.0, metadata-3.0.0
asyncio: mode=Mode.AUTO
collected 46 items                                                                                                                                                                                                                                                                                           

tests/unit/providers/batches/test_reference.py::TestReferenceBatchesImpl::test_create_and_retrieve_batch_success PASSED                                                                                                                                                                                [  2%]
tests/unit/providers/batches/test_reference.py::TestReferenceBatchesImpl::test_create_batch_without_metadata PASSED                                                                                                                                                                                    [  4%]
tests/unit/providers/batches/test_reference.py::TestReferenceBatchesImpl::test_create_batch_completion_window PASSED                                                                                                                                                                                   [  6%]
tests/unit/providers/batches/test_reference.py::TestReferenceBatchesImpl::test_create_batch_invalid_endpoints[/v1/invalid/endpoint] PASSED                                                                                                                                                             [  8%]
tests/unit/providers/batches/test_reference.py::TestReferenceBatchesImpl::test_create_batch_invalid_endpoints[] PASSED                                                                                                                                                                                 [ 10%]
tests/unit/providers/batches/test_reference.py::TestReferenceBatchesImpl::test_create_batch_invalid_metadata PASSED                                                                                                                                                                                    [ 13%]
tests/unit/providers/batches/test_reference.py::TestReferenceBatchesImpl::test_retrieve_batch_not_found PASSED                                                                                                                                                                                         [ 15%]
tests/unit/providers/batches/test_reference.py::TestReferenceBatchesImpl::test_cancel_batch_success PASSED                                                                                                                                                                                             [ 17%]
tests/unit/providers/batches/test_reference.py::TestReferenceBatchesImpl::test_cancel_batch_invalid_statuses[failed] PASSED                                                                                                                                                                            [ 19%]
tests/unit/providers/batches/test_reference.py::TestReferenceBatchesImpl::test_cancel_batch_invalid_statuses[expired] PASSED                                                                                                                                                                           [ 21%]
tests/unit/providers/batches/test_reference.py::TestReferenceBatchesImpl::test_cancel_batch_invalid_statuses[completed] PASSED                                                                                                                                                                         [ 23%]
tests/unit/providers/batches/test_reference.py::TestReferenceBatchesImpl::test_cancel_batch_not_found PASSED                                                                                                                                                                                           [ 26%]
tests/unit/providers/batches/test_reference.py::TestReferenceBatchesImpl::test_list_batches_empty PASSED                                                                                                                                                                                               [ 28%]
tests/unit/providers/batches/test_reference.py::TestReferenceBatchesImpl::test_list_batches_single_batch PASSED                                                                                                                                                                                        [ 30%]
tests/unit/providers/batches/test_reference.py::TestReferenceBatchesImpl::test_list_batches_multiple_batches PASSED                                                                                                                                                                                    [ 32%]
tests/unit/providers/batches/test_reference.py::TestReferenceBatchesImpl::test_list_batches_with_limit PASSED                                                                                                                                                                                          [ 34%]
tests/unit/providers/batches/test_reference.py::TestReferenceBatchesImpl::test_list_batches_with_pagination PASSED                                                                                                                                                                                     [ 36%]
tests/unit/providers/batches/test_reference.py::TestReferenceBatchesImpl::test_list_batches_invalid_after PASSED                                                                                                                                                                                       [ 39%]
tests/unit/providers/batches/test_reference.py::TestReferenceBatchesImpl::test_kvstore_persistence PASSED                                                                                                                                                                                              [ 41%]
tests/unit/providers/batches/test_reference.py::TestReferenceBatchesImpl::test_validate_input_file_not_found PASSED                                                                                                                                                                                    [ 43%]
tests/unit/providers/batches/test_reference.py::TestReferenceBatchesImpl::test_validate_input_file_exists_empty_content PASSED                                                                                                                                                                         [ 45%]
tests/unit/providers/batches/test_reference.py::TestReferenceBatchesImpl::test_validate_input_file_mixed_valid_invalid_json PASSED                                                                                                                                                                     [ 47%]
tests/unit/providers/batches/test_reference.py::TestReferenceBatchesImpl::test_validate_input_invalid_model PASSED                                                                                                                                                                                     [ 50%]
tests/unit/providers/batches/test_reference.py::TestReferenceBatchesImpl::test_validate_input_missing_parameters_chat_completions[custom_id-custom_id-missing_required_parameter-Missing required parameter: custom_id] PASSED                                                                         [ 52%]
tests/unit/providers/batches/test_reference.py::TestReferenceBatchesImpl::test_validate_input_missing_parameters_chat_completions[method-method-missing_required_parameter-Missing required parameter: method] PASSED                                                                                  [ 54%]
tests/unit/providers/batches/test_reference.py::TestReferenceBatchesImpl::test_validate_input_missing_parameters_chat_completions[url-url-missing_required_parameter-Missing required parameter: url] PASSED                                                                                           [ 56%]
tests/unit/providers/batches/test_reference.py::TestReferenceBatchesImpl::test_validate_input_missing_parameters_chat_completions[body-body-missing_required_parameter-Missing required parameter: body] PASSED                                                                                        [ 58%]
tests/unit/providers/batches/test_reference.py::TestReferenceBatchesImpl::test_validate_input_missing_parameters_chat_completions[model-body.model-invalid_request-Model parameter is required] PASSED                                                                                                 [ 60%]
tests/unit/providers/batches/test_reference.py::TestReferenceBatchesImpl::test_validate_input_missing_parameters_chat_completions[messages-body.messages-invalid_request-Messages parameter is required] PASSED                                                                                        [ 63%]
tests/unit/providers/batches/test_reference.py::TestReferenceBatchesImpl::test_validate_input_missing_parameters_completions[custom_id-custom_id-missing_required_parameter-Missing required parameter: custom_id] PASSED                                                                              [ 65%]
tests/unit/providers/batches/test_reference.py::TestReferenceBatchesImpl::test_validate_input_missing_parameters_completions[method-method-missing_required_parameter-Missing required parameter: method] PASSED                                                                                       [ 67%]
tests/unit/providers/batches/test_reference.py::TestReferenceBatchesImpl::test_validate_input_missing_parameters_completions[url-url-missing_required_parameter-Missing required parameter: url] PASSED                                                                                                [ 69%]
tests/unit/providers/batches/test_reference.py::TestReferenceBatchesImpl::test_validate_input_missing_parameters_completions[body-body-missing_required_parameter-Missing required parameter: body] PASSED                                                                                             [ 71%]
tests/unit/providers/batches/test_reference.py::TestReferenceBatchesImpl::test_validate_input_missing_parameters_completions[model-body.model-invalid_request-Model parameter is required] PASSED                                                                                                      [ 73%]
tests/unit/providers/batches/test_reference.py::TestReferenceBatchesImpl::test_validate_input_missing_parameters_completions[prompt-body.prompt-invalid_request-Prompt parameter is required] PASSED                                                                                                   [ 76%]
tests/unit/providers/batches/test_reference.py::TestReferenceBatchesImpl::test_validate_input_url_mismatch PASSED                                                                                                                                                                                      [ 78%]
tests/unit/providers/batches/test_reference.py::TestReferenceBatchesImpl::test_validate_input_multiple_errors_per_request PASSED                                                                                                                                                                       [ 80%]
tests/unit/providers/batches/test_reference.py::TestReferenceBatchesImpl::test_validate_input_invalid_request_format PASSED                                                                                                                                                                            [ 82%]
tests/unit/providers/batches/test_reference.py::TestReferenceBatchesImpl::test_validate_input_invalid_parameter_types[custom_id-custom_id-12345-Custom_id must be a string] PASSED                                                                                                                     [ 84%]
tests/unit/providers/batches/test_reference.py::TestReferenceBatchesImpl::test_validate_input_invalid_parameter_types[url-url-123-URL must be a string] PASSED                                                                                                                                         [ 86%]
tests/unit/providers/batches/test_reference.py::TestReferenceBatchesImpl::test_validate_input_invalid_parameter_types[method-method-invalid_value2-Method must be a string] PASSED                                                                                                                     [ 89%]
tests/unit/providers/batches/test_reference.py::TestReferenceBatchesImpl::test_validate_input_invalid_parameter_types[body-body-invalid_value3-Body must be a JSON dictionary object] PASSED                                                                                                           [ 91%]
tests/unit/providers/batches/test_reference.py::TestReferenceBatchesImpl::test_validate_input_invalid_parameter_types[model-body.model-123-Model must be a string] PASSED                                                                                                                              [ 93%]
tests/unit/providers/batches/test_reference.py::TestReferenceBatchesImpl::test_validate_input_invalid_parameter_types[messages-body.messages-invalid messages format-Messages must be an array] PASSED                                                                                                 [ 95%]
tests/unit/providers/batches/test_reference.py::TestReferenceBatchesImpl::test_max_concurrent_batches PASSED                                                                                                                                                                                           [ 97%]
tests/unit/providers/batches/test_reference.py::TestReferenceBatchesImpl::test_create_batch_embeddings_endpoint PASSED                                                                                                                                                                                 [100%]

```

---------

Signed-off-by: Varsha Prasad Narsing <varshaprasad96@gmail.com>
Co-authored-by: Ashwin Bharambe <ashwin.bharambe@gmail.com>
2025-10-10 13:25:58 -07:00

414 lines
16 KiB
Python

# Copyright (c) Meta Platforms, Inc. and affiliates.
# All rights reserved.
#
# This source code is licensed under the terms described in the LICENSE file in
# the root directory of this source tree.
"""
Integration tests for the Llama Stack batch processing functionality.
This module contains comprehensive integration tests for the batch processing API,
using the OpenAI-compatible client interface for consistency.
Test Categories:
1. Core Batch Operations:
- test_batch_creation_and_retrieval: Comprehensive batch creation, structure validation, and retrieval
- test_batch_listing: Basic batch listing functionality
- test_batch_immediate_cancellation: Batch cancellation workflow
# TODO: cancel during processing
2. End-to-End Processing:
- test_batch_e2e_chat_completions: Full chat completions workflow with output and error validation
Note: Error conditions and edge cases are primarily tested in test_batches_errors.py
for better organization and separation of concerns.
CLEANUP WARNING: These tests currently create batches that are not automatically
cleaned up after test completion. This may lead to resource accumulation over
multiple test runs. Only test_batch_immediate_cancellation properly cancels its batch.
The test_batch_e2e_chat_completions test does clean up its output and error files.
"""
import json
class TestBatchesIntegration:
"""Integration tests for the batches API."""
def test_batch_creation_and_retrieval(self, openai_client, batch_helper, text_model_id):
"""Test comprehensive batch creation and retrieval scenarios."""
test_metadata = {
"test_type": "comprehensive",
"purpose": "creation_and_retrieval_test",
"version": "1.0",
"tags": "test,batch",
}
batch_requests = [
{
"custom_id": "request-1",
"method": "POST",
"url": "/v1/chat/completions",
"body": {
"model": text_model_id,
"messages": [{"role": "user", "content": "Hello"}],
"max_tokens": 10,
},
}
]
with batch_helper.create_file(batch_requests, "batch_creation_test") as uploaded_file:
batch = openai_client.batches.create(
input_file_id=uploaded_file.id,
endpoint="/v1/chat/completions",
completion_window="24h",
metadata=test_metadata,
)
assert batch.endpoint == "/v1/chat/completions"
assert batch.input_file_id == uploaded_file.id
assert batch.completion_window == "24h"
assert batch.metadata == test_metadata
retrieved_batch = openai_client.batches.retrieve(batch.id)
assert retrieved_batch.id == batch.id
assert retrieved_batch.object == batch.object
assert retrieved_batch.endpoint == batch.endpoint
assert retrieved_batch.input_file_id == batch.input_file_id
assert retrieved_batch.completion_window == batch.completion_window
assert retrieved_batch.metadata == batch.metadata
def test_batch_listing(self, openai_client, batch_helper, text_model_id):
"""
Test batch listing.
This test creates multiple batches and verifies that they can be listed.
It also deletes the input files before execution, which means the batches
will appear as failed due to missing input files. This is expected and
a good thing, because it means no inference is performed.
"""
batch_ids = []
for i in range(2):
batch_requests = [
{
"custom_id": f"request-{i}",
"method": "POST",
"url": "/v1/chat/completions",
"body": {
"model": text_model_id,
"messages": [{"role": "user", "content": f"Hello {i}"}],
"max_tokens": 10,
},
}
]
with batch_helper.create_file(batch_requests, f"batch_input_{i}") as uploaded_file:
batch = openai_client.batches.create(
input_file_id=uploaded_file.id,
endpoint="/v1/chat/completions",
completion_window="24h",
)
batch_ids.append(batch.id)
batch_list = openai_client.batches.list()
assert isinstance(batch_list.data, list)
listed_batch_ids = {b.id for b in batch_list.data}
for batch_id in batch_ids:
assert batch_id in listed_batch_ids
def test_batch_immediate_cancellation(self, openai_client, batch_helper, text_model_id):
"""Test immediate batch cancellation."""
batch_requests = [
{
"custom_id": "request-1",
"method": "POST",
"url": "/v1/chat/completions",
"body": {
"model": text_model_id,
"messages": [{"role": "user", "content": "Hello"}],
"max_tokens": 10,
},
}
]
with batch_helper.create_file(batch_requests) as uploaded_file:
batch = openai_client.batches.create(
input_file_id=uploaded_file.id,
endpoint="/v1/chat/completions",
completion_window="24h",
)
# hopefully cancel the batch before it completes
cancelling_batch = openai_client.batches.cancel(batch.id)
assert cancelling_batch.status in ["cancelling", "cancelled"]
assert isinstance(cancelling_batch.cancelling_at, int), (
f"cancelling_at should be int, got {type(cancelling_batch.cancelling_at)}"
)
final_batch = batch_helper.wait_for(
batch.id,
max_wait_time=3 * 60, # often takes 10-11 minutes, give it 3 min
expected_statuses={"cancelled"},
timeout_action="skip",
)
assert final_batch.status == "cancelled"
assert isinstance(final_batch.cancelled_at, int), (
f"cancelled_at should be int, got {type(final_batch.cancelled_at)}"
)
def test_batch_e2e_chat_completions(self, openai_client, batch_helper, text_model_id):
"""Test end-to-end batch processing for chat completions with both successful and failed operations."""
batch_requests = [
{
"custom_id": "success-1",
"method": "POST",
"url": "/v1/chat/completions",
"body": {
"model": text_model_id,
"messages": [{"role": "user", "content": "Say hello"}],
"max_tokens": 20,
},
},
{
"custom_id": "error-1",
"method": "POST",
"url": "/v1/chat/completions",
"body": {
"model": text_model_id,
"messages": [{"rolez": "user", "contentz": "This should fail"}], # Invalid keys to trigger error
# note: ollama does not validate max_tokens values or the "role" key, so they won't trigger an error
},
},
]
with batch_helper.create_file(batch_requests) as uploaded_file:
batch = openai_client.batches.create(
input_file_id=uploaded_file.id,
endpoint="/v1/chat/completions",
completion_window="24h",
metadata={"test": "e2e_success_and_errors_test"},
)
final_batch = batch_helper.wait_for(
batch.id,
max_wait_time=3 * 60, # often takes 2-3 minutes
expected_statuses={"completed"},
timeout_action="skip",
)
# Expecting a completed batch with both successful and failed requests
# Batch(id='batch_xxx',
# completion_window='24h',
# created_at=...,
# endpoint='/v1/chat/completions',
# input_file_id='file-xxx',
# object='batch',
# status='completed',
# output_file_id='file-xxx',
# error_file_id='file-xxx',
# request_counts=BatchRequestCounts(completed=1, failed=1, total=2))
assert final_batch.status == "completed"
assert final_batch.request_counts is not None
assert final_batch.request_counts.total == 2
assert final_batch.request_counts.completed == 1
assert final_batch.request_counts.failed == 1
assert final_batch.output_file_id is not None, "Output file should exist for successful requests"
output_content = openai_client.files.content(final_batch.output_file_id)
if isinstance(output_content, str):
output_text = output_content
else:
output_text = output_content.content.decode("utf-8")
output_lines = output_text.strip().split("\n")
for line in output_lines:
result = json.loads(line)
assert "id" in result
assert "custom_id" in result
assert result["custom_id"] == "success-1"
assert "response" in result
assert result["response"]["status_code"] == 200
assert "body" in result["response"]
assert "choices" in result["response"]["body"]
assert final_batch.error_file_id is not None, "Error file should exist for failed requests"
error_content = openai_client.files.content(final_batch.error_file_id)
if isinstance(error_content, str):
error_text = error_content
else:
error_text = error_content.content.decode("utf-8")
error_lines = error_text.strip().split("\n")
for line in error_lines:
result = json.loads(line)
assert "id" in result
assert "custom_id" in result
assert result["custom_id"] == "error-1"
assert "error" in result
error = result["error"]
assert error is not None
assert "code" in error or "message" in error, "Error should have code or message"
deleted_output_file = openai_client.files.delete(final_batch.output_file_id)
assert deleted_output_file.deleted, f"Output file {final_batch.output_file_id} was not deleted successfully"
deleted_error_file = openai_client.files.delete(final_batch.error_file_id)
assert deleted_error_file.deleted, f"Error file {final_batch.error_file_id} was not deleted successfully"
def test_batch_e2e_completions(self, openai_client, batch_helper, text_model_id):
"""Run an end-to-end batch with a single successful text completion request."""
request_body = {"model": text_model_id, "prompt": "Say completions", "max_tokens": 20}
batch_requests = [
{
"custom_id": "success-1",
"method": "POST",
"url": "/v1/completions",
"body": request_body,
}
]
with batch_helper.create_file(batch_requests) as uploaded_file:
batch = openai_client.batches.create(
input_file_id=uploaded_file.id,
endpoint="/v1/completions",
completion_window="24h",
metadata={"test": "e2e_completions_success"},
)
final_batch = batch_helper.wait_for(
batch.id,
max_wait_time=3 * 60,
expected_statuses={"completed"},
timeout_action="skip",
)
assert final_batch.status == "completed"
assert final_batch.request_counts is not None
assert final_batch.request_counts.total == 1
assert final_batch.request_counts.completed == 1
assert final_batch.output_file_id is not None
output_content = openai_client.files.content(final_batch.output_file_id)
if isinstance(output_content, str):
output_text = output_content
else:
output_text = output_content.content.decode("utf-8")
output_lines = output_text.strip().split("\n")
assert len(output_lines) == 1
result = json.loads(output_lines[0])
assert result["custom_id"] == "success-1"
assert "response" in result
assert result["response"]["status_code"] == 200
deleted_output_file = openai_client.files.delete(final_batch.output_file_id)
assert deleted_output_file.deleted
if final_batch.error_file_id is not None:
deleted_error_file = openai_client.files.delete(final_batch.error_file_id)
assert deleted_error_file.deleted
def test_batch_e2e_embeddings(self, openai_client, batch_helper, embedding_model_id):
"""Run an end-to-end batch with embeddings requests including both string and list inputs."""
batch_requests = [
{
"custom_id": "success-1",
"method": "POST",
"url": "/v1/embeddings",
"body": {"model": embedding_model_id, "input": "Hello world", "encoding_format": "float"},
},
{
"custom_id": "success-2",
"method": "POST",
"url": "/v1/embeddings",
"body": {
"model": embedding_model_id,
"input": ["How are you?", "Good morning", "Have a great day"],
"encoding_format": "float",
},
},
]
with batch_helper.create_file(batch_requests) as uploaded_file:
batch = openai_client.batches.create(
input_file_id=uploaded_file.id,
endpoint="/v1/embeddings",
completion_window="24h",
metadata={"test": "e2e_embeddings_success"},
)
final_batch = batch_helper.wait_for(
batch.id,
max_wait_time=3 * 60,
expected_statuses={"completed"},
timeout_action="skip",
)
assert final_batch.status == "completed"
assert final_batch.request_counts is not None
assert final_batch.request_counts.total == 2
assert final_batch.request_counts.completed == 2
assert final_batch.output_file_id is not None
output_content = openai_client.files.content(final_batch.output_file_id)
if isinstance(output_content, str):
output_text = output_content
else:
output_text = output_content.content.decode("utf-8")
output_lines = output_text.strip().split("\n")
assert len(output_lines) == 2
# Check first result (string input)
result1 = json.loads(output_lines[0])
assert result1["custom_id"] in ["success-1", "success-2"]
assert "response" in result1
assert result1["response"]["status_code"] == 200
# Verify the response body contains embeddings data
response_body1 = json.loads(result1["response"]["body"])
assert response_body1["object"] == "list"
assert "data" in response_body1
assert len(response_body1["data"]) == 1
assert "embedding" in response_body1["data"][0]
assert "index" in response_body1["data"][0]
assert response_body1["data"][0]["index"] == 0
# Check second result (list input)
result2 = json.loads(output_lines[1])
assert result2["custom_id"] in ["success-1", "success-2"]
assert "response" in result2
assert result2["response"]["status_code"] == 200
# Verify the response body contains embeddings data for list input
response_body2 = json.loads(result2["response"]["body"])
assert response_body2["object"] == "list"
assert "data" in response_body2
assert len(response_body2["data"]) == 3 # Three strings in the list
for i, embedding_data in enumerate(response_body2["data"]):
assert "embedding" in embedding_data
assert "index" in embedding_data
assert embedding_data["index"] == i
deleted_output_file = openai_client.files.delete(final_batch.output_file_id)
assert deleted_output_file.deleted
if final_batch.error_file_id is not None:
deleted_error_file = openai_client.files.delete(final_batch.error_file_id)
assert deleted_error_file.deleted