Revert "feat: add batches API with OpenAI compatibility" (#3149)

Reverts llamastack/llama-stack#3088 The PR broke integration tests.
2025-12-05 02:17:31 +00:00 · 2025-08-14 10:08:54 -07:00 · 2025-08-14 10:08:54 -07:00 · ee7631b6cf
commit ee7631b6cf
parent de692162af
26 changed files with 2 additions and 2707 deletions
--- a/tests/integration/batches/init.py
+++ b/tests/integration/batches/init.py
@ -1,5 +0,0 @@
-# Copyright (c) Meta Platforms, Inc. and affiliates.
-# All rights reserved.
-#
-# This source code is licensed under the terms described in the LICENSE file in
-# the root directory of this source tree.
--- a/tests/integration/batches/conftest.py
+++ b/tests/integration/batches/conftest.py
@ -1,122 +0,0 @@
-# Copyright (c) Meta Platforms, Inc. and affiliates.
-# All rights reserved.
-#
-# This source code is licensed under the terms described in the LICENSE file in
-# the root directory of this source tree.
-
-"""Shared pytest fixtures for batch tests."""
-
-import json
-import time
-import warnings
-from contextlib import contextmanager
-from io import BytesIO
-
-import pytest
-
-from llama_stack.apis.files import OpenAIFilePurpose
-
-
-class BatchHelper:
-    """Helper class for creating and managing batch input files."""
-
-    def __init__(self, client):
-        """Initialize with either a batch_client or openai_client."""
-        self.client = client
-
-    @contextmanager
-    def create_file(self, content: str | list[dict], filename_prefix="batch_input"):
-        """Context manager for creating and cleaning up batch input files.
-
-        Args:
-            content: Either a list of batch request dictionaries or raw string content
-            filename_prefix: Prefix for the generated filename (or full filename if content is string)
-
-        Yields:
-            The uploaded file object
-        """
-        if isinstance(content, str):
-            # Handle raw string content (e.g., malformed JSONL, empty files)
-            file_content = content.encode("utf-8")
-        else:
-            # Handle list of batch request dictionaries
-            jsonl_content = "\n".join(json.dumps(req) for req in content)
-            file_content = jsonl_content.encode("utf-8")
-
-        filename = filename_prefix if filename_prefix.endswith(".jsonl") else f"{filename_prefix}.jsonl"
-
-        with BytesIO(file_content) as file_buffer:
-            file_buffer.name = filename
-            uploaded_file = self.client.files.create(file=file_buffer, purpose=OpenAIFilePurpose.BATCH)
-
-        try:
-            yield uploaded_file
-        finally:
-            try:
-                self.client.files.delete(uploaded_file.id)
-            except Exception:
-                warnings.warn(
-                    f"Failed to cleanup file {uploaded_file.id}: {uploaded_file.filename}",
-                    stacklevel=2,
-                )
-
-    def wait_for(
-        self,
-        batch_id: str,
-        max_wait_time: int = 60,
-        sleep_interval: int | None = None,
-        expected_statuses: set[str] | None = None,
-        timeout_action: str = "fail",
-    ):
-        """Wait for a batch to reach a terminal status.
-
-        Args:
-            batch_id: The batch ID to monitor
-            max_wait_time: Maximum time to wait in seconds (default: 60 seconds)
-            sleep_interval: Time to sleep between checks in seconds (default: 1/10th of max_wait_time, min 1s, max 15s)
-            expected_statuses: Set of expected terminal statuses (default: {"completed"})
-            timeout_action: Action on timeout - "fail" (pytest.fail) or "skip" (pytest.skip)
-
-        Returns:
-            The final batch object
-
-        Raises:
-            pytest.Failed: If batch reaches an unexpected status or timeout_action is "fail"
-            pytest.Skipped: If timeout_action is "skip" on timeout or unexpected status
-        """
-        if sleep_interval is None:
-            # Default to 1/10th of max_wait_time, with min 1s and max 15s
-            sleep_interval = max(1, min(15, max_wait_time // 10))
-
-        if expected_statuses is None:
-            expected_statuses = {"completed"}
-
-        terminal_statuses = {"completed", "failed", "cancelled", "expired"}
-        unexpected_statuses = terminal_statuses - expected_statuses
-
-        start_time = time.time()
-        while time.time() - start_time < max_wait_time:
-            current_batch = self.client.batches.retrieve(batch_id)
-
-            if current_batch.status in expected_statuses:
-                return current_batch
-            elif current_batch.status in unexpected_statuses:
-                error_msg = f"Batch reached unexpected status: {current_batch.status}"
-                if timeout_action == "skip":
-                    pytest.skip(error_msg)
-                else:
-                    pytest.fail(error_msg)
-
-            time.sleep(sleep_interval)
-
-        timeout_msg = f"Batch did not reach expected status {expected_statuses} within {max_wait_time} seconds"
-        if timeout_action == "skip":
-            pytest.skip(timeout_msg)
-        else:
-            pytest.fail(timeout_msg)
-
-
-@pytest.fixture
-def batch_helper(openai_client):
-    """Fixture that provides a BatchHelper instance for OpenAI client."""
-    return BatchHelper(openai_client)
--- a/tests/integration/batches/test_batches.py
+++ b/tests/integration/batches/test_batches.py
@ -1,270 +0,0 @@
-# Copyright (c) Meta Platforms, Inc. and affiliates.
-# All rights reserved.
-#
-# This source code is licensed under the terms described in the LICENSE file in
-# the root directory of this source tree.
-
-"""
-Integration tests for the Llama Stack batch processing functionality.
-
-This module contains comprehensive integration tests for the batch processing API,
-using the OpenAI-compatible client interface for consistency.
-
-Test Categories:
-    1. Core Batch Operations:
-        - test_batch_creation_and_retrieval: Comprehensive batch creation, structure validation, and retrieval
-        - test_batch_listing: Basic batch listing functionality
-        - test_batch_immediate_cancellation: Batch cancellation workflow
-        # TODO: cancel during processing
-
-    2. End-to-End Processing:
-        - test_batch_e2e_chat_completions: Full chat completions workflow with output and error validation
-
-Note: Error conditions and edge cases are primarily tested in test_batches_errors.py
-for better organization and separation of concerns.
-
-CLEANUP WARNING: These tests currently create batches that are not automatically
-cleaned up after test completion. This may lead to resource accumulation over
-multiple test runs. Only test_batch_immediate_cancellation properly cancels its batch.
-The test_batch_e2e_chat_completions test does clean up its output and error files.
-"""
-
-import json
-
-
-class TestBatchesIntegration:
-    """Integration tests for the batches API."""
-
-    def test_batch_creation_and_retrieval(self, openai_client, batch_helper, text_model_id):
-        """Test comprehensive batch creation and retrieval scenarios."""
-        test_metadata = {
-            "test_type": "comprehensive",
-            "purpose": "creation_and_retrieval_test",
-            "version": "1.0",
-            "tags": "test,batch",
-        }
-
-        batch_requests = [
-            {
-                "custom_id": "request-1",
-                "method": "POST",
-                "url": "/v1/chat/completions",
-                "body": {
-                    "model": text_model_id,
-                    "messages": [{"role": "user", "content": "Hello"}],
-                    "max_tokens": 10,
-                },
-            }
-        ]
-
-        with batch_helper.create_file(batch_requests, "batch_creation_test") as uploaded_file:
-            batch = openai_client.batches.create(
-                input_file_id=uploaded_file.id,
-                endpoint="/v1/chat/completions",
-                completion_window="24h",
-                metadata=test_metadata,
-            )
-
-            assert batch.endpoint == "/v1/chat/completions"
-            assert batch.input_file_id == uploaded_file.id
-            assert batch.completion_window == "24h"
-            assert batch.metadata == test_metadata
-
-            retrieved_batch = openai_client.batches.retrieve(batch.id)
-
-            assert retrieved_batch.id == batch.id
-            assert retrieved_batch.object == batch.object
-            assert retrieved_batch.endpoint == batch.endpoint
-            assert retrieved_batch.input_file_id == batch.input_file_id
-            assert retrieved_batch.completion_window == batch.completion_window
-            assert retrieved_batch.metadata == batch.metadata
-
-    def test_batch_listing(self, openai_client, batch_helper, text_model_id):
-        """
-        Test batch listing.
-
-        This test creates multiple batches and verifies that they can be listed.
-        It also deletes the input files before execution, which means the batches
-        will appear as failed due to missing input files. This is expected and
-        a good thing, because it means no inference is performed.
-        """
-        batch_ids = []
-
-        for i in range(2):
-            batch_requests = [
-                {
-                    "custom_id": f"request-{i}",
-                    "method": "POST",
-                    "url": "/v1/chat/completions",
-                    "body": {
-                        "model": text_model_id,
-                        "messages": [{"role": "user", "content": f"Hello {i}"}],
-                        "max_tokens": 10,
-                    },
-                }
-            ]
-
-            with batch_helper.create_file(batch_requests, f"batch_input_{i}") as uploaded_file:
-                batch = openai_client.batches.create(
-                    input_file_id=uploaded_file.id,
-                    endpoint="/v1/chat/completions",
-                    completion_window="24h",
-                )
-                batch_ids.append(batch.id)
-
-        batch_list = openai_client.batches.list()
-
-        assert isinstance(batch_list.data, list)
-
-        listed_batch_ids = {b.id for b in batch_list.data}
-        for batch_id in batch_ids:
-            assert batch_id in listed_batch_ids
-
-    def test_batch_immediate_cancellation(self, openai_client, batch_helper, text_model_id):
-        """Test immediate batch cancellation."""
-        batch_requests = [
-            {
-                "custom_id": "request-1",
-                "method": "POST",
-                "url": "/v1/chat/completions",
-                "body": {
-                    "model": text_model_id,
-                    "messages": [{"role": "user", "content": "Hello"}],
-                    "max_tokens": 10,
-                },
-            }
-        ]
-
-        with batch_helper.create_file(batch_requests) as uploaded_file:
-            batch = openai_client.batches.create(
-                input_file_id=uploaded_file.id,
-                endpoint="/v1/chat/completions",
-                completion_window="24h",
-            )
-
-            # hopefully cancel the batch before it completes
-            cancelling_batch = openai_client.batches.cancel(batch.id)
-            assert cancelling_batch.status in ["cancelling", "cancelled"]
-            assert isinstance(cancelling_batch.cancelling_at, int), (
-                f"cancelling_at should be int, got {type(cancelling_batch.cancelling_at)}"
-            )
-
-            final_batch = batch_helper.wait_for(
-                batch.id,
-                max_wait_time=3 * 60,  # often takes 10-11 minutes, give it 3 min
-                expected_statuses={"cancelled"},
-                timeout_action="skip",
-            )
-
-        assert final_batch.status == "cancelled"
-        assert isinstance(final_batch.cancelled_at, int), (
-            f"cancelled_at should be int, got {type(final_batch.cancelled_at)}"
-        )
-
-    def test_batch_e2e_chat_completions(self, openai_client, batch_helper, text_model_id):
-        """Test end-to-end batch processing for chat completions with both successful and failed operations."""
-        batch_requests = [
-            {
-                "custom_id": "success-1",
-                "method": "POST",
-                "url": "/v1/chat/completions",
-                "body": {
-                    "model": text_model_id,
-                    "messages": [{"role": "user", "content": "Say hello"}],
-                    "max_tokens": 20,
-                },
-            },
-            {
-                "custom_id": "error-1",
-                "method": "POST",
-                "url": "/v1/chat/completions",
-                "body": {
-                    "model": text_model_id,
-                    "messages": [{"role": "user", "content": "This should fail"}],
-                    "max_tokens": -1,  # Invalid negative max_tokens will cause inference error
-                },
-            },
-        ]
-
-        with batch_helper.create_file(batch_requests) as uploaded_file:
-            batch = openai_client.batches.create(
-                input_file_id=uploaded_file.id,
-                endpoint="/v1/chat/completions",
-                completion_window="24h",
-                metadata={"test": "e2e_success_and_errors_test"},
-            )
-
-            final_batch = batch_helper.wait_for(
-                batch.id,
-                max_wait_time=3 * 60,  # often takes 2-3 minutes
-                expected_statuses={"completed"},
-                timeout_action="skip",
-            )
-
-        # Expecting a completed batch with both successful and failed requests
-        #  Batch(id='batch_xxx',
-        #        completion_window='24h',
-        #        created_at=...,
-        #        endpoint='/v1/chat/completions',
-        #        input_file_id='file-xxx',
-        #        object='batch',
-        #        status='completed',
-        #        output_file_id='file-xxx',
-        #        error_file_id='file-xxx',
-        #        request_counts=BatchRequestCounts(completed=1, failed=1, total=2))
-
-        assert final_batch.status == "completed"
-        assert final_batch.request_counts is not None
-        assert final_batch.request_counts.total == 2
-        assert final_batch.request_counts.completed == 1
-        assert final_batch.request_counts.failed == 1
-
-        assert final_batch.output_file_id is not None, "Output file should exist for successful requests"
-
-        output_content = openai_client.files.content(final_batch.output_file_id)
-        if isinstance(output_content, str):
-            output_text = output_content
-        else:
-            output_text = output_content.content.decode("utf-8")
-
-        output_lines = output_text.strip().split("\n")
-
-        for line in output_lines:
-            result = json.loads(line)
-
-            assert "id" in result
-            assert "custom_id" in result
-            assert result["custom_id"] == "success-1"
-
-            assert "response" in result
-
-            assert result["response"]["status_code"] == 200
-            assert "body" in result["response"]
-            assert "choices" in result["response"]["body"]
-
-        assert final_batch.error_file_id is not None, "Error file should exist for failed requests"
-
-        error_content = openai_client.files.content(final_batch.error_file_id)
-        if isinstance(error_content, str):
-            error_text = error_content
-        else:
-            error_text = error_content.content.decode("utf-8")
-
-        error_lines = error_text.strip().split("\n")
-
-        for line in error_lines:
-            result = json.loads(line)
-
-            assert "id" in result
-            assert "custom_id" in result
-            assert result["custom_id"] == "error-1"
-            assert "error" in result
-            error = result["error"]
-            assert error is not None
-            assert "code" in error or "message" in error, "Error should have code or message"
-
-        deleted_output_file = openai_client.files.delete(final_batch.output_file_id)
-        assert deleted_output_file.deleted, f"Output file {final_batch.output_file_id} was not deleted successfully"
-
-        deleted_error_file = openai_client.files.delete(final_batch.error_file_id)
-        assert deleted_error_file.deleted, f"Error file {final_batch.error_file_id} was not deleted successfully"
--- a/tests/integration/batches/test_batches_errors.py
+++ b/tests/integration/batches/test_batches_errors.py
@ -1,693 +0,0 @@
-# Copyright (c) Meta Platforms, Inc. and affiliates.
-# All rights reserved.
-#
-# This source code is licensed under the terms described in the LICENSE file in
-# the root directory of this source tree.
-
-"""
-Error handling and edge case tests for the Llama Stack batch processing functionality.
-
-This module focuses exclusively on testing error conditions, validation failures,
-and edge cases for batch operations to ensure robust error handling and graceful
-degradation.
-
-Test Categories:
-    1. File and Input Validation:
-        - test_batch_nonexistent_file_id: Handling invalid file IDs
-        - test_batch_malformed_jsonl: Processing malformed JSONL input files
-        - test_file_malformed_batch_file: Handling malformed files at upload time
-        - test_batch_missing_required_fields: Validation of required request fields
-
-    2. API Endpoint and Model Validation:
-        - test_batch_invalid_endpoint: Invalid endpoint handling during creation
-        - test_batch_error_handling_invalid_model: Error handling with nonexistent models
-        - test_batch_endpoint_mismatch: Validation of endpoint/URL consistency
-
-    3. Batch Lifecycle Error Handling:
-        - test_batch_retrieve_nonexistent: Retrieving non-existent batches
-        - test_batch_cancel_nonexistent: Cancelling non-existent batches
-        - test_batch_cancel_completed: Attempting to cancel completed batches
-
-    4. Parameter and Configuration Validation:
-        - test_batch_invalid_completion_window: Invalid completion window values
-        - test_batch_invalid_metadata_types: Invalid metadata type validation
-        - test_batch_missing_required_body_fields: Validation of required fields in request body
-
-    5. Feature Restriction and Compatibility:
-        - test_batch_streaming_not_supported: Streaming request rejection
-        - test_batch_mixed_streaming_requests: Mixed streaming/non-streaming validation
-
-Note: Core functionality and OpenAI compatibility tests are located in
-test_batches_integration.py for better organization and separation of concerns.
-
-CLEANUP WARNING: These tests create batches to test error conditions but do not
-automatically clean them up after test completion. While most error tests create
-batches that fail quickly, some may create valid batches that consume resources.
-"""
-
-import pytest
-from openai import BadRequestError, ConflictError, NotFoundError
-
-
-class TestBatchesErrorHandling:
-    """Error handling and edge case tests for the batches API using OpenAI client."""
-
-    def test_batch_nonexistent_file_id(self, openai_client, batch_helper):
-        """Test batch creation with nonexistent input file ID."""
-
-        batch = openai_client.batches.create(
-            input_file_id="file-nonexistent-xyz",
-            endpoint="/v1/chat/completions",
-            completion_window="24h",
-        )
-
-        final_batch = batch_helper.wait_for(batch.id, expected_statuses={"failed"})
-
-        # Expecting -
-        #  Batch(...,
-        #    status='failed',
-        #    errors=Errors(data=[
-        #      BatchError(
-        #        code='invalid_request',
-        #        line=None,
-        #        message='Cannot find file ..., or organization ... does not have access to it.',
-        #        param='file_id')
-        #    ], object='list'),
-        #    failed_at=1754566971,
-        #    ...)
-
-        assert final_batch.status == "failed"
-        assert final_batch.errors is not None
-        assert len(final_batch.errors.data) == 1
-        error = final_batch.errors.data[0]
-        assert error.code == "invalid_request"
-        assert "cannot find file" in error.message.lower()
-
-    def test_batch_invalid_endpoint(self, openai_client, batch_helper, text_model_id):
-        """Test batch creation with invalid endpoint."""
-        batch_requests = [
-            {
-                "custom_id": "invalid-endpoint",
-                "method": "POST",
-                "url": "/v1/chat/completions",
-                "body": {
-                    "model": text_model_id,
-                    "messages": [{"role": "user", "content": "Hello"}],
-                    "max_tokens": 10,
-                },
-            }
-        ]
-
-        with batch_helper.create_file(batch_requests) as uploaded_file:
-            with pytest.raises(BadRequestError) as exc_info:
-                openai_client.batches.create(
-                    input_file_id=uploaded_file.id,
-                    endpoint="/v1/invalid/endpoint",
-                    completion_window="24h",
-                )
-
-            # Expected -
-            #  Error code: 400 - {
-            #    'error': {
-            #      'message': "Invalid value: '/v1/invalid/endpoint'. Supported values are: '/v1/chat/completions', '/v1/completions', '/v1/embeddings', and '/v1/responses'.",
-            #      'type': 'invalid_request_error',
-            #      'param': 'endpoint',
-            #      'code': 'invalid_value'
-            #    }
-            #  }
-
-            error_msg = str(exc_info.value).lower()
-            assert exc_info.value.status_code == 400
-            assert "invalid value" in error_msg
-            assert "/v1/invalid/endpoint" in error_msg
-            assert "supported values" in error_msg
-            assert "endpoint" in error_msg
-            assert "invalid_value" in error_msg
-
-    def test_batch_malformed_jsonl(self, openai_client, batch_helper):
-        """
-        Test batch with malformed JSONL input.
-
-        The /v1/files endpoint requires valid JSONL format, so we provide a well formed line
-        before a malformed line to ensure we get to the /v1/batches validation stage.
-        """
-        with batch_helper.create_file(
-            """{"custom_id": "valid", "method": "POST", "url": "/v1/chat/completions", "body": {"model": "test"}}
-{invalid json here""",
-            "malformed_batch_input.jsonl",
-        ) as uploaded_file:
-            batch = openai_client.batches.create(
-                input_file_id=uploaded_file.id,
-                endpoint="/v1/chat/completions",
-                completion_window="24h",
-            )
-
-            final_batch = batch_helper.wait_for(batch.id, expected_statuses={"failed"})
-
-        # Expecting -
-        #  Batch(...,
-        #    status='failed',
-        #    errors=Errors(data=[
-        #      ...,
-        #      BatchError(code='invalid_json_line',
-        #                 line=2,
-        #                 message='This line is not parseable as valid JSON.',
-        #                 param=None)
-        #    ], object='list'),
-        #    ...)
-
-        assert final_batch.status == "failed"
-        assert final_batch.errors is not None
-        assert len(final_batch.errors.data) > 0
-        error = final_batch.errors.data[-1]  # get last error because first may be about the "test" model
-        assert error.code == "invalid_json_line"
-        assert error.line == 2
-        assert "not" in error.message.lower()
-        assert "valid json" in error.message.lower()
-
-    @pytest.mark.xfail(reason="Not all file providers validate content")
-    @pytest.mark.parametrize("batch_requests", ["", "{malformed json"], ids=["empty", "malformed"])
-    def test_file_malformed_batch_file(self, openai_client, batch_helper, batch_requests):
-        """Test file upload with malformed content."""
-
-        with pytest.raises(BadRequestError) as exc_info:
-            with batch_helper.create_file(batch_requests, "malformed_batch_input_file.jsonl"):
-                # /v1/files rejects the file, we don't get to batch creation
-                pass
-
-        error_msg = str(exc_info.value).lower()
-        assert exc_info.value.status_code == 400
-        assert "invalid file format" in error_msg
-        assert "jsonl" in error_msg
-
-    def test_batch_retrieve_nonexistent(self, openai_client):
-        """Test retrieving nonexistent batch."""
-        with pytest.raises(NotFoundError) as exc_info:
-            openai_client.batches.retrieve("batch-nonexistent-xyz")
-
-        error_msg = str(exc_info.value).lower()
-        assert exc_info.value.status_code == 404
-        assert "no batch found" in error_msg or "not found" in error_msg
-
-    def test_batch_cancel_nonexistent(self, openai_client):
-        """Test cancelling nonexistent batch."""
-        with pytest.raises(NotFoundError) as exc_info:
-            openai_client.batches.cancel("batch-nonexistent-xyz")
-
-        error_msg = str(exc_info.value).lower()
-        assert exc_info.value.status_code == 404
-        assert "no batch found" in error_msg or "not found" in error_msg
-
-    def test_batch_cancel_completed(self, openai_client, batch_helper, text_model_id):
-        """Test cancelling already completed batch."""
-        batch_requests = [
-            {
-                "custom_id": "cancel-completed",
-                "method": "POST",
-                "url": "/v1/chat/completions",
-                "body": {
-                    "model": text_model_id,
-                    "messages": [{"role": "user", "content": "Quick test"}],
-                    "max_tokens": 5,
-                },
-            }
-        ]
-
-        with batch_helper.create_file(batch_requests, "cancel_test_batch_input") as uploaded_file:
-            batch = openai_client.batches.create(
-                input_file_id=uploaded_file.id,
-                endpoint="/v1/chat/completions",
-                completion_window="24h",
-            )
-
-            final_batch = batch_helper.wait_for(
-                batch.id,
-                max_wait_time=3 * 60,  # often take 10-11 min, give it 3 min
-                expected_statuses={"completed"},
-                timeout_action="skip",
-            )
-
-        deleted_file = openai_client.files.delete(final_batch.output_file_id)
-        assert deleted_file.deleted, f"File {final_batch.output_file_id} was not deleted successfully"
-
-        with pytest.raises(ConflictError) as exc_info:
-            openai_client.batches.cancel(batch.id)
-
-        # Expecting -
-        #   Error code: 409 - {
-        #     'error': {
-        #       'message': "Cannot cancel a batch with status 'completed'.",
-        #       'type': 'invalid_request_error',
-        #       'param': None,
-        #       'code': None
-        #     }
-        #   }
-        #
-        # NOTE: Same for "failed", cancelling "cancelled" batches is allowed
-
-        error_msg = str(exc_info.value).lower()
-        assert exc_info.value.status_code == 409
-        assert "cannot cancel" in error_msg
-
-    def test_batch_missing_required_fields(self, openai_client, batch_helper, text_model_id):
-        """Test batch with requests missing required fields."""
-        batch_requests = [
-            {
-                # Missing custom_id
-                "method": "POST",
-                "url": "/v1/chat/completions",
-                "body": {
-                    "model": text_model_id,
-                    "messages": [{"role": "user", "content": "No custom_id"}],
-                    "max_tokens": 10,
-                },
-            },
-            {
-                "custom_id": "no-method",
-                "url": "/v1/chat/completions",
-                "body": {
-                    "model": text_model_id,
-                    "messages": [{"role": "user", "content": "No method"}],
-                    "max_tokens": 10,
-                },
-            },
-            {
-                "custom_id": "no-url",
-                "method": "POST",
-                "body": {
-                    "model": text_model_id,
-                    "messages": [{"role": "user", "content": "No URL"}],
-                    "max_tokens": 10,
-                },
-            },
-            {
-                "custom_id": "no-body",
-                "method": "POST",
-                "url": "/v1/chat/completions",
-            },
-        ]
-
-        with batch_helper.create_file(batch_requests, "missing_fields_batch_input") as uploaded_file:
-            batch = openai_client.batches.create(
-                input_file_id=uploaded_file.id,
-                endpoint="/v1/chat/completions",
-                completion_window="24h",
-            )
-
-            final_batch = batch_helper.wait_for(batch.id, expected_statuses={"failed"})
-
-        # Expecting -
-        #  Batch(...,
-        #    status='failed',
-        #    errors=Errors(
-        #      data=[
-        #        BatchError(
-        #          code='missing_required_parameter',
-        #          line=1,
-        #          message="Missing required parameter: 'custom_id'.",
-        #          param='custom_id'
-        #        ),
-        #        BatchError(
-        #          code='missing_required_parameter',
-        #          line=2,
-        #          message="Missing required parameter: 'method'.",
-        #          param='method'
-        #        ),
-        #        BatchError(
-        #          code='missing_required_parameter',
-        #          line=3,
-        #          message="Missing required parameter: 'url'.",
-        #          param='url'
-        #        ),
-        #        BatchError(
-        #          code='missing_required_parameter',
-        #          line=4,
-        #          message="Missing required parameter: 'body'.",
-        #          param='body'
-        #        )
-        #    ], object='list'),
-        #    failed_at=1754566945,
-        #    ...)
-        #  )
-
-        assert final_batch.status == "failed"
-        assert final_batch.errors is not None
-        assert len(final_batch.errors.data) == 4
-        no_custom_id_error = final_batch.errors.data[0]
-        assert no_custom_id_error.code == "missing_required_parameter"
-        assert no_custom_id_error.line == 1
-        assert "missing" in no_custom_id_error.message.lower()
-        assert "custom_id" in no_custom_id_error.message.lower()
-        no_method_error = final_batch.errors.data[1]
-        assert no_method_error.code == "missing_required_parameter"
-        assert no_method_error.line == 2
-        assert "missing" in no_method_error.message.lower()
-        assert "method" in no_method_error.message.lower()
-        no_url_error = final_batch.errors.data[2]
-        assert no_url_error.code == "missing_required_parameter"
-        assert no_url_error.line == 3
-        assert "missing" in no_url_error.message.lower()
-        assert "url" in no_url_error.message.lower()
-        no_body_error = final_batch.errors.data[3]
-        assert no_body_error.code == "missing_required_parameter"
-        assert no_body_error.line == 4
-        assert "missing" in no_body_error.message.lower()
-        assert "body" in no_body_error.message.lower()
-
-    def test_batch_invalid_completion_window(self, openai_client, batch_helper, text_model_id):
-        """Test batch creation with invalid completion window."""
-        batch_requests = [
-            {
-                "custom_id": "invalid-completion-window",
-                "method": "POST",
-                "url": "/v1/chat/completions",
-                "body": {
-                    "model": text_model_id,
-                    "messages": [{"role": "user", "content": "Hello"}],
-                    "max_tokens": 10,
-                },
-            }
-        ]
-
-        with batch_helper.create_file(batch_requests) as uploaded_file:
-            for window in ["1h", "48h", "invalid", ""]:
-                with pytest.raises(BadRequestError) as exc_info:
-                    openai_client.batches.create(
-                        input_file_id=uploaded_file.id,
-                        endpoint="/v1/chat/completions",
-                        completion_window=window,
-                    )
-            assert exc_info.value.status_code == 400
-            error_msg = str(exc_info.value).lower()
-            assert "error" in error_msg
-            assert "completion_window" in error_msg
-
-    def test_batch_streaming_not_supported(self, openai_client, batch_helper, text_model_id):
-        """Test that streaming responses are not supported in batches."""
-        batch_requests = [
-            {
-                "custom_id": "streaming-test",
-                "method": "POST",
-                "url": "/v1/chat/completions",
-                "body": {
-                    "model": text_model_id,
-                    "messages": [{"role": "user", "content": "Hello"}],
-                    "max_tokens": 10,
-                    "stream": True,  # Not supported
-                },
-            }
-        ]
-
-        with batch_helper.create_file(batch_requests, "streaming_batch_input") as uploaded_file:
-            batch = openai_client.batches.create(
-                input_file_id=uploaded_file.id,
-                endpoint="/v1/chat/completions",
-                completion_window="24h",
-            )
-
-            final_batch = batch_helper.wait_for(batch.id, expected_statuses={"failed"})
-
-        # Expecting -
-        #  Batch(...,
-        #    status='failed',
-        #    errors=Errors(data=[
-        #       BatchError(code='streaming_unsupported',
-        #         line=1,
-        #         message='Chat Completions: Streaming is not supported in the Batch API.',
-        #         param='body.stream')
-        #    ], object='list'),
-        #    failed_at=1754566965,
-        #    ...)
-
-        assert final_batch.status == "failed"
-        assert final_batch.errors is not None
-        assert len(final_batch.errors.data) == 1
-        error = final_batch.errors.data[0]
-        assert error.code == "streaming_unsupported"
-        assert error.line == 1
-        assert "streaming" in error.message.lower()
-        assert "not supported" in error.message.lower()
-        assert error.param == "body.stream"
-        assert final_batch.failed_at is not None
-
-    def test_batch_mixed_streaming_requests(self, openai_client, batch_helper, text_model_id):
-        """
-        Test batch with mixed streaming and non-streaming requests.
-
-        This is distinct from test_batch_streaming_not_supported, which tests a single
-        streaming request, to ensure an otherwise valid batch fails when a single
-        streaming request is included.
-        """
-        batch_requests = [
-            {
-                "custom_id": "valid-non-streaming-request",
-                "method": "POST",
-                "url": "/v1/chat/completions",
-                "body": {
-                    "model": text_model_id,
-                    "messages": [{"role": "user", "content": "Hello without streaming"}],
-                    "max_tokens": 10,
-                },
-            },
-            {
-                "custom_id": "streaming-request",
-                "method": "POST",
-                "url": "/v1/chat/completions",
-                "body": {
-                    "model": text_model_id,
-                    "messages": [{"role": "user", "content": "Hello with streaming"}],
-                    "max_tokens": 10,
-                    "stream": True,  # Not supported
-                },
-            },
-        ]
-
-        with batch_helper.create_file(batch_requests, "mixed_streaming_batch_input") as uploaded_file:
-            batch = openai_client.batches.create(
-                input_file_id=uploaded_file.id,
-                endpoint="/v1/chat/completions",
-                completion_window="24h",
-            )
-
-            final_batch = batch_helper.wait_for(batch.id, expected_statuses={"failed"})
-
-        # Expecting -
-        #  Batch(...,
-        #    status='failed',
-        #    errors=Errors(data=[
-        #      BatchError(
-        #        code='streaming_unsupported',
-        #        line=2,
-        #        message='Chat Completions: Streaming is not supported in the Batch API.',
-        #        param='body.stream')
-        #    ], object='list'),
-        #    failed_at=1754574442,
-        #    ...)
-
-        assert final_batch.status == "failed"
-        assert final_batch.errors is not None
-        assert len(final_batch.errors.data) == 1
-        error = final_batch.errors.data[0]
-        assert error.code == "streaming_unsupported"
-        assert error.line == 2
-        assert "streaming" in error.message.lower()
-        assert "not supported" in error.message.lower()
-        assert error.param == "body.stream"
-        assert final_batch.failed_at is not None
-
-    def test_batch_endpoint_mismatch(self, openai_client, batch_helper, text_model_id):
-        """Test batch creation with mismatched endpoint and request URL."""
-        batch_requests = [
-            {
-                "custom_id": "endpoint-mismatch",
-                "method": "POST",
-                "url": "/v1/embeddings",  # Different from batch endpoint
-                "body": {
-                    "model": text_model_id,
-                    "messages": [{"role": "user", "content": "Hello"}],
-                },
-            }
-        ]
-
-        with batch_helper.create_file(batch_requests, "endpoint_mismatch_batch_input") as uploaded_file:
-            batch = openai_client.batches.create(
-                input_file_id=uploaded_file.id,
-                endpoint="/v1/chat/completions",  # Different from request URL
-                completion_window="24h",
-            )
-
-            final_batch = batch_helper.wait_for(batch.id, expected_statuses={"failed"})
-
-        # Expecting -
-        #  Batch(...,
-        #    status='failed',
-        #    errors=Errors(data=[
-        #      BatchError(
-        #        code='invalid_url',
-        #        line=1,
-        #        message='The URL provided for this request does not match the batch endpoint.',
-        #        param='url')
-        #    ], object='list'),
-        #    failed_at=1754566972,
-        #    ...)
-
-        assert final_batch.status == "failed"
-        assert final_batch.errors is not None
-        assert len(final_batch.errors.data) == 1
-        error = final_batch.errors.data[0]
-        assert error.line == 1
-        assert error.code == "invalid_url"
-        assert "does not match" in error.message.lower()
-        assert "endpoint" in error.message.lower()
-        assert final_batch.failed_at is not None
-
-    def test_batch_error_handling_invalid_model(self, openai_client, batch_helper):
-        """Test batch error handling with invalid model."""
-        batch_requests = [
-            {
-                "custom_id": "invalid-model",
-                "method": "POST",
-                "url": "/v1/chat/completions",
-                "body": {
-                    "model": "nonexistent-model-xyz",
-                    "messages": [{"role": "user", "content": "Hello"}],
-                    "max_tokens": 10,
-                },
-            }
-        ]
-
-        with batch_helper.create_file(batch_requests) as uploaded_file:
-            batch = openai_client.batches.create(
-                input_file_id=uploaded_file.id,
-                endpoint="/v1/chat/completions",
-                completion_window="24h",
-            )
-
-            final_batch = batch_helper.wait_for(batch.id, expected_statuses={"failed"})
-
-        # Expecting -
-        #  Batch(...,
-        #    status='failed',
-        #    errors=Errors(data=[
-        #      BatchError(code='model_not_found',
-        #        line=1,
-        #        message="The provided model 'nonexistent-model-xyz' is not supported by the Batch API.",
-        #        param='body.model')
-        #    ], object='list'),
-        #    failed_at=1754566978,
-        #    ...)
-
-        assert final_batch.status == "failed"
-        assert final_batch.errors is not None
-        assert len(final_batch.errors.data) == 1
-        error = final_batch.errors.data[0]
-        assert error.line == 1
-        assert error.code == "model_not_found"
-        assert "not supported" in error.message.lower()
-        assert error.param == "body.model"
-        assert final_batch.failed_at is not None
-
-    def test_batch_missing_required_body_fields(self, openai_client, batch_helper, text_model_id):
-        """Test batch with requests missing required fields in body (model and messages)."""
-        batch_requests = [
-            {
-                "custom_id": "missing-model",
-                "method": "POST",
-                "url": "/v1/chat/completions",
-                "body": {
-                    # Missing model field
-                    "messages": [{"role": "user", "content": "Hello without model"}],
-                    "max_tokens": 10,
-                },
-            },
-            {
-                "custom_id": "missing-messages",
-                "method": "POST",
-                "url": "/v1/chat/completions",
-                "body": {
-                    "model": text_model_id,
-                    # Missing messages field
-                    "max_tokens": 10,
-                },
-            },
-        ]
-
-        with batch_helper.create_file(batch_requests, "missing_body_fields_batch_input") as uploaded_file:
-            batch = openai_client.batches.create(
-                input_file_id=uploaded_file.id,
-                endpoint="/v1/chat/completions",
-                completion_window="24h",
-            )
-
-            final_batch = batch_helper.wait_for(batch.id, expected_statuses={"failed"})
-
-        # Expecting -
-        #  Batch(...,
-        #    status='failed',
-        #    errors=Errors(data=[
-        #      BatchError(
-        #        code='invalid_request',
-        #        line=1,
-        #        message='Model parameter is required.',
-        #        param='body.model'),
-        #      BatchError(
-        #        code='invalid_request',
-        #        line=2,
-        #        message='Messages parameter is required.',
-        #        param='body.messages')
-        #      ], object='list'),
-        #    ...)
-
-        assert final_batch.status == "failed"
-        assert final_batch.errors is not None
-        assert len(final_batch.errors.data) == 2
-
-        model_error = final_batch.errors.data[0]
-        assert model_error.line == 1
-        assert "model" in model_error.message.lower()
-        assert model_error.param == "body.model"
-
-        messages_error = final_batch.errors.data[1]
-        assert messages_error.line == 2
-        assert "messages" in messages_error.message.lower()
-        assert messages_error.param == "body.messages"
-
-        assert final_batch.failed_at is not None
-
-    def test_batch_invalid_metadata_types(self, openai_client, batch_helper, text_model_id):
-        """Test batch creation with invalid metadata types (like lists)."""
-        batch_requests = [
-            {
-                "custom_id": "invalid-metadata-type",
-                "method": "POST",
-                "url": "/v1/chat/completions",
-                "body": {
-                    "model": text_model_id,
-                    "messages": [{"role": "user", "content": "Hello"}],
-                    "max_tokens": 10,
-                },
-            }
-        ]
-
-        with batch_helper.create_file(batch_requests) as uploaded_file:
-            with pytest.raises(Exception) as exc_info:
-                openai_client.batches.create(
-                    input_file_id=uploaded_file.id,
-                    endpoint="/v1/chat/completions",
-                    completion_window="24h",
-                    metadata={
-                        "tags": ["tag1", "tag2"],  # Invalid type, should be a string
-                    },
-                )
-
-        # Expecting -
-        #  Error code: 400 - {'error':
-        #    {'message': "Invalid type for 'metadata.tags': expected a string,
-        #                 but got an array instead.",
-        #     'type': 'invalid_request_error', 'param': 'metadata.tags',
-        #     'code': 'invalid_type'}}
-
-        error_msg = str(exc_info.value).lower()
-        assert "400" in error_msg
-        assert "tags" in error_msg
-        assert "string" in error_msg
--- a/tests/unit/providers/batches/test_reference.py
+++ b/tests/unit/providers/batches/test_reference.py
@ -1,753 +0,0 @@
-# Copyright (c) Meta Platforms, Inc. and affiliates.
-# All rights reserved.
-#
-# This source code is licensed under the terms described in the LICENSE file in
-# the root directory of this source tree.
-
-"""
-Test suite for the reference implementation of the Batches API.
-
-The tests are categorized and outlined below, keep this updated:
-
- Batch creation with various parameters and validation:
-  * test_create_and_retrieve_batch_success (positive)
-  * test_create_batch_without_metadata (positive)
-  * test_create_batch_completion_window (negative)
-  * test_create_batch_invalid_endpoints (negative)
-  * test_create_batch_invalid_metadata (negative)
-
- Batch retrieval and error handling for non-existent batches:
-  * test_retrieve_batch_not_found (negative)
-
- Batch cancellation with proper status transitions:
-  * test_cancel_batch_success (positive)
-  * test_cancel_batch_invalid_statuses (negative)
-  * test_cancel_batch_not_found (negative)
-
- Batch listing with pagination and filtering:
-  * test_list_batches_empty (positive)
-  * test_list_batches_single_batch (positive)
-  * test_list_batches_multiple_batches (positive)
-  * test_list_batches_with_limit (positive)
-  * test_list_batches_with_pagination (positive)
-  * test_list_batches_invalid_after (negative)
-
- Data persistence in the underlying key-value store:
-  * test_kvstore_persistence (positive)
-
- Batch processing concurrency control:
-  * test_max_concurrent_batches (positive)
-
- Input validation testing (direct _validate_input method tests):
-  * test_validate_input_file_not_found (negative)
-  * test_validate_input_file_exists_empty_content (positive)
-  * test_validate_input_file_mixed_valid_invalid_json (mixed)
-  * test_validate_input_invalid_model (negative)
-  * test_validate_input_url_mismatch (negative)
-  * test_validate_input_multiple_errors_per_request (negative)
-  * test_validate_input_invalid_request_format (negative)
-  * test_validate_input_missing_parameters (parametrized negative - custom_id, method, url, body, model, messages missing validation)
-  * test_validate_input_invalid_parameter_types (parametrized negative - custom_id, url, method, body, model, messages type validation)
-
-The tests use temporary SQLite databases for isolation and mock external
-dependencies like inference, files, and models APIs.
-"""
-
-import json
-import tempfile
-from pathlib import Path
-from unittest.mock import AsyncMock, MagicMock
-
-import pytest
-
-from llama_stack.apis.batches import BatchObject
-from llama_stack.apis.common.errors import ConflictError, ResourceNotFoundError
-from llama_stack.providers.inline.batches.reference.batches import ReferenceBatchesImpl
-from llama_stack.providers.inline.batches.reference.config import ReferenceBatchesImplConfig
-from llama_stack.providers.utils.kvstore.config import SqliteKVStoreConfig
-
-
-class TestReferenceBatchesImpl:
-    """Test the reference implementation of the Batches API."""
-
-    @pytest.fixture
-    async def provider(self):
-        """Create a test provider instance with temporary database."""
-        with tempfile.TemporaryDirectory() as tmpdir:
-            db_path = Path(tmpdir) / "test_batches.db"
-            kvstore_config = SqliteKVStoreConfig(db_path=str(db_path))
-            config = ReferenceBatchesImplConfig(kvstore=kvstore_config)
-
-            # Create kvstore and mock APIs
-            from unittest.mock import AsyncMock
-
-            from llama_stack.providers.utils.kvstore import kvstore_impl
-
-            kvstore = await kvstore_impl(config.kvstore)
-            mock_inference = AsyncMock()
-            mock_files = AsyncMock()
-            mock_models = AsyncMock()
-
-            provider = ReferenceBatchesImpl(config, mock_inference, mock_files, mock_models, kvstore)
-            await provider.initialize()
-
-            # unit tests should not require background processing
-            provider.process_batches = False
-
-            yield provider
-
-            await provider.shutdown()
-
-    @pytest.fixture
-    def sample_batch_data(self):
-        """Sample batch data for testing."""
-        return {
-            "input_file_id": "file_abc123",
-            "endpoint": "/v1/chat/completions",
-            "completion_window": "24h",
-            "metadata": {"test": "true", "priority": "high"},
-        }
-
-    def _validate_batch_type(self, batch, expected_metadata=None):
-        """
-        Helper function to validate batch object structure and field types.
-
-        Note: This validates the direct BatchObject from the provider, not the
-              client library response which has a different structure.
-
-        Args:
-            batch: The BatchObject instance to validate.
-            expected_metadata: Optional expected metadata dictionary to validate against.
-        """
-        assert isinstance(batch.id, str)
-        assert isinstance(batch.completion_window, str)
-        assert isinstance(batch.created_at, int)
-        assert isinstance(batch.endpoint, str)
-        assert isinstance(batch.input_file_id, str)
-        assert batch.object == "batch"
-        assert batch.status in [
-            "validating",
-            "failed",
-            "in_progress",
-            "finalizing",
-            "completed",
-            "expired",
-            "cancelling",
-            "cancelled",
-        ]
-
-        if expected_metadata is not None:
-            assert batch.metadata == expected_metadata
-
-        timestamp_fields = [
-            "cancelled_at",
-            "cancelling_at",
-            "completed_at",
-            "expired_at",
-            "expires_at",
-            "failed_at",
-            "finalizing_at",
-            "in_progress_at",
-        ]
-        for field in timestamp_fields:
-            field_value = getattr(batch, field, None)
-            if field_value is not None:
-                assert isinstance(field_value, int), f"{field} should be int or None, got {type(field_value)}"
-
-        file_id_fields = ["error_file_id", "output_file_id"]
-        for field in file_id_fields:
-            field_value = getattr(batch, field, None)
-            if field_value is not None:
-                assert isinstance(field_value, str), f"{field} should be str or None, got {type(field_value)}"
-
-        if hasattr(batch, "request_counts") and batch.request_counts is not None:
-            assert isinstance(batch.request_counts.completed, int), (
-                f"request_counts.completed should be int, got {type(batch.request_counts.completed)}"
-            )
-            assert isinstance(batch.request_counts.failed, int), (
-                f"request_counts.failed should be int, got {type(batch.request_counts.failed)}"
-            )
-            assert isinstance(batch.request_counts.total, int), (
-                f"request_counts.total should be int, got {type(batch.request_counts.total)}"
-            )
-
-        if hasattr(batch, "errors") and batch.errors is not None:
-            assert isinstance(batch.errors, dict), f"errors should be object or dict, got {type(batch.errors)}"
-
-            if hasattr(batch.errors, "data") and batch.errors.data is not None:
-                assert isinstance(batch.errors.data, list), (
-                    f"errors.data should be list or None, got {type(batch.errors.data)}"
-                )
-
-                for i, error_item in enumerate(batch.errors.data):
-                    assert isinstance(error_item, dict), (
-                        f"errors.data[{i}] should be object or dict, got {type(error_item)}"
-                    )
-
-                    if hasattr(error_item, "code") and error_item.code is not None:
-                        assert isinstance(error_item.code, str), (
-                            f"errors.data[{i}].code should be str or None, got {type(error_item.code)}"
-                        )
-
-                    if hasattr(error_item, "line") and error_item.line is not None:
-                        assert isinstance(error_item.line, int), (
-                            f"errors.data[{i}].line should be int or None, got {type(error_item.line)}"
-                        )
-
-                    if hasattr(error_item, "message") and error_item.message is not None:
-                        assert isinstance(error_item.message, str), (
-                            f"errors.data[{i}].message should be str or None, got {type(error_item.message)}"
-                        )
-
-                    if hasattr(error_item, "param") and error_item.param is not None:
-                        assert isinstance(error_item.param, str), (
-                            f"errors.data[{i}].param should be str or None, got {type(error_item.param)}"
-                        )
-
-            if hasattr(batch.errors, "object") and batch.errors.object is not None:
-                assert isinstance(batch.errors.object, str), (
-                    f"errors.object should be str or None, got {type(batch.errors.object)}"
-                )
-                assert batch.errors.object == "list", f"errors.object should be 'list', got {batch.errors.object}"
-
-    async def test_create_and_retrieve_batch_success(self, provider, sample_batch_data):
-        """Test successful batch creation and retrieval."""
-        created_batch = await provider.create_batch(**sample_batch_data)
-
-        self._validate_batch_type(created_batch, expected_metadata=sample_batch_data["metadata"])
-
-        assert created_batch.id.startswith("batch_")
-        assert len(created_batch.id) > 13
-        assert created_batch.object == "batch"
-        assert created_batch.endpoint == sample_batch_data["endpoint"]
-        assert created_batch.input_file_id == sample_batch_data["input_file_id"]
-        assert created_batch.completion_window == sample_batch_data["completion_window"]
-        assert created_batch.status == "validating"
-        assert created_batch.metadata == sample_batch_data["metadata"]
-        assert isinstance(created_batch.created_at, int)
-        assert created_batch.created_at > 0
-
-        retrieved_batch = await provider.retrieve_batch(created_batch.id)
-
-        self._validate_batch_type(retrieved_batch, expected_metadata=sample_batch_data["metadata"])
-
-        assert retrieved_batch.id == created_batch.id
-        assert retrieved_batch.input_file_id == created_batch.input_file_id
-        assert retrieved_batch.endpoint == created_batch.endpoint
-        assert retrieved_batch.status == created_batch.status
-        assert retrieved_batch.metadata == created_batch.metadata
-
-    async def test_create_batch_without_metadata(self, provider):
-        """Test batch creation without optional metadata."""
-        batch = await provider.create_batch(
-            input_file_id="file_123", endpoint="/v1/chat/completions", completion_window="24h"
-        )
-
-        assert batch.metadata is None
-
-    async def test_create_batch_completion_window(self, provider):
-        """Test batch creation with invalid completion window."""
-        with pytest.raises(ValueError, match="Invalid completion_window"):
-            await provider.create_batch(
-                input_file_id="file_123", endpoint="/v1/chat/completions", completion_window="now"
-            )
-
-    @pytest.mark.parametrize(
-        "endpoint",
-        [
-            "/v1/embeddings",
-            "/v1/completions",
-            "/v1/invalid/endpoint",
-            "",
-        ],
-    )
-    async def test_create_batch_invalid_endpoints(self, provider, endpoint):
-        """Test batch creation with various invalid endpoints."""
-        with pytest.raises(ValueError, match="Invalid endpoint"):
-            await provider.create_batch(input_file_id="file_123", endpoint=endpoint, completion_window="24h")
-
-    async def test_create_batch_invalid_metadata(self, provider):
-        """Test that batch creation fails with invalid metadata."""
-        with pytest.raises(ValueError, match="should be a valid string"):
-            await provider.create_batch(
-                input_file_id="file_123",
-                endpoint="/v1/chat/completions",
-                completion_window="24h",
-                metadata={123: "invalid_key"},  # Non-string key
-            )
-
-        with pytest.raises(ValueError, match="should be a valid string"):
-            await provider.create_batch(
-                input_file_id="file_123",
-                endpoint="/v1/chat/completions",
-                completion_window="24h",
-                metadata={"valid_key": 456},  # Non-string value
-            )
-
-    async def test_retrieve_batch_not_found(self, provider):
-        """Test error when retrieving non-existent batch."""
-        with pytest.raises(ResourceNotFoundError, match=r"Batch 'nonexistent_batch' not found"):
-            await provider.retrieve_batch("nonexistent_batch")
-
-    async def test_cancel_batch_success(self, provider, sample_batch_data):
-        """Test successful batch cancellation."""
-        created_batch = await provider.create_batch(**sample_batch_data)
-        assert created_batch.status == "validating"
-
-        cancelled_batch = await provider.cancel_batch(created_batch.id)
-
-        assert cancelled_batch.id == created_batch.id
-        assert cancelled_batch.status in ["cancelling", "cancelled"]
-        assert isinstance(cancelled_batch.cancelling_at, int)
-        assert cancelled_batch.cancelling_at >= created_batch.created_at
-
-    @pytest.mark.parametrize("status", ["failed", "expired", "completed"])
-    async def test_cancel_batch_invalid_statuses(self, provider, sample_batch_data, status):
-        """Test error when cancelling batch in final states."""
-        provider.process_batches = False
-        created_batch = await provider.create_batch(**sample_batch_data)
-
-        # directly update status in kvstore
-        await provider._update_batch(created_batch.id, status=status)
-
-        with pytest.raises(ConflictError, match=f"Cannot cancel batch '{created_batch.id}' with status '{status}'"):
-            await provider.cancel_batch(created_batch.id)
-
-    async def test_cancel_batch_not_found(self, provider):
-        """Test error when cancelling non-existent batch."""
-        with pytest.raises(ResourceNotFoundError, match=r"Batch 'nonexistent_batch' not found"):
-            await provider.cancel_batch("nonexistent_batch")
-
-    async def test_list_batches_empty(self, provider):
-        """Test listing batches when none exist."""
-        response = await provider.list_batches()
-
-        assert response.object == "list"
-        assert response.data == []
-        assert response.first_id is None
-        assert response.last_id is None
-        assert response.has_more is False
-
-    async def test_list_batches_single_batch(self, provider, sample_batch_data):
-        """Test listing batches with single batch."""
-        created_batch = await provider.create_batch(**sample_batch_data)
-
-        response = await provider.list_batches()
-
-        assert len(response.data) == 1
-        self._validate_batch_type(response.data[0], expected_metadata=sample_batch_data["metadata"])
-        assert response.data[0].id == created_batch.id
-        assert response.first_id == created_batch.id
-        assert response.last_id == created_batch.id
-        assert response.has_more is False
-
-    async def test_list_batches_multiple_batches(self, provider):
-        """Test listing multiple batches."""
-        batches = [
-            await provider.create_batch(
-                input_file_id=f"file_{i}", endpoint="/v1/chat/completions", completion_window="24h"
-            )
-            for i in range(3)
-        ]
-
-        response = await provider.list_batches()
-
-        assert len(response.data) == 3
-
-        batch_ids = {batch.id for batch in response.data}
-        expected_ids = {batch.id for batch in batches}
-        assert batch_ids == expected_ids
-        assert response.has_more is False
-
-        assert response.first_id in expected_ids
-        assert response.last_id in expected_ids
-
-    async def test_list_batches_with_limit(self, provider):
-        """Test listing batches with limit parameter."""
-        batches = [
-            await provider.create_batch(
-                input_file_id=f"file_{i}", endpoint="/v1/chat/completions", completion_window="24h"
-            )
-            for i in range(3)
-        ]
-
-        response = await provider.list_batches(limit=2)
-
-        assert len(response.data) == 2
-        assert response.has_more is True
-        assert response.first_id == response.data[0].id
-        assert response.last_id == response.data[1].id
-        batch_ids = {batch.id for batch in response.data}
-        expected_ids = {batch.id for batch in batches}
-        assert batch_ids.issubset(expected_ids)
-
-    async def test_list_batches_with_pagination(self, provider):
-        """Test listing batches with pagination using 'after' parameter."""
-        for i in range(3):
-            await provider.create_batch(
-                input_file_id=f"file_{i}", endpoint="/v1/chat/completions", completion_window="24h"
-            )
-
-        # Get first page
-        first_page = await provider.list_batches(limit=1)
-        assert len(first_page.data) == 1
-        assert first_page.has_more is True
-
-        # Get second page using 'after'
-        second_page = await provider.list_batches(limit=1, after=first_page.data[0].id)
-        assert len(second_page.data) == 1
-        assert second_page.data[0].id != first_page.data[0].id
-
-        # Verify we got the next batch in order
-        all_batches = await provider.list_batches()
-        expected_second_batch_id = all_batches.data[1].id
-        assert second_page.data[0].id == expected_second_batch_id
-
-    async def test_list_batches_invalid_after(self, provider, sample_batch_data):
-        """Test listing batches with invalid 'after' parameter."""
-        await provider.create_batch(**sample_batch_data)
-
-        response = await provider.list_batches(after="nonexistent_batch")
-
-        # Should return all batches (no filtering when 'after' batch not found)
-        assert len(response.data) == 1
-
-    async def test_kvstore_persistence(self, provider, sample_batch_data):
-        """Test that batches are properly persisted in kvstore."""
-        batch = await provider.create_batch(**sample_batch_data)
-
-        stored_data = await provider.kvstore.get(f"batch:{batch.id}")
-        assert stored_data is not None
-
-        stored_batch_dict = json.loads(stored_data)
-        assert stored_batch_dict["id"] == batch.id
-        assert stored_batch_dict["input_file_id"] == sample_batch_data["input_file_id"]
-
-    async def test_validate_input_file_not_found(self, provider):
-        """Test _validate_input when input file does not exist."""
-        provider.files_api.openai_retrieve_file = AsyncMock(side_effect=Exception("File not found"))
-
-        batch = BatchObject(
-            id="batch_test",
-            object="batch",
-            endpoint="/v1/chat/completions",
-            input_file_id="nonexistent_file",
-            completion_window="24h",
-            status="validating",
-            created_at=1234567890,
-        )
-
-        errors, requests = await provider._validate_input(batch)
-
-        assert len(errors) == 1
-        assert len(requests) == 0
-        assert errors[0].code == "invalid_request"
-        assert errors[0].message == "Cannot find file nonexistent_file."
-        assert errors[0].param == "input_file_id"
-        assert errors[0].line is None
-
-    async def test_validate_input_file_exists_empty_content(self, provider):
-        """Test _validate_input when file exists but is empty."""
-        provider.files_api.openai_retrieve_file = AsyncMock()
-        mock_response = MagicMock()
-        mock_response.body = b""
-        provider.files_api.openai_retrieve_file_content = AsyncMock(return_value=mock_response)
-
-        batch = BatchObject(
-            id="batch_test",
-            object="batch",
-            endpoint="/v1/chat/completions",
-            input_file_id="empty_file",
-            completion_window="24h",
-            status="validating",
-            created_at=1234567890,
-        )
-
-        errors, requests = await provider._validate_input(batch)
-
-        assert len(errors) == 0
-        assert len(requests) == 0
-
-    async def test_validate_input_file_mixed_valid_invalid_json(self, provider):
-        """Test _validate_input when file contains valid and invalid JSON lines."""
-        provider.files_api.openai_retrieve_file = AsyncMock()
-        mock_response = MagicMock()
-        # Line 1: valid JSON with proper body args, Line 2: invalid JSON
-        mock_response.body = b'{"custom_id": "req-1", "method": "POST", "url": "/v1/chat/completions", "body": {"model": "test-model", "messages": [{"role": "user", "content": "Hello"}]}}\n{invalid json'
-        provider.files_api.openai_retrieve_file_content = AsyncMock(return_value=mock_response)
-
-        batch = BatchObject(
-            id="batch_test",
-            object="batch",
-            endpoint="/v1/chat/completions",
-            input_file_id="mixed_file",
-            completion_window="24h",
-            status="validating",
-            created_at=1234567890,
-        )
-
-        errors, requests = await provider._validate_input(batch)
-
-        # Should have 1 JSON parsing error from line 2, and 1 valid request from line 1
-        assert len(errors) == 1
-        assert len(requests) == 1
-
-        assert errors[0].code == "invalid_json_line"
-        assert errors[0].line == 2
-        assert errors[0].message == "This line is not parseable as valid JSON."
-
-        assert requests[0].custom_id == "req-1"
-        assert requests[0].method == "POST"
-        assert requests[0].url == "/v1/chat/completions"
-        assert requests[0].body["model"] == "test-model"
-        assert requests[0].body["messages"] == [{"role": "user", "content": "Hello"}]
-
-    async def test_validate_input_invalid_model(self, provider):
-        """Test _validate_input when file contains request with non-existent model."""
-        provider.files_api.openai_retrieve_file = AsyncMock()
-        mock_response = MagicMock()
-        mock_response.body = b'{"custom_id": "req-1", "method": "POST", "url": "/v1/chat/completions", "body": {"model": "nonexistent-model", "messages": [{"role": "user", "content": "Hello"}]}}'
-        provider.files_api.openai_retrieve_file_content = AsyncMock(return_value=mock_response)
-
-        provider.models_api.get_model = AsyncMock(side_effect=Exception("Model not found"))
-
-        batch = BatchObject(
-            id="batch_test",
-            object="batch",
-            endpoint="/v1/chat/completions",
-            input_file_id="invalid_model_file",
-            completion_window="24h",
-            status="validating",
-            created_at=1234567890,
-        )
-
-        errors, requests = await provider._validate_input(batch)
-
-        assert len(errors) == 1
-        assert len(requests) == 0
-
-        assert errors[0].code == "model_not_found"
-        assert errors[0].line == 1
-        assert errors[0].message == "Model 'nonexistent-model' does not exist or is not supported"
-        assert errors[0].param == "body.model"
-
-    @pytest.mark.parametrize(
-        "param_name,param_path,error_code,error_message",
-        [
-            ("custom_id", "custom_id", "missing_required_parameter", "Missing required parameter: custom_id"),
-            ("method", "method", "missing_required_parameter", "Missing required parameter: method"),
-            ("url", "url", "missing_required_parameter", "Missing required parameter: url"),
-            ("body", "body", "missing_required_parameter", "Missing required parameter: body"),
-            ("model", "body.model", "invalid_request", "Model parameter is required"),
-            ("messages", "body.messages", "invalid_request", "Messages parameter is required"),
-        ],
-    )
-    async def test_validate_input_missing_parameters(self, provider, param_name, param_path, error_code, error_message):
-        """Test _validate_input when file contains request with missing required parameters."""
-        provider.files_api.openai_retrieve_file = AsyncMock()
-        mock_response = MagicMock()
-
-        base_request = {
-            "custom_id": "req-1",
-            "method": "POST",
-            "url": "/v1/chat/completions",
-            "body": {"model": "test-model", "messages": [{"role": "user", "content": "Hello"}]},
-        }
-
-        # Remove the specific parameter being tested
-        if "." in param_path:
-            top_level, nested_param = param_path.split(".", 1)
-            del base_request[top_level][nested_param]
-        else:
-            del base_request[param_name]
-
-        mock_response.body = json.dumps(base_request).encode()
-        provider.files_api.openai_retrieve_file_content = AsyncMock(return_value=mock_response)
-
-        batch = BatchObject(
-            id="batch_test",
-            object="batch",
-            endpoint="/v1/chat/completions",
-            input_file_id=f"missing_{param_name}_file",
-            completion_window="24h",
-            status="validating",
-            created_at=1234567890,
-        )
-
-        errors, requests = await provider._validate_input(batch)
-
-        assert len(errors) == 1
-        assert len(requests) == 0
-
-        assert errors[0].code == error_code
-        assert errors[0].line == 1
-        assert errors[0].message == error_message
-        assert errors[0].param == param_path
-
-    async def test_validate_input_url_mismatch(self, provider):
-        """Test _validate_input when file contains request with URL that doesn't match batch endpoint."""
-        provider.files_api.openai_retrieve_file = AsyncMock()
-        mock_response = MagicMock()
-        mock_response.body = b'{"custom_id": "req-1", "method": "POST", "url": "/v1/embeddings", "body": {"model": "test-model", "messages": [{"role": "user", "content": "Hello"}]}}'
-        provider.files_api.openai_retrieve_file_content = AsyncMock(return_value=mock_response)
-
-        batch = BatchObject(
-            id="batch_test",
-            object="batch",
-            endpoint="/v1/chat/completions",  # This doesn't match the URL in the request
-            input_file_id="url_mismatch_file",
-            completion_window="24h",
-            status="validating",
-            created_at=1234567890,
-        )
-
-        errors, requests = await provider._validate_input(batch)
-
-        assert len(errors) == 1
-        assert len(requests) == 0
-
-        assert errors[0].code == "invalid_url"
-        assert errors[0].line == 1
-        assert errors[0].message == "URL provided for this request does not match the batch endpoint"
-        assert errors[0].param == "url"
-
-    async def test_validate_input_multiple_errors_per_request(self, provider):
-        """Test _validate_input when a single request has multiple validation errors."""
-        provider.files_api.openai_retrieve_file = AsyncMock()
-        mock_response = MagicMock()
-        # Request missing custom_id, has invalid URL, and missing model in body
-        mock_response.body = (
-            b'{"method": "POST", "url": "/v1/embeddings", "body": {"messages": [{"role": "user", "content": "Hello"}]}}'
-        )
-        provider.files_api.openai_retrieve_file_content = AsyncMock(return_value=mock_response)
-
-        batch = BatchObject(
-            id="batch_test",
-            object="batch",
-            endpoint="/v1/chat/completions",  # Doesn't match /v1/embeddings in request
-            input_file_id="multiple_errors_file",
-            completion_window="24h",
-            status="validating",
-            created_at=1234567890,
-        )
-
-        errors, requests = await provider._validate_input(batch)
-
-        assert len(errors) >= 2  # At least missing custom_id and URL mismatch
-        assert len(requests) == 0
-
-        for error in errors:
-            assert error.line == 1
-
-        error_codes = {error.code for error in errors}
-        assert "missing_required_parameter" in error_codes  # missing custom_id
-        assert "invalid_url" in error_codes  # URL mismatch
-
-    async def test_validate_input_invalid_request_format(self, provider):
-        """Test _validate_input when file contains non-object JSON (array, string, number)."""
-        provider.files_api.openai_retrieve_file = AsyncMock()
-        mock_response = MagicMock()
-        mock_response.body = b'["not", "a", "request", "object"]'
-        provider.files_api.openai_retrieve_file_content = AsyncMock(return_value=mock_response)
-
-        batch = BatchObject(
-            id="batch_test",
-            object="batch",
-            endpoint="/v1/chat/completions",
-            input_file_id="invalid_format_file",
-            completion_window="24h",
-            status="validating",
-            created_at=1234567890,
-        )
-
-        errors, requests = await provider._validate_input(batch)
-
-        assert len(errors) == 1
-        assert len(requests) == 0
-
-        assert errors[0].code == "invalid_request"
-        assert errors[0].line == 1
-        assert errors[0].message == "Each line must be a JSON dictionary object"
-
-    @pytest.mark.parametrize(
-        "param_name,param_path,invalid_value,error_message",
-        [
-            ("custom_id", "custom_id", 12345, "Custom_id must be a string"),
-            ("url", "url", 123, "URL must be a string"),
-            ("method", "method", ["POST"], "Method must be a string"),
-            ("body", "body", ["not", "valid"], "Body must be a JSON dictionary object"),
-            ("model", "body.model", 123, "Model must be a string"),
-            ("messages", "body.messages", "invalid messages format", "Messages must be an array"),
-        ],
-    )
-    async def test_validate_input_invalid_parameter_types(
-        self, provider, param_name, param_path, invalid_value, error_message
-    ):
-        """Test _validate_input when file contains request with parameters that have invalid types."""
-        provider.files_api.openai_retrieve_file = AsyncMock()
-        mock_response = MagicMock()
-
-        base_request = {
-            "custom_id": "req-1",
-            "method": "POST",
-            "url": "/v1/chat/completions",
-            "body": {"model": "test-model", "messages": [{"role": "user", "content": "Hello"}]},
-        }
-
-        # Override the specific parameter with invalid value
-        if "." in param_path:
-            top_level, nested_param = param_path.split(".", 1)
-            base_request[top_level][nested_param] = invalid_value
-        else:
-            base_request[param_name] = invalid_value
-
-        mock_response.body = json.dumps(base_request).encode()
-        provider.files_api.openai_retrieve_file_content = AsyncMock(return_value=mock_response)
-
-        batch = BatchObject(
-            id="batch_test",
-            object="batch",
-            endpoint="/v1/chat/completions",
-            input_file_id=f"invalid_{param_name}_type_file",
-            completion_window="24h",
-            status="validating",
-            created_at=1234567890,
-        )
-
-        errors, requests = await provider._validate_input(batch)
-
-        assert len(errors) == 1
-        assert len(requests) == 0
-
-        assert errors[0].code == "invalid_request"
-        assert errors[0].line == 1
-        assert errors[0].message == error_message
-        assert errors[0].param == param_path
-
-    async def test_max_concurrent_batches(self, provider):
-        """Test max_concurrent_batches configuration and concurrency control."""
-        import asyncio
-
-        provider._batch_semaphore = asyncio.Semaphore(2)
-
-        provider.process_batches = True  # enable because we're testing background processing
-
-        active_batches = 0
-
-        async def add_and_wait(batch_id: str):
-            nonlocal active_batches
-            active_batches += 1
-            await asyncio.sleep(float("inf"))
-
-        # the first thing done in _process_batch is to acquire the semaphore, then call _process_batch_impl,
-        # so we can replace _process_batch_impl with our mock to control concurrency
-        provider._process_batch_impl = add_and_wait
-
-        for _ in range(3):
-            await provider.create_batch(
-                input_file_id="file_id", endpoint="/v1/chat/completions", completion_window="24h"
-            )
-
-        await asyncio.sleep(0.042)  # let tasks start
-
-        assert active_batches == 2, f"Expected 2 active batches, got {active_batches}"