feat: Implement FastAPI router system (#4191)

# What does this PR do? This commit introduces a new FastAPI router-based system for defining API endpoints, enabling a migration path away from the legacy @webmethod decorator system. The implementation includes router infrastructure, migration of the Batches API as the first example, and updates to server, OpenAPI generation, and inspection systems to support both routing approaches. The router infrastructure consists of a router registry system that allows APIs to register FastAPI router factories, which are then automatically discovered and included in the server application. Standard error responses are centralized in router_utils to ensure consistent OpenAPI specification generation with proper $ref references to component responses. The Batches API has been migrated to demonstrate the new pattern. The protocol definition and models remain in llama_stack_api/batches, maintaining clear separation between API contracts and server implementation. The FastAPI router implementation lives in llama_stack/core/server/routers/batches, following the established pattern where API contracts are defined in llama_stack_api and server routing logic lives in llama_stack/core/server. The server now checks for registered routers before falling back to the legacy webmethod-based route discovery, ensuring backward compatibility during the migration period. The OpenAPI generator has been updated to handle both router-based and webmethod-based routes, correctly extracting metadata from FastAPI route decorators and Pydantic Field descriptions. The inspect endpoint now includes routes from both systems, with proper filtering for deprecated routes and API levels. Response descriptions are now explicitly defined in router decorators, ensuring the generated OpenAPI specification matches the previous format. Error responses use $ref references to component responses (BadRequest400, TooManyRequests429, etc.) as required by the specification. This is neat and will allow us to remove a lot of boiler plate code from our generator once the migration is done. This implementation provides a foundation for incrementally migrating other APIs to the router system while maintaining full backward compatibility with existing webmethod-based APIs. Closes: https://github.com/llamastack/llama-stack/issues/4188 ## Test Plan CI, the server should start, same routes should be visible. ``` curl http://localhost:8321/v1/inspect/routes | jq '.data[] | select(.route | contains("batches"))' ``` Also: ``` uv run pytest tests/integration/batches/ -vv --stack-config=http://localhost:8321 ================================================== test session starts ================================================== platform darwin -- Python 3.12.8, pytest-8.4.2, pluggy-1.6.0 -- /Users/leseb/Documents/AI/llama-stack/.venv/bin/python3 cachedir: .pytest_cache metadata: {'Python': '3.12.8', 'Platform': 'macOS-26.0.1-arm64-arm-64bit', 'Packages': {'pytest': '8.4.2', 'pluggy': '1.6.0'}, 'Plugins': {'anyio': '4.9.0', 'html': '4.1.1', 'socket': '0.7.0', 'asyncio': '1.1.0', 'json-report': '1.5.0', 'timeout': '2.4.0', 'metadata': '3.1.1', 'cov': '6.2.1', 'nbval': '0.11.0'}} rootdir: /Users/leseb/Documents/AI/llama-stack configfile: pyproject.toml plugins: anyio-4.9.0, html-4.1.1, socket-0.7.0, asyncio-1.1.0, json-report-1.5.0, timeout-2.4.0, metadata-3.1.1, cov-6.2.1, nbval-0.11.0 asyncio: mode=Mode.AUTO, asyncio_default_fixture_loop_scope=None, asyncio_default_test_loop_scope=function collected 24 items tests/integration/batches/test_batches.py::TestBatchesIntegration::test_batch_creation_and_retrieval[None] SKIPPED [ 4%] tests/integration/batches/test_batches.py::TestBatchesIntegration::test_batch_listing[None] SKIPPED [ 8%] tests/integration/batches/test_batches.py::TestBatchesIntegration::test_batch_immediate_cancellation[None] SKIPPED [ 12%] tests/integration/batches/test_batches.py::TestBatchesIntegration::test_batch_e2e_chat_completions[None] SKIPPED [ 16%] tests/integration/batches/test_batches.py::TestBatchesIntegration::test_batch_e2e_completions[None] SKIPPED [ 20%] tests/integration/batches/test_batches_errors.py::TestBatchesErrorHandling::test_batch_invalid_endpoint[None] SKIPPED [ 25%] tests/integration/batches/test_batches_errors.py::TestBatchesErrorHandling::test_batch_cancel_completed[None] SKIPPED [ 29%] tests/integration/batches/test_batches_errors.py::TestBatchesErrorHandling::test_batch_missing_required_fields[None] SKIPPED [ 33%] tests/integration/batches/test_batches_errors.py::TestBatchesErrorHandling::test_batch_invalid_completion_window[None] SKIPPED [ 37%] tests/integration/batches/test_batches_errors.py::TestBatchesErrorHandling::test_batch_streaming_not_supported[None] SKIPPED [ 41%] tests/integration/batches/test_batches_errors.py::TestBatchesErrorHandling::test_batch_mixed_streaming_requests[None] SKIPPED [ 45%] tests/integration/batches/test_batches_errors.py::TestBatchesErrorHandling::test_batch_endpoint_mismatch[None] SKIPPED [ 50%] tests/integration/batches/test_batches_errors.py::TestBatchesErrorHandling::test_batch_missing_required_body_fields[None] SKIPPED [ 54%] tests/integration/batches/test_batches_errors.py::TestBatchesErrorHandling::test_batch_invalid_metadata_types[None] SKIPPED [ 58%] tests/integration/batches/test_batches.py::TestBatchesIntegration::test_batch_e2e_embeddings[None] SKIPPED [ 62%] tests/integration/batches/test_batches_errors.py::TestBatchesErrorHandling::test_batch_nonexistent_file_id PASSED [ 66%] tests/integration/batches/test_batches_errors.py::TestBatchesErrorHandling::test_batch_malformed_jsonl PASSED [ 70%] tests/integration/batches/test_batches_errors.py::TestBatchesErrorHandling::test_file_malformed_batch_file[empty] XFAIL [ 75%] tests/integration/batches/test_batches_errors.py::TestBatchesErrorHandling::test_file_malformed_batch_file[malformed] XFAIL [ 79%] tests/integration/batches/test_batches_errors.py::TestBatchesErrorHandling::test_batch_retrieve_nonexistent PASSED [ 83%] tests/integration/batches/test_batches_errors.py::TestBatchesErrorHandling::test_batch_cancel_nonexistent PASSED [ 87%] tests/integration/batches/test_batches_errors.py::TestBatchesErrorHandling::test_batch_error_handling_invalid_model PASSED [ 91%] tests/integration/batches/test_batches_idempotency.py::TestBatchesIdempotencyIntegration::test_idempotent_batch_creation_successful PASSED [ 95%] tests/integration/batches/test_batches_idempotency.py::TestBatchesIdempotencyIntegration::test_idempotency_conflict_with_different_params PASSED [100%] ================================================= slowest 10 durations ================================================== 1.01s call tests/integration/batches/test_batches_idempotency.py::TestBatchesIdempotencyIntegration::test_idempotent_batch_creation_successful 0.21s call tests/integration/batches/test_batches_errors.py::TestBatchesErrorHandling::test_batch_nonexistent_file_id 0.17s call tests/integration/batches/test_batches_errors.py::TestBatchesErrorHandling::test_batch_malformed_jsonl 0.12s call tests/integration/batches/test_batches_errors.py::TestBatchesErrorHandling::test_batch_error_handling_invalid_model 0.05s setup tests/integration/batches/test_batches.py::TestBatchesIntegration::test_batch_creation_and_retrieval[None] 0.02s call tests/integration/batches/test_batches_errors.py::TestBatchesErrorHandling::test_file_malformed_batch_file[empty] 0.01s call tests/integration/batches/test_batches_idempotency.py::TestBatchesIdempotencyIntegration::test_idempotency_conflict_with_different_params 0.01s call tests/integration/batches/test_batches_errors.py::TestBatchesErrorHandling::test_file_malformed_batch_file[malformed] 0.01s call tests/integration/batches/test_batches_errors.py::TestBatchesErrorHandling::test_batch_retrieve_nonexistent 0.00s call tests/integration/batches/test_batches_errors.py::TestBatchesErrorHandling::test_batch_cancel_nonexistent ======================================= 7 passed, 15 skipped, 2 xfailed in 1.78s ======================================== ``` --------- Signed-off-by: Sébastien Han <seb@redhat.com>
2025-12-04 18:13:44 +00:00 · 2025-12-03 12:25:54 +01:00 · 2025-12-03 12:25:54 +01:00 · 7f43051a63
commit 7f43051a63
parent 4237eb4aaa
22 changed files with 1095 additions and 248 deletions
--- a/tests/unit/providers/batches/test_reference.py
+++ b/tests/unit/providers/batches/test_reference.py
@ -58,8 +58,15 @@ import json
 from unittest.mock import AsyncMock, MagicMock

 import pytest
+from pydantic import ValidationError

 from llama_stack_api import BatchObject, ConflictError, ResourceNotFoundError
+from llama_stack_api.batches.models import (
+    CancelBatchRequest,
+    CreateBatchRequest,
+    ListBatchesRequest,
+    RetrieveBatchRequest,
+)


 class TestReferenceBatchesImpl:
@ -169,7 +176,7 @@ class TestReferenceBatchesImpl:

    async def test_create_and_retrieve_batch_success(self, provider, sample_batch_data):
        """Test successful batch creation and retrieval."""
-        created_batch = await provider.create_batch(**sample_batch_data)
+        created_batch = await provider.create_batch(CreateBatchRequest(**sample_batch_data))

        self._validate_batch_type(created_batch, expected_metadata=sample_batch_data["metadata"])

@ -184,7 +191,7 @@ class TestReferenceBatchesImpl:
        assert isinstance(created_batch.created_at, int)
        assert created_batch.created_at > 0

-        retrieved_batch = await provider.retrieve_batch(created_batch.id)
+        retrieved_batch = await provider.retrieve_batch(RetrieveBatchRequest(batch_id=created_batch.id))

        self._validate_batch_type(retrieved_batch, expected_metadata=sample_batch_data["metadata"])

@ -197,17 +204,15 @@ class TestReferenceBatchesImpl:
    async def test_create_batch_without_metadata(self, provider):
        """Test batch creation without optional metadata."""
        batch = await provider.create_batch(
-            input_file_id="file_123", endpoint="/v1/chat/completions", completion_window="24h"
+            CreateBatchRequest(input_file_id="file_123", endpoint="/v1/chat/completions", completion_window="24h")
        )

        assert batch.metadata is None

    async def test_create_batch_completion_window(self, provider):
        """Test batch creation with invalid completion window."""
-        with pytest.raises(ValueError, match="Invalid completion_window"):
-            await provider.create_batch(
-                input_file_id="file_123", endpoint="/v1/chat/completions", completion_window="now"
-            )
+        with pytest.raises(ValidationError, match="completion_window"):
+            CreateBatchRequest(input_file_id="file_123", endpoint="/v1/chat/completions", completion_window="now")

    @pytest.mark.parametrize(
        "endpoint",
@ -219,37 +224,43 @@ class TestReferenceBatchesImpl:
    async def test_create_batch_invalid_endpoints(self, provider, endpoint):
        """Test batch creation with various invalid endpoints."""
        with pytest.raises(ValueError, match="Invalid endpoint"):
-            await provider.create_batch(input_file_id="file_123", endpoint=endpoint, completion_window="24h")
+            await provider.create_batch(
+                CreateBatchRequest(input_file_id="file_123", endpoint=endpoint, completion_window="24h")
+            )

    async def test_create_batch_invalid_metadata(self, provider):
        """Test that batch creation fails with invalid metadata."""
        with pytest.raises(ValueError, match="should be a valid string"):
            await provider.create_batch(
-                input_file_id="file_123",
-                endpoint="/v1/chat/completions",
-                completion_window="24h",
-                metadata={123: "invalid_key"},  # Non-string key
+                CreateBatchRequest(
+                    input_file_id="file_123",
+                    endpoint="/v1/chat/completions",
+                    completion_window="24h",
+                    metadata={123: "invalid_key"},  # Non-string key
+                )
            )

        with pytest.raises(ValueError, match="should be a valid string"):
            await provider.create_batch(
-                input_file_id="file_123",
-                endpoint="/v1/chat/completions",
-                completion_window="24h",
-                metadata={"valid_key": 456},  # Non-string value
+                CreateBatchRequest(
+                    input_file_id="file_123",
+                    endpoint="/v1/chat/completions",
+                    completion_window="24h",
+                    metadata={"valid_key": 456},  # Non-string value
+                )
            )

    async def test_retrieve_batch_not_found(self, provider):
        """Test error when retrieving non-existent batch."""
        with pytest.raises(ResourceNotFoundError, match=r"Batch 'nonexistent_batch' not found"):
-            await provider.retrieve_batch("nonexistent_batch")
+            await provider.retrieve_batch(RetrieveBatchRequest(batch_id="nonexistent_batch"))

    async def test_cancel_batch_success(self, provider, sample_batch_data):
        """Test successful batch cancellation."""
-        created_batch = await provider.create_batch(**sample_batch_data)
+        created_batch = await provider.create_batch(CreateBatchRequest(**sample_batch_data))
        assert created_batch.status == "validating"

-        cancelled_batch = await provider.cancel_batch(created_batch.id)
+        cancelled_batch = await provider.cancel_batch(CancelBatchRequest(batch_id=created_batch.id))

        assert cancelled_batch.id == created_batch.id
        assert cancelled_batch.status in ["cancelling", "cancelled"]
@ -260,22 +271,22 @@ class TestReferenceBatchesImpl:
    async def test_cancel_batch_invalid_statuses(self, provider, sample_batch_data, status):
        """Test error when cancelling batch in final states."""
        provider.process_batches = False
-        created_batch = await provider.create_batch(**sample_batch_data)
+        created_batch = await provider.create_batch(CreateBatchRequest(**sample_batch_data))

        # directly update status in kvstore
        await provider._update_batch(created_batch.id, status=status)

        with pytest.raises(ConflictError, match=f"Cannot cancel batch '{created_batch.id}' with status '{status}'"):
-            await provider.cancel_batch(created_batch.id)
+            await provider.cancel_batch(CancelBatchRequest(batch_id=created_batch.id))

    async def test_cancel_batch_not_found(self, provider):
        """Test error when cancelling non-existent batch."""
        with pytest.raises(ResourceNotFoundError, match=r"Batch 'nonexistent_batch' not found"):
-            await provider.cancel_batch("nonexistent_batch")
+            await provider.cancel_batch(CancelBatchRequest(batch_id="nonexistent_batch"))

    async def test_list_batches_empty(self, provider):
        """Test listing batches when none exist."""
-        response = await provider.list_batches()
+        response = await provider.list_batches(ListBatchesRequest())

        assert response.object == "list"
        assert response.data == []
@ -285,9 +296,9 @@ class TestReferenceBatchesImpl:

    async def test_list_batches_single_batch(self, provider, sample_batch_data):
        """Test listing batches with single batch."""
-        created_batch = await provider.create_batch(**sample_batch_data)
+        created_batch = await provider.create_batch(CreateBatchRequest(**sample_batch_data))

-        response = await provider.list_batches()
+        response = await provider.list_batches(ListBatchesRequest())

        assert len(response.data) == 1
        self._validate_batch_type(response.data[0], expected_metadata=sample_batch_data["metadata"])
@ -300,12 +311,12 @@ class TestReferenceBatchesImpl:
        """Test listing multiple batches."""
        batches = [
            await provider.create_batch(
-                input_file_id=f"file_{i}", endpoint="/v1/chat/completions", completion_window="24h"
+                CreateBatchRequest(input_file_id=f"file_{i}", endpoint="/v1/chat/completions", completion_window="24h")
            )
            for i in range(3)
        ]

-        response = await provider.list_batches()
+        response = await provider.list_batches(ListBatchesRequest())

        assert len(response.data) == 3

@ -321,12 +332,12 @@ class TestReferenceBatchesImpl:
        """Test listing batches with limit parameter."""
        batches = [
            await provider.create_batch(
-                input_file_id=f"file_{i}", endpoint="/v1/chat/completions", completion_window="24h"
+                CreateBatchRequest(input_file_id=f"file_{i}", endpoint="/v1/chat/completions", completion_window="24h")
            )
            for i in range(3)
        ]

-        response = await provider.list_batches(limit=2)
+        response = await provider.list_batches(ListBatchesRequest(limit=2))

        assert len(response.data) == 2
        assert response.has_more is True
@ -340,36 +351,36 @@ class TestReferenceBatchesImpl:
        """Test listing batches with pagination using 'after' parameter."""
        for i in range(3):
            await provider.create_batch(
-                input_file_id=f"file_{i}", endpoint="/v1/chat/completions", completion_window="24h"
+                CreateBatchRequest(input_file_id=f"file_{i}", endpoint="/v1/chat/completions", completion_window="24h")
            )

        # Get first page
-        first_page = await provider.list_batches(limit=1)
+        first_page = await provider.list_batches(ListBatchesRequest(limit=1))
        assert len(first_page.data) == 1
        assert first_page.has_more is True

        # Get second page using 'after'
-        second_page = await provider.list_batches(limit=1, after=first_page.data[0].id)
+        second_page = await provider.list_batches(ListBatchesRequest(limit=1, after=first_page.data[0].id))
        assert len(second_page.data) == 1
        assert second_page.data[0].id != first_page.data[0].id

        # Verify we got the next batch in order
-        all_batches = await provider.list_batches()
+        all_batches = await provider.list_batches(ListBatchesRequest())
        expected_second_batch_id = all_batches.data[1].id
        assert second_page.data[0].id == expected_second_batch_id

    async def test_list_batches_invalid_after(self, provider, sample_batch_data):
        """Test listing batches with invalid 'after' parameter."""
-        await provider.create_batch(**sample_batch_data)
+        await provider.create_batch(CreateBatchRequest(**sample_batch_data))

-        response = await provider.list_batches(after="nonexistent_batch")
+        response = await provider.list_batches(ListBatchesRequest(after="nonexistent_batch"))

        # Should return all batches (no filtering when 'after' batch not found)
        assert len(response.data) == 1

    async def test_kvstore_persistence(self, provider, sample_batch_data):
        """Test that batches are properly persisted in kvstore."""
-        batch = await provider.create_batch(**sample_batch_data)
+        batch = await provider.create_batch(CreateBatchRequest(**sample_batch_data))

        stored_data = await provider.kvstore.get(f"batch:{batch.id}")
        assert stored_data is not None
@ -757,7 +768,7 @@ class TestReferenceBatchesImpl:

        for _ in range(3):
            await provider.create_batch(
-                input_file_id="file_id", endpoint="/v1/chat/completions", completion_window="24h"
+                CreateBatchRequest(input_file_id="file_id", endpoint="/v1/chat/completions", completion_window="24h")
            )

        await asyncio.sleep(0.042)  # let tasks start
@ -767,8 +778,10 @@ class TestReferenceBatchesImpl:
    async def test_create_batch_embeddings_endpoint(self, provider):
        """Test that batch creation succeeds with embeddings endpoint."""
        batch = await provider.create_batch(
-            input_file_id="file_123",
-            endpoint="/v1/embeddings",
-            completion_window="24h",
+            CreateBatchRequest(
+                input_file_id="file_123",
+                endpoint="/v1/embeddings",
+                completion_window="24h",
+            )
        )
        assert batch.endpoint == "/v1/embeddings"