fix: adopt FastAPI directly in llama-stack-api

This commit migrates the Batches API to use FastAPI routers directly in the API package, removing the need for custom decorator systems and manual router registration. The API package now defines FastAPI routers using standard FastAPI route decorators, making it self-sufficient and eliminating dependencies on the server package. The router implementation has been moved from llama_stack/core/server/routers/batches.py to llama_stack_api/batches/routes.py, where it belongs alongside the protocol and models. Standard error responses (standard_responses) have been moved from the server package to llama_stack_api/router_utils.py, ensuring the API package can define complete routers without server dependencies. FastAPI has been added as an explicit dependency to the llama-stack-api package, making it an intentional dependency rather than an implicit one. Router discovery is now fully automatic. The server discovers routers by checking for routes modules in each API package and looking for a create_router function. This eliminates the need for manual registration and makes the system scalable - new APIs with router modules are automatically discovered and used. The router registry has been simplified to use automatic discovery instead of maintaining a manual registry. The build_router function (renamed from create_router to better reflect its purpose) discovers and combines router factories with implementations to create the final router instances. Exposing Routers from the API is nice for the Bring Your Own API use case too. Signed-off-by: Sébastien Han <seb@redhat.com>
2025-12-04 02:03:44 +00:00 · 2025-11-20 15:00:11 +01:00 · 2025-11-20 15:00:11 +01:00 · 00e7ea6c3b
commit 00e7ea6c3b
parent 2fe24a6df8
10 changed files with 54 additions and 70 deletions
--- a/src/llama_stack_api/batches/init.py
+++ b/src/llama_stack_api/batches/init.py
@ -8,7 +8,7 @@

 This module contains the Batches protocol definition.
 Pydantic models are defined in llama_stack_api.batches.models.
-The router implementation is in llama_stack.core.server.routers.batches.
+The FastAPI router is defined in llama_stack_api.batches.routes.
 """

 from typing import Literal, Protocol, runtime_checkable
--- a/src/llama_stack_api/batches/routes.py
+++ b/src/llama_stack_api/batches/routes.py
@ -0,0 +1,113 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
+
+"""FastAPI router for the Batches API.
+
+This module defines the FastAPI router for the Batches API using standard
+FastAPI route decorators. The router is defined in the API package to keep
+all API-related code together.
+"""
+
+from collections.abc import Callable
+from typing import Annotated
+
+from fastapi import APIRouter, Body, Depends
+
+from llama_stack_api.batches import Batches, BatchObject, ListBatchesResponse
+from llama_stack_api.batches.models import CreateBatchRequest
+from llama_stack_api.datatypes import Api
+from llama_stack_api.router_utils import standard_responses
+from llama_stack_api.version import LLAMA_STACK_API_V1
+
+
+def create_router(impl_getter: Callable[[Api], Batches]) -> APIRouter:
+    """Create a FastAPI router for the Batches API.
+
+    Args:
+        impl_getter: Function that returns the Batches implementation for the batches API
+
+    Returns:
+        APIRouter configured for the Batches API
+    """
+    router = APIRouter(
+        prefix=f"/{LLAMA_STACK_API_V1}",
+        tags=["Batches"],
+        responses=standard_responses,
+    )
+
+    def get_batch_service() -> Batches:
+        """Dependency function to get the batch service implementation."""
+        return impl_getter(Api.batches)
+
+    @router.post(
+        "/batches",
+        response_model=BatchObject,
+        summary="Create a new batch for processing multiple API requests.",
+        description="Create a new batch for processing multiple API requests.",
+        responses={
+            200: {"description": "The created batch object."},
+            409: {"description": "Conflict: The idempotency key was previously used with different parameters."},
+        },
+    )
+    async def create_batch(
+        request: Annotated[CreateBatchRequest, Body(...)],
+        svc: Annotated[Batches, Depends(get_batch_service)],
+    ) -> BatchObject:
+        return await svc.create_batch(
+            input_file_id=request.input_file_id,
+            endpoint=request.endpoint,
+            completion_window=request.completion_window,
+            metadata=request.metadata,
+            idempotency_key=request.idempotency_key,
+        )
+
+    @router.get(
+        "/batches/{batch_id}",
+        response_model=BatchObject,
+        summary="Retrieve information about a specific batch.",
+        description="Retrieve information about a specific batch.",
+        responses={
+            200: {"description": "The batch object."},
+        },
+    )
+    async def retrieve_batch(
+        batch_id: str,
+        svc: Annotated[Batches, Depends(get_batch_service)],
+    ) -> BatchObject:
+        return await svc.retrieve_batch(batch_id)
+
+    @router.post(
+        "/batches/{batch_id}/cancel",
+        response_model=BatchObject,
+        summary="Cancel a batch that is in progress.",
+        description="Cancel a batch that is in progress.",
+        responses={
+            200: {"description": "The updated batch object."},
+        },
+    )
+    async def cancel_batch(
+        batch_id: str,
+        svc: Annotated[Batches, Depends(get_batch_service)],
+    ) -> BatchObject:
+        return await svc.cancel_batch(batch_id)
+
+    @router.get(
+        "/batches",
+        response_model=ListBatchesResponse,
+        summary="List all batches for the current user.",
+        description="List all batches for the current user.",
+        responses={
+            200: {"description": "A list of batch objects."},
+        },
+    )
+    async def list_batches(
+        svc: Annotated[Batches, Depends(get_batch_service)],
+        after: str | None = None,
+        limit: int = 20,
+    ) -> ListBatchesResponse:
+        return await svc.list_batches(after=after, limit=limit)
+
+    return router