feat: split API and provider specs into separate llama-stack-api pkg (#3895)

# What does this PR do? Extract API definitions and provider specifications into a standalone llama-stack-api package that can be published to PyPI independently of the main llama-stack server. see: https://github.com/llamastack/llama-stack/pull/2978 and https://github.com/llamastack/llama-stack/pull/2978#issuecomment-3145115942 Motivation External providers currently import from llama-stack, which overrides the installed version and causes dependency conflicts. This separation allows external providers to: - Install only the type definitions they need without server dependencies - Avoid version conflicts with the installed llama-stack package - Be versioned and released independently This enables us to re-enable external provider module tests that were previously blocked by these import conflicts. Changes - Created llama-stack-api package with minimal dependencies (pydantic, jsonschema) - Moved APIs, providers datatypes, strong_typing, and schema_utils - Updated all imports from llama_stack.* to llama_stack_api.* - Configured local editable install for development workflow - Updated linting and type-checking configuration for both packages Next Steps - Publish llama-stack-api to PyPI - Update external provider dependencies - Re-enable external provider module tests Pre-cursor PRs to this one: - #4093 - #3954 - #4064 These PRs moved key pieces _out_ of the Api pkg, limiting the scope of change here. relates to #3237 ## Test Plan Package builds successfully and can be imported independently. All pre-commit hooks pass with expected exclusions maintained. --------- Signed-off-by: Charlie Doern <cdoern@redhat.com>
2025-12-04 02:03:44 +00:00 · 2025-11-13 14:51:17 -05:00 · 2025-11-13 14:51:17 -05:00 · 840ad75fe9
commit 840ad75fe9
parent ceb716b9a0
358 changed files with 2337 additions and 1424 deletions
--- a/src/llama_stack/core/server/auth_providers.py
+++ b/src/llama_stack/core/server/auth_providers.py
@ -11,9 +11,9 @@ from urllib.parse import parse_qs, urljoin, urlparse

 import httpx
 import jwt
+from llama_stack_api import TokenValidationError
 from pydantic import BaseModel, Field

-from llama_stack.apis.common.errors import TokenValidationError
 from llama_stack.core.datatypes import (
    AuthenticationConfig,
    CustomAuthConfig,
--- a/src/llama_stack/core/server/routes.py
+++ b/src/llama_stack/core/server/routes.py
@ -10,11 +10,10 @@ from collections.abc import Callable
 from typing import Any

 from aiohttp import hdrs
+from llama_stack_api import Api, ExternalApiSpec, WebMethod
 from starlette.routing import Route

-from llama_stack.apis.datatypes import Api, ExternalApiSpec
 from llama_stack.core.resolver import api_protocol_map
-from llama_stack.schema_utils import WebMethod

 EndpointFunc = Callable[..., Any]
 PathParams = dict[str, str]
--- a/src/llama_stack/core/server/server.py
+++ b/src/llama_stack/core/server/server.py
@ -28,11 +28,10 @@ from fastapi import Path as FastapiPath
 from fastapi.exceptions import RequestValidationError
 from fastapi.middleware.cors import CORSMiddleware
 from fastapi.responses import JSONResponse, StreamingResponse
+from llama_stack_api import Api, ConflictError, PaginatedResponse, ResourceNotFoundError
 from openai import BadRequestError
 from pydantic import BaseModel, ValidationError

-from llama_stack.apis.common.errors import ConflictError, ResourceNotFoundError
-from llama_stack.apis.common.responses import PaginatedResponse
 from llama_stack.core.access_control.access_control import AccessDeniedError
 from llama_stack.core.datatypes import (
    AuthenticationRequiredError,
@ -58,7 +57,6 @@ from llama_stack.core.utils.config import redact_sensitive_fields
 from llama_stack.core.utils.config_resolution import Mode, resolve_config_or_distro
 from llama_stack.core.utils.context import preserve_contexts_async_generator
 from llama_stack.log import LoggingConfig, get_logger, setup_logging
-from llama_stack.providers.datatypes import Api

 from .auth import AuthenticationMiddleware
 from .quota import QuotaMiddleware