feat: split API and provider specs into separate llama-stack-api pkg

Extract API definitions, models, and provider specifications into a standalone llama-stack-api package that can be published to PyPI independently of the main llama-stack server. Motivation External providers currently import from llama-stack, which overrides the installed version and causes dependency conflicts. This separation allows external providers to: - Install only the type definitions they need without server dependencies - Avoid version conflicts with the installed llama-stack package - Be versioned and released independently This enables us to re-enable external provider module tests that were previously blocked by these import conflicts. Changes - Created llama-stack-api package with minimal dependencies (pydantic, jsonschema) - Moved APIs, providers datatypes, strong_typing, and schema_utils - Updated all imports from llama_stack.* to llama_stack_api.* - Preserved git history using git mv for moved files - Configured local editable install for development workflow - Updated linting and type-checking configuration for both packages - Rebased on top of upstream src/ layout changes Testing Package builds successfully and can be imported independently. All pre-commit hooks pass with expected exclusions maintained. Next Steps - Publish llama-stack-api to PyPI - Update external provider dependencies - Re-enable external provider module tests Signed-off-by: Charlie Doern <cdoern@redhat.com>
2025-12-05 10:23:44 +00:00 · 2025-10-30 12:25:23 -04:00 · 2025-10-30 12:25:23 -04:00 · 85d407c2a0
commit 85d407c2a0
parent e5a55f3677
359 changed files with 1259 additions and 980 deletions
--- a/src/llama_stack/core/build.py
+++ b/src/llama_stack/core/build.py
@ -7,6 +7,7 @@
 import importlib.resources
 import sys

+from llama_stack_api.providers.datatypes import Api
 from pydantic import BaseModel
 from termcolor import cprint

@ -17,7 +18,6 @@ from llama_stack.core.utils.exec import run_command
 from llama_stack.core.utils.image_types import LlamaStackImageType
 from llama_stack.distributions.template import DistributionTemplate
 from llama_stack.log import get_logger
-from llama_stack.providers.datatypes import Api

 log = get_logger(name=__name__, category="core")

--- a/src/llama_stack/core/client.py
+++ b/src/llama_stack/core/client.py
@ -12,11 +12,10 @@ from enum import Enum
 from typing import Any, Union, get_args, get_origin

 import httpx
+from llama_stack_api.providers.datatypes import RemoteProviderConfig
 from pydantic import BaseModel, parse_obj_as
 from termcolor import cprint

-from llama_stack.providers.datatypes import RemoteProviderConfig
-
 _CLIENT_CLASSES = {}


--- a/src/llama_stack/core/configure.py
+++ b/src/llama_stack/core/configure.py
@ -6,6 +6,8 @@
 import textwrap
 from typing import Any

+from llama_stack_api.providers.datatypes import Api, ProviderSpec
+
 from llama_stack.core.datatypes import (
    LLAMA_STACK_RUN_CONFIG_VERSION,
    DistributionSpec,
@ -20,7 +22,6 @@ from llama_stack.core.stack import cast_image_name_to_string, replace_env_vars
 from llama_stack.core.utils.dynamic import instantiate_class_type
 from llama_stack.core.utils.prompt_for_config import prompt_for_config
 from llama_stack.log import get_logger
-from llama_stack.providers.datatypes import Api, ProviderSpec

 logger = get_logger(name=__name__, category="core")

--- a/src/llama_stack/core/conversations/conversations.py
+++ b/src/llama_stack/core/conversations/conversations.py
@ -8,9 +8,7 @@ import secrets
 import time
 from typing import Any, Literal

-from pydantic import BaseModel, TypeAdapter
-
-from llama_stack.apis.conversations.conversations import (
+from llama_stack_api.apis.conversations.conversations import (
    Conversation,
    ConversationDeletedResource,
    ConversationItem,
@ -20,6 +18,8 @@ from llama_stack.apis.conversations.conversations import (
    Conversations,
    Metadata,
 )
+from pydantic import BaseModel, TypeAdapter
+
 from llama_stack.core.datatypes import AccessRule, StackRunConfig
 from llama_stack.log import get_logger
 from llama_stack.providers.utils.sqlstore.api import ColumnDefinition, ColumnType
--- a/src/llama_stack/core/datatypes.py
+++ b/src/llama_stack/core/datatypes.py
@ -9,22 +9,23 @@ from pathlib import Path
 from typing import Annotated, Any, Literal, Self
 from urllib.parse import urlparse

+from llama_stack_api.apis.benchmarks import Benchmark, BenchmarkInput
+from llama_stack_api.apis.datasetio import DatasetIO
+from llama_stack_api.apis.datasets import Dataset, DatasetInput
+from llama_stack_api.apis.eval import Eval
+from llama_stack_api.apis.inference import Inference
+from llama_stack_api.apis.models import Model, ModelInput
+from llama_stack_api.apis.resource import Resource
+from llama_stack_api.apis.safety import Safety
+from llama_stack_api.apis.scoring import Scoring
+from llama_stack_api.apis.scoring_functions import ScoringFn, ScoringFnInput
+from llama_stack_api.apis.shields import Shield, ShieldInput
+from llama_stack_api.apis.tools import ToolGroup, ToolGroupInput, ToolRuntime
+from llama_stack_api.apis.vector_io import VectorIO
+from llama_stack_api.apis.vector_stores import VectorStore, VectorStoreInput
+from llama_stack_api.providers.datatypes import Api, ProviderSpec
 from pydantic import BaseModel, Field, field_validator, model_validator

-from llama_stack.apis.benchmarks import Benchmark, BenchmarkInput
-from llama_stack.apis.datasetio import DatasetIO
-from llama_stack.apis.datasets import Dataset, DatasetInput
-from llama_stack.apis.eval import Eval
-from llama_stack.apis.inference import Inference
-from llama_stack.apis.models import Model, ModelInput
-from llama_stack.apis.resource import Resource
-from llama_stack.apis.safety import Safety
-from llama_stack.apis.scoring import Scoring
-from llama_stack.apis.scoring_functions import ScoringFn, ScoringFnInput
-from llama_stack.apis.shields import Shield, ShieldInput
-from llama_stack.apis.tools import ToolGroup, ToolGroupInput, ToolRuntime
-from llama_stack.apis.vector_io import VectorIO
-from llama_stack.apis.vector_stores import VectorStore, VectorStoreInput
 from llama_stack.core.access_control.datatypes import AccessRule
 from llama_stack.core.storage.datatypes import (
    KVStoreReference,
@ -32,7 +33,6 @@ from llama_stack.core.storage.datatypes import (
    StorageConfig,
 )
 from llama_stack.log import LoggingConfig
-from llama_stack.providers.datatypes import Api, ProviderSpec

 LLAMA_STACK_BUILD_CONFIG_VERSION = 2
 LLAMA_STACK_RUN_CONFIG_VERSION = 2
--- a/src/llama_stack/core/distribution.py
+++ b/src/llama_stack/core/distribution.py
@ -10,17 +10,17 @@ import os
 from typing import Any

 import yaml
-from pydantic import BaseModel
-
-from llama_stack.core.datatypes import BuildConfig, DistributionSpec
-from llama_stack.core.external import load_external_apis
-from llama_stack.log import get_logger
-from llama_stack.providers.datatypes import (
+from llama_stack_api.providers.datatypes import (
    Api,
    InlineProviderSpec,
    ProviderSpec,
    RemoteProviderSpec,
 )
+from pydantic import BaseModel
+
+from llama_stack.core.datatypes import BuildConfig, DistributionSpec
+from llama_stack.core.external import load_external_apis
+from llama_stack.log import get_logger

 logger = get_logger(name=__name__, category="core")

--- a/src/llama_stack/core/external.py
+++ b/src/llama_stack/core/external.py
@ -6,8 +6,8 @@


 import yaml
+from llama_stack_api.apis.datatypes import Api, ExternalApiSpec

-from llama_stack.apis.datatypes import Api, ExternalApiSpec
 from llama_stack.core.datatypes import BuildConfig, StackRunConfig
 from llama_stack.log import get_logger

--- a/src/llama_stack/core/inspect.py
+++ b/src/llama_stack/core/inspect.py
@ -6,19 +6,19 @@

 from importlib.metadata import version

-from pydantic import BaseModel
-
-from llama_stack.apis.inspect import (
+from llama_stack_api.apis.inspect import (
    HealthInfo,
    Inspect,
    ListRoutesResponse,
    RouteInfo,
    VersionInfo,
 )
+from llama_stack_api.providers.datatypes import HealthStatus
+from pydantic import BaseModel
+
 from llama_stack.core.datatypes import StackRunConfig
 from llama_stack.core.external import load_external_apis
 from llama_stack.core.server.routes import get_all_api_routes
-from llama_stack.providers.datatypes import HealthStatus


 class DistributionInspectConfig(BaseModel):
--- a/src/llama_stack/core/library_client.py
+++ b/src/llama_stack/core/library_client.py
@ -18,6 +18,7 @@ from typing import Any, TypeVar, Union, get_args, get_origin
 import httpx
 import yaml
 from fastapi import Response as FastAPIResponse
+from llama_stack_api.strong_typing.inspection import is_unwrapped_body_param

 try:
    from llama_stack_client import (
@ -57,7 +58,6 @@ from llama_stack.core.utils.config import redact_sensitive_fields
 from llama_stack.core.utils.context import preserve_contexts_async_generator
 from llama_stack.core.utils.exec import in_notebook
 from llama_stack.log import get_logger, setup_logging
-from llama_stack.strong_typing.inspection import is_unwrapped_body_param

 logger = get_logger(name=__name__, category="core")

--- a/src/llama_stack/core/prompts/prompts.py
+++ b/src/llama_stack/core/prompts/prompts.py
@ -7,9 +7,9 @@
 import json
 from typing import Any

+from llama_stack_api.apis.prompts import ListPromptsResponse, Prompt, Prompts
 from pydantic import BaseModel

-from llama_stack.apis.prompts import ListPromptsResponse, Prompt, Prompts
 from llama_stack.core.datatypes import StackRunConfig
 from llama_stack.providers.utils.kvstore import KVStore, kvstore_impl

--- a/src/llama_stack/core/providers.py
+++ b/src/llama_stack/core/providers.py
@ -7,11 +7,11 @@
 import asyncio
 from typing import Any

+from llama_stack_api.apis.providers import ListProvidersResponse, ProviderInfo, Providers
+from llama_stack_api.providers.datatypes import HealthResponse, HealthStatus
 from pydantic import BaseModel

-from llama_stack.apis.providers import ListProvidersResponse, ProviderInfo, Providers
 from llama_stack.log import get_logger
-from llama_stack.providers.datatypes import HealthResponse, HealthStatus

 from .datatypes import StackRunConfig
 from .utils.config import redact_sensitive_fields
--- a/src/llama_stack/core/resolver.py
+++ b/src/llama_stack/core/resolver.py
@ -8,29 +8,42 @@ import importlib.metadata
 import inspect
 from typing import Any

-from llama_stack.apis.agents import Agents
-from llama_stack.apis.batches import Batches
-from llama_stack.apis.benchmarks import Benchmarks
-from llama_stack.apis.conversations import Conversations
-from llama_stack.apis.datasetio import DatasetIO
-from llama_stack.apis.datasets import Datasets
-from llama_stack.apis.datatypes import ExternalApiSpec
-from llama_stack.apis.eval import Eval
-from llama_stack.apis.files import Files
-from llama_stack.apis.inference import Inference, InferenceProvider
-from llama_stack.apis.inspect import Inspect
-from llama_stack.apis.models import Models
-from llama_stack.apis.post_training import PostTraining
-from llama_stack.apis.prompts import Prompts
-from llama_stack.apis.providers import Providers as ProvidersAPI
-from llama_stack.apis.safety import Safety
-from llama_stack.apis.scoring import Scoring
-from llama_stack.apis.scoring_functions import ScoringFunctions
-from llama_stack.apis.shields import Shields
-from llama_stack.apis.tools import ToolGroups, ToolRuntime
-from llama_stack.apis.vector_io import VectorIO
-from llama_stack.apis.vector_stores import VectorStore
-from llama_stack.apis.version import LLAMA_STACK_API_V1ALPHA
+from llama_stack_api.apis.agents import Agents
+from llama_stack_api.apis.batches import Batches
+from llama_stack_api.apis.benchmarks import Benchmarks
+from llama_stack_api.apis.conversations import Conversations
+from llama_stack_api.apis.datasetio import DatasetIO
+from llama_stack_api.apis.datasets import Datasets
+from llama_stack_api.apis.datatypes import ExternalApiSpec
+from llama_stack_api.apis.eval import Eval
+from llama_stack_api.apis.files import Files
+from llama_stack_api.apis.inference import Inference, InferenceProvider
+from llama_stack_api.apis.inspect import Inspect
+from llama_stack_api.apis.models import Models
+from llama_stack_api.apis.post_training import PostTraining
+from llama_stack_api.apis.prompts import Prompts
+from llama_stack_api.apis.providers import Providers as ProvidersAPI
+from llama_stack_api.apis.safety import Safety
+from llama_stack_api.apis.scoring import Scoring
+from llama_stack_api.apis.scoring_functions import ScoringFunctions
+from llama_stack_api.apis.shields import Shields
+from llama_stack_api.apis.tools import ToolGroups, ToolRuntime
+from llama_stack_api.apis.vector_io import VectorIO
+from llama_stack_api.apis.vector_stores import VectorStore
+from llama_stack_api.apis.version import LLAMA_STACK_API_V1ALPHA
+from llama_stack_api.providers.datatypes import (
+    Api,
+    BenchmarksProtocolPrivate,
+    DatasetsProtocolPrivate,
+    ModelsProtocolPrivate,
+    ProviderSpec,
+    RemoteProviderConfig,
+    RemoteProviderSpec,
+    ScoringFunctionsProtocolPrivate,
+    ShieldsProtocolPrivate,
+    ToolGroupsProtocolPrivate,
+)
+
 from llama_stack.core.client import get_client_impl
 from llama_stack.core.datatypes import (
    AccessRule,
@ -44,18 +57,6 @@ from llama_stack.core.external import load_external_apis
 from llama_stack.core.store import DistributionRegistry
 from llama_stack.core.utils.dynamic import instantiate_class_type
 from llama_stack.log import get_logger
-from llama_stack.providers.datatypes import (
-    Api,
-    BenchmarksProtocolPrivate,
-    DatasetsProtocolPrivate,
-    ModelsProtocolPrivate,
-    ProviderSpec,
-    RemoteProviderConfig,
-    RemoteProviderSpec,
-    ScoringFunctionsProtocolPrivate,
-    ShieldsProtocolPrivate,
-    ToolGroupsProtocolPrivate,
-)

 logger = get_logger(name=__name__, category="core")

--- a/src/llama_stack/core/routers/init.py
+++ b/src/llama_stack/core/routers/init.py
@ -6,13 +6,14 @@

 from typing import Any

+from llama_stack_api.providers.datatypes import Api, RoutingTable
+
 from llama_stack.core.datatypes import (
    AccessRule,
    RoutedProtocol,
 )
 from llama_stack.core.stack import StackRunConfig
 from llama_stack.core.store import DistributionRegistry
-from llama_stack.providers.datatypes import Api, RoutingTable
 from llama_stack.providers.utils.inference.inference_store import InferenceStore


--- a/src/llama_stack/core/routers/datasets.py
+++ b/src/llama_stack/core/routers/datasets.py
@ -6,11 +6,12 @@

 from typing import Any

-from llama_stack.apis.common.responses import PaginatedResponse
-from llama_stack.apis.datasetio import DatasetIO
-from llama_stack.apis.datasets import DatasetPurpose, DataSource
+from llama_stack_api.apis.common.responses import PaginatedResponse
+from llama_stack_api.apis.datasetio import DatasetIO
+from llama_stack_api.apis.datasets import DatasetPurpose, DataSource
+from llama_stack_api.providers.datatypes import RoutingTable
+
 from llama_stack.log import get_logger
-from llama_stack.providers.datatypes import RoutingTable

 logger = get_logger(name=__name__, category="core::routers")

--- a/src/llama_stack/core/routers/eval_scoring.py
+++ b/src/llama_stack/core/routers/eval_scoring.py
@ -6,15 +6,16 @@

 from typing import Any

-from llama_stack.apis.eval import BenchmarkConfig, Eval, EvaluateResponse, Job
-from llama_stack.apis.scoring import (
+from llama_stack_api.apis.eval import BenchmarkConfig, Eval, EvaluateResponse, Job
+from llama_stack_api.apis.scoring import (
    ScoreBatchResponse,
    ScoreResponse,
    Scoring,
    ScoringFnParams,
 )
+from llama_stack_api.providers.datatypes import RoutingTable
+
 from llama_stack.log import get_logger
-from llama_stack.providers.datatypes import RoutingTable

 logger = get_logger(name=__name__, category="core::routers")

--- a/src/llama_stack/core/routers/inference.py
+++ b/src/llama_stack/core/routers/inference.py
@ -11,12 +11,8 @@ from datetime import UTC, datetime
 from typing import Annotated, Any

 from fastapi import Body
-from openai.types.chat import ChatCompletionToolChoiceOptionParam as OpenAIChatCompletionToolChoiceOptionParam
-from openai.types.chat import ChatCompletionToolParam as OpenAIChatCompletionToolParam
-from pydantic import TypeAdapter
-
-from llama_stack.apis.common.errors import ModelNotFoundError, ModelTypeError
-from llama_stack.apis.inference import (
+from llama_stack_api.apis.common.errors import ModelNotFoundError, ModelTypeError
+from llama_stack_api.apis.inference import (
    Inference,
    ListOpenAIChatCompletionResponse,
    OpenAIAssistantMessageParam,
@ -36,17 +32,21 @@ from llama_stack.apis.inference import (
    Order,
    RerankResponse,
 )
-from llama_stack.apis.inference.inference import (
+from llama_stack_api.apis.inference.inference import (
    OpenAIChatCompletionContentPartImageParam,
    OpenAIChatCompletionContentPartTextParam,
 )
-from llama_stack.apis.models import ModelType
+from llama_stack_api.apis.models import ModelType
+from llama_stack_api.providers.datatypes import HealthResponse, HealthStatus, RoutingTable
+from openai.types.chat import ChatCompletionToolChoiceOptionParam as OpenAIChatCompletionToolChoiceOptionParam
+from openai.types.chat import ChatCompletionToolParam as OpenAIChatCompletionToolParam
+from pydantic import TypeAdapter
+
 from llama_stack.core.telemetry.telemetry import MetricEvent
 from llama_stack.core.telemetry.tracing import enqueue_event, get_current_span
 from llama_stack.log import get_logger
 from llama_stack.models.llama.llama3.chat_format import ChatFormat
 from llama_stack.models.llama.llama3.tokenizer import Tokenizer
-from llama_stack.providers.datatypes import HealthResponse, HealthStatus, RoutingTable
 from llama_stack.providers.utils.inference.inference_store import InferenceStore

 logger = get_logger(name=__name__, category="core::routers")
--- a/src/llama_stack/core/routers/safety.py
+++ b/src/llama_stack/core/routers/safety.py
@ -6,13 +6,14 @@

 from typing import Any

-from llama_stack.apis.inference import OpenAIMessageParam
-from llama_stack.apis.safety import RunShieldResponse, Safety
-from llama_stack.apis.safety.safety import ModerationObject
-from llama_stack.apis.shields import Shield
+from llama_stack_api.apis.inference import OpenAIMessageParam
+from llama_stack_api.apis.safety import RunShieldResponse, Safety
+from llama_stack_api.apis.safety.safety import ModerationObject
+from llama_stack_api.apis.shields import Shield
+from llama_stack_api.providers.datatypes import RoutingTable
+
 from llama_stack.core.datatypes import SafetyConfig
 from llama_stack.log import get_logger
-from llama_stack.providers.datatypes import RoutingTable

 logger = get_logger(name=__name__, category="core::routers")

--- a/src/llama_stack/core/routers/tool_runtime.py
+++ b/src/llama_stack/core/routers/tool_runtime.py
@ -6,13 +6,14 @@

 from typing import Any

-from llama_stack.apis.common.content_types import (
+from llama_stack_api.apis.common.content_types import (
    URL,
 )
-from llama_stack.apis.tools import (
+from llama_stack_api.apis.tools import (
    ListToolDefsResponse,
    ToolRuntime,
 )
+
 from llama_stack.log import get_logger

 from ..routing_tables.toolgroups import ToolGroupsRoutingTable
--- a/src/llama_stack/core/routers/vector_io.py
+++ b/src/llama_stack/core/routers/vector_io.py
@ -9,10 +9,9 @@ import uuid
 from typing import Annotated, Any

 from fastapi import Body
-
-from llama_stack.apis.common.content_types import InterleavedContent
-from llama_stack.apis.models import ModelType
-from llama_stack.apis.vector_io import (
+from llama_stack_api.apis.common.content_types import InterleavedContent
+from llama_stack_api.apis.models import ModelType
+from llama_stack_api.apis.vector_io import (
    Chunk,
    OpenAICreateVectorStoreFileBatchRequestWithExtraBody,
    OpenAICreateVectorStoreRequestWithExtraBody,
@ -33,9 +32,10 @@ from llama_stack.apis.vector_io import (
    VectorStoreObject,
    VectorStoreSearchResponsePage,
 )
+from llama_stack_api.providers.datatypes import HealthResponse, HealthStatus, RoutingTable
+
 from llama_stack.core.datatypes import VectorStoresConfig
 from llama_stack.log import get_logger
-from llama_stack.providers.datatypes import HealthResponse, HealthStatus, RoutingTable

 logger = get_logger(name=__name__, category="core::routers")

--- a/src/llama_stack/core/routing_tables/benchmarks.py
+++ b/src/llama_stack/core/routing_tables/benchmarks.py
@ -6,7 +6,8 @@

 from typing import Any

-from llama_stack.apis.benchmarks import Benchmark, Benchmarks, ListBenchmarksResponse
+from llama_stack_api.apis.benchmarks import Benchmark, Benchmarks, ListBenchmarksResponse
+
 from llama_stack.core.datatypes import (
    BenchmarkWithOwner,
 )
--- a/src/llama_stack/core/routing_tables/common.py
+++ b/src/llama_stack/core/routing_tables/common.py
@ -6,9 +6,11 @@

 from typing import Any

-from llama_stack.apis.common.errors import ModelNotFoundError
-from llama_stack.apis.models import Model
-from llama_stack.apis.resource import ResourceType
+from llama_stack_api.apis.common.errors import ModelNotFoundError
+from llama_stack_api.apis.models import Model
+from llama_stack_api.apis.resource import ResourceType
+from llama_stack_api.providers.datatypes import Api, RoutingTable
+
 from llama_stack.core.access_control.access_control import AccessDeniedError, is_action_allowed
 from llama_stack.core.access_control.datatypes import Action
 from llama_stack.core.datatypes import (
@ -21,7 +23,6 @@ from llama_stack.core.datatypes import (
 from llama_stack.core.request_headers import get_authenticated_user
 from llama_stack.core.store import DistributionRegistry
 from llama_stack.log import get_logger
-from llama_stack.providers.datatypes import Api, RoutingTable

 logger = get_logger(name=__name__, category="core::routing_tables")

--- a/src/llama_stack/core/routing_tables/datasets.py
+++ b/src/llama_stack/core/routing_tables/datasets.py
@ -7,8 +7,8 @@
 import uuid
 from typing import Any

-from llama_stack.apis.common.errors import DatasetNotFoundError
-from llama_stack.apis.datasets import (
+from llama_stack_api.apis.common.errors import DatasetNotFoundError
+from llama_stack_api.apis.datasets import (
    Dataset,
    DatasetPurpose,
    Datasets,
@ -18,7 +18,8 @@ from llama_stack.apis.datasets import (
    RowsDataSource,
    URIDataSource,
 )
-from llama_stack.apis.resource import ResourceType
+from llama_stack_api.apis.resource import ResourceType
+
 from llama_stack.core.datatypes import (
    DatasetWithOwner,
 )
--- a/src/llama_stack/core/routing_tables/models.py
+++ b/src/llama_stack/core/routing_tables/models.py
@ -7,8 +7,16 @@
 import time
 from typing import Any

-from llama_stack.apis.common.errors import ModelNotFoundError
-from llama_stack.apis.models import ListModelsResponse, Model, Models, ModelType, OpenAIListModelsResponse, OpenAIModel
+from llama_stack_api.apis.common.errors import ModelNotFoundError
+from llama_stack_api.apis.models import (
+    ListModelsResponse,
+    Model,
+    Models,
+    ModelType,
+    OpenAIListModelsResponse,
+    OpenAIModel,
+)
+
 from llama_stack.core.datatypes import (
    ModelWithOwner,
    RegistryEntrySource,
--- a/src/llama_stack/core/routing_tables/scoring_functions.py
+++ b/src/llama_stack/core/routing_tables/scoring_functions.py
@ -4,14 +4,15 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.

-from llama_stack.apis.common.type_system import ParamType
-from llama_stack.apis.resource import ResourceType
-from llama_stack.apis.scoring_functions import (
+from llama_stack_api.apis.common.type_system import ParamType
+from llama_stack_api.apis.resource import ResourceType
+from llama_stack_api.apis.scoring_functions import (
    ListScoringFunctionsResponse,
    ScoringFn,
    ScoringFnParams,
    ScoringFunctions,
 )
+
 from llama_stack.core.datatypes import (
    ScoringFnWithOwner,
 )
--- a/src/llama_stack/core/routing_tables/shields.py
+++ b/src/llama_stack/core/routing_tables/shields.py
@ -6,8 +6,9 @@

 from typing import Any

-from llama_stack.apis.resource import ResourceType
-from llama_stack.apis.shields import ListShieldsResponse, Shield, Shields
+from llama_stack_api.apis.resource import ResourceType
+from llama_stack_api.apis.shields import ListShieldsResponse, Shield, Shields
+
 from llama_stack.core.datatypes import (
    ShieldWithOwner,
 )
--- a/src/llama_stack/core/routing_tables/toolgroups.py
+++ b/src/llama_stack/core/routing_tables/toolgroups.py
@ -6,9 +6,10 @@

 from typing import Any

-from llama_stack.apis.common.content_types import URL
-from llama_stack.apis.common.errors import ToolGroupNotFoundError
-from llama_stack.apis.tools import ListToolDefsResponse, ListToolGroupsResponse, ToolDef, ToolGroup, ToolGroups
+from llama_stack_api.apis.common.content_types import URL
+from llama_stack_api.apis.common.errors import ToolGroupNotFoundError
+from llama_stack_api.apis.tools import ListToolDefsResponse, ListToolGroupsResponse, ToolDef, ToolGroup, ToolGroups
+
 from llama_stack.core.datatypes import AuthenticationRequiredError, ToolGroupWithOwner
 from llama_stack.log import get_logger

--- a/src/llama_stack/core/routing_tables/vector_stores.py
+++ b/src/llama_stack/core/routing_tables/vector_stores.py
@ -6,12 +6,12 @@

 from typing import Any

-from llama_stack.apis.common.errors import ModelNotFoundError, ModelTypeError
-from llama_stack.apis.models import ModelType
-from llama_stack.apis.resource import ResourceType
+from llama_stack_api.apis.common.errors import ModelNotFoundError, ModelTypeError
+from llama_stack_api.apis.models import ModelType
+from llama_stack_api.apis.resource import ResourceType

 # Removed VectorStores import to avoid exposing public API
-from llama_stack.apis.vector_io.vector_io import (
+from llama_stack_api.apis.vector_io.vector_io import (
    SearchRankingOptions,
    VectorStoreChunkingStrategy,
    VectorStoreDeleteResponse,
@ -22,6 +22,7 @@ from llama_stack.apis.vector_io.vector_io import (
    VectorStoreObject,
    VectorStoreSearchResponsePage,
 )
+
 from llama_stack.core.datatypes import (
    VectorStoreWithOwner,
 )
--- a/src/llama_stack/core/server/auth_providers.py
+++ b/src/llama_stack/core/server/auth_providers.py
@ -11,9 +11,9 @@ from urllib.parse import parse_qs, urljoin, urlparse

 import httpx
 import jwt
+from llama_stack_api.apis.common.errors import TokenValidationError
 from pydantic import BaseModel, Field

-from llama_stack.apis.common.errors import TokenValidationError
 from llama_stack.core.datatypes import (
    AuthenticationConfig,
    CustomAuthConfig,
--- a/src/llama_stack/core/server/routes.py
+++ b/src/llama_stack/core/server/routes.py
@ -10,11 +10,11 @@ from collections.abc import Callable
 from typing import Any

 from aiohttp import hdrs
+from llama_stack_api.apis.datatypes import Api, ExternalApiSpec
+from llama_stack_api.schema_utils import WebMethod
 from starlette.routing import Route

-from llama_stack.apis.datatypes import Api, ExternalApiSpec
 from llama_stack.core.resolver import api_protocol_map
-from llama_stack.schema_utils import WebMethod

 EndpointFunc = Callable[..., Any]
 PathParams = dict[str, str]
--- a/src/llama_stack/core/server/server.py
+++ b/src/llama_stack/core/server/server.py
@ -28,11 +28,12 @@ from fastapi import Path as FastapiPath
 from fastapi.exceptions import RequestValidationError
 from fastapi.middleware.cors import CORSMiddleware
 from fastapi.responses import JSONResponse, StreamingResponse
+from llama_stack_api.apis.common.errors import ConflictError, ResourceNotFoundError
+from llama_stack_api.apis.common.responses import PaginatedResponse
+from llama_stack_api.providers.datatypes import Api
 from openai import BadRequestError
 from pydantic import BaseModel, ValidationError

-from llama_stack.apis.common.errors import ConflictError, ResourceNotFoundError
-from llama_stack.apis.common.responses import PaginatedResponse
 from llama_stack.core.access_control.access_control import AccessDeniedError
 from llama_stack.core.datatypes import (
    AuthenticationRequiredError,
@ -58,7 +59,6 @@ from llama_stack.core.utils.config import redact_sensitive_fields
 from llama_stack.core.utils.config_resolution import Mode, resolve_config_or_distro
 from llama_stack.core.utils.context import preserve_contexts_async_generator
 from llama_stack.log import LoggingConfig, get_logger, setup_logging
-from llama_stack.providers.datatypes import Api

 from .auth import AuthenticationMiddleware
 from .quota import QuotaMiddleware
--- a/src/llama_stack/core/stack.py
+++ b/src/llama_stack/core/stack.py
@ -12,27 +12,28 @@ import tempfile
 from typing import Any

 import yaml
+from llama_stack_api.apis.agents import Agents
+from llama_stack_api.apis.batches import Batches
+from llama_stack_api.apis.benchmarks import Benchmarks
+from llama_stack_api.apis.conversations import Conversations
+from llama_stack_api.apis.datasetio import DatasetIO
+from llama_stack_api.apis.datasets import Datasets
+from llama_stack_api.apis.eval import Eval
+from llama_stack_api.apis.files import Files
+from llama_stack_api.apis.inference import Inference
+from llama_stack_api.apis.inspect import Inspect
+from llama_stack_api.apis.models import Models
+from llama_stack_api.apis.post_training import PostTraining
+from llama_stack_api.apis.prompts import Prompts
+from llama_stack_api.apis.providers import Providers
+from llama_stack_api.apis.safety import Safety
+from llama_stack_api.apis.scoring import Scoring
+from llama_stack_api.apis.scoring_functions import ScoringFunctions
+from llama_stack_api.apis.shields import Shields
+from llama_stack_api.apis.tools import ToolGroups, ToolRuntime
+from llama_stack_api.apis.vector_io import VectorIO
+from llama_stack_api.providers.datatypes import Api

-from llama_stack.apis.agents import Agents
-from llama_stack.apis.batches import Batches
-from llama_stack.apis.benchmarks import Benchmarks
-from llama_stack.apis.conversations import Conversations
-from llama_stack.apis.datasetio import DatasetIO
-from llama_stack.apis.datasets import Datasets
-from llama_stack.apis.eval import Eval
-from llama_stack.apis.files import Files
-from llama_stack.apis.inference import Inference
-from llama_stack.apis.inspect import Inspect
-from llama_stack.apis.models import Models
-from llama_stack.apis.post_training import PostTraining
-from llama_stack.apis.prompts import Prompts
-from llama_stack.apis.providers import Providers
-from llama_stack.apis.safety import Safety
-from llama_stack.apis.scoring import Scoring
-from llama_stack.apis.scoring_functions import ScoringFunctions
-from llama_stack.apis.shields import Shields
-from llama_stack.apis.tools import ToolGroups, ToolRuntime
-from llama_stack.apis.vector_io import VectorIO
 from llama_stack.core.conversations.conversations import ConversationServiceConfig, ConversationServiceImpl
 from llama_stack.core.datatypes import Provider, SafetyConfig, StackRunConfig, VectorStoresConfig
 from llama_stack.core.distribution import get_provider_registry
@ -54,7 +55,6 @@ from llama_stack.core.storage.datatypes import (
 from llama_stack.core.store.registry import create_dist_registry
 from llama_stack.core.utils.dynamic import instantiate_class_type
 from llama_stack.log import get_logger
-from llama_stack.providers.datatypes import Api

 logger = get_logger(name=__name__, category="core")

--- a/src/llama_stack/core/telemetry/telemetry.py
+++ b/src/llama_stack/core/telemetry/telemetry.py
@ -16,6 +16,7 @@ from typing import (
    cast,
 )

+from llama_stack_api.schema_utils import json_schema_type, register_schema
 from opentelemetry import metrics, trace
 from opentelemetry.exporter.otlp.proto.http.metric_exporter import OTLPMetricExporter
 from opentelemetry.exporter.otlp.proto.http.trace_exporter import OTLPSpanExporter
@ -28,7 +29,6 @@ from pydantic import BaseModel, Field

 from llama_stack.log import get_logger
 from llama_stack.models.llama.datatypes import Primitive
-from llama_stack.schema_utils import json_schema_type, register_schema

 ROOT_SPAN_MARKERS = ["__root__", "__root_span__"]