feat: split API and provider specs into separate llama-stack-api pkg (#3895)

# What does this PR do?

Extract API definitions and provider specifications into a standalone
llama-stack-api package that can be published to PyPI independently of
the main llama-stack server.


see: https://github.com/llamastack/llama-stack/pull/2978 and
https://github.com/llamastack/llama-stack/pull/2978#issuecomment-3145115942

Motivation

External providers currently import from llama-stack, which overrides
the installed version and causes dependency conflicts. This separation
allows external providers to:

- Install only the type definitions they need without server
dependencies
- Avoid version conflicts with the installed llama-stack package
- Be versioned and released independently

This enables us to re-enable external provider module tests that were
previously blocked by these import conflicts.

Changes

- Created llama-stack-api package with minimal dependencies (pydantic,
jsonschema)
- Moved APIs, providers datatypes, strong_typing, and schema_utils
- Updated all imports from llama_stack.* to llama_stack_api.*
- Configured local editable install for development workflow
- Updated linting and type-checking configuration for both packages

Next Steps

- Publish llama-stack-api to PyPI
- Update external provider dependencies
- Re-enable external provider module tests


Pre-cursor PRs to this one:

- #4093 
- #3954 
- #4064 

These PRs moved key pieces _out_ of the Api pkg, limiting the scope of
change here.


relates to #3237 

## Test Plan

Package builds successfully and can be imported independently. All
pre-commit hooks pass with expected exclusions maintained.

---------

Signed-off-by: Charlie Doern <cdoern@redhat.com>
This commit is contained in:
Charlie Doern 2025-11-13 14:51:17 -05:00 committed by GitHub
parent ceb716b9a0
commit 840ad75fe9
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
358 changed files with 2337 additions and 1424 deletions

View file

@ -6,13 +6,14 @@
from typing import Any
from llama_stack_api import Api, RoutingTable
from llama_stack.core.datatypes import (
AccessRule,
RoutedProtocol,
)
from llama_stack.core.stack import StackRunConfig
from llama_stack.core.store import DistributionRegistry
from llama_stack.providers.datatypes import Api, RoutingTable
from llama_stack.providers.utils.inference.inference_store import InferenceStore

View file

@ -6,11 +6,9 @@
from typing import Any
from llama_stack.apis.common.responses import PaginatedResponse
from llama_stack.apis.datasetio import DatasetIO
from llama_stack.apis.datasets import DatasetPurpose, DataSource
from llama_stack_api import DatasetIO, DatasetPurpose, DataSource, PaginatedResponse, RoutingTable
from llama_stack.log import get_logger
from llama_stack.providers.datatypes import RoutingTable
logger = get_logger(name=__name__, category="core::routers")

View file

@ -6,15 +6,19 @@
from typing import Any
from llama_stack.apis.eval import BenchmarkConfig, Eval, EvaluateResponse, Job
from llama_stack.apis.scoring import (
from llama_stack_api import (
BenchmarkConfig,
Eval,
EvaluateResponse,
Job,
RoutingTable,
ScoreBatchResponse,
ScoreResponse,
Scoring,
ScoringFnParams,
)
from llama_stack.log import get_logger
from llama_stack.providers.datatypes import RoutingTable
logger = get_logger(name=__name__, category="core::routers")

View file

@ -11,17 +11,19 @@ from datetime import UTC, datetime
from typing import Annotated, Any
from fastapi import Body
from openai.types.chat import ChatCompletionToolChoiceOptionParam as OpenAIChatCompletionToolChoiceOptionParam
from openai.types.chat import ChatCompletionToolParam as OpenAIChatCompletionToolParam
from pydantic import TypeAdapter
from llama_stack.apis.common.errors import ModelNotFoundError, ModelTypeError
from llama_stack.apis.inference import (
from llama_stack_api import (
HealthResponse,
HealthStatus,
Inference,
ListOpenAIChatCompletionResponse,
ModelNotFoundError,
ModelType,
ModelTypeError,
OpenAIAssistantMessageParam,
OpenAIChatCompletion,
OpenAIChatCompletionChunk,
OpenAIChatCompletionContentPartImageParam,
OpenAIChatCompletionContentPartTextParam,
OpenAIChatCompletionRequestWithExtraBody,
OpenAIChatCompletionToolCall,
OpenAIChatCompletionToolCallFunction,
@ -35,18 +37,17 @@ from llama_stack.apis.inference import (
OpenAIMessageParam,
Order,
RerankResponse,
RoutingTable,
)
from llama_stack.apis.inference.inference import (
OpenAIChatCompletionContentPartImageParam,
OpenAIChatCompletionContentPartTextParam,
)
from llama_stack.apis.models import ModelType
from openai.types.chat import ChatCompletionToolChoiceOptionParam as OpenAIChatCompletionToolChoiceOptionParam
from openai.types.chat import ChatCompletionToolParam as OpenAIChatCompletionToolParam
from pydantic import TypeAdapter
from llama_stack.core.telemetry.telemetry import MetricEvent
from llama_stack.core.telemetry.tracing import enqueue_event, get_current_span
from llama_stack.log import get_logger
from llama_stack.models.llama.llama3.chat_format import ChatFormat
from llama_stack.models.llama.llama3.tokenizer import Tokenizer
from llama_stack.providers.datatypes import HealthResponse, HealthStatus, RoutingTable
from llama_stack.providers.utils.inference.inference_store import InferenceStore
logger = get_logger(name=__name__, category="core::routers")

View file

@ -6,13 +6,10 @@
from typing import Any
from llama_stack.apis.inference import OpenAIMessageParam
from llama_stack.apis.safety import RunShieldResponse, Safety
from llama_stack.apis.safety.safety import ModerationObject
from llama_stack.apis.shields import Shield
from llama_stack_api import ModerationObject, OpenAIMessageParam, RoutingTable, RunShieldResponse, Safety, Shield
from llama_stack.core.datatypes import SafetyConfig
from llama_stack.log import get_logger
from llama_stack.providers.datatypes import RoutingTable
logger = get_logger(name=__name__, category="core::routers")

View file

@ -6,13 +6,12 @@
from typing import Any
from llama_stack.apis.common.content_types import (
from llama_stack_api import (
URL,
)
from llama_stack.apis.tools import (
ListToolDefsResponse,
ToolRuntime,
)
from llama_stack.log import get_logger
from ..routing_tables.toolgroups import ToolGroupsRoutingTable

View file

@ -9,14 +9,16 @@ import uuid
from typing import Annotated, Any
from fastapi import Body
from llama_stack.apis.common.content_types import InterleavedContent
from llama_stack.apis.models import ModelType
from llama_stack.apis.vector_io import (
from llama_stack_api import (
Chunk,
HealthResponse,
HealthStatus,
InterleavedContent,
ModelType,
OpenAICreateVectorStoreFileBatchRequestWithExtraBody,
OpenAICreateVectorStoreRequestWithExtraBody,
QueryChunksResponse,
RoutingTable,
SearchRankingOptions,
VectorIO,
VectorStoreChunkingStrategy,
@ -33,9 +35,9 @@ from llama_stack.apis.vector_io import (
VectorStoreObject,
VectorStoreSearchResponsePage,
)
from llama_stack.core.datatypes import VectorStoresConfig
from llama_stack.log import get_logger
from llama_stack.providers.datatypes import HealthResponse, HealthStatus, RoutingTable
logger = get_logger(name=__name__, category="core::routers")