feat: split API and provider specs into separate llama-stack-api pkg (#3895)

# What does this PR do?

Extract API definitions and provider specifications into a standalone
llama-stack-api package that can be published to PyPI independently of
the main llama-stack server.


see: https://github.com/llamastack/llama-stack/pull/2978 and
https://github.com/llamastack/llama-stack/pull/2978#issuecomment-3145115942

Motivation

External providers currently import from llama-stack, which overrides
the installed version and causes dependency conflicts. This separation
allows external providers to:

- Install only the type definitions they need without server
dependencies
- Avoid version conflicts with the installed llama-stack package
- Be versioned and released independently

This enables us to re-enable external provider module tests that were
previously blocked by these import conflicts.

Changes

- Created llama-stack-api package with minimal dependencies (pydantic,
jsonschema)
- Moved APIs, providers datatypes, strong_typing, and schema_utils
- Updated all imports from llama_stack.* to llama_stack_api.*
- Configured local editable install for development workflow
- Updated linting and type-checking configuration for both packages

Next Steps

- Publish llama-stack-api to PyPI
- Update external provider dependencies
- Re-enable external provider module tests


Pre-cursor PRs to this one:

- #4093 
- #3954 
- #4064 

These PRs moved key pieces _out_ of the Api pkg, limiting the scope of
change here.


relates to #3237 

## Test Plan

Package builds successfully and can be imported independently. All
pre-commit hooks pass with expected exclusions maintained.

---------

Signed-off-by: Charlie Doern <cdoern@redhat.com>
This commit is contained in:
Charlie Doern 2025-11-13 14:51:17 -05:00 committed by GitHub
parent ceb716b9a0
commit 840ad75fe9
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
358 changed files with 2337 additions and 1424 deletions

View file

@ -31,6 +31,7 @@ dependencies = [
"httpx",
"jinja2>=3.1.6",
"jsonschema",
"llama-stack-api", # API and provider specifications (local dev via tool.uv.sources)
"openai>=2.5.0",
"prompt-toolkit",
"python-dotenv",
@ -180,7 +181,7 @@ install-wheel-from-presigned = "llama_stack.cli.scripts.run:install_wheel_from_p
[tool.setuptools.packages.find]
where = ["src"]
include = ["llama_stack", "llama_stack.*"]
include = ["llama_stack", "llama_stack.*", "llama-stack-api", "llama-stack-api.*"]
[[tool.uv.index]]
name = "pytorch-cpu"
@ -190,6 +191,7 @@ explicit = true
[tool.uv.sources]
torch = [{ index = "pytorch-cpu" }]
torchvision = [{ index = "pytorch-cpu" }]
llama-stack-api = [{ path = "src/llama-stack-api", editable = true }]
[tool.ruff]
line-length = 120
@ -256,8 +258,8 @@ unfixable = [
] # Using import * is acceptable (or at least tolerated) in an __init__.py of a package API
[tool.mypy]
mypy_path = ["src"]
packages = ["llama_stack"]
mypy_path = ["src", "src/llama-stack-api"]
packages = ["llama_stack", "llama_stack_api"]
plugins = ['pydantic.mypy']
disable_error_code = []
warn_return_any = true
@ -279,15 +281,18 @@ exclude = [
"^src/llama_stack/core/store/registry\\.py$",
"^src/llama_stack/core/utils/exec\\.py$",
"^src/llama_stack/core/utils/prompt_for_config\\.py$",
# Moved to llama-stack-api but still excluded
"^src/llama_stack/models/llama/llama3/interface\\.py$",
"^src/llama_stack/models/llama/llama3/tokenizer\\.py$",
"^src/llama_stack/models/llama/llama3/tool_utils\\.py$",
"^src/llama_stack/providers/inline/datasetio/localfs/",
"^src/llama_stack/providers/inline/eval/meta_reference/eval\\.py$",
"^src/llama_stack/providers/inline/inference/meta_reference/inference\\.py$",
"^src/llama_stack/models/llama/llama3/generation\\.py$",
"^src/llama_stack/models/llama/llama3/multimodal/model\\.py$",
"^src/llama_stack/models/llama/llama4/",
"^src/llama-stack-api/llama_stack_api/core/telemetry/telemetry\\.py$",
"^src/llama_stack/providers/inline/agents/meta_reference/",
"^src/llama_stack/providers/inline/datasetio/localfs/",
"^src/llama_stack/providers/inline/eval/meta_reference/eval\\.py$",
"^src/llama_stack/providers/inline/inference/meta_reference/inference\\.py$",
"^src/llama_stack/providers/inline/inference/sentence_transformers/sentence_transformers\\.py$",
"^src/llama_stack/providers/inline/post_training/common/validator\\.py$",
"^src/llama_stack/providers/inline/safety/code_scanner/",
@ -337,7 +342,9 @@ exclude = [
"^src/llama_stack/providers/utils/telemetry/dataset_mixin\\.py$",
"^src/llama_stack/providers/utils/telemetry/trace_protocol\\.py$",
"^src/llama_stack/providers/utils/telemetry/tracing\\.py$",
"^src/llama_stack/strong_typing/auxiliary\\.py$",
"^src/llama-stack-api/llama_stack_api/core/telemetry/trace_protocol\\.py$",
"^src/llama-stack-api/llama_stack_api/core/telemetry/tracing\\.py$",
"^src/llama-stack-api/llama_stack_api/strong_typing/auxiliary\\.py$",
"^src/llama_stack/distributions/template\\.py$",
]