feat: split API and provider specs into separate llama-stack-api pkg (#3895)

# What does this PR do? Extract API definitions and provider specifications into a standalone llama-stack-api package that can be published to PyPI independently of the main llama-stack server. see: https://github.com/llamastack/llama-stack/pull/2978 and https://github.com/llamastack/llama-stack/pull/2978#issuecomment-3145115942 Motivation External providers currently import from llama-stack, which overrides the installed version and causes dependency conflicts. This separation allows external providers to: - Install only the type definitions they need without server dependencies - Avoid version conflicts with the installed llama-stack package - Be versioned and released independently This enables us to re-enable external provider module tests that were previously blocked by these import conflicts. Changes - Created llama-stack-api package with minimal dependencies (pydantic, jsonschema) - Moved APIs, providers datatypes, strong_typing, and schema_utils - Updated all imports from llama_stack.* to llama_stack_api.* - Configured local editable install for development workflow - Updated linting and type-checking configuration for both packages Next Steps - Publish llama-stack-api to PyPI - Update external provider dependencies - Re-enable external provider module tests Pre-cursor PRs to this one: - #4093 - #3954 - #4064 These PRs moved key pieces _out_ of the Api pkg, limiting the scope of change here. relates to #3237 ## Test Plan Package builds successfully and can be imported independently. All pre-commit hooks pass with expected exclusions maintained. --------- Signed-off-by: Charlie Doern <cdoern@redhat.com>
2025-12-03 09:53:45 +00:00 · 2025-11-13 14:51:17 -05:00 · 2025-11-13 14:51:17 -05:00 · 840ad75fe9
commit 840ad75fe9
parent ceb716b9a0
358 changed files with 2337 additions and 1424 deletions
--- a/pyproject.toml
+++ b/pyproject.toml
@ -31,6 +31,7 @@ dependencies = [
    "httpx",
    "jinja2>=3.1.6",
    "jsonschema",
+    "llama-stack-api",  # API and provider specifications (local dev via tool.uv.sources)
    "openai>=2.5.0",
    "prompt-toolkit",
    "python-dotenv",
@ -180,7 +181,7 @@ install-wheel-from-presigned = "llama_stack.cli.scripts.run:install_wheel_from_p

 [tool.setuptools.packages.find]
 where = ["src"]
-include = ["llama_stack", "llama_stack.*"]
+include = ["llama_stack", "llama_stack.*", "llama-stack-api", "llama-stack-api.*"]

 [[tool.uv.index]]
 name = "pytorch-cpu"
@ -190,6 +191,7 @@ explicit = true
 [tool.uv.sources]
 torch = [{ index = "pytorch-cpu" }]
 torchvision = [{ index = "pytorch-cpu" }]
+llama-stack-api = [{ path = "src/llama-stack-api", editable = true }]

 [tool.ruff]
 line-length = 120
@ -256,8 +258,8 @@ unfixable = [
 ] # Using import * is acceptable (or at least tolerated) in an __init__.py of a package API

 [tool.mypy]
-mypy_path = ["src"]
-packages = ["llama_stack"]
+mypy_path = ["src", "src/llama-stack-api"]
+packages = ["llama_stack", "llama_stack_api"]
 plugins = ['pydantic.mypy']
 disable_error_code = []
 warn_return_any = true
@ -279,15 +281,18 @@ exclude = [
    "^src/llama_stack/core/store/registry\\.py$",
    "^src/llama_stack/core/utils/exec\\.py$",
    "^src/llama_stack/core/utils/prompt_for_config\\.py$",
+    # Moved to llama-stack-api but still excluded
    "^src/llama_stack/models/llama/llama3/interface\\.py$",
    "^src/llama_stack/models/llama/llama3/tokenizer\\.py$",
    "^src/llama_stack/models/llama/llama3/tool_utils\\.py$",
-    "^src/llama_stack/providers/inline/datasetio/localfs/",
-    "^src/llama_stack/providers/inline/eval/meta_reference/eval\\.py$",
-    "^src/llama_stack/providers/inline/inference/meta_reference/inference\\.py$",
    "^src/llama_stack/models/llama/llama3/generation\\.py$",
    "^src/llama_stack/models/llama/llama3/multimodal/model\\.py$",
    "^src/llama_stack/models/llama/llama4/",
+    "^src/llama-stack-api/llama_stack_api/core/telemetry/telemetry\\.py$",
+    "^src/llama_stack/providers/inline/agents/meta_reference/",
+    "^src/llama_stack/providers/inline/datasetio/localfs/",
+    "^src/llama_stack/providers/inline/eval/meta_reference/eval\\.py$",
+    "^src/llama_stack/providers/inline/inference/meta_reference/inference\\.py$",
    "^src/llama_stack/providers/inline/inference/sentence_transformers/sentence_transformers\\.py$",
    "^src/llama_stack/providers/inline/post_training/common/validator\\.py$",
    "^src/llama_stack/providers/inline/safety/code_scanner/",
@ -337,7 +342,9 @@ exclude = [
    "^src/llama_stack/providers/utils/telemetry/dataset_mixin\\.py$",
    "^src/llama_stack/providers/utils/telemetry/trace_protocol\\.py$",
    "^src/llama_stack/providers/utils/telemetry/tracing\\.py$",
-    "^src/llama_stack/strong_typing/auxiliary\\.py$",
+    "^src/llama-stack-api/llama_stack_api/core/telemetry/trace_protocol\\.py$",
+    "^src/llama-stack-api/llama_stack_api/core/telemetry/tracing\\.py$",
+    "^src/llama-stack-api/llama_stack_api/strong_typing/auxiliary\\.py$",
    "^src/llama_stack/distributions/template\\.py$",
 ]