feat: split API and provider specs into separate llama-stack-api pkg

Extract API definitions, models, and provider specifications into a
standalone llama-stack-api package that can be published to PyPI
independently of the main llama-stack server.

Motivation

External providers currently import from llama-stack, which overrides
the installed version and causes dependency conflicts. This separation
allows external providers to:

- Install only the type definitions they need without server dependencies
- Avoid version conflicts with the installed llama-stack package
- Be versioned and released independently

This enables us to re-enable external provider module tests that were
previously blocked by these import conflicts.

Changes

- Created llama-stack-api package with minimal dependencies (pydantic, jsonschema)
- Moved APIs, providers datatypes, strong_typing, and schema_utils
- Updated all imports from llama_stack.* to llama_stack_api.*
- Preserved git history using git mv for moved files
- Configured local editable install for development workflow
- Updated linting and type-checking configuration for both packages
- Rebased on top of upstream src/ layout changes

Testing

Package builds successfully and can be imported independently.
All pre-commit hooks pass with expected exclusions maintained.

Next Steps

- Publish llama-stack-api to PyPI
- Update external provider dependencies
- Re-enable external provider module tests

Signed-off-by: Charlie Doern <cdoern@redhat.com>
This commit is contained in:
Charlie Doern 2025-10-30 12:25:23 -04:00
parent e5a55f3677
commit 85d407c2a0
359 changed files with 1259 additions and 980 deletions

View file

@ -9,9 +9,9 @@ from unittest.mock import AsyncMock, MagicMock, patch
import numpy as np
import pytest
from llama_stack_api.apis.vector_io import Chunk, ChunkMetadata, QueryChunksResponse
from llama_stack_api.apis.vector_stores import VectorStore
from llama_stack.apis.vector_io import Chunk, ChunkMetadata, QueryChunksResponse
from llama_stack.apis.vector_stores import VectorStore
from llama_stack.core.storage.datatypes import KVStoreReference, SqliteKVStoreConfig
from llama_stack.providers.inline.vector_io.faiss.config import FaissVectorIOConfig
from llama_stack.providers.inline.vector_io.faiss.faiss import FaissIndex, FaissVectorIOAdapter

View file

@ -9,11 +9,11 @@ from unittest.mock import MagicMock, patch
import numpy as np
import pytest
from llama_stack_api.apis.files import Files
from llama_stack_api.apis.vector_io import Chunk, QueryChunksResponse
from llama_stack_api.apis.vector_stores import VectorStore
from llama_stack_api.providers.datatypes import HealthStatus
from llama_stack.apis.files import Files
from llama_stack.apis.vector_io import Chunk, QueryChunksResponse
from llama_stack.apis.vector_stores import VectorStore
from llama_stack.providers.datatypes import HealthStatus
from llama_stack.providers.inline.vector_io.faiss.config import FaissVectorIOConfig
from llama_stack.providers.inline.vector_io.faiss.faiss import (
FaissIndex,

View file

@ -8,8 +8,8 @@ import asyncio
import numpy as np
import pytest
from llama_stack_api.apis.vector_io import Chunk, QueryChunksResponse
from llama_stack.apis.vector_io import Chunk, QueryChunksResponse
from llama_stack.providers.inline.vector_io.sqlite_vec.sqlite_vec import (
SQLiteVecIndex,
SQLiteVecVectorIOAdapter,

View file

@ -10,9 +10,8 @@ from unittest.mock import AsyncMock, patch
import numpy as np
import pytest
from llama_stack.apis.common.errors import VectorStoreNotFoundError
from llama_stack.apis.vector_io import (
from llama_stack_api.apis.common.errors import VectorStoreNotFoundError
from llama_stack_api.apis.vector_io import (
Chunk,
OpenAICreateVectorStoreFileBatchRequestWithExtraBody,
OpenAICreateVectorStoreRequestWithExtraBody,
@ -20,7 +19,8 @@ from llama_stack.apis.vector_io import (
VectorStoreChunkingStrategyAuto,
VectorStoreFileObject,
)
from llama_stack.apis.vector_stores import VectorStore
from llama_stack_api.apis.vector_stores import VectorStore
from llama_stack.providers.inline.vector_io.sqlite_vec.sqlite_vec import VECTOR_DBS_PREFIX
# This test is a unit test for the inline VectorIO providers. This should only contain
@ -222,7 +222,7 @@ async def test_insert_chunks_missing_db_raises(vector_io_adapter):
async def test_insert_chunks_with_missing_document_id(vector_io_adapter):
"""Ensure no KeyError when document_id is missing or in different places."""
from llama_stack.apis.vector_io import Chunk, ChunkMetadata
from llama_stack_api.apis.vector_io import Chunk, ChunkMetadata
fake_index = AsyncMock()
vector_io_adapter.cache["db1"] = fake_index
@ -255,7 +255,7 @@ async def test_insert_chunks_with_missing_document_id(vector_io_adapter):
async def test_document_id_with_invalid_type_raises_error():
"""Ensure TypeError is raised when document_id is not a string."""
from llama_stack.apis.vector_io import Chunk
from llama_stack_api.apis.vector_io import Chunk
# Integer document_id should raise TypeError
from llama_stack.providers.utils.vector_io.vector_utils import generate_chunk_id

View file

@ -4,7 +4,8 @@
# This source code is licensed under the terms described in the LICENSE file in
# the root directory of this source tree.
from llama_stack.apis.vector_io import Chunk, ChunkMetadata
from llama_stack_api.apis.vector_io import Chunk, ChunkMetadata
from llama_stack.providers.utils.vector_io.vector_utils import generate_chunk_id
# This test is a unit test for the chunk_utils.py helpers. This should only contain