feat: split API and provider specs into separate llama-stack-api pkg (#3895)

# What does this PR do?

Extract API definitions and provider specifications into a standalone
llama-stack-api package that can be published to PyPI independently of
the main llama-stack server.


see: https://github.com/llamastack/llama-stack/pull/2978 and
https://github.com/llamastack/llama-stack/pull/2978#issuecomment-3145115942

Motivation

External providers currently import from llama-stack, which overrides
the installed version and causes dependency conflicts. This separation
allows external providers to:

- Install only the type definitions they need without server
dependencies
- Avoid version conflicts with the installed llama-stack package
- Be versioned and released independently

This enables us to re-enable external provider module tests that were
previously blocked by these import conflicts.

Changes

- Created llama-stack-api package with minimal dependencies (pydantic,
jsonschema)
- Moved APIs, providers datatypes, strong_typing, and schema_utils
- Updated all imports from llama_stack.* to llama_stack_api.*
- Configured local editable install for development workflow
- Updated linting and type-checking configuration for both packages

Next Steps

- Publish llama-stack-api to PyPI
- Update external provider dependencies
- Re-enable external provider module tests


Pre-cursor PRs to this one:

- #4093 
- #3954 
- #4064 

These PRs moved key pieces _out_ of the Api pkg, limiting the scope of
change here.


relates to #3237 

## Test Plan

Package builds successfully and can be imported independently. All
pre-commit hooks pass with expected exclusions maintained.

---------

Signed-off-by: Charlie Doern <cdoern@redhat.com>
This commit is contained in:
Charlie Doern 2025-11-13 14:51:17 -05:00 committed by GitHub
parent ceb716b9a0
commit 840ad75fe9
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
358 changed files with 2337 additions and 1424 deletions

View file

@ -6,10 +6,10 @@
from typing import Any
from llama_stack_api import json_schema_type
from pydantic import BaseModel, Field
from llama_stack.providers.utils.inference.model_registry import RemoteInferenceProviderConfig
from llama_stack.schema_utils import json_schema_type
class AnthropicProviderDataValidator(BaseModel):

View file

@ -7,10 +7,10 @@
import os
from typing import Any
from llama_stack_api import json_schema_type
from pydantic import BaseModel, Field, HttpUrl, SecretStr
from llama_stack.providers.utils.inference.model_registry import RemoteInferenceProviderConfig
from llama_stack.schema_utils import json_schema_type
class AzureProviderDataValidator(BaseModel):

View file

@ -6,9 +6,7 @@
from collections.abc import AsyncIterator, Iterable
from openai import AuthenticationError
from llama_stack.apis.inference import (
from llama_stack_api import (
OpenAIChatCompletion,
OpenAIChatCompletionChunk,
OpenAIChatCompletionRequestWithExtraBody,
@ -17,6 +15,8 @@ from llama_stack.apis.inference import (
OpenAIEmbeddingsRequestWithExtraBody,
OpenAIEmbeddingsResponse,
)
from openai import AuthenticationError
from llama_stack.core.telemetry.tracing import get_current_span
from llama_stack.log import get_logger
from llama_stack.providers.utils.inference.openai_mixin import OpenAIMixin

View file

@ -6,10 +6,11 @@
from urllib.parse import urljoin
from llama_stack.apis.inference import (
from llama_stack_api import (
OpenAIEmbeddingsRequestWithExtraBody,
OpenAIEmbeddingsResponse,
)
from llama_stack.providers.utils.inference.openai_mixin import OpenAIMixin
from .config import CerebrasImplConfig

View file

@ -7,10 +7,10 @@
import os
from typing import Any
from llama_stack_api import json_schema_type
from pydantic import BaseModel, Field
from llama_stack.providers.utils.inference.model_registry import RemoteInferenceProviderConfig
from llama_stack.schema_utils import json_schema_type
DEFAULT_BASE_URL = "https://api.cerebras.ai"

View file

@ -6,10 +6,10 @@
from typing import Any
from llama_stack_api import json_schema_type
from pydantic import BaseModel, Field, SecretStr
from llama_stack.providers.utils.inference.model_registry import RemoteInferenceProviderConfig
from llama_stack.schema_utils import json_schema_type
class DatabricksProviderDataValidator(BaseModel):

View file

@ -7,8 +7,8 @@
from collections.abc import Iterable
from databricks.sdk import WorkspaceClient
from llama_stack_api import OpenAICompletion, OpenAICompletionRequestWithExtraBody
from llama_stack.apis.inference import OpenAICompletion, OpenAICompletionRequestWithExtraBody
from llama_stack.log import get_logger
from llama_stack.providers.utils.inference.openai_mixin import OpenAIMixin

View file

@ -6,10 +6,10 @@
from typing import Any
from llama_stack_api import json_schema_type
from pydantic import Field
from llama_stack.providers.utils.inference.model_registry import RemoteInferenceProviderConfig
from llama_stack.schema_utils import json_schema_type
@json_schema_type

View file

@ -6,10 +6,10 @@
from typing import Any
from llama_stack_api import json_schema_type
from pydantic import BaseModel, Field
from llama_stack.providers.utils.inference.model_registry import RemoteInferenceProviderConfig
from llama_stack.schema_utils import json_schema_type
class GeminiProviderDataValidator(BaseModel):

View file

@ -6,12 +6,13 @@
from typing import Any
from llama_stack.apis.inference import (
from llama_stack_api import (
OpenAIEmbeddingData,
OpenAIEmbeddingsRequestWithExtraBody,
OpenAIEmbeddingsResponse,
OpenAIEmbeddingUsage,
)
from llama_stack.providers.utils.inference.openai_mixin import OpenAIMixin
from .config import GeminiConfig

View file

@ -6,10 +6,10 @@
from typing import Any
from llama_stack_api import json_schema_type
from pydantic import BaseModel, Field
from llama_stack.providers.utils.inference.model_registry import RemoteInferenceProviderConfig
from llama_stack.schema_utils import json_schema_type
class GroqProviderDataValidator(BaseModel):

View file

@ -6,10 +6,10 @@
from typing import Any
from llama_stack_api import json_schema_type
from pydantic import BaseModel, Field
from llama_stack.providers.utils.inference.model_registry import RemoteInferenceProviderConfig
from llama_stack.schema_utils import json_schema_type
class LlamaProviderDataValidator(BaseModel):

View file

@ -4,12 +4,13 @@
# This source code is licensed under the terms described in the LICENSE file in
# the root directory of this source tree.
from llama_stack.apis.inference.inference import (
from llama_stack_api import (
OpenAICompletion,
OpenAICompletionRequestWithExtraBody,
OpenAIEmbeddingsRequestWithExtraBody,
OpenAIEmbeddingsResponse,
)
from llama_stack.log import get_logger
from llama_stack.providers.remote.inference.llama_openai_compat.config import LlamaCompatConfig
from llama_stack.providers.utils.inference.openai_mixin import OpenAIMixin

View file

@ -4,7 +4,7 @@
# This source code is licensed under the terms described in the LICENSE file in
# the root directory of this source tree.
from llama_stack.apis.inference import Inference
from llama_stack_api import Inference
from .config import NVIDIAConfig

View file

@ -7,10 +7,10 @@
import os
from typing import Any
from llama_stack_api import json_schema_type
from pydantic import BaseModel, Field
from llama_stack.providers.utils.inference.model_registry import RemoteInferenceProviderConfig
from llama_stack.schema_utils import json_schema_type
class NVIDIAProviderDataValidator(BaseModel):

View file

@ -8,16 +8,15 @@
from collections.abc import Iterable
import aiohttp
from llama_stack.apis.inference import (
from llama_stack_api import (
Model,
ModelType,
OpenAIChatCompletionContentPartImageParam,
OpenAIChatCompletionContentPartTextParam,
RerankData,
RerankResponse,
)
from llama_stack.apis.inference.inference import (
OpenAIChatCompletionContentPartImageParam,
OpenAIChatCompletionContentPartTextParam,
)
from llama_stack.apis.models import Model, ModelType
from llama_stack.log import get_logger
from llama_stack.providers.utils.inference.openai_mixin import OpenAIMixin

View file

@ -4,7 +4,7 @@
# This source code is licensed under the terms described in the LICENSE file in
# the root directory of this source tree.
from llama_stack.apis.inference import InferenceProvider
from llama_stack_api import InferenceProvider
from .config import OCIConfig

View file

@ -7,10 +7,10 @@
import os
from typing import Any
from llama_stack_api import json_schema_type
from pydantic import BaseModel, Field
from llama_stack.providers.utils.inference.model_registry import RemoteInferenceProviderConfig
from llama_stack.schema_utils import json_schema_type
class OCIProviderDataValidator(BaseModel):

View file

@ -10,15 +10,15 @@ from typing import Any
import httpx
import oci
from llama_stack_api import (
ModelType,
OpenAIEmbeddingsRequestWithExtraBody,
OpenAIEmbeddingsResponse,
)
from oci.generative_ai.generative_ai_client import GenerativeAiClient
from oci.generative_ai.models import ModelCollection
from openai._base_client import DefaultAsyncHttpxClient
from llama_stack.apis.inference.inference import (
OpenAIEmbeddingsRequestWithExtraBody,
OpenAIEmbeddingsResponse,
)
from llama_stack.apis.models import ModelType
from llama_stack.log import get_logger
from llama_stack.providers.remote.inference.oci.auth import OciInstancePrincipalAuth, OciUserPrincipalAuth
from llama_stack.providers.remote.inference.oci.config import OCIConfig

View file

@ -7,15 +7,15 @@
import asyncio
from ollama import AsyncClient as AsyncOllamaClient
from llama_stack.apis.common.errors import UnsupportedModelError
from llama_stack.apis.models import Model
from llama_stack.log import get_logger
from llama_stack.providers.datatypes import (
from llama_stack_api import (
HealthResponse,
HealthStatus,
Model,
UnsupportedModelError,
)
from ollama import AsyncClient as AsyncOllamaClient
from llama_stack.log import get_logger
from llama_stack.providers.remote.inference.ollama.config import OllamaImplConfig
from llama_stack.providers.utils.inference.openai_mixin import OpenAIMixin

View file

@ -6,10 +6,10 @@
from typing import Any
from llama_stack_api import json_schema_type
from pydantic import BaseModel, Field
from llama_stack.providers.utils.inference.model_registry import RemoteInferenceProviderConfig
from llama_stack.schema_utils import json_schema_type
class OpenAIProviderDataValidator(BaseModel):

View file

@ -6,10 +6,10 @@
from typing import Any
from llama_stack_api import json_schema_type
from pydantic import Field
from llama_stack.providers.utils.inference.model_registry import RemoteInferenceProviderConfig
from llama_stack.schema_utils import json_schema_type
@json_schema_type

View file

@ -6,10 +6,9 @@
from collections.abc import AsyncIterator
from openai import AsyncOpenAI
from llama_stack.apis.inference import (
from llama_stack_api import (
Inference,
Model,
OpenAIChatCompletion,
OpenAIChatCompletionChunk,
OpenAIChatCompletionRequestWithExtraBody,
@ -18,7 +17,8 @@ from llama_stack.apis.inference import (
OpenAIEmbeddingsRequestWithExtraBody,
OpenAIEmbeddingsResponse,
)
from llama_stack.apis.models import Model
from openai import AsyncOpenAI
from llama_stack.core.request_headers import NeedsRequestProviderData
from .config import PassthroughImplConfig

View file

@ -6,10 +6,10 @@
from typing import Any
from llama_stack_api import json_schema_type
from pydantic import BaseModel, Field, SecretStr
from llama_stack.providers.utils.inference.model_registry import RemoteInferenceProviderConfig
from llama_stack.schema_utils import json_schema_type
class RunpodProviderDataValidator(BaseModel):

View file

@ -6,11 +6,12 @@
from collections.abc import AsyncIterator
from llama_stack.apis.inference import (
from llama_stack_api import (
OpenAIChatCompletion,
OpenAIChatCompletionChunk,
OpenAIChatCompletionRequestWithExtraBody,
)
from llama_stack.providers.utils.inference.openai_mixin import OpenAIMixin
from .config import RunpodImplConfig

View file

@ -6,10 +6,10 @@
from typing import Any
from llama_stack_api import json_schema_type
from pydantic import BaseModel, Field
from llama_stack.providers.utils.inference.model_registry import RemoteInferenceProviderConfig
from llama_stack.schema_utils import json_schema_type
class SambaNovaProviderDataValidator(BaseModel):

View file

@ -5,10 +5,10 @@
# the root directory of this source tree.
from llama_stack_api import json_schema_type
from pydantic import BaseModel, Field, SecretStr
from llama_stack.providers.utils.inference.model_registry import RemoteInferenceProviderConfig
from llama_stack.schema_utils import json_schema_type
@json_schema_type

View file

@ -8,12 +8,12 @@
from collections.abc import Iterable
from huggingface_hub import AsyncInferenceClient, HfApi
from pydantic import SecretStr
from llama_stack.apis.inference import (
from llama_stack_api import (
OpenAIEmbeddingsRequestWithExtraBody,
OpenAIEmbeddingsResponse,
)
from pydantic import SecretStr
from llama_stack.log import get_logger
from llama_stack.providers.utils.inference.openai_mixin import OpenAIMixin

View file

@ -6,10 +6,10 @@
from typing import Any
from llama_stack_api import json_schema_type
from pydantic import Field
from llama_stack.providers.utils.inference.model_registry import RemoteInferenceProviderConfig
from llama_stack.schema_utils import json_schema_type
@json_schema_type

View file

@ -8,15 +8,15 @@
from collections.abc import Iterable
from typing import Any, cast
from llama_stack_api import (
Model,
OpenAIEmbeddingsRequestWithExtraBody,
OpenAIEmbeddingsResponse,
OpenAIEmbeddingUsage,
)
from together import AsyncTogether # type: ignore[import-untyped]
from together.constants import BASE_URL # type: ignore[import-untyped]
from llama_stack.apis.inference import (
OpenAIEmbeddingsRequestWithExtraBody,
OpenAIEmbeddingsResponse,
)
from llama_stack.apis.inference.inference import OpenAIEmbeddingUsage
from llama_stack.apis.models import Model
from llama_stack.core.request_headers import NeedsRequestProviderData
from llama_stack.log import get_logger
from llama_stack.providers.utils.inference.openai_mixin import OpenAIMixin

View file

@ -6,10 +6,10 @@
from typing import Any
from llama_stack_api import json_schema_type
from pydantic import BaseModel, Field, SecretStr
from llama_stack.providers.utils.inference.model_registry import RemoteInferenceProviderConfig
from llama_stack.schema_utils import json_schema_type
class VertexAIProviderDataValidator(BaseModel):

View file

@ -6,10 +6,10 @@
from pathlib import Path
from llama_stack_api import json_schema_type
from pydantic import Field, SecretStr, field_validator
from llama_stack.providers.utils.inference.model_registry import RemoteInferenceProviderConfig
from llama_stack.schema_utils import json_schema_type
@json_schema_type

View file

@ -7,19 +7,17 @@ from collections.abc import AsyncIterator
from urllib.parse import urljoin
import httpx
from pydantic import ConfigDict
from llama_stack.apis.inference import (
from llama_stack_api import (
HealthResponse,
HealthStatus,
OpenAIChatCompletion,
OpenAIChatCompletionChunk,
OpenAIChatCompletionRequestWithExtraBody,
ToolChoice,
)
from pydantic import ConfigDict
from llama_stack.log import get_logger
from llama_stack.providers.datatypes import (
HealthResponse,
HealthStatus,
)
from llama_stack.providers.utils.inference.openai_mixin import OpenAIMixin
from .config import VLLMInferenceAdapterConfig

View file

@ -7,10 +7,10 @@
import os
from typing import Any
from llama_stack_api import json_schema_type
from pydantic import BaseModel, Field
from llama_stack.providers.utils.inference.model_registry import RemoteInferenceProviderConfig
from llama_stack.schema_utils import json_schema_type
class WatsonXProviderDataValidator(BaseModel):

View file

@ -9,8 +9,9 @@ from typing import Any
import litellm
import requests
from llama_stack.apis.inference.inference import (
from llama_stack_api import (
Model,
ModelType,
OpenAIChatCompletion,
OpenAIChatCompletionChunk,
OpenAIChatCompletionRequestWithExtraBody,
@ -20,8 +21,7 @@ from llama_stack.apis.inference.inference import (
OpenAIEmbeddingsRequestWithExtraBody,
OpenAIEmbeddingsResponse,
)
from llama_stack.apis.models import Model
from llama_stack.apis.models.models import ModelType
from llama_stack.core.telemetry.tracing import get_current_span
from llama_stack.log import get_logger
from llama_stack.providers.remote.inference.watsonx.config import WatsonXConfig
@ -238,7 +238,8 @@ class WatsonXInferenceAdapter(LiteLLMOpenAIMixin):
)
# Convert response to OpenAI format
from llama_stack.apis.inference import OpenAIEmbeddingUsage
from llama_stack_api import OpenAIEmbeddingUsage
from llama_stack.providers.utils.inference.litellm_openai_mixin import b64_encode_openai_embeddings_response
data = b64_encode_openai_embeddings_response(response.data, params.encoding_format)