feat: split API and provider specs into separate llama-stack-api pkg (#3895)

# What does this PR do?

Extract API definitions and provider specifications into a standalone
llama-stack-api package that can be published to PyPI independently of
the main llama-stack server.


see: https://github.com/llamastack/llama-stack/pull/2978 and
https://github.com/llamastack/llama-stack/pull/2978#issuecomment-3145115942

Motivation

External providers currently import from llama-stack, which overrides
the installed version and causes dependency conflicts. This separation
allows external providers to:

- Install only the type definitions they need without server
dependencies
- Avoid version conflicts with the installed llama-stack package
- Be versioned and released independently

This enables us to re-enable external provider module tests that were
previously blocked by these import conflicts.

Changes

- Created llama-stack-api package with minimal dependencies (pydantic,
jsonschema)
- Moved APIs, providers datatypes, strong_typing, and schema_utils
- Updated all imports from llama_stack.* to llama_stack_api.*
- Configured local editable install for development workflow
- Updated linting and type-checking configuration for both packages

Next Steps

- Publish llama-stack-api to PyPI
- Update external provider dependencies
- Re-enable external provider module tests


Pre-cursor PRs to this one:

- #4093 
- #3954 
- #4064 

These PRs moved key pieces _out_ of the Api pkg, limiting the scope of
change here.


relates to #3237 

## Test Plan

Package builds successfully and can be imported independently. All
pre-commit hooks pass with expected exclusions maintained.

---------

Signed-off-by: Charlie Doern <cdoern@redhat.com>
This commit is contained in:
Charlie Doern 2025-11-13 14:51:17 -05:00 committed by GitHub
parent ceb716b9a0
commit 840ad75fe9
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
358 changed files with 2337 additions and 1424 deletions

View file

@ -6,10 +6,8 @@
from typing import Any
from urllib.parse import parse_qs, urlparse
from llama_stack.apis.common.responses import PaginatedResponse
from llama_stack.apis.datasetio import DatasetIO
from llama_stack.apis.datasets import Dataset
from llama_stack.providers.datatypes import DatasetsProtocolPrivate
from llama_stack_api import Dataset, DatasetIO, DatasetsProtocolPrivate, PaginatedResponse
from llama_stack.providers.utils.kvstore import kvstore_impl
from llama_stack.providers.utils.pagination import paginate_records

View file

@ -7,11 +7,7 @@
from typing import Any
import aiohttp
from llama_stack.apis.common.content_types import URL
from llama_stack.apis.common.responses import PaginatedResponse
from llama_stack.apis.common.type_system import ParamType
from llama_stack.apis.datasets import Dataset
from llama_stack_api import URL, Dataset, PaginatedResponse, ParamType
from .config import NvidiaDatasetIOConfig

View file

@ -6,18 +6,24 @@
from typing import Any
import requests
from llama_stack_api import (
Agents,
Benchmark,
BenchmarkConfig,
BenchmarksProtocolPrivate,
DatasetIO,
Datasets,
Eval,
EvaluateResponse,
Inference,
Job,
JobStatus,
Scoring,
ScoringResult,
)
from llama_stack.apis.agents import Agents
from llama_stack.apis.benchmarks import Benchmark
from llama_stack.apis.datasetio import DatasetIO
from llama_stack.apis.datasets import Datasets
from llama_stack.apis.inference import Inference
from llama_stack.apis.scoring import Scoring, ScoringResult
from llama_stack.providers.datatypes import BenchmarksProtocolPrivate
from llama_stack.providers.utils.inference.model_registry import ModelRegistryHelper
from .....apis.common.job_types import Job, JobStatus
from .....apis.eval.eval import BenchmarkConfig, Eval, EvaluateResponse
from .config import NVIDIAEvalConfig
DEFAULT_NAMESPACE = "nvidia"

View file

@ -8,17 +8,17 @@ from datetime import UTC, datetime
from typing import Annotated, Any
from fastapi import Depends, File, Form, Response, UploadFile
from llama_stack.apis.common.errors import ResourceNotFoundError
from llama_stack.apis.common.responses import Order
from llama_stack.apis.files import (
from llama_stack_api import (
ExpiresAfter,
Files,
ListOpenAIFileResponse,
OpenAIFileDeleteResponse,
OpenAIFileObject,
OpenAIFilePurpose,
Order,
ResourceNotFoundError,
)
from llama_stack.core.datatypes import AccessRule
from llama_stack.providers.utils.files.form_data import parse_expires_after
from llama_stack.providers.utils.sqlstore.api import ColumnDefinition, ColumnType

View file

@ -17,16 +17,17 @@ from fastapi import Depends, File, Form, Response, UploadFile
if TYPE_CHECKING:
from mypy_boto3_s3.client import S3Client
from llama_stack.apis.common.errors import ResourceNotFoundError
from llama_stack.apis.common.responses import Order
from llama_stack.apis.files import (
from llama_stack_api import (
ExpiresAfter,
Files,
ListOpenAIFileResponse,
OpenAIFileDeleteResponse,
OpenAIFileObject,
OpenAIFilePurpose,
Order,
ResourceNotFoundError,
)
from llama_stack.core.datatypes import AccessRule
from llama_stack.core.id_generation import generate_object_id
from llama_stack.providers.utils.files.form_data import parse_expires_after

View file

@ -6,10 +6,10 @@
from typing import Any
from llama_stack_api import json_schema_type
from pydantic import BaseModel, Field
from llama_stack.providers.utils.inference.model_registry import RemoteInferenceProviderConfig
from llama_stack.schema_utils import json_schema_type
class AnthropicProviderDataValidator(BaseModel):

View file

@ -7,10 +7,10 @@
import os
from typing import Any
from llama_stack_api import json_schema_type
from pydantic import BaseModel, Field, HttpUrl, SecretStr
from llama_stack.providers.utils.inference.model_registry import RemoteInferenceProviderConfig
from llama_stack.schema_utils import json_schema_type
class AzureProviderDataValidator(BaseModel):

View file

@ -6,9 +6,7 @@
from collections.abc import AsyncIterator, Iterable
from openai import AuthenticationError
from llama_stack.apis.inference import (
from llama_stack_api import (
OpenAIChatCompletion,
OpenAIChatCompletionChunk,
OpenAIChatCompletionRequestWithExtraBody,
@ -17,6 +15,8 @@ from llama_stack.apis.inference import (
OpenAIEmbeddingsRequestWithExtraBody,
OpenAIEmbeddingsResponse,
)
from openai import AuthenticationError
from llama_stack.core.telemetry.tracing import get_current_span
from llama_stack.log import get_logger
from llama_stack.providers.utils.inference.openai_mixin import OpenAIMixin

View file

@ -6,10 +6,11 @@
from urllib.parse import urljoin
from llama_stack.apis.inference import (
from llama_stack_api import (
OpenAIEmbeddingsRequestWithExtraBody,
OpenAIEmbeddingsResponse,
)
from llama_stack.providers.utils.inference.openai_mixin import OpenAIMixin
from .config import CerebrasImplConfig

View file

@ -7,10 +7,10 @@
import os
from typing import Any
from llama_stack_api import json_schema_type
from pydantic import BaseModel, Field
from llama_stack.providers.utils.inference.model_registry import RemoteInferenceProviderConfig
from llama_stack.schema_utils import json_schema_type
DEFAULT_BASE_URL = "https://api.cerebras.ai"

View file

@ -6,10 +6,10 @@
from typing import Any
from llama_stack_api import json_schema_type
from pydantic import BaseModel, Field, SecretStr
from llama_stack.providers.utils.inference.model_registry import RemoteInferenceProviderConfig
from llama_stack.schema_utils import json_schema_type
class DatabricksProviderDataValidator(BaseModel):

View file

@ -7,8 +7,8 @@
from collections.abc import Iterable
from databricks.sdk import WorkspaceClient
from llama_stack_api import OpenAICompletion, OpenAICompletionRequestWithExtraBody
from llama_stack.apis.inference import OpenAICompletion, OpenAICompletionRequestWithExtraBody
from llama_stack.log import get_logger
from llama_stack.providers.utils.inference.openai_mixin import OpenAIMixin

View file

@ -6,10 +6,10 @@
from typing import Any
from llama_stack_api import json_schema_type
from pydantic import Field
from llama_stack.providers.utils.inference.model_registry import RemoteInferenceProviderConfig
from llama_stack.schema_utils import json_schema_type
@json_schema_type

View file

@ -6,10 +6,10 @@
from typing import Any
from llama_stack_api import json_schema_type
from pydantic import BaseModel, Field
from llama_stack.providers.utils.inference.model_registry import RemoteInferenceProviderConfig
from llama_stack.schema_utils import json_schema_type
class GeminiProviderDataValidator(BaseModel):

View file

@ -6,12 +6,13 @@
from typing import Any
from llama_stack.apis.inference import (
from llama_stack_api import (
OpenAIEmbeddingData,
OpenAIEmbeddingsRequestWithExtraBody,
OpenAIEmbeddingsResponse,
OpenAIEmbeddingUsage,
)
from llama_stack.providers.utils.inference.openai_mixin import OpenAIMixin
from .config import GeminiConfig

View file

@ -6,10 +6,10 @@
from typing import Any
from llama_stack_api import json_schema_type
from pydantic import BaseModel, Field
from llama_stack.providers.utils.inference.model_registry import RemoteInferenceProviderConfig
from llama_stack.schema_utils import json_schema_type
class GroqProviderDataValidator(BaseModel):

View file

@ -6,10 +6,10 @@
from typing import Any
from llama_stack_api import json_schema_type
from pydantic import BaseModel, Field
from llama_stack.providers.utils.inference.model_registry import RemoteInferenceProviderConfig
from llama_stack.schema_utils import json_schema_type
class LlamaProviderDataValidator(BaseModel):

View file

@ -4,12 +4,13 @@
# This source code is licensed under the terms described in the LICENSE file in
# the root directory of this source tree.
from llama_stack.apis.inference.inference import (
from llama_stack_api import (
OpenAICompletion,
OpenAICompletionRequestWithExtraBody,
OpenAIEmbeddingsRequestWithExtraBody,
OpenAIEmbeddingsResponse,
)
from llama_stack.log import get_logger
from llama_stack.providers.remote.inference.llama_openai_compat.config import LlamaCompatConfig
from llama_stack.providers.utils.inference.openai_mixin import OpenAIMixin

View file

@ -4,7 +4,7 @@
# This source code is licensed under the terms described in the LICENSE file in
# the root directory of this source tree.
from llama_stack.apis.inference import Inference
from llama_stack_api import Inference
from .config import NVIDIAConfig

View file

@ -7,10 +7,10 @@
import os
from typing import Any
from llama_stack_api import json_schema_type
from pydantic import BaseModel, Field
from llama_stack.providers.utils.inference.model_registry import RemoteInferenceProviderConfig
from llama_stack.schema_utils import json_schema_type
class NVIDIAProviderDataValidator(BaseModel):

View file

@ -8,16 +8,15 @@
from collections.abc import Iterable
import aiohttp
from llama_stack.apis.inference import (
from llama_stack_api import (
Model,
ModelType,
OpenAIChatCompletionContentPartImageParam,
OpenAIChatCompletionContentPartTextParam,
RerankData,
RerankResponse,
)
from llama_stack.apis.inference.inference import (
OpenAIChatCompletionContentPartImageParam,
OpenAIChatCompletionContentPartTextParam,
)
from llama_stack.apis.models import Model, ModelType
from llama_stack.log import get_logger
from llama_stack.providers.utils.inference.openai_mixin import OpenAIMixin

View file

@ -4,7 +4,7 @@
# This source code is licensed under the terms described in the LICENSE file in
# the root directory of this source tree.
from llama_stack.apis.inference import InferenceProvider
from llama_stack_api import InferenceProvider
from .config import OCIConfig

View file

@ -7,10 +7,10 @@
import os
from typing import Any
from llama_stack_api import json_schema_type
from pydantic import BaseModel, Field
from llama_stack.providers.utils.inference.model_registry import RemoteInferenceProviderConfig
from llama_stack.schema_utils import json_schema_type
class OCIProviderDataValidator(BaseModel):

View file

@ -10,15 +10,15 @@ from typing import Any
import httpx
import oci
from llama_stack_api import (
ModelType,
OpenAIEmbeddingsRequestWithExtraBody,
OpenAIEmbeddingsResponse,
)
from oci.generative_ai.generative_ai_client import GenerativeAiClient
from oci.generative_ai.models import ModelCollection
from openai._base_client import DefaultAsyncHttpxClient
from llama_stack.apis.inference.inference import (
OpenAIEmbeddingsRequestWithExtraBody,
OpenAIEmbeddingsResponse,
)
from llama_stack.apis.models import ModelType
from llama_stack.log import get_logger
from llama_stack.providers.remote.inference.oci.auth import OciInstancePrincipalAuth, OciUserPrincipalAuth
from llama_stack.providers.remote.inference.oci.config import OCIConfig

View file

@ -7,15 +7,15 @@
import asyncio
from ollama import AsyncClient as AsyncOllamaClient
from llama_stack.apis.common.errors import UnsupportedModelError
from llama_stack.apis.models import Model
from llama_stack.log import get_logger
from llama_stack.providers.datatypes import (
from llama_stack_api import (
HealthResponse,
HealthStatus,
Model,
UnsupportedModelError,
)
from ollama import AsyncClient as AsyncOllamaClient
from llama_stack.log import get_logger
from llama_stack.providers.remote.inference.ollama.config import OllamaImplConfig
from llama_stack.providers.utils.inference.openai_mixin import OpenAIMixin

View file

@ -6,10 +6,10 @@
from typing import Any
from llama_stack_api import json_schema_type
from pydantic import BaseModel, Field
from llama_stack.providers.utils.inference.model_registry import RemoteInferenceProviderConfig
from llama_stack.schema_utils import json_schema_type
class OpenAIProviderDataValidator(BaseModel):

View file

@ -6,10 +6,10 @@
from typing import Any
from llama_stack_api import json_schema_type
from pydantic import Field
from llama_stack.providers.utils.inference.model_registry import RemoteInferenceProviderConfig
from llama_stack.schema_utils import json_schema_type
@json_schema_type

View file

@ -6,10 +6,9 @@
from collections.abc import AsyncIterator
from openai import AsyncOpenAI
from llama_stack.apis.inference import (
from llama_stack_api import (
Inference,
Model,
OpenAIChatCompletion,
OpenAIChatCompletionChunk,
OpenAIChatCompletionRequestWithExtraBody,
@ -18,7 +17,8 @@ from llama_stack.apis.inference import (
OpenAIEmbeddingsRequestWithExtraBody,
OpenAIEmbeddingsResponse,
)
from llama_stack.apis.models import Model
from openai import AsyncOpenAI
from llama_stack.core.request_headers import NeedsRequestProviderData
from .config import PassthroughImplConfig

View file

@ -6,10 +6,10 @@
from typing import Any
from llama_stack_api import json_schema_type
from pydantic import BaseModel, Field, SecretStr
from llama_stack.providers.utils.inference.model_registry import RemoteInferenceProviderConfig
from llama_stack.schema_utils import json_schema_type
class RunpodProviderDataValidator(BaseModel):

View file

@ -6,11 +6,12 @@
from collections.abc import AsyncIterator
from llama_stack.apis.inference import (
from llama_stack_api import (
OpenAIChatCompletion,
OpenAIChatCompletionChunk,
OpenAIChatCompletionRequestWithExtraBody,
)
from llama_stack.providers.utils.inference.openai_mixin import OpenAIMixin
from .config import RunpodImplConfig

View file

@ -6,10 +6,10 @@
from typing import Any
from llama_stack_api import json_schema_type
from pydantic import BaseModel, Field
from llama_stack.providers.utils.inference.model_registry import RemoteInferenceProviderConfig
from llama_stack.schema_utils import json_schema_type
class SambaNovaProviderDataValidator(BaseModel):

View file

@ -5,10 +5,10 @@
# the root directory of this source tree.
from llama_stack_api import json_schema_type
from pydantic import BaseModel, Field, SecretStr
from llama_stack.providers.utils.inference.model_registry import RemoteInferenceProviderConfig
from llama_stack.schema_utils import json_schema_type
@json_schema_type

View file

@ -8,12 +8,12 @@
from collections.abc import Iterable
from huggingface_hub import AsyncInferenceClient, HfApi
from pydantic import SecretStr
from llama_stack.apis.inference import (
from llama_stack_api import (
OpenAIEmbeddingsRequestWithExtraBody,
OpenAIEmbeddingsResponse,
)
from pydantic import SecretStr
from llama_stack.log import get_logger
from llama_stack.providers.utils.inference.openai_mixin import OpenAIMixin

View file

@ -6,10 +6,10 @@
from typing import Any
from llama_stack_api import json_schema_type
from pydantic import Field
from llama_stack.providers.utils.inference.model_registry import RemoteInferenceProviderConfig
from llama_stack.schema_utils import json_schema_type
@json_schema_type

View file

@ -8,15 +8,15 @@
from collections.abc import Iterable
from typing import Any, cast
from llama_stack_api import (
Model,
OpenAIEmbeddingsRequestWithExtraBody,
OpenAIEmbeddingsResponse,
OpenAIEmbeddingUsage,
)
from together import AsyncTogether # type: ignore[import-untyped]
from together.constants import BASE_URL # type: ignore[import-untyped]
from llama_stack.apis.inference import (
OpenAIEmbeddingsRequestWithExtraBody,
OpenAIEmbeddingsResponse,
)
from llama_stack.apis.inference.inference import OpenAIEmbeddingUsage
from llama_stack.apis.models import Model
from llama_stack.core.request_headers import NeedsRequestProviderData
from llama_stack.log import get_logger
from llama_stack.providers.utils.inference.openai_mixin import OpenAIMixin

View file

@ -6,10 +6,10 @@
from typing import Any
from llama_stack_api import json_schema_type
from pydantic import BaseModel, Field, SecretStr
from llama_stack.providers.utils.inference.model_registry import RemoteInferenceProviderConfig
from llama_stack.schema_utils import json_schema_type
class VertexAIProviderDataValidator(BaseModel):

View file

@ -6,10 +6,10 @@
from pathlib import Path
from llama_stack_api import json_schema_type
from pydantic import Field, SecretStr, field_validator
from llama_stack.providers.utils.inference.model_registry import RemoteInferenceProviderConfig
from llama_stack.schema_utils import json_schema_type
@json_schema_type

View file

@ -7,19 +7,17 @@ from collections.abc import AsyncIterator
from urllib.parse import urljoin
import httpx
from pydantic import ConfigDict
from llama_stack.apis.inference import (
from llama_stack_api import (
HealthResponse,
HealthStatus,
OpenAIChatCompletion,
OpenAIChatCompletionChunk,
OpenAIChatCompletionRequestWithExtraBody,
ToolChoice,
)
from pydantic import ConfigDict
from llama_stack.log import get_logger
from llama_stack.providers.datatypes import (
HealthResponse,
HealthStatus,
)
from llama_stack.providers.utils.inference.openai_mixin import OpenAIMixin
from .config import VLLMInferenceAdapterConfig

View file

@ -7,10 +7,10 @@
import os
from typing import Any
from llama_stack_api import json_schema_type
from pydantic import BaseModel, Field
from llama_stack.providers.utils.inference.model_registry import RemoteInferenceProviderConfig
from llama_stack.schema_utils import json_schema_type
class WatsonXProviderDataValidator(BaseModel):

View file

@ -9,8 +9,9 @@ from typing import Any
import litellm
import requests
from llama_stack.apis.inference.inference import (
from llama_stack_api import (
Model,
ModelType,
OpenAIChatCompletion,
OpenAIChatCompletionChunk,
OpenAIChatCompletionRequestWithExtraBody,
@ -20,8 +21,7 @@ from llama_stack.apis.inference.inference import (
OpenAIEmbeddingsRequestWithExtraBody,
OpenAIEmbeddingsResponse,
)
from llama_stack.apis.models import Model
from llama_stack.apis.models.models import ModelType
from llama_stack.core.telemetry.tracing import get_current_span
from llama_stack.log import get_logger
from llama_stack.providers.remote.inference.watsonx.config import WatsonXConfig
@ -238,7 +238,8 @@ class WatsonXInferenceAdapter(LiteLLMOpenAIMixin):
)
# Convert response to OpenAI format
from llama_stack.apis.inference import OpenAIEmbeddingUsage
from llama_stack_api import OpenAIEmbeddingUsage
from llama_stack.providers.utils.inference.litellm_openai_mixin import b64_encode_openai_embeddings_response
data = b64_encode_openai_embeddings_response(response.data, params.encoding_format)

View file

@ -128,7 +128,7 @@ client.post_training.job.cancel(job_uuid="your-job-id")
#### 1. Register the model
```python
from llama_stack.apis.models import Model, ModelType
from llama_stack_api.models import Model, ModelType
client.models.register(
model_id="test-example-model@v1",

View file

@ -8,9 +8,7 @@ from datetime import datetime
from typing import Any, Literal
import aiohttp
from pydantic import BaseModel, ConfigDict
from llama_stack.apis.post_training import (
from llama_stack_api import (
AlgorithmConfig,
DPOAlignmentConfig,
JobStatus,
@ -19,6 +17,8 @@ from llama_stack.apis.post_training import (
PostTrainingJobStatusResponse,
TrainingConfig,
)
from pydantic import BaseModel, ConfigDict
from llama_stack.providers.remote.post_training.nvidia.config import NvidiaPostTrainingConfig
from llama_stack.providers.remote.post_training.nvidia.utils import warn_unsupported_params
from llama_stack.providers.utils.inference.model_registry import ModelRegistryHelper

View file

@ -7,9 +7,9 @@
import warnings
from typing import Any
from llama_stack_api import TrainingConfig
from pydantic import BaseModel
from llama_stack.apis.post_training import TrainingConfig
from llama_stack.log import get_logger
from llama_stack.providers.remote.post_training.nvidia.config import SFTLoRADefaultConfig

View file

@ -7,16 +7,17 @@
import json
from typing import Any
from llama_stack.apis.inference import OpenAIMessageParam
from llama_stack.apis.safety import (
from llama_stack_api import (
OpenAIMessageParam,
RunShieldResponse,
Safety,
SafetyViolation,
Shield,
ShieldsProtocolPrivate,
ViolationLevel,
)
from llama_stack.apis.shields import Shield
from llama_stack.log import get_logger
from llama_stack.providers.datatypes import ShieldsProtocolPrivate
from llama_stack.providers.utils.bedrock.client import create_bedrock_client
from .config import BedrockSafetyConfig

View file

@ -5,8 +5,9 @@
# the root directory of this source tree.
from llama_stack_api import json_schema_type
from llama_stack.providers.utils.bedrock.config import BedrockBaseConfig
from llama_stack.schema_utils import json_schema_type
@json_schema_type

View file

@ -42,8 +42,8 @@ client.initialize()
#### Create a safety shield
```python
from llama_stack.apis.safety import Shield
from llama_stack.apis.inference import Message
from llama_stack_api.safety import Shield
from llama_stack_api.inference import Message
# Create a safety shield
shield = Shield(

View file

@ -6,10 +6,9 @@
import os
from typing import Any
from llama_stack_api import json_schema_type
from pydantic import BaseModel, Field
from llama_stack.schema_utils import json_schema_type
@json_schema_type
class NVIDIASafetyConfig(BaseModel):

View file

@ -7,12 +7,18 @@
from typing import Any
import requests
from llama_stack_api import (
ModerationObject,
OpenAIMessageParam,
RunShieldResponse,
Safety,
SafetyViolation,
Shield,
ShieldsProtocolPrivate,
ViolationLevel,
)
from llama_stack.apis.inference import OpenAIMessageParam
from llama_stack.apis.safety import ModerationObject, RunShieldResponse, Safety, SafetyViolation, ViolationLevel
from llama_stack.apis.shields import Shield
from llama_stack.log import get_logger
from llama_stack.providers.datatypes import ShieldsProtocolPrivate
from .config import NVIDIASafetyConfig

View file

@ -6,10 +6,9 @@
from typing import Any
from llama_stack_api import json_schema_type
from pydantic import BaseModel, Field, SecretStr
from llama_stack.schema_utils import json_schema_type
class SambaNovaProviderDataValidator(BaseModel):
sambanova_api_key: str | None = Field(

View file

@ -8,18 +8,18 @@ from typing import Any
import litellm
import requests
from llama_stack.apis.inference import OpenAIMessageParam
from llama_stack.apis.safety import (
from llama_stack_api import (
OpenAIMessageParam,
RunShieldResponse,
Safety,
SafetyViolation,
Shield,
ShieldsProtocolPrivate,
ViolationLevel,
)
from llama_stack.apis.shields import Shield
from llama_stack.core.request_headers import NeedsRequestProviderData
from llama_stack.log import get_logger
from llama_stack.providers.datatypes import ShieldsProtocolPrivate
from .config import SambaNovaSafetyConfig

View file

@ -8,17 +8,17 @@ import json
from typing import Any
import httpx
from llama_stack.apis.common.content_types import URL
from llama_stack.apis.tools import (
from llama_stack_api import (
URL,
ListToolDefsResponse,
ToolDef,
ToolGroup,
ToolGroupsProtocolPrivate,
ToolInvocationResult,
ToolRuntime,
)
from llama_stack.core.request_headers import NeedsRequestProviderData
from llama_stack.providers.datatypes import ToolGroupsProtocolPrivate
from .config import BingSearchToolConfig

View file

@ -7,18 +7,18 @@
from typing import Any
import httpx
from llama_stack.apis.common.content_types import URL
from llama_stack.apis.tools import (
from llama_stack_api import (
URL,
ListToolDefsResponse,
ToolDef,
ToolGroup,
ToolGroupsProtocolPrivate,
ToolInvocationResult,
ToolRuntime,
)
from llama_stack.core.request_headers import NeedsRequestProviderData
from llama_stack.models.llama.datatypes import BuiltinTool
from llama_stack.providers.datatypes import ToolGroupsProtocolPrivate
from .config import BraveSearchToolConfig

View file

@ -7,17 +7,18 @@
from typing import Any
from urllib.parse import urlparse
from llama_stack.apis.common.content_types import URL
from llama_stack.apis.datatypes import Api
from llama_stack.apis.tools import (
from llama_stack_api import (
URL,
Api,
ListToolDefsResponse,
ToolGroup,
ToolGroupsProtocolPrivate,
ToolInvocationResult,
ToolRuntime,
)
from llama_stack.core.request_headers import NeedsRequestProviderData
from llama_stack.log import get_logger
from llama_stack.providers.datatypes import ToolGroupsProtocolPrivate
from llama_stack.providers.utils.tools.mcp import invoke_mcp_tool, list_mcp_tools
from .config import MCPProviderConfig

View file

@ -8,17 +8,17 @@ import json
from typing import Any
import httpx
from llama_stack.apis.common.content_types import URL
from llama_stack.apis.tools import (
from llama_stack_api import (
URL,
ListToolDefsResponse,
ToolDef,
ToolGroup,
ToolGroupsProtocolPrivate,
ToolInvocationResult,
ToolRuntime,
)
from llama_stack.core.request_headers import NeedsRequestProviderData
from llama_stack.providers.datatypes import ToolGroupsProtocolPrivate
from .config import TavilySearchToolConfig

View file

@ -8,17 +8,17 @@ import json
from typing import Any
import httpx
from llama_stack.apis.common.content_types import URL
from llama_stack.apis.tools import (
from llama_stack_api import (
URL,
ListToolDefsResponse,
ToolDef,
ToolGroup,
ToolGroupsProtocolPrivate,
ToolInvocationResult,
ToolRuntime,
)
from llama_stack.core.request_headers import NeedsRequestProviderData
from llama_stack.providers.datatypes import ToolGroupsProtocolPrivate
from .config import WolframAlphaToolConfig

View file

@ -4,7 +4,7 @@
# This source code is licensed under the terms described in the LICENSE file in
# the root directory of this source tree.
from llama_stack.providers.datatypes import Api, ProviderSpec
from llama_stack_api import Api, ProviderSpec
from .config import ChromaVectorIOConfig

View file

@ -9,14 +9,19 @@ from typing import Any
from urllib.parse import urlparse
import chromadb
from llama_stack_api import (
Chunk,
Files,
Inference,
InterleavedContent,
QueryChunksResponse,
VectorIO,
VectorStore,
VectorStoresProtocolPrivate,
)
from numpy.typing import NDArray
from llama_stack.apis.files import Files
from llama_stack.apis.inference import Inference, InterleavedContent
from llama_stack.apis.vector_io import Chunk, QueryChunksResponse, VectorIO
from llama_stack.apis.vector_stores import VectorStore
from llama_stack.log import get_logger
from llama_stack.providers.datatypes import VectorStoresProtocolPrivate
from llama_stack.providers.inline.vector_io.chroma import ChromaVectorIOConfig as InlineChromaVectorIOConfig
from llama_stack.providers.utils.kvstore import kvstore_impl
from llama_stack.providers.utils.kvstore.api import KVStore

View file

@ -6,10 +6,10 @@
from typing import Any
from llama_stack_api import json_schema_type
from pydantic import BaseModel, Field
from llama_stack.core.storage.datatypes import KVStoreReference
from llama_stack.schema_utils import json_schema_type
@json_schema_type

View file

@ -4,7 +4,7 @@
# This source code is licensed under the terms described in the LICENSE file in
# the root directory of this source tree.
from llama_stack.providers.datatypes import Api, ProviderSpec
from llama_stack_api import Api, ProviderSpec
from .config import MilvusVectorIOConfig

View file

@ -6,10 +6,10 @@
from typing import Any
from llama_stack_api import json_schema_type
from pydantic import BaseModel, ConfigDict, Field
from llama_stack.core.storage.datatypes import KVStoreReference
from llama_stack.schema_utils import json_schema_type
@json_schema_type

View file

@ -8,16 +8,21 @@ import asyncio
import os
from typing import Any
from llama_stack_api import (
Chunk,
Files,
Inference,
InterleavedContent,
QueryChunksResponse,
VectorIO,
VectorStore,
VectorStoreNotFoundError,
VectorStoresProtocolPrivate,
)
from numpy.typing import NDArray
from pymilvus import AnnSearchRequest, DataType, Function, FunctionType, MilvusClient, RRFRanker, WeightedRanker
from llama_stack.apis.common.errors import VectorStoreNotFoundError
from llama_stack.apis.files import Files
from llama_stack.apis.inference import Inference, InterleavedContent
from llama_stack.apis.vector_io import Chunk, QueryChunksResponse, VectorIO
from llama_stack.apis.vector_stores import VectorStore
from llama_stack.log import get_logger
from llama_stack.providers.datatypes import VectorStoresProtocolPrivate
from llama_stack.providers.inline.vector_io.milvus import MilvusVectorIOConfig as InlineMilvusVectorIOConfig
from llama_stack.providers.utils.kvstore import kvstore_impl
from llama_stack.providers.utils.kvstore.api import KVStore

View file

@ -4,7 +4,7 @@
# This source code is licensed under the terms described in the LICENSE file in
# the root directory of this source tree.
from llama_stack.providers.datatypes import Api, ProviderSpec
from llama_stack_api import Api, ProviderSpec
from .config import PGVectorVectorIOConfig

View file

@ -6,10 +6,10 @@
from typing import Any
from llama_stack_api import json_schema_type
from pydantic import BaseModel, Field
from llama_stack.core.storage.datatypes import KVStoreReference
from llama_stack.schema_utils import json_schema_type
@json_schema_type

View file

@ -8,18 +8,23 @@ import heapq
from typing import Any
import psycopg2
from llama_stack_api import (
Chunk,
Files,
Inference,
InterleavedContent,
QueryChunksResponse,
VectorIO,
VectorStore,
VectorStoreNotFoundError,
VectorStoresProtocolPrivate,
)
from numpy.typing import NDArray
from psycopg2 import sql
from psycopg2.extras import Json, execute_values
from pydantic import BaseModel, TypeAdapter
from llama_stack.apis.common.errors import VectorStoreNotFoundError
from llama_stack.apis.files import Files
from llama_stack.apis.inference import Inference, InterleavedContent
from llama_stack.apis.vector_io import Chunk, QueryChunksResponse, VectorIO
from llama_stack.apis.vector_stores import VectorStore
from llama_stack.log import get_logger
from llama_stack.providers.datatypes import VectorStoresProtocolPrivate
from llama_stack.providers.utils.inference.prompt_adapter import interleaved_content_as_str
from llama_stack.providers.utils.kvstore import kvstore_impl
from llama_stack.providers.utils.kvstore.api import KVStore

View file

@ -4,7 +4,7 @@
# This source code is licensed under the terms described in the LICENSE file in
# the root directory of this source tree.
from llama_stack.providers.datatypes import Api, ProviderSpec
from llama_stack_api import Api, ProviderSpec
from .config import QdrantVectorIOConfig

View file

@ -6,10 +6,10 @@
from typing import Any
from llama_stack_api import json_schema_type
from pydantic import BaseModel
from llama_stack.core.storage.datatypes import KVStoreReference
from llama_stack.schema_utils import json_schema_type
@json_schema_type

View file

@ -9,23 +9,24 @@ import hashlib
import uuid
from typing import Any
from llama_stack_api import (
Chunk,
Files,
Inference,
InterleavedContent,
QueryChunksResponse,
VectorIO,
VectorStore,
VectorStoreChunkingStrategy,
VectorStoreFileObject,
VectorStoreNotFoundError,
VectorStoresProtocolPrivate,
)
from numpy.typing import NDArray
from qdrant_client import AsyncQdrantClient, models
from qdrant_client.models import PointStruct
from llama_stack.apis.common.errors import VectorStoreNotFoundError
from llama_stack.apis.files import Files
from llama_stack.apis.inference import Inference, InterleavedContent
from llama_stack.apis.vector_io import (
Chunk,
QueryChunksResponse,
VectorIO,
VectorStoreChunkingStrategy,
VectorStoreFileObject,
)
from llama_stack.apis.vector_stores import VectorStore
from llama_stack.log import get_logger
from llama_stack.providers.datatypes import VectorStoresProtocolPrivate
from llama_stack.providers.inline.vector_io.qdrant import QdrantVectorIOConfig as InlineQdrantVectorIOConfig
from llama_stack.providers.utils.kvstore import kvstore_impl
from llama_stack.providers.utils.memory.openai_vector_store_mixin import OpenAIVectorStoreMixin

View file

@ -4,7 +4,7 @@
# This source code is licensed under the terms described in the LICENSE file in
# the root directory of this source tree.
from llama_stack.providers.datatypes import Api, ProviderSpec
from llama_stack_api import Api, ProviderSpec
from .config import WeaviateVectorIOConfig

View file

@ -6,10 +6,10 @@
from typing import Any
from llama_stack_api import json_schema_type
from pydantic import BaseModel, Field
from llama_stack.core.storage.datatypes import KVStoreReference
from llama_stack.schema_utils import json_schema_type
@json_schema_type

View file

@ -8,19 +8,23 @@ from typing import Any
import weaviate
import weaviate.classes as wvc
from llama_stack_api import (
Chunk,
Files,
Inference,
InterleavedContent,
QueryChunksResponse,
VectorIO,
VectorStore,
VectorStoreNotFoundError,
VectorStoresProtocolPrivate,
)
from numpy.typing import NDArray
from weaviate.classes.init import Auth
from weaviate.classes.query import Filter, HybridFusion
from llama_stack.apis.common.content_types import InterleavedContent
from llama_stack.apis.common.errors import VectorStoreNotFoundError
from llama_stack.apis.files import Files
from llama_stack.apis.inference import Inference
from llama_stack.apis.vector_io import Chunk, QueryChunksResponse, VectorIO
from llama_stack.apis.vector_stores import VectorStore
from llama_stack.core.request_headers import NeedsRequestProviderData
from llama_stack.log import get_logger
from llama_stack.providers.datatypes import VectorStoresProtocolPrivate
from llama_stack.providers.utils.kvstore import kvstore_impl
from llama_stack.providers.utils.kvstore.api import KVStore
from llama_stack.providers.utils.memory.openai_vector_store_mixin import OpenAIVectorStoreMixin