diff --git a/.github/workflows/python-build-test.yml b/.github/workflows/python-build-test.yml index b0f2c6e69..b58f4eb69 100644 --- a/.github/workflows/python-build-test.yml +++ b/.github/workflows/python-build-test.yml @@ -31,7 +31,7 @@ jobs: version: 0.7.6 - name: Build Llama Stack API package - working-directory: src/llama-stack-api + working-directory: src/llama_stack_api run: uv build - name: Build Llama Stack package @@ -39,7 +39,7 @@ jobs: - name: Install Llama Stack package (with api stubs from local build) run: | - uv pip install --find-links src/llama-stack-api/dist dist/*.whl + uv pip install --find-links src/llama_stack_api/dist dist/*.whl - name: Verify Llama Stack package run: | diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 6f4dd6a0e..6e32d16b7 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -42,7 +42,7 @@ repos: hooks: - id: ruff args: [ --fix ] - exclude: ^(src/llama_stack/strong_typing/.*|src/llama-stack-api/llama_stack_api/strong_typing/.*)$ + exclude: ^(src/llama_stack/strong_typing/.*|src/llama_stack_api/strong_typing/.*)$ - id: ruff-format - repo: https://github.com/adamchainz/blacken-docs diff --git a/pyproject.toml b/pyproject.toml index d287b4be7..34728d6ea 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -181,7 +181,7 @@ install-wheel-from-presigned = "llama_stack.cli.scripts.run:install_wheel_from_p [tool.setuptools.packages.find] where = ["src"] -include = ["llama_stack", "llama_stack.*", "llama-stack-api", "llama-stack-api.*"] +include = ["llama_stack", "llama_stack.*", "llama_stack_api", "llama_stack_api.*"] [[tool.uv.index]] name = "pytorch-cpu" @@ -191,7 +191,7 @@ explicit = true [tool.uv.sources] torch = [{ index = "pytorch-cpu" }] torchvision = [{ index = "pytorch-cpu" }] -llama-stack-api = [{ path = "src/llama-stack-api", editable = true }] +llama-stack-api = [{ path = "src/llama_stack_api", editable = true }] [tool.ruff] line-length = 120 @@ -258,7 +258,7 @@ unfixable = [ ] # Using import * is acceptable (or at least tolerated) in an __init__.py of a package API [tool.mypy] -mypy_path = ["src", "src/llama-stack-api"] +mypy_path = ["src"] packages = ["llama_stack", "llama_stack_api"] plugins = ['pydantic.mypy'] disable_error_code = [] @@ -281,14 +281,12 @@ exclude = [ "^src/llama_stack/core/store/registry\\.py$", "^src/llama_stack/core/utils/exec\\.py$", "^src/llama_stack/core/utils/prompt_for_config\\.py$", - # Moved to llama-stack-api but still excluded "^src/llama_stack/models/llama/llama3/interface\\.py$", "^src/llama_stack/models/llama/llama3/tokenizer\\.py$", "^src/llama_stack/models/llama/llama3/tool_utils\\.py$", "^src/llama_stack/models/llama/llama3/generation\\.py$", "^src/llama_stack/models/llama/llama3/multimodal/model\\.py$", "^src/llama_stack/models/llama/llama4/", - "^src/llama-stack-api/llama_stack_api/core/telemetry/telemetry\\.py$", "^src/llama_stack/providers/inline/agents/meta_reference/", "^src/llama_stack/providers/inline/datasetio/localfs/", "^src/llama_stack/providers/inline/eval/meta_reference/eval\\.py$", @@ -342,9 +340,7 @@ exclude = [ "^src/llama_stack/providers/utils/telemetry/dataset_mixin\\.py$", "^src/llama_stack/providers/utils/telemetry/trace_protocol\\.py$", "^src/llama_stack/providers/utils/telemetry/tracing\\.py$", - "^src/llama-stack-api/llama_stack_api/core/telemetry/trace_protocol\\.py$", - "^src/llama-stack-api/llama_stack_api/core/telemetry/tracing\\.py$", - "^src/llama-stack-api/llama_stack_api/strong_typing/auxiliary\\.py$", + "^src/llama_stack_api/strong_typing/auxiliary\\.py$", "^src/llama_stack/distributions/template\\.py$", ] diff --git a/scripts/generate_prompt_format.py b/scripts/generate_prompt_format.py index 8099a3f0d..381bbc6a7 100755 --- a/scripts/generate_prompt_format.py +++ b/scripts/generate_prompt_format.py @@ -14,11 +14,11 @@ import os from pathlib import Path import fire -from llama_stack_api import ModelNotFoundError from llama_stack.models.llama.llama3.generation import Llama3 from llama_stack.models.llama.llama4.generation import Llama4 from llama_stack.models.llama.sku_list import resolve_model +from llama_stack_api import ModelNotFoundError THIS_DIR = Path(__file__).parent.resolve() diff --git a/src/llama_stack/cli/stack/_list_deps.py b/src/llama_stack/cli/stack/_list_deps.py index 50fe394fc..82bef1a4f 100644 --- a/src/llama_stack/cli/stack/_list_deps.py +++ b/src/llama_stack/cli/stack/_list_deps.py @@ -9,7 +9,6 @@ import sys from pathlib import Path import yaml -from llama_stack_api import Api from termcolor import cprint from llama_stack.cli.stack.utils import ImageType @@ -22,6 +21,7 @@ from llama_stack.core.datatypes import ( from llama_stack.core.distribution import get_provider_registry from llama_stack.core.stack import replace_env_vars from llama_stack.log import get_logger +from llama_stack_api import Api TEMPLATES_PATH = Path(__file__).parent.parent.parent / "templates" diff --git a/src/llama_stack/cli/stack/utils.py b/src/llama_stack/cli/stack/utils.py index 0a4e22b09..d49b142e0 100644 --- a/src/llama_stack/cli/stack/utils.py +++ b/src/llama_stack/cli/stack/utils.py @@ -11,7 +11,6 @@ from functools import lru_cache from pathlib import Path import yaml -from llama_stack_api import Api from termcolor import cprint from llama_stack.core.datatypes import ( @@ -33,6 +32,7 @@ from llama_stack.core.storage.datatypes import ( from llama_stack.core.utils.config_dirs import DISTRIBS_BASE_DIR, EXTERNAL_PROVIDERS_DIR from llama_stack.core.utils.dynamic import instantiate_class_type from llama_stack.core.utils.image_types import LlamaStackImageType +from llama_stack_api import Api TEMPLATES_PATH = Path(__file__).parent.parent.parent / "distributions" diff --git a/src/llama_stack/core/build.py b/src/llama_stack/core/build.py index 27ded7ede..630b2a47f 100644 --- a/src/llama_stack/core/build.py +++ b/src/llama_stack/core/build.py @@ -6,7 +6,6 @@ import sys -from llama_stack_api import Api from pydantic import BaseModel from termcolor import cprint @@ -14,6 +13,7 @@ from llama_stack.core.datatypes import BuildConfig from llama_stack.core.distribution import get_provider_registry from llama_stack.distributions.template import DistributionTemplate from llama_stack.log import get_logger +from llama_stack_api import Api log = get_logger(name=__name__, category="core") diff --git a/src/llama_stack/core/client.py b/src/llama_stack/core/client.py index 41acacdb5..ba935a35e 100644 --- a/src/llama_stack/core/client.py +++ b/src/llama_stack/core/client.py @@ -12,10 +12,11 @@ from enum import Enum from typing import Any, Union, get_args, get_origin import httpx -from llama_stack_api import RemoteProviderConfig from pydantic import BaseModel, parse_obj_as from termcolor import cprint +from llama_stack_api import RemoteProviderConfig + _CLIENT_CLASSES = {} diff --git a/src/llama_stack/core/configure.py b/src/llama_stack/core/configure.py index bdb3b9734..d738b8a61 100644 --- a/src/llama_stack/core/configure.py +++ b/src/llama_stack/core/configure.py @@ -6,8 +6,6 @@ import textwrap from typing import Any -from llama_stack_api import Api, ProviderSpec - from llama_stack.core.datatypes import ( LLAMA_STACK_RUN_CONFIG_VERSION, DistributionSpec, @@ -22,6 +20,7 @@ from llama_stack.core.stack import cast_image_name_to_string, replace_env_vars from llama_stack.core.utils.dynamic import instantiate_class_type from llama_stack.core.utils.prompt_for_config import prompt_for_config from llama_stack.log import get_logger +from llama_stack_api import Api, ProviderSpec logger = get_logger(name=__name__, category="core") diff --git a/src/llama_stack/core/conversations/conversations.py b/src/llama_stack/core/conversations/conversations.py index b94cd4fdd..4cf5a82ee 100644 --- a/src/llama_stack/core/conversations/conversations.py +++ b/src/llama_stack/core/conversations/conversations.py @@ -8,6 +8,13 @@ import secrets import time from typing import Any, Literal +from pydantic import BaseModel, TypeAdapter + +from llama_stack.core.datatypes import AccessRule, StackRunConfig +from llama_stack.log import get_logger +from llama_stack.providers.utils.sqlstore.api import ColumnDefinition, ColumnType +from llama_stack.providers.utils.sqlstore.authorized_sqlstore import AuthorizedSqlStore +from llama_stack.providers.utils.sqlstore.sqlstore import sqlstore_impl from llama_stack_api import ( Conversation, ConversationDeletedResource, @@ -18,13 +25,6 @@ from llama_stack_api import ( Conversations, Metadata, ) -from pydantic import BaseModel, TypeAdapter - -from llama_stack.core.datatypes import AccessRule, StackRunConfig -from llama_stack.log import get_logger -from llama_stack.providers.utils.sqlstore.api import ColumnDefinition, ColumnType -from llama_stack.providers.utils.sqlstore.authorized_sqlstore import AuthorizedSqlStore -from llama_stack.providers.utils.sqlstore.sqlstore import sqlstore_impl logger = get_logger(name=__name__, category="openai_conversations") diff --git a/src/llama_stack/core/datatypes.py b/src/llama_stack/core/datatypes.py index 4231363b6..1e29690ff 100644 --- a/src/llama_stack/core/datatypes.py +++ b/src/llama_stack/core/datatypes.py @@ -9,6 +9,15 @@ from pathlib import Path from typing import Annotated, Any, Literal, Self from urllib.parse import urlparse +from pydantic import BaseModel, Field, field_validator, model_validator + +from llama_stack.core.access_control.datatypes import AccessRule +from llama_stack.core.storage.datatypes import ( + KVStoreReference, + StorageBackendType, + StorageConfig, +) +from llama_stack.log import LoggingConfig from llama_stack_api import ( Api, Benchmark, @@ -35,15 +44,6 @@ from llama_stack_api import ( VectorStore, VectorStoreInput, ) -from pydantic import BaseModel, Field, field_validator, model_validator - -from llama_stack.core.access_control.datatypes import AccessRule -from llama_stack.core.storage.datatypes import ( - KVStoreReference, - StorageBackendType, - StorageConfig, -) -from llama_stack.log import LoggingConfig LLAMA_STACK_BUILD_CONFIG_VERSION = 2 LLAMA_STACK_RUN_CONFIG_VERSION = 2 diff --git a/src/llama_stack/core/distribution.py b/src/llama_stack/core/distribution.py index 162f9f2b0..658c75ef2 100644 --- a/src/llama_stack/core/distribution.py +++ b/src/llama_stack/core/distribution.py @@ -10,17 +10,17 @@ import os from typing import Any import yaml +from pydantic import BaseModel + +from llama_stack.core.datatypes import BuildConfig, DistributionSpec +from llama_stack.core.external import load_external_apis +from llama_stack.log import get_logger from llama_stack_api import ( Api, InlineProviderSpec, ProviderSpec, RemoteProviderSpec, ) -from pydantic import BaseModel - -from llama_stack.core.datatypes import BuildConfig, DistributionSpec -from llama_stack.core.external import load_external_apis -from llama_stack.log import get_logger logger = get_logger(name=__name__, category="core") diff --git a/src/llama_stack/core/external.py b/src/llama_stack/core/external.py index ce0c7eb72..d1a2d6e42 100644 --- a/src/llama_stack/core/external.py +++ b/src/llama_stack/core/external.py @@ -6,10 +6,10 @@ import yaml -from llama_stack_api import Api, ExternalApiSpec from llama_stack.core.datatypes import BuildConfig, StackRunConfig from llama_stack.log import get_logger +from llama_stack_api import Api, ExternalApiSpec logger = get_logger(name=__name__, category="core") diff --git a/src/llama_stack/core/inspect.py b/src/llama_stack/core/inspect.py index 53ddd3475..272c9d1bc 100644 --- a/src/llama_stack/core/inspect.py +++ b/src/llama_stack/core/inspect.py @@ -6,6 +6,11 @@ from importlib.metadata import version +from pydantic import BaseModel + +from llama_stack.core.datatypes import StackRunConfig +from llama_stack.core.external import load_external_apis +from llama_stack.core.server.routes import get_all_api_routes from llama_stack_api import ( HealthInfo, HealthStatus, @@ -14,11 +19,6 @@ from llama_stack_api import ( RouteInfo, VersionInfo, ) -from pydantic import BaseModel - -from llama_stack.core.datatypes import StackRunConfig -from llama_stack.core.external import load_external_apis -from llama_stack.core.server.routes import get_all_api_routes class DistributionInspectConfig(BaseModel): diff --git a/src/llama_stack/core/library_client.py b/src/llama_stack/core/library_client.py index 959284720..2a224d915 100644 --- a/src/llama_stack/core/library_client.py +++ b/src/llama_stack/core/library_client.py @@ -18,6 +18,7 @@ from typing import Any, TypeVar, Union, get_args, get_origin import httpx import yaml from fastapi import Response as FastAPIResponse + from llama_stack_api import is_unwrapped_body_param try: diff --git a/src/llama_stack/core/prompts/prompts.py b/src/llama_stack/core/prompts/prompts.py index d9532b978..9f532c1cd 100644 --- a/src/llama_stack/core/prompts/prompts.py +++ b/src/llama_stack/core/prompts/prompts.py @@ -7,11 +7,11 @@ import json from typing import Any -from llama_stack_api import ListPromptsResponse, Prompt, Prompts from pydantic import BaseModel from llama_stack.core.datatypes import StackRunConfig from llama_stack.providers.utils.kvstore import KVStore, kvstore_impl +from llama_stack_api import ListPromptsResponse, Prompt, Prompts class PromptServiceConfig(BaseModel): diff --git a/src/llama_stack/core/providers.py b/src/llama_stack/core/providers.py index 7337d9e35..e3fe3c7b3 100644 --- a/src/llama_stack/core/providers.py +++ b/src/llama_stack/core/providers.py @@ -7,10 +7,10 @@ import asyncio from typing import Any -from llama_stack_api import HealthResponse, HealthStatus, ListProvidersResponse, ProviderInfo, Providers from pydantic import BaseModel from llama_stack.log import get_logger +from llama_stack_api import HealthResponse, HealthStatus, ListProvidersResponse, ProviderInfo, Providers from .datatypes import StackRunConfig from .utils.config import redact_sensitive_fields diff --git a/src/llama_stack/core/resolver.py b/src/llama_stack/core/resolver.py index ca154fbc6..6bc32c2d0 100644 --- a/src/llama_stack/core/resolver.py +++ b/src/llama_stack/core/resolver.py @@ -8,6 +8,19 @@ import importlib.metadata import inspect from typing import Any +from llama_stack.core.client import get_client_impl +from llama_stack.core.datatypes import ( + AccessRule, + AutoRoutedProviderSpec, + Provider, + RoutingTableProviderSpec, + StackRunConfig, +) +from llama_stack.core.distribution import builtin_automatically_routed_apis +from llama_stack.core.external import load_external_apis +from llama_stack.core.store import DistributionRegistry +from llama_stack.core.utils.dynamic import instantiate_class_type +from llama_stack.log import get_logger from llama_stack_api import ( LLAMA_STACK_API_V1ALPHA, Agents, @@ -48,20 +61,6 @@ from llama_stack_api import ( Providers as ProvidersAPI, ) -from llama_stack.core.client import get_client_impl -from llama_stack.core.datatypes import ( - AccessRule, - AutoRoutedProviderSpec, - Provider, - RoutingTableProviderSpec, - StackRunConfig, -) -from llama_stack.core.distribution import builtin_automatically_routed_apis -from llama_stack.core.external import load_external_apis -from llama_stack.core.store import DistributionRegistry -from llama_stack.core.utils.dynamic import instantiate_class_type -from llama_stack.log import get_logger - logger = get_logger(name=__name__, category="core") diff --git a/src/llama_stack/core/routers/__init__.py b/src/llama_stack/core/routers/__init__.py index c2d051422..289755bcb 100644 --- a/src/llama_stack/core/routers/__init__.py +++ b/src/llama_stack/core/routers/__init__.py @@ -6,8 +6,6 @@ from typing import Any -from llama_stack_api import Api, RoutingTable - from llama_stack.core.datatypes import ( AccessRule, RoutedProtocol, @@ -15,6 +13,7 @@ from llama_stack.core.datatypes import ( from llama_stack.core.stack import StackRunConfig from llama_stack.core.store import DistributionRegistry from llama_stack.providers.utils.inference.inference_store import InferenceStore +from llama_stack_api import Api, RoutingTable async def get_routing_table_impl( diff --git a/src/llama_stack/core/routers/datasets.py b/src/llama_stack/core/routers/datasets.py index dcf247874..b6a5f3b96 100644 --- a/src/llama_stack/core/routers/datasets.py +++ b/src/llama_stack/core/routers/datasets.py @@ -6,9 +6,8 @@ from typing import Any -from llama_stack_api import DatasetIO, DatasetPurpose, DataSource, PaginatedResponse, RoutingTable - from llama_stack.log import get_logger +from llama_stack_api import DatasetIO, DatasetPurpose, DataSource, PaginatedResponse, RoutingTable logger = get_logger(name=__name__, category="core::routers") diff --git a/src/llama_stack/core/routers/eval_scoring.py b/src/llama_stack/core/routers/eval_scoring.py index cbbbf5cc5..4d7269180 100644 --- a/src/llama_stack/core/routers/eval_scoring.py +++ b/src/llama_stack/core/routers/eval_scoring.py @@ -6,6 +6,7 @@ from typing import Any +from llama_stack.log import get_logger from llama_stack_api import ( BenchmarkConfig, Eval, @@ -18,8 +19,6 @@ from llama_stack_api import ( ScoringFnParams, ) -from llama_stack.log import get_logger - logger = get_logger(name=__name__, category="core::routers") diff --git a/src/llama_stack/core/routers/inference.py b/src/llama_stack/core/routers/inference.py index a538ab02e..acfe20399 100644 --- a/src/llama_stack/core/routers/inference.py +++ b/src/llama_stack/core/routers/inference.py @@ -11,6 +11,16 @@ from datetime import UTC, datetime from typing import Annotated, Any from fastapi import Body +from openai.types.chat import ChatCompletionToolChoiceOptionParam as OpenAIChatCompletionToolChoiceOptionParam +from openai.types.chat import ChatCompletionToolParam as OpenAIChatCompletionToolParam +from pydantic import TypeAdapter + +from llama_stack.core.telemetry.telemetry import MetricEvent +from llama_stack.core.telemetry.tracing import enqueue_event, get_current_span +from llama_stack.log import get_logger +from llama_stack.models.llama.llama3.chat_format import ChatFormat +from llama_stack.models.llama.llama3.tokenizer import Tokenizer +from llama_stack.providers.utils.inference.inference_store import InferenceStore from llama_stack_api import ( HealthResponse, HealthStatus, @@ -39,16 +49,6 @@ from llama_stack_api import ( RerankResponse, RoutingTable, ) -from openai.types.chat import ChatCompletionToolChoiceOptionParam as OpenAIChatCompletionToolChoiceOptionParam -from openai.types.chat import ChatCompletionToolParam as OpenAIChatCompletionToolParam -from pydantic import TypeAdapter - -from llama_stack.core.telemetry.telemetry import MetricEvent -from llama_stack.core.telemetry.tracing import enqueue_event, get_current_span -from llama_stack.log import get_logger -from llama_stack.models.llama.llama3.chat_format import ChatFormat -from llama_stack.models.llama.llama3.tokenizer import Tokenizer -from llama_stack.providers.utils.inference.inference_store import InferenceStore logger = get_logger(name=__name__, category="core::routers") diff --git a/src/llama_stack/core/routers/safety.py b/src/llama_stack/core/routers/safety.py index f85bbb767..2bc99f14f 100644 --- a/src/llama_stack/core/routers/safety.py +++ b/src/llama_stack/core/routers/safety.py @@ -6,10 +6,9 @@ from typing import Any -from llama_stack_api import ModerationObject, OpenAIMessageParam, RoutingTable, RunShieldResponse, Safety, Shield - from llama_stack.core.datatypes import SafetyConfig from llama_stack.log import get_logger +from llama_stack_api import ModerationObject, OpenAIMessageParam, RoutingTable, RunShieldResponse, Safety, Shield logger = get_logger(name=__name__, category="core::routers") diff --git a/src/llama_stack/core/routers/tool_runtime.py b/src/llama_stack/core/routers/tool_runtime.py index 984a8e2a7..eccc05732 100644 --- a/src/llama_stack/core/routers/tool_runtime.py +++ b/src/llama_stack/core/routers/tool_runtime.py @@ -6,14 +6,13 @@ from typing import Any +from llama_stack.log import get_logger from llama_stack_api import ( URL, ListToolDefsResponse, ToolRuntime, ) -from llama_stack.log import get_logger - from ..routing_tables.toolgroups import ToolGroupsRoutingTable logger = get_logger(name=__name__, category="core::routers") diff --git a/src/llama_stack/core/routers/vector_io.py b/src/llama_stack/core/routers/vector_io.py index bfd090e32..02e56ed7e 100644 --- a/src/llama_stack/core/routers/vector_io.py +++ b/src/llama_stack/core/routers/vector_io.py @@ -9,6 +9,9 @@ import uuid from typing import Annotated, Any from fastapi import Body + +from llama_stack.core.datatypes import VectorStoresConfig +from llama_stack.log import get_logger from llama_stack_api import ( Chunk, HealthResponse, @@ -36,9 +39,6 @@ from llama_stack_api import ( VectorStoreSearchResponsePage, ) -from llama_stack.core.datatypes import VectorStoresConfig -from llama_stack.log import get_logger - logger = get_logger(name=__name__, category="core::routers") diff --git a/src/llama_stack/core/routing_tables/benchmarks.py b/src/llama_stack/core/routing_tables/benchmarks.py index 66830bc41..9037ffe8b 100644 --- a/src/llama_stack/core/routing_tables/benchmarks.py +++ b/src/llama_stack/core/routing_tables/benchmarks.py @@ -6,12 +6,11 @@ from typing import Any -from llama_stack_api import Benchmark, Benchmarks, ListBenchmarksResponse - from llama_stack.core.datatypes import ( BenchmarkWithOwner, ) from llama_stack.log import get_logger +from llama_stack_api import Benchmark, Benchmarks, ListBenchmarksResponse from .common import CommonRoutingTableImpl diff --git a/src/llama_stack/core/routing_tables/common.py b/src/llama_stack/core/routing_tables/common.py index cfbafc9a8..a9e3ff95f 100644 --- a/src/llama_stack/core/routing_tables/common.py +++ b/src/llama_stack/core/routing_tables/common.py @@ -6,8 +6,6 @@ from typing import Any -from llama_stack_api import Api, Model, ModelNotFoundError, ResourceType, RoutingTable - from llama_stack.core.access_control.access_control import AccessDeniedError, is_action_allowed from llama_stack.core.access_control.datatypes import Action from llama_stack.core.datatypes import ( @@ -20,6 +18,7 @@ from llama_stack.core.datatypes import ( from llama_stack.core.request_headers import get_authenticated_user from llama_stack.core.store import DistributionRegistry from llama_stack.log import get_logger +from llama_stack_api import Api, Model, ModelNotFoundError, ResourceType, RoutingTable logger = get_logger(name=__name__, category="core::routing_tables") diff --git a/src/llama_stack/core/routing_tables/datasets.py b/src/llama_stack/core/routing_tables/datasets.py index c49c9769b..62fd07b13 100644 --- a/src/llama_stack/core/routing_tables/datasets.py +++ b/src/llama_stack/core/routing_tables/datasets.py @@ -7,6 +7,10 @@ import uuid from typing import Any +from llama_stack.core.datatypes import ( + DatasetWithOwner, +) +from llama_stack.log import get_logger from llama_stack_api import ( Dataset, DatasetNotFoundError, @@ -20,11 +24,6 @@ from llama_stack_api import ( URIDataSource, ) -from llama_stack.core.datatypes import ( - DatasetWithOwner, -) -from llama_stack.log import get_logger - from .common import CommonRoutingTableImpl logger = get_logger(name=__name__, category="core::routing_tables") diff --git a/src/llama_stack/core/routing_tables/models.py b/src/llama_stack/core/routing_tables/models.py index e1210a139..1facbb27b 100644 --- a/src/llama_stack/core/routing_tables/models.py +++ b/src/llama_stack/core/routing_tables/models.py @@ -7,6 +7,13 @@ import time from typing import Any +from llama_stack.core.datatypes import ( + ModelWithOwner, + RegistryEntrySource, +) +from llama_stack.core.request_headers import PROVIDER_DATA_VAR, NeedsRequestProviderData +from llama_stack.core.utils.dynamic import instantiate_class_type +from llama_stack.log import get_logger from llama_stack_api import ( ListModelsResponse, Model, @@ -17,14 +24,6 @@ from llama_stack_api import ( OpenAIModel, ) -from llama_stack.core.datatypes import ( - ModelWithOwner, - RegistryEntrySource, -) -from llama_stack.core.request_headers import PROVIDER_DATA_VAR, NeedsRequestProviderData -from llama_stack.core.utils.dynamic import instantiate_class_type -from llama_stack.log import get_logger - from .common import CommonRoutingTableImpl, lookup_model logger = get_logger(name=__name__, category="core::routing_tables") diff --git a/src/llama_stack/core/routing_tables/scoring_functions.py b/src/llama_stack/core/routing_tables/scoring_functions.py index 66165ac2f..65ed26b85 100644 --- a/src/llama_stack/core/routing_tables/scoring_functions.py +++ b/src/llama_stack/core/routing_tables/scoring_functions.py @@ -4,6 +4,10 @@ # This source code is licensed under the terms described in the LICENSE file in # the root directory of this source tree. +from llama_stack.core.datatypes import ( + ScoringFnWithOwner, +) +from llama_stack.log import get_logger from llama_stack_api import ( ListScoringFunctionsResponse, ParamType, @@ -13,11 +17,6 @@ from llama_stack_api import ( ScoringFunctions, ) -from llama_stack.core.datatypes import ( - ScoringFnWithOwner, -) -from llama_stack.log import get_logger - from .common import CommonRoutingTableImpl logger = get_logger(name=__name__, category="core::routing_tables") diff --git a/src/llama_stack/core/routing_tables/shields.py b/src/llama_stack/core/routing_tables/shields.py index 0f981c49d..97b2efb96 100644 --- a/src/llama_stack/core/routing_tables/shields.py +++ b/src/llama_stack/core/routing_tables/shields.py @@ -6,12 +6,11 @@ from typing import Any -from llama_stack_api import ListShieldsResponse, ResourceType, Shield, Shields - from llama_stack.core.datatypes import ( ShieldWithOwner, ) from llama_stack.log import get_logger +from llama_stack_api import ListShieldsResponse, ResourceType, Shield, Shields from .common import CommonRoutingTableImpl diff --git a/src/llama_stack/core/routing_tables/toolgroups.py b/src/llama_stack/core/routing_tables/toolgroups.py index a552cb96e..7e2068608 100644 --- a/src/llama_stack/core/routing_tables/toolgroups.py +++ b/src/llama_stack/core/routing_tables/toolgroups.py @@ -6,6 +6,8 @@ from typing import Any +from llama_stack.core.datatypes import AuthenticationRequiredError, ToolGroupWithOwner +from llama_stack.log import get_logger from llama_stack_api import ( URL, ListToolDefsResponse, @@ -16,9 +18,6 @@ from llama_stack_api import ( ToolGroups, ) -from llama_stack.core.datatypes import AuthenticationRequiredError, ToolGroupWithOwner -from llama_stack.log import get_logger - from .common import CommonRoutingTableImpl logger = get_logger(name=__name__, category="core::routing_tables") diff --git a/src/llama_stack/core/routing_tables/vector_stores.py b/src/llama_stack/core/routing_tables/vector_stores.py index f95463b3c..93c119542 100644 --- a/src/llama_stack/core/routing_tables/vector_stores.py +++ b/src/llama_stack/core/routing_tables/vector_stores.py @@ -6,6 +6,11 @@ from typing import Any +from llama_stack.core.datatypes import ( + VectorStoreWithOwner, +) +from llama_stack.log import get_logger + # Removed VectorStores import to avoid exposing public API from llama_stack_api import ( ModelNotFoundError, @@ -23,11 +28,6 @@ from llama_stack_api import ( VectorStoreSearchResponsePage, ) -from llama_stack.core.datatypes import ( - VectorStoreWithOwner, -) -from llama_stack.log import get_logger - from .common import CommonRoutingTableImpl, lookup_model logger = get_logger(name=__name__, category="core::routing_tables") diff --git a/src/llama_stack/core/server/auth_providers.py b/src/llama_stack/core/server/auth_providers.py index a7f5d7916..66942dd39 100644 --- a/src/llama_stack/core/server/auth_providers.py +++ b/src/llama_stack/core/server/auth_providers.py @@ -11,7 +11,6 @@ from urllib.parse import parse_qs, urljoin, urlparse import httpx import jwt -from llama_stack_api import TokenValidationError from pydantic import BaseModel, Field from llama_stack.core.datatypes import ( @@ -23,6 +22,7 @@ from llama_stack.core.datatypes import ( User, ) from llama_stack.log import get_logger +from llama_stack_api import TokenValidationError logger = get_logger(name=__name__, category="core::auth") diff --git a/src/llama_stack/core/server/routes.py b/src/llama_stack/core/server/routes.py index e7a84937d..af5002565 100644 --- a/src/llama_stack/core/server/routes.py +++ b/src/llama_stack/core/server/routes.py @@ -10,10 +10,10 @@ from collections.abc import Callable from typing import Any from aiohttp import hdrs -from llama_stack_api import Api, ExternalApiSpec, WebMethod from starlette.routing import Route from llama_stack.core.resolver import api_protocol_map +from llama_stack_api import Api, ExternalApiSpec, WebMethod EndpointFunc = Callable[..., Any] PathParams = dict[str, str] diff --git a/src/llama_stack/core/server/server.py b/src/llama_stack/core/server/server.py index 8116348ec..0d3513980 100644 --- a/src/llama_stack/core/server/server.py +++ b/src/llama_stack/core/server/server.py @@ -28,7 +28,6 @@ from fastapi import Path as FastapiPath from fastapi.exceptions import RequestValidationError from fastapi.middleware.cors import CORSMiddleware from fastapi.responses import JSONResponse, StreamingResponse -from llama_stack_api import Api, ConflictError, PaginatedResponse, ResourceNotFoundError from openai import BadRequestError from pydantic import BaseModel, ValidationError @@ -57,6 +56,7 @@ from llama_stack.core.utils.config import redact_sensitive_fields from llama_stack.core.utils.config_resolution import Mode, resolve_config_or_distro from llama_stack.core.utils.context import preserve_contexts_async_generator from llama_stack.log import LoggingConfig, get_logger, setup_logging +from llama_stack_api import Api, ConflictError, PaginatedResponse, ResourceNotFoundError from .auth import AuthenticationMiddleware from .quota import QuotaMiddleware diff --git a/src/llama_stack/core/stack.py b/src/llama_stack/core/stack.py index 674c35f31..00d990cb1 100644 --- a/src/llama_stack/core/stack.py +++ b/src/llama_stack/core/stack.py @@ -12,6 +12,28 @@ import tempfile from typing import Any import yaml + +from llama_stack.core.conversations.conversations import ConversationServiceConfig, ConversationServiceImpl +from llama_stack.core.datatypes import Provider, SafetyConfig, StackRunConfig, VectorStoresConfig +from llama_stack.core.distribution import get_provider_registry +from llama_stack.core.inspect import DistributionInspectConfig, DistributionInspectImpl +from llama_stack.core.prompts.prompts import PromptServiceConfig, PromptServiceImpl +from llama_stack.core.providers import ProviderImpl, ProviderImplConfig +from llama_stack.core.resolver import ProviderRegistry, resolve_impls +from llama_stack.core.routing_tables.common import CommonRoutingTableImpl +from llama_stack.core.storage.datatypes import ( + InferenceStoreReference, + KVStoreReference, + ServerStoresConfig, + SqliteKVStoreConfig, + SqliteSqlStoreConfig, + SqlStoreReference, + StorageBackendConfig, + StorageConfig, +) +from llama_stack.core.store.registry import create_dist_registry +from llama_stack.core.utils.dynamic import instantiate_class_type +from llama_stack.log import get_logger from llama_stack_api import ( Agents, Api, @@ -37,28 +59,6 @@ from llama_stack_api import ( VectorIO, ) -from llama_stack.core.conversations.conversations import ConversationServiceConfig, ConversationServiceImpl -from llama_stack.core.datatypes import Provider, SafetyConfig, StackRunConfig, VectorStoresConfig -from llama_stack.core.distribution import get_provider_registry -from llama_stack.core.inspect import DistributionInspectConfig, DistributionInspectImpl -from llama_stack.core.prompts.prompts import PromptServiceConfig, PromptServiceImpl -from llama_stack.core.providers import ProviderImpl, ProviderImplConfig -from llama_stack.core.resolver import ProviderRegistry, resolve_impls -from llama_stack.core.routing_tables.common import CommonRoutingTableImpl -from llama_stack.core.storage.datatypes import ( - InferenceStoreReference, - KVStoreReference, - ServerStoresConfig, - SqliteKVStoreConfig, - SqliteSqlStoreConfig, - SqlStoreReference, - StorageBackendConfig, - StorageConfig, -) -from llama_stack.core.store.registry import create_dist_registry -from llama_stack.core.utils.dynamic import instantiate_class_type -from llama_stack.log import get_logger - logger = get_logger(name=__name__, category="core") diff --git a/src/llama_stack/core/telemetry/telemetry.py b/src/llama_stack/core/telemetry/telemetry.py index 1a56277ea..5268fa641 100644 --- a/src/llama_stack/core/telemetry/telemetry.py +++ b/src/llama_stack/core/telemetry/telemetry.py @@ -16,7 +16,6 @@ from typing import ( cast, ) -from llama_stack_api import json_schema_type, register_schema from opentelemetry import metrics, trace from opentelemetry.exporter.otlp.proto.http.metric_exporter import OTLPMetricExporter from opentelemetry.exporter.otlp.proto.http.trace_exporter import OTLPSpanExporter @@ -29,6 +28,7 @@ from pydantic import BaseModel, Field from llama_stack.log import get_logger from llama_stack.models.llama.datatypes import Primitive +from llama_stack_api import json_schema_type, register_schema ROOT_SPAN_MARKERS = ["__root__", "__root_span__"] diff --git a/src/llama_stack/distributions/dell/dell.py b/src/llama_stack/distributions/dell/dell.py index fd76e3ccb..52a07b7f1 100644 --- a/src/llama_stack/distributions/dell/dell.py +++ b/src/llama_stack/distributions/dell/dell.py @@ -4,8 +4,6 @@ # This source code is licensed under the terms described in the LICENSE file in # the root directory of this source tree. -from llama_stack_api import ModelType - from llama_stack.core.datatypes import ( BuildProvider, ModelInput, @@ -18,6 +16,7 @@ from llama_stack.providers.inline.inference.sentence_transformers import ( SentenceTransformersInferenceConfig, ) from llama_stack.providers.remote.vector_io.chroma import ChromaVectorIOConfig +from llama_stack_api import ModelType def get_distribution_template() -> DistributionTemplate: diff --git a/src/llama_stack/distributions/meta-reference-gpu/meta_reference.py b/src/llama_stack/distributions/meta-reference-gpu/meta_reference.py index 67af0e92a..a515794d5 100644 --- a/src/llama_stack/distributions/meta-reference-gpu/meta_reference.py +++ b/src/llama_stack/distributions/meta-reference-gpu/meta_reference.py @@ -6,8 +6,6 @@ from pathlib import Path -from llama_stack_api import ModelType - from llama_stack.core.datatypes import ( BuildProvider, ModelInput, @@ -23,6 +21,7 @@ from llama_stack.providers.inline.inference.sentence_transformers import ( SentenceTransformersInferenceConfig, ) from llama_stack.providers.inline.vector_io.faiss.config import FaissVectorIOConfig +from llama_stack_api import ModelType def get_distribution_template() -> DistributionTemplate: diff --git a/src/llama_stack/distributions/open-benchmark/open_benchmark.py b/src/llama_stack/distributions/open-benchmark/open_benchmark.py index 59deca6d0..1f4dbf2c2 100644 --- a/src/llama_stack/distributions/open-benchmark/open_benchmark.py +++ b/src/llama_stack/distributions/open-benchmark/open_benchmark.py @@ -5,8 +5,6 @@ # the root directory of this source tree. -from llama_stack_api import DatasetPurpose, ModelType, URIDataSource - from llama_stack.core.datatypes import ( BenchmarkInput, BuildProvider, @@ -34,6 +32,7 @@ from llama_stack.providers.remote.vector_io.pgvector.config import ( PGVectorVectorIOConfig, ) from llama_stack.providers.utils.inference.model_registry import ProviderModelEntry +from llama_stack_api import DatasetPurpose, ModelType, URIDataSource def get_inference_providers() -> tuple[list[Provider], dict[str, list[ProviderModelEntry]]]: diff --git a/src/llama_stack/distributions/starter/starter.py b/src/llama_stack/distributions/starter/starter.py index 1a8126290..4c21a8c99 100644 --- a/src/llama_stack/distributions/starter/starter.py +++ b/src/llama_stack/distributions/starter/starter.py @@ -7,8 +7,6 @@ from typing import Any -from llama_stack_api import RemoteProviderSpec - from llama_stack.core.datatypes import ( BuildProvider, Provider, @@ -39,6 +37,7 @@ from llama_stack.providers.remote.vector_io.qdrant.config import QdrantVectorIOC from llama_stack.providers.remote.vector_io.weaviate.config import WeaviateVectorIOConfig from llama_stack.providers.utils.kvstore.config import PostgresKVStoreConfig from llama_stack.providers.utils.sqlstore.sqlstore import PostgresSqlStoreConfig +from llama_stack_api import RemoteProviderSpec def _get_config_for_provider(provider_spec: ProviderSpec) -> dict[str, Any]: diff --git a/src/llama_stack/distributions/template.py b/src/llama_stack/distributions/template.py index faf5fb085..5755a26de 100644 --- a/src/llama_stack/distributions/template.py +++ b/src/llama_stack/distributions/template.py @@ -10,7 +10,6 @@ from typing import Any, Literal import jinja2 import rich import yaml -from llama_stack_api import DatasetPurpose, ModelType from pydantic import BaseModel, Field from llama_stack.core.datatypes import ( @@ -43,6 +42,7 @@ from llama_stack.providers.utils.kvstore.config import SqliteKVStoreConfig from llama_stack.providers.utils.kvstore.config import get_pip_packages as get_kv_pip_packages from llama_stack.providers.utils.sqlstore.sqlstore import SqliteSqlStoreConfig from llama_stack.providers.utils.sqlstore.sqlstore import get_pip_packages as get_sql_pip_packages +from llama_stack_api import DatasetPurpose, ModelType def filter_empty_values(obj: Any) -> Any: diff --git a/src/llama_stack/providers/inline/agents/meta_reference/agents.py b/src/llama_stack/providers/inline/agents/meta_reference/agents.py index 025fcc676..347f6fdb1 100644 --- a/src/llama_stack/providers/inline/agents/meta_reference/agents.py +++ b/src/llama_stack/providers/inline/agents/meta_reference/agents.py @@ -5,6 +5,10 @@ # the root directory of this source tree. +from llama_stack.core.datatypes import AccessRule +from llama_stack.log import get_logger +from llama_stack.providers.utils.kvstore import InmemoryKVStoreImpl, kvstore_impl +from llama_stack.providers.utils.responses.responses_store import ResponsesStore from llama_stack_api import ( Agents, Conversations, @@ -25,11 +29,6 @@ from llama_stack_api import ( VectorIO, ) -from llama_stack.core.datatypes import AccessRule -from llama_stack.log import get_logger -from llama_stack.providers.utils.kvstore import InmemoryKVStoreImpl, kvstore_impl -from llama_stack.providers.utils.responses.responses_store import ResponsesStore - from .config import MetaReferenceAgentsImplConfig from .responses.openai_responses import OpenAIResponsesImpl diff --git a/src/llama_stack/providers/inline/agents/meta_reference/responses/openai_responses.py b/src/llama_stack/providers/inline/agents/meta_reference/responses/openai_responses.py index 347eeef78..3f88b1562 100644 --- a/src/llama_stack/providers/inline/agents/meta_reference/responses/openai_responses.py +++ b/src/llama_stack/providers/inline/agents/meta_reference/responses/openai_responses.py @@ -8,6 +8,13 @@ import time import uuid from collections.abc import AsyncIterator +from pydantic import BaseModel, TypeAdapter + +from llama_stack.log import get_logger +from llama_stack.providers.utils.responses.responses_store import ( + ResponsesStore, + _OpenAIResponseObjectWithInputAndMessages, +) from llama_stack_api import ( ConversationItem, Conversations, @@ -34,13 +41,6 @@ from llama_stack_api import ( ToolRuntime, VectorIO, ) -from pydantic import BaseModel, TypeAdapter - -from llama_stack.log import get_logger -from llama_stack.providers.utils.responses.responses_store import ( - ResponsesStore, - _OpenAIResponseObjectWithInputAndMessages, -) from .streaming import StreamingResponseOrchestrator from .tool_executor import ToolExecutor diff --git a/src/llama_stack/providers/inline/agents/meta_reference/responses/streaming.py b/src/llama_stack/providers/inline/agents/meta_reference/responses/streaming.py index 6a791e92d..ea4486b62 100644 --- a/src/llama_stack/providers/inline/agents/meta_reference/responses/streaming.py +++ b/src/llama_stack/providers/inline/agents/meta_reference/responses/streaming.py @@ -8,6 +8,9 @@ import uuid from collections.abc import AsyncIterator from typing import Any +from llama_stack.core.telemetry import tracing +from llama_stack.log import get_logger +from llama_stack.providers.utils.inference.prompt_adapter import interleaved_content_as_str from llama_stack_api import ( AllowedToolsFilter, ApprovalFilter, @@ -65,10 +68,6 @@ from llama_stack_api import ( WebSearchToolTypes, ) -from llama_stack.core.telemetry import tracing -from llama_stack.log import get_logger -from llama_stack.providers.utils.inference.prompt_adapter import interleaved_content_as_str - from .types import ChatCompletionContext, ChatCompletionResult from .utils import ( convert_chat_choice_to_response_message, @@ -1022,11 +1021,11 @@ class StreamingResponseOrchestrator: self, tools: list[OpenAIResponseInputTool], output_messages: list[OpenAIResponseOutput] ) -> AsyncIterator[OpenAIResponseObjectStream]: """Process all tools and emit appropriate streaming events.""" - from llama_stack_api import ToolDef from openai.types.chat import ChatCompletionToolParam from llama_stack.models.llama.datatypes import ToolDefinition from llama_stack.providers.utils.inference.openai_compat import convert_tooldef_to_openai_tool + from llama_stack_api import ToolDef def make_openai_tool(tool_name: str, tool: ToolDef) -> ChatCompletionToolParam: tool_def = ToolDefinition( diff --git a/src/llama_stack/providers/inline/agents/meta_reference/responses/tool_executor.py b/src/llama_stack/providers/inline/agents/meta_reference/responses/tool_executor.py index 38fb2a94f..616ec2477 100644 --- a/src/llama_stack/providers/inline/agents/meta_reference/responses/tool_executor.py +++ b/src/llama_stack/providers/inline/agents/meta_reference/responses/tool_executor.py @@ -9,6 +9,8 @@ import json from collections.abc import AsyncIterator from typing import Any +from llama_stack.core.telemetry import tracing +from llama_stack.log import get_logger from llama_stack_api import ( ImageContentItem, OpenAIChatCompletionContentPartImageParam, @@ -37,9 +39,6 @@ from llama_stack_api import ( VectorIO, ) -from llama_stack.core.telemetry import tracing -from llama_stack.log import get_logger - from .types import ChatCompletionContext, ToolExecutionResult logger = get_logger(name=__name__, category="agents::meta_reference") diff --git a/src/llama_stack/providers/inline/agents/meta_reference/responses/types.py b/src/llama_stack/providers/inline/agents/meta_reference/responses/types.py index 35ad03378..f6efcee22 100644 --- a/src/llama_stack/providers/inline/agents/meta_reference/responses/types.py +++ b/src/llama_stack/providers/inline/agents/meta_reference/responses/types.py @@ -7,6 +7,9 @@ from dataclasses import dataclass from typing import cast +from openai.types.chat import ChatCompletionToolParam +from pydantic import BaseModel + from llama_stack_api import ( OpenAIChatCompletionToolCall, OpenAIMessageParam, @@ -26,8 +29,6 @@ from llama_stack_api import ( OpenAIResponseTool, OpenAIResponseToolMCP, ) -from openai.types.chat import ChatCompletionToolParam -from pydantic import BaseModel class ToolExecutionResult(BaseModel): diff --git a/src/llama_stack/providers/inline/agents/meta_reference/safety.py b/src/llama_stack/providers/inline/agents/meta_reference/safety.py index dd90ac298..bfb557a99 100644 --- a/src/llama_stack/providers/inline/agents/meta_reference/safety.py +++ b/src/llama_stack/providers/inline/agents/meta_reference/safety.py @@ -6,10 +6,9 @@ import asyncio -from llama_stack_api import OpenAIMessageParam, Safety, SafetyViolation, ViolationLevel - from llama_stack.core.telemetry import tracing from llama_stack.log import get_logger +from llama_stack_api import OpenAIMessageParam, Safety, SafetyViolation, ViolationLevel log = get_logger(name=__name__, category="agents::meta_reference") diff --git a/src/llama_stack/providers/inline/batches/reference/__init__.py b/src/llama_stack/providers/inline/batches/reference/__init__.py index 27d0f4213..11c4b06a9 100644 --- a/src/llama_stack/providers/inline/batches/reference/__init__.py +++ b/src/llama_stack/providers/inline/batches/reference/__init__.py @@ -6,10 +6,9 @@ from typing import Any -from llama_stack_api import Files, Inference, Models - from llama_stack.core.datatypes import AccessRule, Api from llama_stack.providers.utils.kvstore import kvstore_impl +from llama_stack_api import Files, Inference, Models from .batches import ReferenceBatchesImpl from .config import ReferenceBatchesImplConfig diff --git a/src/llama_stack/providers/inline/batches/reference/batches.py b/src/llama_stack/providers/inline/batches/reference/batches.py index f0f8da96c..73727799d 100644 --- a/src/llama_stack/providers/inline/batches/reference/batches.py +++ b/src/llama_stack/providers/inline/batches/reference/batches.py @@ -13,6 +13,11 @@ import uuid from io import BytesIO from typing import Any, Literal +from openai.types.batch import BatchError, Errors +from pydantic import BaseModel + +from llama_stack.log import get_logger +from llama_stack.providers.utils.kvstore import KVStore from llama_stack_api import ( Batches, BatchObject, @@ -33,11 +38,6 @@ from llama_stack_api import ( OpenAIUserMessageParam, ResourceNotFoundError, ) -from openai.types.batch import BatchError, Errors -from pydantic import BaseModel - -from llama_stack.log import get_logger -from llama_stack.providers.utils.kvstore import KVStore from .config import ReferenceBatchesImplConfig diff --git a/src/llama_stack/providers/inline/datasetio/localfs/datasetio.py b/src/llama_stack/providers/inline/datasetio/localfs/datasetio.py index 1fcfbbef4..6ab1a540f 100644 --- a/src/llama_stack/providers/inline/datasetio/localfs/datasetio.py +++ b/src/llama_stack/providers/inline/datasetio/localfs/datasetio.py @@ -5,11 +5,10 @@ # the root directory of this source tree. from typing import Any -from llama_stack_api import Dataset, DatasetIO, DatasetsProtocolPrivate, PaginatedResponse - from llama_stack.providers.utils.datasetio.url_utils import get_dataframe_from_uri from llama_stack.providers.utils.kvstore import kvstore_impl from llama_stack.providers.utils.pagination import paginate_records +from llama_stack_api import Dataset, DatasetIO, DatasetsProtocolPrivate, PaginatedResponse from .config import LocalFSDatasetIOConfig diff --git a/src/llama_stack/providers/inline/eval/meta_reference/eval.py b/src/llama_stack/providers/inline/eval/meta_reference/eval.py index e6020e8a3..d43e569e2 100644 --- a/src/llama_stack/providers/inline/eval/meta_reference/eval.py +++ b/src/llama_stack/providers/inline/eval/meta_reference/eval.py @@ -6,6 +6,10 @@ import json from typing import Any +from tqdm import tqdm + +from llama_stack.providers.utils.common.data_schema_validator import ColumnName +from llama_stack.providers.utils.kvstore import kvstore_impl from llama_stack_api import ( Agents, Benchmark, @@ -24,10 +28,6 @@ from llama_stack_api import ( OpenAIUserMessageParam, Scoring, ) -from tqdm import tqdm - -from llama_stack.providers.utils.common.data_schema_validator import ColumnName -from llama_stack.providers.utils.kvstore import kvstore_impl from .config import MetaReferenceEvalConfig diff --git a/src/llama_stack/providers/inline/files/localfs/files.py b/src/llama_stack/providers/inline/files/localfs/files.py index 5e8c887f1..5fb35a378 100644 --- a/src/llama_stack/providers/inline/files/localfs/files.py +++ b/src/llama_stack/providers/inline/files/localfs/files.py @@ -10,6 +10,14 @@ from pathlib import Path from typing import Annotated from fastapi import Depends, File, Form, Response, UploadFile + +from llama_stack.core.datatypes import AccessRule +from llama_stack.core.id_generation import generate_object_id +from llama_stack.log import get_logger +from llama_stack.providers.utils.files.form_data import parse_expires_after +from llama_stack.providers.utils.sqlstore.api import ColumnDefinition, ColumnType +from llama_stack.providers.utils.sqlstore.authorized_sqlstore import AuthorizedSqlStore +from llama_stack.providers.utils.sqlstore.sqlstore import sqlstore_impl from llama_stack_api import ( ExpiresAfter, Files, @@ -21,14 +29,6 @@ from llama_stack_api import ( ResourceNotFoundError, ) -from llama_stack.core.datatypes import AccessRule -from llama_stack.core.id_generation import generate_object_id -from llama_stack.log import get_logger -from llama_stack.providers.utils.files.form_data import parse_expires_after -from llama_stack.providers.utils.sqlstore.api import ColumnDefinition, ColumnType -from llama_stack.providers.utils.sqlstore.authorized_sqlstore import AuthorizedSqlStore -from llama_stack.providers.utils.sqlstore.sqlstore import sqlstore_impl - from .config import LocalfsFilesImplConfig logger = get_logger(name=__name__, category="files") diff --git a/src/llama_stack/providers/inline/inference/meta_reference/config.py b/src/llama_stack/providers/inline/inference/meta_reference/config.py index 802e79f15..ec6e8bfe8 100644 --- a/src/llama_stack/providers/inline/inference/meta_reference/config.py +++ b/src/llama_stack/providers/inline/inference/meta_reference/config.py @@ -6,10 +6,10 @@ from typing import Any -from llama_stack_api import QuantizationConfig from pydantic import BaseModel, field_validator from llama_stack.providers.utils.inference import supported_inference_models +from llama_stack_api import QuantizationConfig class MetaReferenceInferenceConfig(BaseModel): diff --git a/src/llama_stack/providers/inline/inference/meta_reference/generators.py b/src/llama_stack/providers/inline/inference/meta_reference/generators.py index 2155a1ae8..6781d0af9 100644 --- a/src/llama_stack/providers/inline/inference/meta_reference/generators.py +++ b/src/llama_stack/providers/inline/inference/meta_reference/generators.py @@ -8,6 +8,14 @@ import math from typing import Optional import torch +from lmformatenforcer import JsonSchemaParser, TokenEnforcer, TokenEnforcerTokenizerData + +from llama_stack.models.llama.datatypes import QuantizationMode, ToolPromptFormat +from llama_stack.models.llama.llama3.generation import Llama3 +from llama_stack.models.llama.llama3.tokenizer import Tokenizer as Llama3Tokenizer +from llama_stack.models.llama.llama4.generation import Llama4 +from llama_stack.models.llama.llama4.tokenizer import Tokenizer as Llama4Tokenizer +from llama_stack.models.llama.sku_types import Model, ModelFamily from llama_stack_api import ( GreedySamplingStrategy, JsonSchemaResponseFormat, @@ -18,14 +26,6 @@ from llama_stack_api import ( SamplingParams, TopPSamplingStrategy, ) -from lmformatenforcer import JsonSchemaParser, TokenEnforcer, TokenEnforcerTokenizerData - -from llama_stack.models.llama.datatypes import QuantizationMode, ToolPromptFormat -from llama_stack.models.llama.llama3.generation import Llama3 -from llama_stack.models.llama.llama3.tokenizer import Tokenizer as Llama3Tokenizer -from llama_stack.models.llama.llama4.generation import Llama4 -from llama_stack.models.llama.llama4.tokenizer import Tokenizer as Llama4Tokenizer -from llama_stack.models.llama.sku_types import Model, ModelFamily from .common import model_checkpoint_dir from .config import MetaReferenceInferenceConfig diff --git a/src/llama_stack/providers/inline/inference/meta_reference/inference.py b/src/llama_stack/providers/inline/inference/meta_reference/inference.py index 753185fe7..42d1299ab 100644 --- a/src/llama_stack/providers/inline/inference/meta_reference/inference.py +++ b/src/llama_stack/providers/inline/inference/meta_reference/inference.py @@ -9,23 +9,6 @@ import time import uuid from collections.abc import AsyncIterator -from llama_stack_api import ( - InferenceProvider, - Model, - ModelsProtocolPrivate, - ModelType, - OpenAIAssistantMessageParam, - OpenAIChatCompletion, - OpenAIChatCompletionChunk, - OpenAIChatCompletionRequestWithExtraBody, - OpenAIChatCompletionUsage, - OpenAIChoice, - OpenAICompletion, - OpenAICompletionRequestWithExtraBody, - OpenAIUserMessageParam, - ToolChoice, -) - from llama_stack.log import get_logger from llama_stack.models.llama.datatypes import RawMessage, RawTextItem, ToolDefinition from llama_stack.models.llama.llama3.chat_format import ChatFormat as Llama3ChatFormat @@ -48,6 +31,22 @@ from llama_stack.providers.utils.inference.model_registry import ( ModelRegistryHelper, build_hf_repo_model_entry, ) +from llama_stack_api import ( + InferenceProvider, + Model, + ModelsProtocolPrivate, + ModelType, + OpenAIAssistantMessageParam, + OpenAIChatCompletion, + OpenAIChatCompletionChunk, + OpenAIChatCompletionRequestWithExtraBody, + OpenAIChatCompletionUsage, + OpenAIChoice, + OpenAICompletion, + OpenAICompletionRequestWithExtraBody, + OpenAIUserMessageParam, + ToolChoice, +) from .config import MetaReferenceInferenceConfig from .generators import LlamaGenerator @@ -441,6 +440,8 @@ class MetaReferenceInferenceImpl( params: OpenAIChatCompletionRequestWithExtraBody, ) -> AsyncIterator[OpenAIChatCompletionChunk]: """Stream chat completion chunks as they're generated.""" + from llama_stack.models.llama.datatypes import StopReason + from llama_stack.providers.utils.inference.prompt_adapter import decode_assistant_message from llama_stack_api import ( OpenAIChatCompletionChunk, OpenAIChatCompletionToolCall, @@ -449,9 +450,6 @@ class MetaReferenceInferenceImpl( OpenAIChunkChoice, ) - from llama_stack.models.llama.datatypes import StopReason - from llama_stack.providers.utils.inference.prompt_adapter import decode_assistant_message - response_id = f"chatcmpl-{uuid.uuid4().hex[:24]}" created = int(time.time()) generated_text = "" diff --git a/src/llama_stack/providers/inline/inference/sentence_transformers/sentence_transformers.py b/src/llama_stack/providers/inline/inference/sentence_transformers/sentence_transformers.py index 14c9a41a4..b5cadeec2 100644 --- a/src/llama_stack/providers/inline/inference/sentence_transformers/sentence_transformers.py +++ b/src/llama_stack/providers/inline/inference/sentence_transformers/sentence_transformers.py @@ -6,6 +6,10 @@ from collections.abc import AsyncIterator +from llama_stack.log import get_logger +from llama_stack.providers.utils.inference.embedding_mixin import ( + SentenceTransformerEmbeddingMixin, +) from llama_stack_api import ( InferenceProvider, Model, @@ -18,11 +22,6 @@ from llama_stack_api import ( OpenAICompletionRequestWithExtraBody, ) -from llama_stack.log import get_logger -from llama_stack.providers.utils.inference.embedding_mixin import ( - SentenceTransformerEmbeddingMixin, -) - from .config import SentenceTransformersInferenceConfig log = get_logger(name=__name__, category="inference") diff --git a/src/llama_stack/providers/inline/post_training/common/validator.py b/src/llama_stack/providers/inline/post_training/common/validator.py index 7a85d0e03..cc018c865 100644 --- a/src/llama_stack/providers/inline/post_training/common/validator.py +++ b/src/llama_stack/providers/inline/post_training/common/validator.py @@ -12,11 +12,10 @@ from typing import Any -from llama_stack_api import ChatCompletionInputType, DialogType, StringType - from llama_stack.providers.utils.common.data_schema_validator import ( ColumnName, ) +from llama_stack_api import ChatCompletionInputType, DialogType, StringType EXPECTED_DATASET_SCHEMA: dict[str, list[dict[str, Any]]] = { "instruct": [ diff --git a/src/llama_stack/providers/inline/post_training/huggingface/post_training.py b/src/llama_stack/providers/inline/post_training/huggingface/post_training.py index f3f3d8d56..fa939d439 100644 --- a/src/llama_stack/providers/inline/post_training/huggingface/post_training.py +++ b/src/llama_stack/providers/inline/post_training/huggingface/post_training.py @@ -6,6 +6,11 @@ from enum import Enum from typing import Any +from llama_stack.providers.inline.post_training.huggingface.config import ( + HuggingFacePostTrainingConfig, +) +from llama_stack.providers.utils.scheduler import JobArtifact, Scheduler +from llama_stack.providers.utils.scheduler import JobStatus as SchedulerJobStatus from llama_stack_api import ( AlgorithmConfig, Checkpoint, @@ -20,12 +25,6 @@ from llama_stack_api import ( TrainingConfig, ) -from llama_stack.providers.inline.post_training.huggingface.config import ( - HuggingFacePostTrainingConfig, -) -from llama_stack.providers.utils.scheduler import JobArtifact, Scheduler -from llama_stack.providers.utils.scheduler import JobStatus as SchedulerJobStatus - class TrainingArtifactType(Enum): CHECKPOINT = "checkpoint" diff --git a/src/llama_stack/providers/inline/post_training/huggingface/recipes/finetune_single_device.py b/src/llama_stack/providers/inline/post_training/huggingface/recipes/finetune_single_device.py index 58a30618c..c7c737fbd 100644 --- a/src/llama_stack/providers/inline/post_training/huggingface/recipes/finetune_single_device.py +++ b/src/llama_stack/providers/inline/post_training/huggingface/recipes/finetune_single_device.py @@ -12,14 +12,6 @@ from typing import Any import torch from datasets import Dataset -from llama_stack_api import ( - Checkpoint, - DataConfig, - DatasetIO, - Datasets, - LoraFinetuningConfig, - TrainingConfig, -) from peft import LoraConfig from transformers import ( AutoTokenizer, @@ -28,6 +20,14 @@ from trl import SFTConfig, SFTTrainer from llama_stack.log import get_logger from llama_stack.providers.inline.post_training.common.utils import evacuate_model_from_device +from llama_stack_api import ( + Checkpoint, + DataConfig, + DatasetIO, + Datasets, + LoraFinetuningConfig, + TrainingConfig, +) from ..config import HuggingFacePostTrainingConfig from ..utils import ( diff --git a/src/llama_stack/providers/inline/post_training/huggingface/recipes/finetune_single_device_dpo.py b/src/llama_stack/providers/inline/post_training/huggingface/recipes/finetune_single_device_dpo.py index f7dc3ebf2..da2626555 100644 --- a/src/llama_stack/providers/inline/post_training/huggingface/recipes/finetune_single_device_dpo.py +++ b/src/llama_stack/providers/inline/post_training/huggingface/recipes/finetune_single_device_dpo.py @@ -11,13 +11,6 @@ from typing import Any import torch from datasets import Dataset -from llama_stack_api import ( - Checkpoint, - DatasetIO, - Datasets, - DPOAlignmentConfig, - TrainingConfig, -) from transformers import ( AutoTokenizer, ) @@ -25,6 +18,13 @@ from trl import DPOConfig, DPOTrainer from llama_stack.log import get_logger from llama_stack.providers.inline.post_training.common.utils import evacuate_model_from_device +from llama_stack_api import ( + Checkpoint, + DatasetIO, + Datasets, + DPOAlignmentConfig, + TrainingConfig, +) from ..config import HuggingFacePostTrainingConfig from ..utils import ( diff --git a/src/llama_stack/providers/inline/post_training/huggingface/utils.py b/src/llama_stack/providers/inline/post_training/huggingface/utils.py index 86c3c3f52..2037f70e7 100644 --- a/src/llama_stack/providers/inline/post_training/huggingface/utils.py +++ b/src/llama_stack/providers/inline/post_training/huggingface/utils.py @@ -14,9 +14,10 @@ from typing import TYPE_CHECKING, Any, Protocol import psutil import torch from datasets import Dataset -from llama_stack_api import Checkpoint, DatasetIO, TrainingConfig from transformers import AutoConfig, AutoModelForCausalLM +from llama_stack_api import Checkpoint, DatasetIO, TrainingConfig + if TYPE_CHECKING: from transformers import PretrainedConfig diff --git a/src/llama_stack/providers/inline/post_training/torchtune/common/utils.py b/src/llama_stack/providers/inline/post_training/torchtune/common/utils.py index 1483b8385..f929ea4dd 100644 --- a/src/llama_stack/providers/inline/post_training/torchtune/common/utils.py +++ b/src/llama_stack/providers/inline/post_training/torchtune/common/utils.py @@ -13,7 +13,6 @@ from collections.abc import Callable import torch -from llama_stack_api import DatasetFormat from pydantic import BaseModel from torchtune.data._messages import InputOutputToMessages, ShareGPTToMessages from torchtune.models.llama3 import llama3_tokenizer @@ -24,6 +23,7 @@ from torchtune.modules.transforms import Transform from llama_stack.models.llama.sku_list import resolve_model from llama_stack.models.llama.sku_types import Model +from llama_stack_api import DatasetFormat BuildLoraModelCallable = Callable[..., torch.nn.Module] BuildTokenizerCallable = Callable[..., Llama3Tokenizer] diff --git a/src/llama_stack/providers/inline/post_training/torchtune/post_training.py b/src/llama_stack/providers/inline/post_training/torchtune/post_training.py index 3370d42fa..515ff7b66 100644 --- a/src/llama_stack/providers/inline/post_training/torchtune/post_training.py +++ b/src/llama_stack/providers/inline/post_training/torchtune/post_training.py @@ -6,6 +6,11 @@ from enum import Enum from typing import Any +from llama_stack.providers.inline.post_training.torchtune.config import ( + TorchtunePostTrainingConfig, +) +from llama_stack.providers.utils.scheduler import JobArtifact, Scheduler +from llama_stack.providers.utils.scheduler import JobStatus as SchedulerJobStatus from llama_stack_api import ( AlgorithmConfig, Checkpoint, @@ -21,12 +26,6 @@ from llama_stack_api import ( TrainingConfig, ) -from llama_stack.providers.inline.post_training.torchtune.config import ( - TorchtunePostTrainingConfig, -) -from llama_stack.providers.utils.scheduler import JobArtifact, Scheduler -from llama_stack.providers.utils.scheduler import JobStatus as SchedulerJobStatus - class TrainingArtifactType(Enum): CHECKPOINT = "checkpoint" diff --git a/src/llama_stack/providers/inline/post_training/torchtune/recipes/lora_finetuning_single_device.py b/src/llama_stack/providers/inline/post_training/torchtune/recipes/lora_finetuning_single_device.py index 2bf1d0fe7..f5e5db415 100644 --- a/src/llama_stack/providers/inline/post_training/torchtune/recipes/lora_finetuning_single_device.py +++ b/src/llama_stack/providers/inline/post_training/torchtune/recipes/lora_finetuning_single_device.py @@ -12,17 +12,6 @@ from pathlib import Path from typing import Any import torch -from llama_stack_api import ( - Checkpoint, - DataConfig, - DatasetIO, - Datasets, - LoraFinetuningConfig, - OptimizerConfig, - PostTrainingMetric, - QATFinetuningConfig, - TrainingConfig, -) from torch import nn from torch.optim import Optimizer from torch.utils.data import DataLoader, DistributedSampler @@ -56,6 +45,17 @@ from llama_stack.providers.inline.post_training.torchtune.config import ( TorchtunePostTrainingConfig, ) from llama_stack.providers.inline.post_training.torchtune.datasets.sft import SFTDataset +from llama_stack_api import ( + Checkpoint, + DataConfig, + DatasetIO, + Datasets, + LoraFinetuningConfig, + OptimizerConfig, + PostTrainingMetric, + QATFinetuningConfig, + TrainingConfig, +) log = get_logger(name=__name__, category="post_training") diff --git a/src/llama_stack/providers/inline/safety/code_scanner/code_scanner.py b/src/llama_stack/providers/inline/safety/code_scanner/code_scanner.py index 80e907c10..071fbe2dc 100644 --- a/src/llama_stack/providers/inline/safety/code_scanner/code_scanner.py +++ b/src/llama_stack/providers/inline/safety/code_scanner/code_scanner.py @@ -10,6 +10,10 @@ from typing import TYPE_CHECKING, Any if TYPE_CHECKING: from codeshield.cs import CodeShieldScanResult +from llama_stack.log import get_logger +from llama_stack.providers.utils.inference.prompt_adapter import ( + interleaved_content_as_str, +) from llama_stack_api import ( ModerationObject, ModerationObjectResults, @@ -21,11 +25,6 @@ from llama_stack_api import ( ViolationLevel, ) -from llama_stack.log import get_logger -from llama_stack.providers.utils.inference.prompt_adapter import ( - interleaved_content_as_str, -) - from .config import CodeScannerConfig log = get_logger(name=__name__, category="safety") diff --git a/src/llama_stack/providers/inline/safety/llama_guard/llama_guard.py b/src/llama_stack/providers/inline/safety/llama_guard/llama_guard.py index 36e4280b9..ff1536bea 100644 --- a/src/llama_stack/providers/inline/safety/llama_guard/llama_guard.py +++ b/src/llama_stack/providers/inline/safety/llama_guard/llama_guard.py @@ -9,6 +9,13 @@ import uuid from string import Template from typing import Any +from llama_stack.core.datatypes import Api +from llama_stack.log import get_logger +from llama_stack.models.llama.datatypes import Role +from llama_stack.models.llama.sku_types import CoreModelId +from llama_stack.providers.utils.inference.prompt_adapter import ( + interleaved_content_as_str, +) from llama_stack_api import ( ImageContentItem, Inference, @@ -26,14 +33,6 @@ from llama_stack_api import ( ViolationLevel, ) -from llama_stack.core.datatypes import Api -from llama_stack.log import get_logger -from llama_stack.models.llama.datatypes import Role -from llama_stack.models.llama.sku_types import CoreModelId -from llama_stack.providers.utils.inference.prompt_adapter import ( - interleaved_content_as_str, -) - from .config import LlamaGuardConfig CANNED_RESPONSE_TEXT = "I can't answer that. Can I help with something else?" diff --git a/src/llama_stack/providers/inline/safety/prompt_guard/prompt_guard.py b/src/llama_stack/providers/inline/safety/prompt_guard/prompt_guard.py index b4f495f19..51383da1b 100644 --- a/src/llama_stack/providers/inline/safety/prompt_guard/prompt_guard.py +++ b/src/llama_stack/providers/inline/safety/prompt_guard/prompt_guard.py @@ -7,6 +7,11 @@ from typing import Any import torch +from transformers import AutoModelForSequenceClassification, AutoTokenizer + +from llama_stack.core.utils.model_utils import model_local_dir +from llama_stack.log import get_logger +from llama_stack.providers.utils.inference.prompt_adapter import interleaved_content_as_str from llama_stack_api import ( ModerationObject, OpenAIMessageParam, @@ -18,11 +23,6 @@ from llama_stack_api import ( ShieldStore, ViolationLevel, ) -from transformers import AutoModelForSequenceClassification, AutoTokenizer - -from llama_stack.core.utils.model_utils import model_local_dir -from llama_stack.log import get_logger -from llama_stack.providers.utils.inference.prompt_adapter import interleaved_content_as_str from .config import PromptGuardConfig, PromptGuardType diff --git a/src/llama_stack/providers/inline/scoring/basic/scoring.py b/src/llama_stack/providers/inline/scoring/basic/scoring.py index 326fd9211..cf5cb79ba 100644 --- a/src/llama_stack/providers/inline/scoring/basic/scoring.py +++ b/src/llama_stack/providers/inline/scoring/basic/scoring.py @@ -5,6 +5,11 @@ # the root directory of this source tree. from typing import Any +from llama_stack.core.datatypes import Api +from llama_stack.providers.utils.common.data_schema_validator import ( + get_valid_schemas, + validate_dataset_schema, +) from llama_stack_api import ( DatasetIO, Datasets, @@ -17,12 +22,6 @@ from llama_stack_api import ( ScoringResult, ) -from llama_stack.core.datatypes import Api -from llama_stack.providers.utils.common.data_schema_validator import ( - get_valid_schemas, - validate_dataset_schema, -) - from .config import BasicScoringConfig from .scoring_fn.docvqa_scoring_fn import DocVQAScoringFn from .scoring_fn.equality_scoring_fn import EqualityScoringFn diff --git a/src/llama_stack/providers/inline/scoring/basic/scoring_fn/docvqa_scoring_fn.py b/src/llama_stack/providers/inline/scoring/basic/scoring_fn/docvqa_scoring_fn.py index 93c2627dd..e48bab8fa 100644 --- a/src/llama_stack/providers/inline/scoring/basic/scoring_fn/docvqa_scoring_fn.py +++ b/src/llama_stack/providers/inline/scoring/basic/scoring_fn/docvqa_scoring_fn.py @@ -8,9 +8,8 @@ import json import re from typing import Any -from llama_stack_api import ScoringFnParams, ScoringResultRow - from llama_stack.providers.utils.scoring.base_scoring_fn import RegisteredBaseScoringFn +from llama_stack_api import ScoringFnParams, ScoringResultRow from .fn_defs.docvqa import docvqa diff --git a/src/llama_stack/providers/inline/scoring/basic/scoring_fn/equality_scoring_fn.py b/src/llama_stack/providers/inline/scoring/basic/scoring_fn/equality_scoring_fn.py index 382c64d88..2e79240be 100644 --- a/src/llama_stack/providers/inline/scoring/basic/scoring_fn/equality_scoring_fn.py +++ b/src/llama_stack/providers/inline/scoring/basic/scoring_fn/equality_scoring_fn.py @@ -6,9 +6,8 @@ from typing import Any -from llama_stack_api import ScoringFnParams, ScoringResultRow - from llama_stack.providers.utils.scoring.base_scoring_fn import RegisteredBaseScoringFn +from llama_stack_api import ScoringFnParams, ScoringResultRow from .fn_defs.equality import equality diff --git a/src/llama_stack/providers/inline/scoring/basic/scoring_fn/ifeval_scoring_fn.py b/src/llama_stack/providers/inline/scoring/basic/scoring_fn/ifeval_scoring_fn.py index 4ec85bb09..33b1c5a31 100644 --- a/src/llama_stack/providers/inline/scoring/basic/scoring_fn/ifeval_scoring_fn.py +++ b/src/llama_stack/providers/inline/scoring/basic/scoring_fn/ifeval_scoring_fn.py @@ -6,9 +6,8 @@ from typing import Any -from llama_stack_api import ScoringFnParams, ScoringResultRow - from llama_stack.providers.utils.scoring.base_scoring_fn import RegisteredBaseScoringFn +from llama_stack_api import ScoringFnParams, ScoringResultRow from .fn_defs.ifeval import ( ifeval, diff --git a/src/llama_stack/providers/inline/scoring/basic/scoring_fn/regex_parser_math_response_scoring_fn.py b/src/llama_stack/providers/inline/scoring/basic/scoring_fn/regex_parser_math_response_scoring_fn.py index 4e9d49e96..1f4f2f979 100644 --- a/src/llama_stack/providers/inline/scoring/basic/scoring_fn/regex_parser_math_response_scoring_fn.py +++ b/src/llama_stack/providers/inline/scoring/basic/scoring_fn/regex_parser_math_response_scoring_fn.py @@ -5,9 +5,8 @@ # the root directory of this source tree. from typing import Any -from llama_stack_api import ScoringFnParams, ScoringFnParamsType, ScoringResultRow - from llama_stack.providers.utils.scoring.base_scoring_fn import RegisteredBaseScoringFn +from llama_stack_api import ScoringFnParams, ScoringFnParamsType, ScoringResultRow from ..utils.math_utils import first_answer, normalize_final_answer, try_evaluate_frac, try_evaluate_latex from .fn_defs.regex_parser_math_response import ( diff --git a/src/llama_stack/providers/inline/scoring/basic/scoring_fn/regex_parser_scoring_fn.py b/src/llama_stack/providers/inline/scoring/basic/scoring_fn/regex_parser_scoring_fn.py index 7f213b38c..1cc74f874 100644 --- a/src/llama_stack/providers/inline/scoring/basic/scoring_fn/regex_parser_scoring_fn.py +++ b/src/llama_stack/providers/inline/scoring/basic/scoring_fn/regex_parser_scoring_fn.py @@ -6,9 +6,8 @@ import re from typing import Any -from llama_stack_api import ScoringFnParams, ScoringFnParamsType, ScoringResultRow - from llama_stack.providers.utils.scoring.base_scoring_fn import RegisteredBaseScoringFn +from llama_stack_api import ScoringFnParams, ScoringFnParamsType, ScoringResultRow from .fn_defs.regex_parser_multiple_choice_answer import ( regex_parser_multiple_choice_answer, diff --git a/src/llama_stack/providers/inline/scoring/basic/scoring_fn/subset_of_scoring_fn.py b/src/llama_stack/providers/inline/scoring/basic/scoring_fn/subset_of_scoring_fn.py index b291924d5..fe15a4972 100644 --- a/src/llama_stack/providers/inline/scoring/basic/scoring_fn/subset_of_scoring_fn.py +++ b/src/llama_stack/providers/inline/scoring/basic/scoring_fn/subset_of_scoring_fn.py @@ -6,9 +6,8 @@ from typing import Any -from llama_stack_api import ScoringFnParams, ScoringResultRow - from llama_stack.providers.utils.scoring.base_scoring_fn import RegisteredBaseScoringFn +from llama_stack_api import ScoringFnParams, ScoringResultRow from .fn_defs.subset_of import subset_of diff --git a/src/llama_stack/providers/inline/scoring/braintrust/braintrust.py b/src/llama_stack/providers/inline/scoring/braintrust/braintrust.py index cbab93c74..cfa35547b 100644 --- a/src/llama_stack/providers/inline/scoring/braintrust/braintrust.py +++ b/src/llama_stack/providers/inline/scoring/braintrust/braintrust.py @@ -17,6 +17,16 @@ from autoevals.ragas import ( ContextRelevancy, Faithfulness, ) +from pydantic import BaseModel + +from llama_stack.core.datatypes import Api +from llama_stack.core.request_headers import NeedsRequestProviderData +from llama_stack.providers.utils.common.data_schema_validator import ( + get_valid_schemas, + validate_dataset_schema, + validate_row_schema, +) +from llama_stack.providers.utils.scoring.aggregation_utils import aggregate_metrics from llama_stack_api import ( DatasetIO, Datasets, @@ -29,16 +39,6 @@ from llama_stack_api import ( ScoringResult, ScoringResultRow, ) -from pydantic import BaseModel - -from llama_stack.core.datatypes import Api -from llama_stack.core.request_headers import NeedsRequestProviderData -from llama_stack.providers.utils.common.data_schema_validator import ( - get_valid_schemas, - validate_dataset_schema, - validate_row_schema, -) -from llama_stack.providers.utils.scoring.aggregation_utils import aggregate_metrics from .config import BraintrustScoringConfig from .scoring_fn.fn_defs.answer_correctness import answer_correctness_fn_def diff --git a/src/llama_stack/providers/inline/scoring/llm_as_judge/scoring.py b/src/llama_stack/providers/inline/scoring/llm_as_judge/scoring.py index aa636d2b3..23e6ad705 100644 --- a/src/llama_stack/providers/inline/scoring/llm_as_judge/scoring.py +++ b/src/llama_stack/providers/inline/scoring/llm_as_judge/scoring.py @@ -5,6 +5,11 @@ # the root directory of this source tree. from typing import Any +from llama_stack.core.datatypes import Api +from llama_stack.providers.utils.common.data_schema_validator import ( + get_valid_schemas, + validate_dataset_schema, +) from llama_stack_api import ( DatasetIO, Datasets, @@ -18,12 +23,6 @@ from llama_stack_api import ( ScoringResult, ) -from llama_stack.core.datatypes import Api -from llama_stack.providers.utils.common.data_schema_validator import ( - get_valid_schemas, - validate_dataset_schema, -) - from .config import LlmAsJudgeScoringConfig from .scoring_fn.llm_as_judge_scoring_fn import LlmAsJudgeScoringFn diff --git a/src/llama_stack/providers/inline/scoring/llm_as_judge/scoring_fn/llm_as_judge_scoring_fn.py b/src/llama_stack/providers/inline/scoring/llm_as_judge/scoring_fn/llm_as_judge_scoring_fn.py index 169a4d8b7..73ce82cda 100644 --- a/src/llama_stack/providers/inline/scoring/llm_as_judge/scoring_fn/llm_as_judge_scoring_fn.py +++ b/src/llama_stack/providers/inline/scoring/llm_as_judge/scoring_fn/llm_as_judge_scoring_fn.py @@ -6,9 +6,8 @@ import re from typing import Any -from llama_stack_api import Inference, OpenAIChatCompletionRequestWithExtraBody, ScoringFnParams, ScoringResultRow - from llama_stack.providers.utils.scoring.base_scoring_fn import RegisteredBaseScoringFn +from llama_stack_api import Inference, OpenAIChatCompletionRequestWithExtraBody, ScoringFnParams, ScoringResultRow from .fn_defs.llm_as_judge_405b_simpleqa import llm_as_judge_405b_simpleqa from .fn_defs.llm_as_judge_base import llm_as_judge_base diff --git a/src/llama_stack/providers/inline/tool_runtime/rag/context_retriever.py b/src/llama_stack/providers/inline/tool_runtime/rag/context_retriever.py index f499989cb..240df199b 100644 --- a/src/llama_stack/providers/inline/tool_runtime/rag/context_retriever.py +++ b/src/llama_stack/providers/inline/tool_runtime/rag/context_retriever.py @@ -6,6 +6,10 @@ from jinja2 import Template + +from llama_stack.providers.utils.inference.prompt_adapter import ( + interleaved_content_as_str, +) from llama_stack_api import ( DefaultRAGQueryGeneratorConfig, InterleavedContent, @@ -16,10 +20,6 @@ from llama_stack_api import ( RAGQueryGeneratorConfig, ) -from llama_stack.providers.utils.inference.prompt_adapter import ( - interleaved_content_as_str, -) - async def generate_rag_query( config: RAGQueryGeneratorConfig, diff --git a/src/llama_stack/providers/inline/tool_runtime/rag/memory.py b/src/llama_stack/providers/inline/tool_runtime/rag/memory.py index aacb7bb38..895d219bb 100644 --- a/src/llama_stack/providers/inline/tool_runtime/rag/memory.py +++ b/src/llama_stack/providers/inline/tool_runtime/rag/memory.py @@ -12,6 +12,11 @@ from typing import Any import httpx from fastapi import UploadFile +from pydantic import TypeAdapter + +from llama_stack.log import get_logger +from llama_stack.providers.utils.inference.prompt_adapter import interleaved_content_as_str +from llama_stack.providers.utils.memory.vector_store import parse_data_url from llama_stack_api import ( URL, Files, @@ -34,11 +39,6 @@ from llama_stack_api import ( VectorStoreChunkingStrategyStatic, VectorStoreChunkingStrategyStaticConfig, ) -from pydantic import TypeAdapter - -from llama_stack.log import get_logger -from llama_stack.providers.utils.inference.prompt_adapter import interleaved_content_as_str -from llama_stack.providers.utils.memory.vector_store import parse_data_url from .config import RagToolRuntimeConfig from .context_retriever import generate_rag_query diff --git a/src/llama_stack/providers/inline/vector_io/chroma/config.py b/src/llama_stack/providers/inline/vector_io/chroma/config.py index d955b1d06..3897991f5 100644 --- a/src/llama_stack/providers/inline/vector_io/chroma/config.py +++ b/src/llama_stack/providers/inline/vector_io/chroma/config.py @@ -6,10 +6,10 @@ from typing import Any -from llama_stack_api import json_schema_type from pydantic import BaseModel, Field from llama_stack.core.storage.datatypes import KVStoreReference +from llama_stack_api import json_schema_type @json_schema_type diff --git a/src/llama_stack/providers/inline/vector_io/faiss/config.py b/src/llama_stack/providers/inline/vector_io/faiss/config.py index dd433f818..d516d9fe9 100644 --- a/src/llama_stack/providers/inline/vector_io/faiss/config.py +++ b/src/llama_stack/providers/inline/vector_io/faiss/config.py @@ -6,10 +6,10 @@ from typing import Any -from llama_stack_api import json_schema_type from pydantic import BaseModel from llama_stack.core.storage.datatypes import KVStoreReference +from llama_stack_api import json_schema_type @json_schema_type diff --git a/src/llama_stack/providers/inline/vector_io/faiss/faiss.py b/src/llama_stack/providers/inline/vector_io/faiss/faiss.py index abef42499..d52a54e6a 100644 --- a/src/llama_stack/providers/inline/vector_io/faiss/faiss.py +++ b/src/llama_stack/providers/inline/vector_io/faiss/faiss.py @@ -12,6 +12,13 @@ from typing import Any import faiss # type: ignore[import-untyped] import numpy as np +from numpy.typing import NDArray + +from llama_stack.log import get_logger +from llama_stack.providers.utils.kvstore import kvstore_impl +from llama_stack.providers.utils.kvstore.api import KVStore +from llama_stack.providers.utils.memory.openai_vector_store_mixin import OpenAIVectorStoreMixin +from llama_stack.providers.utils.memory.vector_store import ChunkForDeletion, EmbeddingIndex, VectorStoreWithIndex from llama_stack_api import ( Chunk, Files, @@ -25,13 +32,6 @@ from llama_stack_api import ( VectorStoreNotFoundError, VectorStoresProtocolPrivate, ) -from numpy.typing import NDArray - -from llama_stack.log import get_logger -from llama_stack.providers.utils.kvstore import kvstore_impl -from llama_stack.providers.utils.kvstore.api import KVStore -from llama_stack.providers.utils.memory.openai_vector_store_mixin import OpenAIVectorStoreMixin -from llama_stack.providers.utils.memory.vector_store import ChunkForDeletion, EmbeddingIndex, VectorStoreWithIndex from .config import FaissVectorIOConfig diff --git a/src/llama_stack/providers/inline/vector_io/milvus/config.py b/src/llama_stack/providers/inline/vector_io/milvus/config.py index 08d05c991..14ddd2362 100644 --- a/src/llama_stack/providers/inline/vector_io/milvus/config.py +++ b/src/llama_stack/providers/inline/vector_io/milvus/config.py @@ -6,10 +6,10 @@ from typing import Any -from llama_stack_api import json_schema_type from pydantic import BaseModel, Field from llama_stack.core.storage.datatypes import KVStoreReference +from llama_stack_api import json_schema_type @json_schema_type diff --git a/src/llama_stack/providers/inline/vector_io/qdrant/config.py b/src/llama_stack/providers/inline/vector_io/qdrant/config.py index 437d643f0..4251f2f39 100644 --- a/src/llama_stack/providers/inline/vector_io/qdrant/config.py +++ b/src/llama_stack/providers/inline/vector_io/qdrant/config.py @@ -7,10 +7,10 @@ from typing import Any -from llama_stack_api import json_schema_type from pydantic import BaseModel from llama_stack.core.storage.datatypes import KVStoreReference +from llama_stack_api import json_schema_type @json_schema_type diff --git a/src/llama_stack/providers/inline/vector_io/sqlite_vec/sqlite_vec.py b/src/llama_stack/providers/inline/vector_io/sqlite_vec/sqlite_vec.py index e979ff323..74bc349a5 100644 --- a/src/llama_stack/providers/inline/vector_io/sqlite_vec/sqlite_vec.py +++ b/src/llama_stack/providers/inline/vector_io/sqlite_vec/sqlite_vec.py @@ -12,16 +12,6 @@ from typing import Any import numpy as np import sqlite_vec # type: ignore[import-untyped] -from llama_stack_api import ( - Chunk, - Files, - Inference, - QueryChunksResponse, - VectorIO, - VectorStore, - VectorStoreNotFoundError, - VectorStoresProtocolPrivate, -) from numpy.typing import NDArray from llama_stack.log import get_logger @@ -35,6 +25,16 @@ from llama_stack.providers.utils.memory.vector_store import ( VectorStoreWithIndex, ) from llama_stack.providers.utils.vector_io.vector_utils import WeightedInMemoryAggregator +from llama_stack_api import ( + Chunk, + Files, + Inference, + QueryChunksResponse, + VectorIO, + VectorStore, + VectorStoreNotFoundError, + VectorStoresProtocolPrivate, +) logger = get_logger(name=__name__, category="vector_io") diff --git a/src/llama_stack/providers/registry/agents.py b/src/llama_stack/providers/registry/agents.py index bd204cecd..455be1ae7 100644 --- a/src/llama_stack/providers/registry/agents.py +++ b/src/llama_stack/providers/registry/agents.py @@ -5,14 +5,13 @@ # the root directory of this source tree. +from llama_stack.providers.utils.kvstore import kvstore_dependencies from llama_stack_api import ( Api, InlineProviderSpec, ProviderSpec, ) -from llama_stack.providers.utils.kvstore import kvstore_dependencies - def available_providers() -> list[ProviderSpec]: return [ diff --git a/src/llama_stack/providers/registry/files.py b/src/llama_stack/providers/registry/files.py index dfc527816..024254b57 100644 --- a/src/llama_stack/providers/registry/files.py +++ b/src/llama_stack/providers/registry/files.py @@ -4,9 +4,8 @@ # This source code is licensed under the terms described in the LICENSE file in # the root directory of this source tree. -from llama_stack_api import Api, InlineProviderSpec, ProviderSpec, RemoteProviderSpec - from llama_stack.providers.utils.sqlstore.sqlstore import sql_store_pip_packages +from llama_stack_api import Api, InlineProviderSpec, ProviderSpec, RemoteProviderSpec def available_providers() -> list[ProviderSpec]: diff --git a/src/llama_stack/providers/registry/tool_runtime.py b/src/llama_stack/providers/registry/tool_runtime.py index 3f0a83a30..d34312353 100644 --- a/src/llama_stack/providers/registry/tool_runtime.py +++ b/src/llama_stack/providers/registry/tool_runtime.py @@ -5,6 +5,7 @@ # the root directory of this source tree. +from llama_stack.providers.registry.vector_io import DEFAULT_VECTOR_IO_DEPS from llama_stack_api import ( Api, InlineProviderSpec, @@ -12,8 +13,6 @@ from llama_stack_api import ( RemoteProviderSpec, ) -from llama_stack.providers.registry.vector_io import DEFAULT_VECTOR_IO_DEPS - def available_providers() -> list[ProviderSpec]: return [ diff --git a/src/llama_stack/providers/remote/datasetio/huggingface/huggingface.py b/src/llama_stack/providers/remote/datasetio/huggingface/huggingface.py index 1260ce644..72069f716 100644 --- a/src/llama_stack/providers/remote/datasetio/huggingface/huggingface.py +++ b/src/llama_stack/providers/remote/datasetio/huggingface/huggingface.py @@ -6,10 +6,9 @@ from typing import Any from urllib.parse import parse_qs, urlparse -from llama_stack_api import Dataset, DatasetIO, DatasetsProtocolPrivate, PaginatedResponse - from llama_stack.providers.utils.kvstore import kvstore_impl from llama_stack.providers.utils.pagination import paginate_records +from llama_stack_api import Dataset, DatasetIO, DatasetsProtocolPrivate, PaginatedResponse from .config import HuggingfaceDatasetIOConfig diff --git a/src/llama_stack/providers/remote/datasetio/nvidia/datasetio.py b/src/llama_stack/providers/remote/datasetio/nvidia/datasetio.py index cb674b0d7..2f5548fa9 100644 --- a/src/llama_stack/providers/remote/datasetio/nvidia/datasetio.py +++ b/src/llama_stack/providers/remote/datasetio/nvidia/datasetio.py @@ -7,6 +7,7 @@ from typing import Any import aiohttp + from llama_stack_api import URL, Dataset, PaginatedResponse, ParamType from .config import NvidiaDatasetIOConfig diff --git a/src/llama_stack/providers/remote/eval/nvidia/eval.py b/src/llama_stack/providers/remote/eval/nvidia/eval.py index fbdec0d4d..5802cb098 100644 --- a/src/llama_stack/providers/remote/eval/nvidia/eval.py +++ b/src/llama_stack/providers/remote/eval/nvidia/eval.py @@ -6,6 +6,8 @@ from typing import Any import requests + +from llama_stack.providers.utils.inference.model_registry import ModelRegistryHelper from llama_stack_api import ( Agents, Benchmark, @@ -22,8 +24,6 @@ from llama_stack_api import ( ScoringResult, ) -from llama_stack.providers.utils.inference.model_registry import ModelRegistryHelper - from .config import NVIDIAEvalConfig DEFAULT_NAMESPACE = "nvidia" diff --git a/src/llama_stack/providers/remote/files/openai/files.py b/src/llama_stack/providers/remote/files/openai/files.py index bbd630977..d2f5a08eb 100644 --- a/src/llama_stack/providers/remote/files/openai/files.py +++ b/src/llama_stack/providers/remote/files/openai/files.py @@ -8,6 +8,12 @@ from datetime import UTC, datetime from typing import Annotated, Any from fastapi import Depends, File, Form, Response, UploadFile + +from llama_stack.core.datatypes import AccessRule +from llama_stack.providers.utils.files.form_data import parse_expires_after +from llama_stack.providers.utils.sqlstore.api import ColumnDefinition, ColumnType +from llama_stack.providers.utils.sqlstore.authorized_sqlstore import AuthorizedSqlStore +from llama_stack.providers.utils.sqlstore.sqlstore import sqlstore_impl from llama_stack_api import ( ExpiresAfter, Files, @@ -18,12 +24,6 @@ from llama_stack_api import ( Order, ResourceNotFoundError, ) - -from llama_stack.core.datatypes import AccessRule -from llama_stack.providers.utils.files.form_data import parse_expires_after -from llama_stack.providers.utils.sqlstore.api import ColumnDefinition, ColumnType -from llama_stack.providers.utils.sqlstore.authorized_sqlstore import AuthorizedSqlStore -from llama_stack.providers.utils.sqlstore.sqlstore import sqlstore_impl from openai import OpenAI from .config import OpenAIFilesImplConfig diff --git a/src/llama_stack/providers/remote/files/s3/files.py b/src/llama_stack/providers/remote/files/s3/files.py index 14f1e3852..68822eb77 100644 --- a/src/llama_stack/providers/remote/files/s3/files.py +++ b/src/llama_stack/providers/remote/files/s3/files.py @@ -17,6 +17,12 @@ from fastapi import Depends, File, Form, Response, UploadFile if TYPE_CHECKING: from mypy_boto3_s3.client import S3Client +from llama_stack.core.datatypes import AccessRule +from llama_stack.core.id_generation import generate_object_id +from llama_stack.providers.utils.files.form_data import parse_expires_after +from llama_stack.providers.utils.sqlstore.api import ColumnDefinition, ColumnType +from llama_stack.providers.utils.sqlstore.authorized_sqlstore import AuthorizedSqlStore +from llama_stack.providers.utils.sqlstore.sqlstore import sqlstore_impl from llama_stack_api import ( ExpiresAfter, Files, @@ -28,13 +34,6 @@ from llama_stack_api import ( ResourceNotFoundError, ) -from llama_stack.core.datatypes import AccessRule -from llama_stack.core.id_generation import generate_object_id -from llama_stack.providers.utils.files.form_data import parse_expires_after -from llama_stack.providers.utils.sqlstore.api import ColumnDefinition, ColumnType -from llama_stack.providers.utils.sqlstore.authorized_sqlstore import AuthorizedSqlStore -from llama_stack.providers.utils.sqlstore.sqlstore import sqlstore_impl - from .config import S3FilesImplConfig # TODO: provider data for S3 credentials diff --git a/src/llama_stack/providers/remote/inference/anthropic/config.py b/src/llama_stack/providers/remote/inference/anthropic/config.py index 7ee4c54e2..b706b90e1 100644 --- a/src/llama_stack/providers/remote/inference/anthropic/config.py +++ b/src/llama_stack/providers/remote/inference/anthropic/config.py @@ -6,10 +6,10 @@ from typing import Any -from llama_stack_api import json_schema_type from pydantic import BaseModel, Field from llama_stack.providers.utils.inference.model_registry import RemoteInferenceProviderConfig +from llama_stack_api import json_schema_type class AnthropicProviderDataValidator(BaseModel): diff --git a/src/llama_stack/providers/remote/inference/azure/config.py b/src/llama_stack/providers/remote/inference/azure/config.py index 596f6c234..b801b91b2 100644 --- a/src/llama_stack/providers/remote/inference/azure/config.py +++ b/src/llama_stack/providers/remote/inference/azure/config.py @@ -7,10 +7,10 @@ import os from typing import Any -from llama_stack_api import json_schema_type from pydantic import BaseModel, Field, HttpUrl, SecretStr from llama_stack.providers.utils.inference.model_registry import RemoteInferenceProviderConfig +from llama_stack_api import json_schema_type class AzureProviderDataValidator(BaseModel): diff --git a/src/llama_stack/providers/remote/inference/bedrock/bedrock.py b/src/llama_stack/providers/remote/inference/bedrock/bedrock.py index 1a9fe533b..70ee95916 100644 --- a/src/llama_stack/providers/remote/inference/bedrock/bedrock.py +++ b/src/llama_stack/providers/remote/inference/bedrock/bedrock.py @@ -6,6 +6,11 @@ from collections.abc import AsyncIterator, Iterable +from openai import AuthenticationError + +from llama_stack.core.telemetry.tracing import get_current_span +from llama_stack.log import get_logger +from llama_stack.providers.utils.inference.openai_mixin import OpenAIMixin from llama_stack_api import ( OpenAIChatCompletion, OpenAIChatCompletionChunk, @@ -15,11 +20,6 @@ from llama_stack_api import ( OpenAIEmbeddingsRequestWithExtraBody, OpenAIEmbeddingsResponse, ) -from openai import AuthenticationError - -from llama_stack.core.telemetry.tracing import get_current_span -from llama_stack.log import get_logger -from llama_stack.providers.utils.inference.openai_mixin import OpenAIMixin from .config import BedrockConfig diff --git a/src/llama_stack/providers/remote/inference/cerebras/cerebras.py b/src/llama_stack/providers/remote/inference/cerebras/cerebras.py index c7f3111f9..680431e22 100644 --- a/src/llama_stack/providers/remote/inference/cerebras/cerebras.py +++ b/src/llama_stack/providers/remote/inference/cerebras/cerebras.py @@ -6,13 +6,12 @@ from urllib.parse import urljoin +from llama_stack.providers.utils.inference.openai_mixin import OpenAIMixin from llama_stack_api import ( OpenAIEmbeddingsRequestWithExtraBody, OpenAIEmbeddingsResponse, ) -from llama_stack.providers.utils.inference.openai_mixin import OpenAIMixin - from .config import CerebrasImplConfig diff --git a/src/llama_stack/providers/remote/inference/cerebras/config.py b/src/llama_stack/providers/remote/inference/cerebras/config.py index a1fd41e2d..db357fd1c 100644 --- a/src/llama_stack/providers/remote/inference/cerebras/config.py +++ b/src/llama_stack/providers/remote/inference/cerebras/config.py @@ -7,10 +7,10 @@ import os from typing import Any -from llama_stack_api import json_schema_type from pydantic import BaseModel, Field from llama_stack.providers.utils.inference.model_registry import RemoteInferenceProviderConfig +from llama_stack_api import json_schema_type DEFAULT_BASE_URL = "https://api.cerebras.ai" diff --git a/src/llama_stack/providers/remote/inference/databricks/config.py b/src/llama_stack/providers/remote/inference/databricks/config.py index 4974593d2..bd409fa13 100644 --- a/src/llama_stack/providers/remote/inference/databricks/config.py +++ b/src/llama_stack/providers/remote/inference/databricks/config.py @@ -6,10 +6,10 @@ from typing import Any -from llama_stack_api import json_schema_type from pydantic import BaseModel, Field, SecretStr from llama_stack.providers.utils.inference.model_registry import RemoteInferenceProviderConfig +from llama_stack_api import json_schema_type class DatabricksProviderDataValidator(BaseModel): diff --git a/src/llama_stack/providers/remote/inference/databricks/databricks.py b/src/llama_stack/providers/remote/inference/databricks/databricks.py index 8b802379f..c07d97b67 100644 --- a/src/llama_stack/providers/remote/inference/databricks/databricks.py +++ b/src/llama_stack/providers/remote/inference/databricks/databricks.py @@ -7,10 +7,10 @@ from collections.abc import Iterable from databricks.sdk import WorkspaceClient -from llama_stack_api import OpenAICompletion, OpenAICompletionRequestWithExtraBody from llama_stack.log import get_logger from llama_stack.providers.utils.inference.openai_mixin import OpenAIMixin +from llama_stack_api import OpenAICompletion, OpenAICompletionRequestWithExtraBody from .config import DatabricksImplConfig diff --git a/src/llama_stack/providers/remote/inference/fireworks/config.py b/src/llama_stack/providers/remote/inference/fireworks/config.py index d786655eb..e36c76054 100644 --- a/src/llama_stack/providers/remote/inference/fireworks/config.py +++ b/src/llama_stack/providers/remote/inference/fireworks/config.py @@ -6,10 +6,10 @@ from typing import Any -from llama_stack_api import json_schema_type from pydantic import Field from llama_stack.providers.utils.inference.model_registry import RemoteInferenceProviderConfig +from llama_stack_api import json_schema_type @json_schema_type diff --git a/src/llama_stack/providers/remote/inference/gemini/config.py b/src/llama_stack/providers/remote/inference/gemini/config.py index 6c25c005c..46cec7d0d 100644 --- a/src/llama_stack/providers/remote/inference/gemini/config.py +++ b/src/llama_stack/providers/remote/inference/gemini/config.py @@ -6,10 +6,10 @@ from typing import Any -from llama_stack_api import json_schema_type from pydantic import BaseModel, Field from llama_stack.providers.utils.inference.model_registry import RemoteInferenceProviderConfig +from llama_stack_api import json_schema_type class GeminiProviderDataValidator(BaseModel): diff --git a/src/llama_stack/providers/remote/inference/gemini/gemini.py b/src/llama_stack/providers/remote/inference/gemini/gemini.py index 79d694f06..f6f48cc2b 100644 --- a/src/llama_stack/providers/remote/inference/gemini/gemini.py +++ b/src/llama_stack/providers/remote/inference/gemini/gemini.py @@ -6,6 +6,7 @@ from typing import Any +from llama_stack.providers.utils.inference.openai_mixin import OpenAIMixin from llama_stack_api import ( OpenAIEmbeddingData, OpenAIEmbeddingsRequestWithExtraBody, @@ -13,8 +14,6 @@ from llama_stack_api import ( OpenAIEmbeddingUsage, ) -from llama_stack.providers.utils.inference.openai_mixin import OpenAIMixin - from .config import GeminiConfig diff --git a/src/llama_stack/providers/remote/inference/groq/config.py b/src/llama_stack/providers/remote/inference/groq/config.py index cec327716..cca53a4e8 100644 --- a/src/llama_stack/providers/remote/inference/groq/config.py +++ b/src/llama_stack/providers/remote/inference/groq/config.py @@ -6,10 +6,10 @@ from typing import Any -from llama_stack_api import json_schema_type from pydantic import BaseModel, Field from llama_stack.providers.utils.inference.model_registry import RemoteInferenceProviderConfig +from llama_stack_api import json_schema_type class GroqProviderDataValidator(BaseModel): diff --git a/src/llama_stack/providers/remote/inference/llama_openai_compat/config.py b/src/llama_stack/providers/remote/inference/llama_openai_compat/config.py index c16311830..ded210d89 100644 --- a/src/llama_stack/providers/remote/inference/llama_openai_compat/config.py +++ b/src/llama_stack/providers/remote/inference/llama_openai_compat/config.py @@ -6,10 +6,10 @@ from typing import Any -from llama_stack_api import json_schema_type from pydantic import BaseModel, Field from llama_stack.providers.utils.inference.model_registry import RemoteInferenceProviderConfig +from llama_stack_api import json_schema_type class LlamaProviderDataValidator(BaseModel): diff --git a/src/llama_stack/providers/remote/inference/llama_openai_compat/llama.py b/src/llama_stack/providers/remote/inference/llama_openai_compat/llama.py index 1dea3e3cb..a5f67ecd1 100644 --- a/src/llama_stack/providers/remote/inference/llama_openai_compat/llama.py +++ b/src/llama_stack/providers/remote/inference/llama_openai_compat/llama.py @@ -4,6 +4,9 @@ # This source code is licensed under the terms described in the LICENSE file in # the root directory of this source tree. +from llama_stack.log import get_logger +from llama_stack.providers.remote.inference.llama_openai_compat.config import LlamaCompatConfig +from llama_stack.providers.utils.inference.openai_mixin import OpenAIMixin from llama_stack_api import ( OpenAICompletion, OpenAICompletionRequestWithExtraBody, @@ -11,10 +14,6 @@ from llama_stack_api import ( OpenAIEmbeddingsResponse, ) -from llama_stack.log import get_logger -from llama_stack.providers.remote.inference.llama_openai_compat.config import LlamaCompatConfig -from llama_stack.providers.utils.inference.openai_mixin import OpenAIMixin - logger = get_logger(name=__name__, category="inference::llama_openai_compat") diff --git a/src/llama_stack/providers/remote/inference/nvidia/config.py b/src/llama_stack/providers/remote/inference/nvidia/config.py index 6ff98d290..e5b0c6b73 100644 --- a/src/llama_stack/providers/remote/inference/nvidia/config.py +++ b/src/llama_stack/providers/remote/inference/nvidia/config.py @@ -7,10 +7,10 @@ import os from typing import Any -from llama_stack_api import json_schema_type from pydantic import BaseModel, Field from llama_stack.providers.utils.inference.model_registry import RemoteInferenceProviderConfig +from llama_stack_api import json_schema_type class NVIDIAProviderDataValidator(BaseModel): diff --git a/src/llama_stack/providers/remote/inference/nvidia/nvidia.py b/src/llama_stack/providers/remote/inference/nvidia/nvidia.py index 9e4c6f559..17f8775bf 100644 --- a/src/llama_stack/providers/remote/inference/nvidia/nvidia.py +++ b/src/llama_stack/providers/remote/inference/nvidia/nvidia.py @@ -8,6 +8,9 @@ from collections.abc import Iterable import aiohttp + +from llama_stack.log import get_logger +from llama_stack.providers.utils.inference.openai_mixin import OpenAIMixin from llama_stack_api import ( Model, ModelType, @@ -17,9 +20,6 @@ from llama_stack_api import ( RerankResponse, ) -from llama_stack.log import get_logger -from llama_stack.providers.utils.inference.openai_mixin import OpenAIMixin - from . import NVIDIAConfig from .utils import _is_nvidia_hosted diff --git a/src/llama_stack/providers/remote/inference/oci/config.py b/src/llama_stack/providers/remote/inference/oci/config.py index 24b4ad926..93cc36d76 100644 --- a/src/llama_stack/providers/remote/inference/oci/config.py +++ b/src/llama_stack/providers/remote/inference/oci/config.py @@ -7,10 +7,10 @@ import os from typing import Any -from llama_stack_api import json_schema_type from pydantic import BaseModel, Field from llama_stack.providers.utils.inference.model_registry import RemoteInferenceProviderConfig +from llama_stack_api import json_schema_type class OCIProviderDataValidator(BaseModel): diff --git a/src/llama_stack/providers/remote/inference/oci/oci.py b/src/llama_stack/providers/remote/inference/oci/oci.py index 36e56cf6c..239443963 100644 --- a/src/llama_stack/providers/remote/inference/oci/oci.py +++ b/src/llama_stack/providers/remote/inference/oci/oci.py @@ -10,11 +10,6 @@ from typing import Any import httpx import oci -from llama_stack_api import ( - ModelType, - OpenAIEmbeddingsRequestWithExtraBody, - OpenAIEmbeddingsResponse, -) from oci.generative_ai.generative_ai_client import GenerativeAiClient from oci.generative_ai.models import ModelCollection from openai._base_client import DefaultAsyncHttpxClient @@ -23,6 +18,11 @@ from llama_stack.log import get_logger from llama_stack.providers.remote.inference.oci.auth import OciInstancePrincipalAuth, OciUserPrincipalAuth from llama_stack.providers.remote.inference.oci.config import OCIConfig from llama_stack.providers.utils.inference.openai_mixin import OpenAIMixin +from llama_stack_api import ( + ModelType, + OpenAIEmbeddingsRequestWithExtraBody, + OpenAIEmbeddingsResponse, +) logger = get_logger(name=__name__, category="inference::oci") diff --git a/src/llama_stack/providers/remote/inference/ollama/ollama.py b/src/llama_stack/providers/remote/inference/ollama/ollama.py index 6a471429e..d1bf85361 100644 --- a/src/llama_stack/providers/remote/inference/ollama/ollama.py +++ b/src/llama_stack/providers/remote/inference/ollama/ollama.py @@ -7,17 +7,17 @@ import asyncio +from ollama import AsyncClient as AsyncOllamaClient + +from llama_stack.log import get_logger +from llama_stack.providers.remote.inference.ollama.config import OllamaImplConfig +from llama_stack.providers.utils.inference.openai_mixin import OpenAIMixin from llama_stack_api import ( HealthResponse, HealthStatus, Model, UnsupportedModelError, ) -from ollama import AsyncClient as AsyncOllamaClient - -from llama_stack.log import get_logger -from llama_stack.providers.remote.inference.ollama.config import OllamaImplConfig -from llama_stack.providers.utils.inference.openai_mixin import OpenAIMixin logger = get_logger(name=__name__, category="inference::ollama") diff --git a/src/llama_stack/providers/remote/inference/openai/config.py b/src/llama_stack/providers/remote/inference/openai/config.py index cbb01b2d0..ab28e571f 100644 --- a/src/llama_stack/providers/remote/inference/openai/config.py +++ b/src/llama_stack/providers/remote/inference/openai/config.py @@ -6,10 +6,10 @@ from typing import Any -from llama_stack_api import json_schema_type from pydantic import BaseModel, Field from llama_stack.providers.utils.inference.model_registry import RemoteInferenceProviderConfig +from llama_stack_api import json_schema_type class OpenAIProviderDataValidator(BaseModel): diff --git a/src/llama_stack/providers/remote/inference/passthrough/config.py b/src/llama_stack/providers/remote/inference/passthrough/config.py index 7045dbf2e..54508b6fb 100644 --- a/src/llama_stack/providers/remote/inference/passthrough/config.py +++ b/src/llama_stack/providers/remote/inference/passthrough/config.py @@ -6,10 +6,10 @@ from typing import Any -from llama_stack_api import json_schema_type from pydantic import Field from llama_stack.providers.utils.inference.model_registry import RemoteInferenceProviderConfig +from llama_stack_api import json_schema_type @json_schema_type diff --git a/src/llama_stack/providers/remote/inference/passthrough/passthrough.py b/src/llama_stack/providers/remote/inference/passthrough/passthrough.py index 19cf0c5d7..75eedf026 100644 --- a/src/llama_stack/providers/remote/inference/passthrough/passthrough.py +++ b/src/llama_stack/providers/remote/inference/passthrough/passthrough.py @@ -6,6 +6,9 @@ from collections.abc import AsyncIterator +from openai import AsyncOpenAI + +from llama_stack.core.request_headers import NeedsRequestProviderData from llama_stack_api import ( Inference, Model, @@ -17,9 +20,6 @@ from llama_stack_api import ( OpenAIEmbeddingsRequestWithExtraBody, OpenAIEmbeddingsResponse, ) -from openai import AsyncOpenAI - -from llama_stack.core.request_headers import NeedsRequestProviderData from .config import PassthroughImplConfig diff --git a/src/llama_stack/providers/remote/inference/runpod/config.py b/src/llama_stack/providers/remote/inference/runpod/config.py index aaa4230a8..2ee56ca94 100644 --- a/src/llama_stack/providers/remote/inference/runpod/config.py +++ b/src/llama_stack/providers/remote/inference/runpod/config.py @@ -6,10 +6,10 @@ from typing import Any -from llama_stack_api import json_schema_type from pydantic import BaseModel, Field, SecretStr from llama_stack.providers.utils.inference.model_registry import RemoteInferenceProviderConfig +from llama_stack_api import json_schema_type class RunpodProviderDataValidator(BaseModel): diff --git a/src/llama_stack/providers/remote/inference/runpod/runpod.py b/src/llama_stack/providers/remote/inference/runpod/runpod.py index 4596b2df5..9c770cc24 100644 --- a/src/llama_stack/providers/remote/inference/runpod/runpod.py +++ b/src/llama_stack/providers/remote/inference/runpod/runpod.py @@ -6,14 +6,13 @@ from collections.abc import AsyncIterator +from llama_stack.providers.utils.inference.openai_mixin import OpenAIMixin from llama_stack_api import ( OpenAIChatCompletion, OpenAIChatCompletionChunk, OpenAIChatCompletionRequestWithExtraBody, ) -from llama_stack.providers.utils.inference.openai_mixin import OpenAIMixin - from .config import RunpodImplConfig diff --git a/src/llama_stack/providers/remote/inference/sambanova/config.py b/src/llama_stack/providers/remote/inference/sambanova/config.py index 6d72e7205..93679ba99 100644 --- a/src/llama_stack/providers/remote/inference/sambanova/config.py +++ b/src/llama_stack/providers/remote/inference/sambanova/config.py @@ -6,10 +6,10 @@ from typing import Any -from llama_stack_api import json_schema_type from pydantic import BaseModel, Field from llama_stack.providers.utils.inference.model_registry import RemoteInferenceProviderConfig +from llama_stack_api import json_schema_type class SambaNovaProviderDataValidator(BaseModel): diff --git a/src/llama_stack/providers/remote/inference/tgi/config.py b/src/llama_stack/providers/remote/inference/tgi/config.py index 051a2afa3..74edc8523 100644 --- a/src/llama_stack/providers/remote/inference/tgi/config.py +++ b/src/llama_stack/providers/remote/inference/tgi/config.py @@ -5,10 +5,10 @@ # the root directory of this source tree. -from llama_stack_api import json_schema_type from pydantic import BaseModel, Field, SecretStr from llama_stack.providers.utils.inference.model_registry import RemoteInferenceProviderConfig +from llama_stack_api import json_schema_type @json_schema_type diff --git a/src/llama_stack/providers/remote/inference/tgi/tgi.py b/src/llama_stack/providers/remote/inference/tgi/tgi.py index 831a26e39..dd47ccc62 100644 --- a/src/llama_stack/providers/remote/inference/tgi/tgi.py +++ b/src/llama_stack/providers/remote/inference/tgi/tgi.py @@ -8,14 +8,14 @@ from collections.abc import Iterable from huggingface_hub import AsyncInferenceClient, HfApi -from llama_stack_api import ( - OpenAIEmbeddingsRequestWithExtraBody, - OpenAIEmbeddingsResponse, -) from pydantic import SecretStr from llama_stack.log import get_logger from llama_stack.providers.utils.inference.openai_mixin import OpenAIMixin +from llama_stack_api import ( + OpenAIEmbeddingsRequestWithExtraBody, + OpenAIEmbeddingsResponse, +) from .config import InferenceAPIImplConfig, InferenceEndpointImplConfig, TGIImplConfig diff --git a/src/llama_stack/providers/remote/inference/together/config.py b/src/llama_stack/providers/remote/inference/together/config.py index 96c0538e3..c1b3c4a55 100644 --- a/src/llama_stack/providers/remote/inference/together/config.py +++ b/src/llama_stack/providers/remote/inference/together/config.py @@ -6,10 +6,10 @@ from typing import Any -from llama_stack_api import json_schema_type from pydantic import Field from llama_stack.providers.utils.inference.model_registry import RemoteInferenceProviderConfig +from llama_stack_api import json_schema_type @json_schema_type diff --git a/src/llama_stack/providers/remote/inference/together/together.py b/src/llama_stack/providers/remote/inference/together/together.py index f1355a760..cd34aec5e 100644 --- a/src/llama_stack/providers/remote/inference/together/together.py +++ b/src/llama_stack/providers/remote/inference/together/together.py @@ -8,18 +8,18 @@ from collections.abc import Iterable from typing import Any, cast -from llama_stack_api import ( - Model, - OpenAIEmbeddingsRequestWithExtraBody, - OpenAIEmbeddingsResponse, - OpenAIEmbeddingUsage, -) from together import AsyncTogether # type: ignore[import-untyped] from together.constants import BASE_URL # type: ignore[import-untyped] from llama_stack.core.request_headers import NeedsRequestProviderData from llama_stack.log import get_logger from llama_stack.providers.utils.inference.openai_mixin import OpenAIMixin +from llama_stack_api import ( + Model, + OpenAIEmbeddingsRequestWithExtraBody, + OpenAIEmbeddingsResponse, + OpenAIEmbeddingUsage, +) from .config import TogetherImplConfig diff --git a/src/llama_stack/providers/remote/inference/vertexai/config.py b/src/llama_stack/providers/remote/inference/vertexai/config.py index 53e2b3e65..5891f7cd0 100644 --- a/src/llama_stack/providers/remote/inference/vertexai/config.py +++ b/src/llama_stack/providers/remote/inference/vertexai/config.py @@ -6,10 +6,10 @@ from typing import Any -from llama_stack_api import json_schema_type from pydantic import BaseModel, Field, SecretStr from llama_stack.providers.utils.inference.model_registry import RemoteInferenceProviderConfig +from llama_stack_api import json_schema_type class VertexAIProviderDataValidator(BaseModel): diff --git a/src/llama_stack/providers/remote/inference/vllm/config.py b/src/llama_stack/providers/remote/inference/vllm/config.py index 23f713961..c43533ee4 100644 --- a/src/llama_stack/providers/remote/inference/vllm/config.py +++ b/src/llama_stack/providers/remote/inference/vllm/config.py @@ -6,10 +6,10 @@ from pathlib import Path -from llama_stack_api import json_schema_type from pydantic import Field, SecretStr, field_validator from llama_stack.providers.utils.inference.model_registry import RemoteInferenceProviderConfig +from llama_stack_api import json_schema_type @json_schema_type diff --git a/src/llama_stack/providers/remote/inference/vllm/vllm.py b/src/llama_stack/providers/remote/inference/vllm/vllm.py index f7938c22c..1510e9384 100644 --- a/src/llama_stack/providers/remote/inference/vllm/vllm.py +++ b/src/llama_stack/providers/remote/inference/vllm/vllm.py @@ -7,6 +7,10 @@ from collections.abc import AsyncIterator from urllib.parse import urljoin import httpx +from pydantic import ConfigDict + +from llama_stack.log import get_logger +from llama_stack.providers.utils.inference.openai_mixin import OpenAIMixin from llama_stack_api import ( HealthResponse, HealthStatus, @@ -15,10 +19,6 @@ from llama_stack_api import ( OpenAIChatCompletionRequestWithExtraBody, ToolChoice, ) -from pydantic import ConfigDict - -from llama_stack.log import get_logger -from llama_stack.providers.utils.inference.openai_mixin import OpenAIMixin from .config import VLLMInferenceAdapterConfig diff --git a/src/llama_stack/providers/remote/inference/watsonx/config.py b/src/llama_stack/providers/remote/inference/watsonx/config.py index 1bba040ef..914f80820 100644 --- a/src/llama_stack/providers/remote/inference/watsonx/config.py +++ b/src/llama_stack/providers/remote/inference/watsonx/config.py @@ -7,10 +7,10 @@ import os from typing import Any -from llama_stack_api import json_schema_type from pydantic import BaseModel, Field from llama_stack.providers.utils.inference.model_registry import RemoteInferenceProviderConfig +from llama_stack_api import json_schema_type class WatsonXProviderDataValidator(BaseModel): diff --git a/src/llama_stack/providers/remote/inference/watsonx/watsonx.py b/src/llama_stack/providers/remote/inference/watsonx/watsonx.py index de23c25d7..aab9e2dca 100644 --- a/src/llama_stack/providers/remote/inference/watsonx/watsonx.py +++ b/src/llama_stack/providers/remote/inference/watsonx/watsonx.py @@ -9,6 +9,12 @@ from typing import Any import litellm import requests + +from llama_stack.core.telemetry.tracing import get_current_span +from llama_stack.log import get_logger +from llama_stack.providers.remote.inference.watsonx.config import WatsonXConfig +from llama_stack.providers.utils.inference.litellm_openai_mixin import LiteLLMOpenAIMixin +from llama_stack.providers.utils.inference.openai_compat import prepare_openai_completion_params from llama_stack_api import ( Model, ModelType, @@ -22,12 +28,6 @@ from llama_stack_api import ( OpenAIEmbeddingsResponse, ) -from llama_stack.core.telemetry.tracing import get_current_span -from llama_stack.log import get_logger -from llama_stack.providers.remote.inference.watsonx.config import WatsonXConfig -from llama_stack.providers.utils.inference.litellm_openai_mixin import LiteLLMOpenAIMixin -from llama_stack.providers.utils.inference.openai_compat import prepare_openai_completion_params - logger = get_logger(name=__name__, category="providers::remote::watsonx") @@ -238,9 +238,8 @@ class WatsonXInferenceAdapter(LiteLLMOpenAIMixin): ) # Convert response to OpenAI format - from llama_stack_api import OpenAIEmbeddingUsage - from llama_stack.providers.utils.inference.litellm_openai_mixin import b64_encode_openai_embeddings_response + from llama_stack_api import OpenAIEmbeddingUsage data = b64_encode_openai_embeddings_response(response.data, params.encoding_format) diff --git a/src/llama_stack/providers/remote/post_training/nvidia/post_training.py b/src/llama_stack/providers/remote/post_training/nvidia/post_training.py index 02c35241b..830a9f747 100644 --- a/src/llama_stack/providers/remote/post_training/nvidia/post_training.py +++ b/src/llama_stack/providers/remote/post_training/nvidia/post_training.py @@ -8,6 +8,11 @@ from datetime import datetime from typing import Any, Literal import aiohttp +from pydantic import BaseModel, ConfigDict + +from llama_stack.providers.remote.post_training.nvidia.config import NvidiaPostTrainingConfig +from llama_stack.providers.remote.post_training.nvidia.utils import warn_unsupported_params +from llama_stack.providers.utils.inference.model_registry import ModelRegistryHelper from llama_stack_api import ( AlgorithmConfig, DPOAlignmentConfig, @@ -17,11 +22,6 @@ from llama_stack_api import ( PostTrainingJobStatusResponse, TrainingConfig, ) -from pydantic import BaseModel, ConfigDict - -from llama_stack.providers.remote.post_training.nvidia.config import NvidiaPostTrainingConfig -from llama_stack.providers.remote.post_training.nvidia.utils import warn_unsupported_params -from llama_stack.providers.utils.inference.model_registry import ModelRegistryHelper from .models import _MODEL_ENTRIES diff --git a/src/llama_stack/providers/remote/post_training/nvidia/utils.py b/src/llama_stack/providers/remote/post_training/nvidia/utils.py index 78762155d..bd40dacb4 100644 --- a/src/llama_stack/providers/remote/post_training/nvidia/utils.py +++ b/src/llama_stack/providers/remote/post_training/nvidia/utils.py @@ -7,11 +7,11 @@ import warnings from typing import Any -from llama_stack_api import TrainingConfig from pydantic import BaseModel from llama_stack.log import get_logger from llama_stack.providers.remote.post_training.nvidia.config import SFTLoRADefaultConfig +from llama_stack_api import TrainingConfig from .config import NvidiaPostTrainingConfig diff --git a/src/llama_stack/providers/remote/safety/bedrock/bedrock.py b/src/llama_stack/providers/remote/safety/bedrock/bedrock.py index 86b93c32e..c321f759b 100644 --- a/src/llama_stack/providers/remote/safety/bedrock/bedrock.py +++ b/src/llama_stack/providers/remote/safety/bedrock/bedrock.py @@ -7,6 +7,8 @@ import json from typing import Any +from llama_stack.log import get_logger +from llama_stack.providers.utils.bedrock.client import create_bedrock_client from llama_stack_api import ( OpenAIMessageParam, RunShieldResponse, @@ -17,9 +19,6 @@ from llama_stack_api import ( ViolationLevel, ) -from llama_stack.log import get_logger -from llama_stack.providers.utils.bedrock.client import create_bedrock_client - from .config import BedrockSafetyConfig logger = get_logger(name=__name__, category="safety::bedrock") diff --git a/src/llama_stack/providers/remote/safety/bedrock/config.py b/src/llama_stack/providers/remote/safety/bedrock/config.py index ca28924d4..0b1f2581a 100644 --- a/src/llama_stack/providers/remote/safety/bedrock/config.py +++ b/src/llama_stack/providers/remote/safety/bedrock/config.py @@ -5,9 +5,8 @@ # the root directory of this source tree. -from llama_stack_api import json_schema_type - from llama_stack.providers.utils.bedrock.config import BedrockBaseConfig +from llama_stack_api import json_schema_type @json_schema_type diff --git a/src/llama_stack/providers/remote/safety/nvidia/config.py b/src/llama_stack/providers/remote/safety/nvidia/config.py index fc686ae73..f11de5feb 100644 --- a/src/llama_stack/providers/remote/safety/nvidia/config.py +++ b/src/llama_stack/providers/remote/safety/nvidia/config.py @@ -6,9 +6,10 @@ import os from typing import Any -from llama_stack_api import json_schema_type from pydantic import BaseModel, Field +from llama_stack_api import json_schema_type + @json_schema_type class NVIDIASafetyConfig(BaseModel): diff --git a/src/llama_stack/providers/remote/safety/nvidia/nvidia.py b/src/llama_stack/providers/remote/safety/nvidia/nvidia.py index b3b5090e0..43ff45cc9 100644 --- a/src/llama_stack/providers/remote/safety/nvidia/nvidia.py +++ b/src/llama_stack/providers/remote/safety/nvidia/nvidia.py @@ -7,6 +7,8 @@ from typing import Any import requests + +from llama_stack.log import get_logger from llama_stack_api import ( ModerationObject, OpenAIMessageParam, @@ -18,8 +20,6 @@ from llama_stack_api import ( ViolationLevel, ) -from llama_stack.log import get_logger - from .config import NVIDIASafetyConfig logger = get_logger(name=__name__, category="safety::nvidia") diff --git a/src/llama_stack/providers/remote/safety/sambanova/config.py b/src/llama_stack/providers/remote/safety/sambanova/config.py index a8e745851..bfb42d88a 100644 --- a/src/llama_stack/providers/remote/safety/sambanova/config.py +++ b/src/llama_stack/providers/remote/safety/sambanova/config.py @@ -6,9 +6,10 @@ from typing import Any -from llama_stack_api import json_schema_type from pydantic import BaseModel, Field, SecretStr +from llama_stack_api import json_schema_type + class SambaNovaProviderDataValidator(BaseModel): sambanova_api_key: str | None = Field( diff --git a/src/llama_stack/providers/remote/safety/sambanova/sambanova.py b/src/llama_stack/providers/remote/safety/sambanova/sambanova.py index 119ebb6ed..c11cb544d 100644 --- a/src/llama_stack/providers/remote/safety/sambanova/sambanova.py +++ b/src/llama_stack/providers/remote/safety/sambanova/sambanova.py @@ -8,6 +8,9 @@ from typing import Any import litellm import requests + +from llama_stack.core.request_headers import NeedsRequestProviderData +from llama_stack.log import get_logger from llama_stack_api import ( OpenAIMessageParam, RunShieldResponse, @@ -18,9 +21,6 @@ from llama_stack_api import ( ViolationLevel, ) -from llama_stack.core.request_headers import NeedsRequestProviderData -from llama_stack.log import get_logger - from .config import SambaNovaSafetyConfig logger = get_logger(name=__name__, category="safety::sambanova") diff --git a/src/llama_stack/providers/remote/tool_runtime/bing_search/bing_search.py b/src/llama_stack/providers/remote/tool_runtime/bing_search/bing_search.py index 84e47dd4f..a5a53a9eb 100644 --- a/src/llama_stack/providers/remote/tool_runtime/bing_search/bing_search.py +++ b/src/llama_stack/providers/remote/tool_runtime/bing_search/bing_search.py @@ -8,6 +8,8 @@ import json from typing import Any import httpx + +from llama_stack.core.request_headers import NeedsRequestProviderData from llama_stack_api import ( URL, ListToolDefsResponse, @@ -18,8 +20,6 @@ from llama_stack_api import ( ToolRuntime, ) -from llama_stack.core.request_headers import NeedsRequestProviderData - from .config import BingSearchToolConfig diff --git a/src/llama_stack/providers/remote/tool_runtime/brave_search/brave_search.py b/src/llama_stack/providers/remote/tool_runtime/brave_search/brave_search.py index b7eee776a..4888730e4 100644 --- a/src/llama_stack/providers/remote/tool_runtime/brave_search/brave_search.py +++ b/src/llama_stack/providers/remote/tool_runtime/brave_search/brave_search.py @@ -7,6 +7,9 @@ from typing import Any import httpx + +from llama_stack.core.request_headers import NeedsRequestProviderData +from llama_stack.models.llama.datatypes import BuiltinTool from llama_stack_api import ( URL, ListToolDefsResponse, @@ -17,9 +20,6 @@ from llama_stack_api import ( ToolRuntime, ) -from llama_stack.core.request_headers import NeedsRequestProviderData -from llama_stack.models.llama.datatypes import BuiltinTool - from .config import BraveSearchToolConfig diff --git a/src/llama_stack/providers/remote/tool_runtime/model_context_protocol/model_context_protocol.py b/src/llama_stack/providers/remote/tool_runtime/model_context_protocol/model_context_protocol.py index efb1eb2df..544597a51 100644 --- a/src/llama_stack/providers/remote/tool_runtime/model_context_protocol/model_context_protocol.py +++ b/src/llama_stack/providers/remote/tool_runtime/model_context_protocol/model_context_protocol.py @@ -7,6 +7,9 @@ from typing import Any from urllib.parse import urlparse +from llama_stack.core.request_headers import NeedsRequestProviderData +from llama_stack.log import get_logger +from llama_stack.providers.utils.tools.mcp import invoke_mcp_tool, list_mcp_tools from llama_stack_api import ( URL, Api, @@ -17,10 +20,6 @@ from llama_stack_api import ( ToolRuntime, ) -from llama_stack.core.request_headers import NeedsRequestProviderData -from llama_stack.log import get_logger -from llama_stack.providers.utils.tools.mcp import invoke_mcp_tool, list_mcp_tools - from .config import MCPProviderConfig logger = get_logger(__name__, category="tools") diff --git a/src/llama_stack/providers/remote/tool_runtime/tavily_search/tavily_search.py b/src/llama_stack/providers/remote/tool_runtime/tavily_search/tavily_search.py index d65d66e67..d86cf5d8e 100644 --- a/src/llama_stack/providers/remote/tool_runtime/tavily_search/tavily_search.py +++ b/src/llama_stack/providers/remote/tool_runtime/tavily_search/tavily_search.py @@ -8,6 +8,8 @@ import json from typing import Any import httpx + +from llama_stack.core.request_headers import NeedsRequestProviderData from llama_stack_api import ( URL, ListToolDefsResponse, @@ -18,8 +20,6 @@ from llama_stack_api import ( ToolRuntime, ) -from llama_stack.core.request_headers import NeedsRequestProviderData - from .config import TavilySearchToolConfig diff --git a/src/llama_stack/providers/remote/tool_runtime/wolfram_alpha/wolfram_alpha.py b/src/llama_stack/providers/remote/tool_runtime/wolfram_alpha/wolfram_alpha.py index 9cc865092..f8d806a5c 100644 --- a/src/llama_stack/providers/remote/tool_runtime/wolfram_alpha/wolfram_alpha.py +++ b/src/llama_stack/providers/remote/tool_runtime/wolfram_alpha/wolfram_alpha.py @@ -8,6 +8,8 @@ import json from typing import Any import httpx + +from llama_stack.core.request_headers import NeedsRequestProviderData from llama_stack_api import ( URL, ListToolDefsResponse, @@ -18,8 +20,6 @@ from llama_stack_api import ( ToolRuntime, ) -from llama_stack.core.request_headers import NeedsRequestProviderData - from .config import WolframAlphaToolConfig diff --git a/src/llama_stack/providers/remote/vector_io/chroma/chroma.py b/src/llama_stack/providers/remote/vector_io/chroma/chroma.py index eca5d349b..645b40661 100644 --- a/src/llama_stack/providers/remote/vector_io/chroma/chroma.py +++ b/src/llama_stack/providers/remote/vector_io/chroma/chroma.py @@ -9,6 +9,14 @@ from typing import Any from urllib.parse import urlparse import chromadb +from numpy.typing import NDArray + +from llama_stack.log import get_logger +from llama_stack.providers.inline.vector_io.chroma import ChromaVectorIOConfig as InlineChromaVectorIOConfig +from llama_stack.providers.utils.kvstore import kvstore_impl +from llama_stack.providers.utils.kvstore.api import KVStore +from llama_stack.providers.utils.memory.openai_vector_store_mixin import OpenAIVectorStoreMixin +from llama_stack.providers.utils.memory.vector_store import ChunkForDeletion, EmbeddingIndex, VectorStoreWithIndex from llama_stack_api import ( Chunk, Files, @@ -19,14 +27,6 @@ from llama_stack_api import ( VectorStore, VectorStoresProtocolPrivate, ) -from numpy.typing import NDArray - -from llama_stack.log import get_logger -from llama_stack.providers.inline.vector_io.chroma import ChromaVectorIOConfig as InlineChromaVectorIOConfig -from llama_stack.providers.utils.kvstore import kvstore_impl -from llama_stack.providers.utils.kvstore.api import KVStore -from llama_stack.providers.utils.memory.openai_vector_store_mixin import OpenAIVectorStoreMixin -from llama_stack.providers.utils.memory.vector_store import ChunkForDeletion, EmbeddingIndex, VectorStoreWithIndex from .config import ChromaVectorIOConfig as RemoteChromaVectorIOConfig diff --git a/src/llama_stack/providers/remote/vector_io/chroma/config.py b/src/llama_stack/providers/remote/vector_io/chroma/config.py index b1e4f9a4a..648d641ad 100644 --- a/src/llama_stack/providers/remote/vector_io/chroma/config.py +++ b/src/llama_stack/providers/remote/vector_io/chroma/config.py @@ -6,10 +6,10 @@ from typing import Any -from llama_stack_api import json_schema_type from pydantic import BaseModel, Field from llama_stack.core.storage.datatypes import KVStoreReference +from llama_stack_api import json_schema_type @json_schema_type diff --git a/src/llama_stack/providers/remote/vector_io/milvus/config.py b/src/llama_stack/providers/remote/vector_io/milvus/config.py index 2e2c788c7..4b9d6a566 100644 --- a/src/llama_stack/providers/remote/vector_io/milvus/config.py +++ b/src/llama_stack/providers/remote/vector_io/milvus/config.py @@ -6,10 +6,10 @@ from typing import Any -from llama_stack_api import json_schema_type from pydantic import BaseModel, ConfigDict, Field from llama_stack.core.storage.datatypes import KVStoreReference +from llama_stack_api import json_schema_type @json_schema_type diff --git a/src/llama_stack/providers/remote/vector_io/milvus/milvus.py b/src/llama_stack/providers/remote/vector_io/milvus/milvus.py index b856bf918..aefa20317 100644 --- a/src/llama_stack/providers/remote/vector_io/milvus/milvus.py +++ b/src/llama_stack/providers/remote/vector_io/milvus/milvus.py @@ -8,17 +8,6 @@ import asyncio import os from typing import Any -from llama_stack_api import ( - Chunk, - Files, - Inference, - InterleavedContent, - QueryChunksResponse, - VectorIO, - VectorStore, - VectorStoreNotFoundError, - VectorStoresProtocolPrivate, -) from numpy.typing import NDArray from pymilvus import AnnSearchRequest, DataType, Function, FunctionType, MilvusClient, RRFRanker, WeightedRanker @@ -34,6 +23,17 @@ from llama_stack.providers.utils.memory.vector_store import ( VectorStoreWithIndex, ) from llama_stack.providers.utils.vector_io.vector_utils import sanitize_collection_name +from llama_stack_api import ( + Chunk, + Files, + Inference, + InterleavedContent, + QueryChunksResponse, + VectorIO, + VectorStore, + VectorStoreNotFoundError, + VectorStoresProtocolPrivate, +) from .config import MilvusVectorIOConfig as RemoteMilvusVectorIOConfig diff --git a/src/llama_stack/providers/remote/vector_io/pgvector/config.py b/src/llama_stack/providers/remote/vector_io/pgvector/config.py index aeb1c83bb..87d40a883 100644 --- a/src/llama_stack/providers/remote/vector_io/pgvector/config.py +++ b/src/llama_stack/providers/remote/vector_io/pgvector/config.py @@ -6,10 +6,10 @@ from typing import Any -from llama_stack_api import json_schema_type from pydantic import BaseModel, Field from llama_stack.core.storage.datatypes import KVStoreReference +from llama_stack_api import json_schema_type @json_schema_type diff --git a/src/llama_stack/providers/remote/vector_io/pgvector/pgvector.py b/src/llama_stack/providers/remote/vector_io/pgvector/pgvector.py index 8aa0303b6..2901bad97 100644 --- a/src/llama_stack/providers/remote/vector_io/pgvector/pgvector.py +++ b/src/llama_stack/providers/remote/vector_io/pgvector/pgvector.py @@ -8,17 +8,6 @@ import heapq from typing import Any import psycopg2 -from llama_stack_api import ( - Chunk, - Files, - Inference, - InterleavedContent, - QueryChunksResponse, - VectorIO, - VectorStore, - VectorStoreNotFoundError, - VectorStoresProtocolPrivate, -) from numpy.typing import NDArray from psycopg2 import sql from psycopg2.extras import Json, execute_values @@ -31,6 +20,17 @@ from llama_stack.providers.utils.kvstore.api import KVStore from llama_stack.providers.utils.memory.openai_vector_store_mixin import OpenAIVectorStoreMixin from llama_stack.providers.utils.memory.vector_store import ChunkForDeletion, EmbeddingIndex, VectorStoreWithIndex from llama_stack.providers.utils.vector_io.vector_utils import WeightedInMemoryAggregator, sanitize_collection_name +from llama_stack_api import ( + Chunk, + Files, + Inference, + InterleavedContent, + QueryChunksResponse, + VectorIO, + VectorStore, + VectorStoreNotFoundError, + VectorStoresProtocolPrivate, +) from .config import PGVectorVectorIOConfig diff --git a/src/llama_stack/providers/remote/vector_io/qdrant/config.py b/src/llama_stack/providers/remote/vector_io/qdrant/config.py index 8cc4cbb2b..e0a3fe207 100644 --- a/src/llama_stack/providers/remote/vector_io/qdrant/config.py +++ b/src/llama_stack/providers/remote/vector_io/qdrant/config.py @@ -6,10 +6,10 @@ from typing import Any -from llama_stack_api import json_schema_type from pydantic import BaseModel from llama_stack.core.storage.datatypes import KVStoreReference +from llama_stack_api import json_schema_type @json_schema_type diff --git a/src/llama_stack/providers/remote/vector_io/qdrant/qdrant.py b/src/llama_stack/providers/remote/vector_io/qdrant/qdrant.py index 53d6be2b6..20ab653d0 100644 --- a/src/llama_stack/providers/remote/vector_io/qdrant/qdrant.py +++ b/src/llama_stack/providers/remote/vector_io/qdrant/qdrant.py @@ -9,6 +9,15 @@ import hashlib import uuid from typing import Any +from numpy.typing import NDArray +from qdrant_client import AsyncQdrantClient, models +from qdrant_client.models import PointStruct + +from llama_stack.log import get_logger +from llama_stack.providers.inline.vector_io.qdrant import QdrantVectorIOConfig as InlineQdrantVectorIOConfig +from llama_stack.providers.utils.kvstore import kvstore_impl +from llama_stack.providers.utils.memory.openai_vector_store_mixin import OpenAIVectorStoreMixin +from llama_stack.providers.utils.memory.vector_store import ChunkForDeletion, EmbeddingIndex, VectorStoreWithIndex from llama_stack_api import ( Chunk, Files, @@ -22,15 +31,6 @@ from llama_stack_api import ( VectorStoreNotFoundError, VectorStoresProtocolPrivate, ) -from numpy.typing import NDArray -from qdrant_client import AsyncQdrantClient, models -from qdrant_client.models import PointStruct - -from llama_stack.log import get_logger -from llama_stack.providers.inline.vector_io.qdrant import QdrantVectorIOConfig as InlineQdrantVectorIOConfig -from llama_stack.providers.utils.kvstore import kvstore_impl -from llama_stack.providers.utils.memory.openai_vector_store_mixin import OpenAIVectorStoreMixin -from llama_stack.providers.utils.memory.vector_store import ChunkForDeletion, EmbeddingIndex, VectorStoreWithIndex from .config import QdrantVectorIOConfig as RemoteQdrantVectorIOConfig diff --git a/src/llama_stack/providers/remote/vector_io/weaviate/config.py b/src/llama_stack/providers/remote/vector_io/weaviate/config.py index 19f9679fb..75d1b7c51 100644 --- a/src/llama_stack/providers/remote/vector_io/weaviate/config.py +++ b/src/llama_stack/providers/remote/vector_io/weaviate/config.py @@ -6,10 +6,10 @@ from typing import Any -from llama_stack_api import json_schema_type from pydantic import BaseModel, Field from llama_stack.core.storage.datatypes import KVStoreReference +from llama_stack_api import json_schema_type @json_schema_type diff --git a/src/llama_stack/providers/remote/vector_io/weaviate/weaviate.py b/src/llama_stack/providers/remote/vector_io/weaviate/weaviate.py index c72666f63..ba3e6b7ea 100644 --- a/src/llama_stack/providers/remote/vector_io/weaviate/weaviate.py +++ b/src/llama_stack/providers/remote/vector_io/weaviate/weaviate.py @@ -8,17 +8,6 @@ from typing import Any import weaviate import weaviate.classes as wvc -from llama_stack_api import ( - Chunk, - Files, - Inference, - InterleavedContent, - QueryChunksResponse, - VectorIO, - VectorStore, - VectorStoreNotFoundError, - VectorStoresProtocolPrivate, -) from numpy.typing import NDArray from weaviate.classes.init import Auth from weaviate.classes.query import Filter, HybridFusion @@ -35,6 +24,17 @@ from llama_stack.providers.utils.memory.vector_store import ( VectorStoreWithIndex, ) from llama_stack.providers.utils.vector_io.vector_utils import sanitize_collection_name +from llama_stack_api import ( + Chunk, + Files, + Inference, + InterleavedContent, + QueryChunksResponse, + VectorIO, + VectorStore, + VectorStoreNotFoundError, + VectorStoresProtocolPrivate, +) from .config import WeaviateVectorIOConfig diff --git a/src/llama_stack/providers/utils/common/data_schema_validator.py b/src/llama_stack/providers/utils/common/data_schema_validator.py index 7ef245779..c9a3b0920 100644 --- a/src/llama_stack/providers/utils/common/data_schema_validator.py +++ b/src/llama_stack/providers/utils/common/data_schema_validator.py @@ -7,9 +7,8 @@ from enum import Enum from typing import Any -from llama_stack_api import ChatCompletionInputType, CompletionInputType, StringType - from llama_stack.core.datatypes import Api +from llama_stack_api import ChatCompletionInputType, CompletionInputType, StringType class ColumnName(Enum): diff --git a/src/llama_stack/providers/utils/files/form_data.py b/src/llama_stack/providers/utils/files/form_data.py index 21afbec2b..3fac14f38 100644 --- a/src/llama_stack/providers/utils/files/form_data.py +++ b/src/llama_stack/providers/utils/files/form_data.py @@ -7,9 +7,10 @@ import json from fastapi import Request -from llama_stack_api import ExpiresAfter from pydantic import BaseModel, ValidationError +from llama_stack_api import ExpiresAfter + async def parse_pydantic_from_form[T: BaseModel](request: Request, field_name: str, model_class: type[T]) -> T | None: """ diff --git a/src/llama_stack/providers/utils/inference/inference_store.py b/src/llama_stack/providers/utils/inference/inference_store.py index 3c707dd01..49e3af7a1 100644 --- a/src/llama_stack/providers/utils/inference/inference_store.py +++ b/src/llama_stack/providers/utils/inference/inference_store.py @@ -6,6 +6,11 @@ import asyncio from typing import Any +from sqlalchemy.exc import IntegrityError + +from llama_stack.core.datatypes import AccessRule +from llama_stack.core.storage.datatypes import InferenceStoreReference, StorageBackendType +from llama_stack.log import get_logger from llama_stack_api import ( ListOpenAIChatCompletionResponse, OpenAIChatCompletion, @@ -13,11 +18,6 @@ from llama_stack_api import ( OpenAIMessageParam, Order, ) -from sqlalchemy.exc import IntegrityError - -from llama_stack.core.datatypes import AccessRule -from llama_stack.core.storage.datatypes import InferenceStoreReference, StorageBackendType -from llama_stack.log import get_logger from ..sqlstore.api import ColumnDefinition, ColumnType from ..sqlstore.authorized_sqlstore import AuthorizedSqlStore diff --git a/src/llama_stack/providers/utils/inference/litellm_openai_mixin.py b/src/llama_stack/providers/utils/inference/litellm_openai_mixin.py index 4f468725b..c462d1aad 100644 --- a/src/llama_stack/providers/utils/inference/litellm_openai_mixin.py +++ b/src/llama_stack/providers/utils/inference/litellm_openai_mixin.py @@ -9,6 +9,13 @@ import struct from collections.abc import AsyncIterator import litellm + +from llama_stack.core.request_headers import NeedsRequestProviderData +from llama_stack.log import get_logger +from llama_stack.providers.utils.inference.model_registry import ModelRegistryHelper, ProviderModelEntry +from llama_stack.providers.utils.inference.openai_compat import ( + prepare_openai_completion_params, +) from llama_stack_api import ( InferenceProvider, OpenAIChatCompletion, @@ -22,13 +29,6 @@ from llama_stack_api import ( OpenAIEmbeddingUsage, ) -from llama_stack.core.request_headers import NeedsRequestProviderData -from llama_stack.log import get_logger -from llama_stack.providers.utils.inference.model_registry import ModelRegistryHelper, ProviderModelEntry -from llama_stack.providers.utils.inference.openai_compat import ( - prepare_openai_completion_params, -) - logger = get_logger(name=__name__, category="providers::utils") diff --git a/src/llama_stack/providers/utils/inference/model_registry.py b/src/llama_stack/providers/utils/inference/model_registry.py index e7ca5ab74..42b54497f 100644 --- a/src/llama_stack/providers/utils/inference/model_registry.py +++ b/src/llama_stack/providers/utils/inference/model_registry.py @@ -6,13 +6,13 @@ from typing import Any -from llama_stack_api import Model, ModelsProtocolPrivate, ModelType, UnsupportedModelError from pydantic import BaseModel, Field, SecretStr from llama_stack.log import get_logger from llama_stack.providers.utils.inference import ( ALL_HUGGINGFACE_REPOS_TO_MODEL_DESCRIPTOR, ) +from llama_stack_api import Model, ModelsProtocolPrivate, ModelType, UnsupportedModelError logger = get_logger(name=__name__, category="providers::utils") diff --git a/src/llama_stack/providers/utils/inference/openai_compat.py b/src/llama_stack/providers/utils/inference/openai_compat.py index c97e42274..32d41ffde 100644 --- a/src/llama_stack/providers/utils/inference/openai_compat.py +++ b/src/llama_stack/providers/utils/inference/openai_compat.py @@ -20,18 +20,6 @@ except ImportError: from openai.types.chat.chat_completion_message_tool_call import ( ChatCompletionMessageToolCall as OpenAIChatCompletionMessageFunctionToolCall, ) -from llama_stack_api import ( - URL, - GreedySamplingStrategy, - ImageContentItem, - JsonSchemaResponseFormat, - OpenAIResponseFormatParam, - SamplingParams, - TextContentItem, - TopKSamplingStrategy, - TopPSamplingStrategy, - _URLOrData, -) from openai.types.chat import ( ChatCompletionMessageToolCall, ) @@ -44,6 +32,18 @@ from llama_stack.models.llama.datatypes import ( ToolCall, ToolDefinition, ) +from llama_stack_api import ( + URL, + GreedySamplingStrategy, + ImageContentItem, + JsonSchemaResponseFormat, + OpenAIResponseFormatParam, + SamplingParams, + TextContentItem, + TopKSamplingStrategy, + TopPSamplingStrategy, + _URLOrData, +) logger = get_logger(name=__name__, category="providers::utils") diff --git a/src/llama_stack/providers/utils/inference/openai_mixin.py b/src/llama_stack/providers/utils/inference/openai_mixin.py index c05873df5..559ac90ce 100644 --- a/src/llama_stack/providers/utils/inference/openai_mixin.py +++ b/src/llama_stack/providers/utils/inference/openai_mixin.py @@ -10,6 +10,14 @@ from abc import ABC, abstractmethod from collections.abc import AsyncIterator, Iterable from typing import Any +from openai import AsyncOpenAI +from pydantic import BaseModel, ConfigDict + +from llama_stack.core.request_headers import NeedsRequestProviderData +from llama_stack.log import get_logger +from llama_stack.providers.utils.inference.model_registry import RemoteInferenceProviderConfig +from llama_stack.providers.utils.inference.openai_compat import prepare_openai_completion_params +from llama_stack.providers.utils.inference.prompt_adapter import localize_image_content from llama_stack_api import ( Model, ModelType, @@ -24,14 +32,6 @@ from llama_stack_api import ( OpenAIEmbeddingUsage, OpenAIMessageParam, ) -from openai import AsyncOpenAI -from pydantic import BaseModel, ConfigDict - -from llama_stack.core.request_headers import NeedsRequestProviderData -from llama_stack.log import get_logger -from llama_stack.providers.utils.inference.model_registry import RemoteInferenceProviderConfig -from llama_stack.providers.utils.inference.openai_compat import prepare_openai_completion_params -from llama_stack.providers.utils.inference.prompt_adapter import localize_image_content logger = get_logger(name=__name__, category="providers::utils") diff --git a/src/llama_stack/providers/utils/inference/prompt_adapter.py b/src/llama_stack/providers/utils/inference/prompt_adapter.py index ea01a34e9..6272c9eed 100644 --- a/src/llama_stack/providers/utils/inference/prompt_adapter.py +++ b/src/llama_stack/providers/utils/inference/prompt_adapter.py @@ -12,24 +12,6 @@ import re from typing import Any import httpx -from llama_stack_api import ( - CompletionRequest, - ImageContentItem, - InterleavedContent, - InterleavedContentItem, - OpenAIAssistantMessageParam, - OpenAIChatCompletionContentPartImageParam, - OpenAIChatCompletionContentPartTextParam, - OpenAIFile, - OpenAIMessageParam, - OpenAISystemMessageParam, - OpenAIToolMessageParam, - OpenAIUserMessageParam, - ResponseFormat, - ResponseFormatType, - TextContentItem, - ToolChoice, -) from PIL import Image as PIL_Image from llama_stack.log import get_logger @@ -48,6 +30,24 @@ from llama_stack.models.llama.llama3.chat_format import ChatFormat from llama_stack.models.llama.llama3.tokenizer import Tokenizer from llama_stack.models.llama.sku_list import resolve_model from llama_stack.models.llama.sku_types import ModelFamily, is_multimodal +from llama_stack_api import ( + CompletionRequest, + ImageContentItem, + InterleavedContent, + InterleavedContentItem, + OpenAIAssistantMessageParam, + OpenAIChatCompletionContentPartImageParam, + OpenAIChatCompletionContentPartTextParam, + OpenAIFile, + OpenAIMessageParam, + OpenAISystemMessageParam, + OpenAIToolMessageParam, + OpenAIUserMessageParam, + ResponseFormat, + ResponseFormatType, + TextContentItem, + ToolChoice, +) log = get_logger(name=__name__, category="providers::utils") diff --git a/src/llama_stack/providers/utils/kvstore/sqlite/config.py b/src/llama_stack/providers/utils/kvstore/sqlite/config.py index 895268a4f..0f8fa0a95 100644 --- a/src/llama_stack/providers/utils/kvstore/sqlite/config.py +++ b/src/llama_stack/providers/utils/kvstore/sqlite/config.py @@ -4,9 +4,10 @@ # This source code is licensed under the terms described in the LICENSE file in # the root directory of this source tree. -from llama_stack_api import json_schema_type from pydantic import BaseModel, Field +from llama_stack_api import json_schema_type + @json_schema_type class SqliteControlPlaneConfig(BaseModel): diff --git a/src/llama_stack/providers/utils/memory/openai_vector_store_mixin.py b/src/llama_stack/providers/utils/memory/openai_vector_store_mixin.py index 68d1c11e5..540ff5940 100644 --- a/src/llama_stack/providers/utils/memory/openai_vector_store_mixin.py +++ b/src/llama_stack/providers/utils/memory/openai_vector_store_mixin.py @@ -13,6 +13,16 @@ from abc import ABC, abstractmethod from typing import Annotated, Any from fastapi import Body +from pydantic import TypeAdapter + +from llama_stack.core.id_generation import generate_object_id +from llama_stack.log import get_logger +from llama_stack.providers.utils.kvstore.api import KVStore +from llama_stack.providers.utils.memory.vector_store import ( + ChunkForDeletion, + content_from_data_and_mime_type, + make_overlapped_chunks, +) from llama_stack_api import ( Chunk, Files, @@ -43,16 +53,6 @@ from llama_stack_api import ( VectorStoreSearchResponse, VectorStoreSearchResponsePage, ) -from pydantic import TypeAdapter - -from llama_stack.core.id_generation import generate_object_id -from llama_stack.log import get_logger -from llama_stack.providers.utils.kvstore.api import KVStore -from llama_stack.providers.utils.memory.vector_store import ( - ChunkForDeletion, - content_from_data_and_mime_type, - make_overlapped_chunks, -) EMBEDDING_DIMENSION = 768 diff --git a/src/llama_stack/providers/utils/memory/vector_store.py b/src/llama_stack/providers/utils/memory/vector_store.py index 37ac79039..b6a671ddb 100644 --- a/src/llama_stack/providers/utils/memory/vector_store.py +++ b/src/llama_stack/providers/utils/memory/vector_store.py @@ -14,6 +14,15 @@ from urllib.parse import unquote import httpx import numpy as np +from numpy.typing import NDArray +from pydantic import BaseModel + +from llama_stack.log import get_logger +from llama_stack.models.llama.llama3.tokenizer import Tokenizer +from llama_stack.providers.utils.inference.prompt_adapter import ( + interleaved_content_as_str, +) +from llama_stack.providers.utils.vector_io.vector_utils import generate_chunk_id from llama_stack_api import ( URL, Api, @@ -25,15 +34,6 @@ from llama_stack_api import ( RAGDocument, VectorStore, ) -from numpy.typing import NDArray -from pydantic import BaseModel - -from llama_stack.log import get_logger -from llama_stack.models.llama.llama3.tokenizer import Tokenizer -from llama_stack.providers.utils.inference.prompt_adapter import ( - interleaved_content_as_str, -) -from llama_stack.providers.utils.vector_io.vector_utils import generate_chunk_id log = get_logger(name=__name__, category="providers::utils") diff --git a/src/llama_stack/providers/utils/responses/responses_store.py b/src/llama_stack/providers/utils/responses/responses_store.py index c7dfed15a..f6e7c435d 100644 --- a/src/llama_stack/providers/utils/responses/responses_store.py +++ b/src/llama_stack/providers/utils/responses/responses_store.py @@ -4,6 +4,9 @@ # This source code is licensed under the terms described in the LICENSE file in # the root directory of this source tree. +from llama_stack.core.datatypes import AccessRule +from llama_stack.core.storage.datatypes import ResponsesStoreReference, SqlStoreReference +from llama_stack.log import get_logger from llama_stack_api import ( ListOpenAIResponseInputItem, ListOpenAIResponseObject, @@ -15,10 +18,6 @@ from llama_stack_api import ( Order, ) -from llama_stack.core.datatypes import AccessRule -from llama_stack.core.storage.datatypes import ResponsesStoreReference, SqlStoreReference -from llama_stack.log import get_logger - from ..sqlstore.api import ColumnDefinition, ColumnType from ..sqlstore.authorized_sqlstore import AuthorizedSqlStore from ..sqlstore.sqlstore import sqlstore_impl diff --git a/src/llama_stack/providers/utils/scoring/base_scoring_fn.py b/src/llama_stack/providers/utils/scoring/base_scoring_fn.py index d16c75263..f372db8b5 100644 --- a/src/llama_stack/providers/utils/scoring/base_scoring_fn.py +++ b/src/llama_stack/providers/utils/scoring/base_scoring_fn.py @@ -6,9 +6,8 @@ from abc import ABC, abstractmethod from typing import Any -from llama_stack_api import ScoringFn, ScoringFnParams, ScoringResultRow - from llama_stack.providers.utils.scoring.aggregation_utils import aggregate_metrics +from llama_stack_api import ScoringFn, ScoringFnParams, ScoringResultRow class BaseScoringFn(ABC): diff --git a/src/llama_stack/providers/utils/sqlstore/api.py b/src/llama_stack/providers/utils/sqlstore/api.py index 033a00edc..708fc7095 100644 --- a/src/llama_stack/providers/utils/sqlstore/api.py +++ b/src/llama_stack/providers/utils/sqlstore/api.py @@ -8,9 +8,10 @@ from collections.abc import Mapping, Sequence from enum import Enum from typing import Any, Literal, Protocol -from llama_stack_api import PaginatedResponse from pydantic import BaseModel +from llama_stack_api import PaginatedResponse + class ColumnType(Enum): INTEGER = "INTEGER" diff --git a/src/llama_stack/providers/utils/sqlstore/sqlalchemy_sqlstore.py b/src/llama_stack/providers/utils/sqlstore/sqlalchemy_sqlstore.py index 263f5e69f..10009d396 100644 --- a/src/llama_stack/providers/utils/sqlstore/sqlalchemy_sqlstore.py +++ b/src/llama_stack/providers/utils/sqlstore/sqlalchemy_sqlstore.py @@ -6,7 +6,6 @@ from collections.abc import Mapping, Sequence from typing import Any, Literal, cast -from llama_stack_api import PaginatedResponse from sqlalchemy import ( JSON, Boolean, @@ -29,6 +28,7 @@ from sqlalchemy.sql.elements import ColumnElement from llama_stack.core.storage.datatypes import SqlAlchemySqlStoreConfig from llama_stack.log import get_logger +from llama_stack_api import PaginatedResponse from .api import ColumnDefinition, ColumnType, SqlStore diff --git a/src/llama_stack/providers/utils/tools/mcp.py b/src/llama_stack/providers/utils/tools/mcp.py index 82c85f46c..fad1bf0f0 100644 --- a/src/llama_stack/providers/utils/tools/mcp.py +++ b/src/llama_stack/providers/utils/tools/mcp.py @@ -10,6 +10,14 @@ from enum import Enum from typing import Any, cast import httpx +from mcp import ClientSession, McpError +from mcp import types as mcp_types +from mcp.client.sse import sse_client +from mcp.client.streamable_http import streamablehttp_client + +from llama_stack.core.datatypes import AuthenticationRequiredError +from llama_stack.log import get_logger +from llama_stack.providers.utils.tools.ttl_dict import TTLDict from llama_stack_api import ( ImageContentItem, InterleavedContentItem, @@ -19,14 +27,6 @@ from llama_stack_api import ( ToolInvocationResult, _URLOrData, ) -from mcp import ClientSession, McpError -from mcp import types as mcp_types -from mcp.client.sse import sse_client -from mcp.client.streamable_http import streamablehttp_client - -from llama_stack.core.datatypes import AuthenticationRequiredError -from llama_stack.log import get_logger -from llama_stack.providers.utils.tools.ttl_dict import TTLDict logger = get_logger(__name__, category="tools") diff --git a/src/llama-stack-api/README.md b/src/llama_stack_api/README.md similarity index 100% rename from src/llama-stack-api/README.md rename to src/llama_stack_api/README.md diff --git a/src/llama-stack-api/llama_stack_api/__init__.py b/src/llama_stack_api/__init__.py similarity index 100% rename from src/llama-stack-api/llama_stack_api/__init__.py rename to src/llama_stack_api/__init__.py diff --git a/src/llama-stack-api/llama_stack_api/agents.py b/src/llama_stack_api/agents.py similarity index 100% rename from src/llama-stack-api/llama_stack_api/agents.py rename to src/llama_stack_api/agents.py diff --git a/src/llama-stack-api/llama_stack_api/batches.py b/src/llama_stack_api/batches.py similarity index 100% rename from src/llama-stack-api/llama_stack_api/batches.py rename to src/llama_stack_api/batches.py diff --git a/src/llama-stack-api/llama_stack_api/benchmarks.py b/src/llama_stack_api/benchmarks.py similarity index 100% rename from src/llama-stack-api/llama_stack_api/benchmarks.py rename to src/llama_stack_api/benchmarks.py diff --git a/src/llama-stack-api/llama_stack_api/common/__init__.py b/src/llama_stack_api/common/__init__.py similarity index 100% rename from src/llama-stack-api/llama_stack_api/common/__init__.py rename to src/llama_stack_api/common/__init__.py diff --git a/src/llama-stack-api/llama_stack_api/common/content_types.py b/src/llama_stack_api/common/content_types.py similarity index 100% rename from src/llama-stack-api/llama_stack_api/common/content_types.py rename to src/llama_stack_api/common/content_types.py diff --git a/src/llama-stack-api/llama_stack_api/common/errors.py b/src/llama_stack_api/common/errors.py similarity index 100% rename from src/llama-stack-api/llama_stack_api/common/errors.py rename to src/llama_stack_api/common/errors.py diff --git a/src/llama-stack-api/llama_stack_api/common/job_types.py b/src/llama_stack_api/common/job_types.py similarity index 100% rename from src/llama-stack-api/llama_stack_api/common/job_types.py rename to src/llama_stack_api/common/job_types.py diff --git a/src/llama-stack-api/llama_stack_api/common/responses.py b/src/llama_stack_api/common/responses.py similarity index 100% rename from src/llama-stack-api/llama_stack_api/common/responses.py rename to src/llama_stack_api/common/responses.py diff --git a/src/llama-stack-api/llama_stack_api/common/tracing.py b/src/llama_stack_api/common/tracing.py similarity index 100% rename from src/llama-stack-api/llama_stack_api/common/tracing.py rename to src/llama_stack_api/common/tracing.py diff --git a/src/llama-stack-api/llama_stack_api/common/training_types.py b/src/llama_stack_api/common/training_types.py similarity index 100% rename from src/llama-stack-api/llama_stack_api/common/training_types.py rename to src/llama_stack_api/common/training_types.py diff --git a/src/llama-stack-api/llama_stack_api/common/type_system.py b/src/llama_stack_api/common/type_system.py similarity index 100% rename from src/llama-stack-api/llama_stack_api/common/type_system.py rename to src/llama_stack_api/common/type_system.py diff --git a/src/llama-stack-api/llama_stack_api/conversations.py b/src/llama_stack_api/conversations.py similarity index 100% rename from src/llama-stack-api/llama_stack_api/conversations.py rename to src/llama_stack_api/conversations.py diff --git a/src/llama-stack-api/llama_stack_api/datasetio.py b/src/llama_stack_api/datasetio.py similarity index 100% rename from src/llama-stack-api/llama_stack_api/datasetio.py rename to src/llama_stack_api/datasetio.py diff --git a/src/llama-stack-api/llama_stack_api/datasets.py b/src/llama_stack_api/datasets.py similarity index 100% rename from src/llama-stack-api/llama_stack_api/datasets.py rename to src/llama_stack_api/datasets.py diff --git a/src/llama-stack-api/llama_stack_api/datatypes.py b/src/llama_stack_api/datatypes.py similarity index 100% rename from src/llama-stack-api/llama_stack_api/datatypes.py rename to src/llama_stack_api/datatypes.py diff --git a/src/llama-stack-api/llama_stack_api/eval.py b/src/llama_stack_api/eval.py similarity index 100% rename from src/llama-stack-api/llama_stack_api/eval.py rename to src/llama_stack_api/eval.py diff --git a/src/llama-stack-api/llama_stack_api/files.py b/src/llama_stack_api/files.py similarity index 100% rename from src/llama-stack-api/llama_stack_api/files.py rename to src/llama_stack_api/files.py diff --git a/src/llama-stack-api/llama_stack_api/inference.py b/src/llama_stack_api/inference.py similarity index 100% rename from src/llama-stack-api/llama_stack_api/inference.py rename to src/llama_stack_api/inference.py diff --git a/src/llama-stack-api/llama_stack_api/inspect.py b/src/llama_stack_api/inspect.py similarity index 100% rename from src/llama-stack-api/llama_stack_api/inspect.py rename to src/llama_stack_api/inspect.py diff --git a/src/llama-stack-api/llama_stack_api/models.py b/src/llama_stack_api/models.py similarity index 100% rename from src/llama-stack-api/llama_stack_api/models.py rename to src/llama_stack_api/models.py diff --git a/src/llama-stack-api/llama_stack_api/openai_responses.py b/src/llama_stack_api/openai_responses.py similarity index 100% rename from src/llama-stack-api/llama_stack_api/openai_responses.py rename to src/llama_stack_api/openai_responses.py diff --git a/src/llama-stack-api/llama_stack_api/post_training.py b/src/llama_stack_api/post_training.py similarity index 100% rename from src/llama-stack-api/llama_stack_api/post_training.py rename to src/llama_stack_api/post_training.py diff --git a/src/llama-stack-api/llama_stack_api/prompts.py b/src/llama_stack_api/prompts.py similarity index 100% rename from src/llama-stack-api/llama_stack_api/prompts.py rename to src/llama_stack_api/prompts.py diff --git a/src/llama-stack-api/llama_stack_api/providers.py b/src/llama_stack_api/providers.py similarity index 100% rename from src/llama-stack-api/llama_stack_api/providers.py rename to src/llama_stack_api/providers.py diff --git a/src/llama-stack-api/llama_stack_api/py.typed b/src/llama_stack_api/py.typed similarity index 100% rename from src/llama-stack-api/llama_stack_api/py.typed rename to src/llama_stack_api/py.typed diff --git a/src/llama-stack-api/pyproject.toml b/src/llama_stack_api/pyproject.toml similarity index 100% rename from src/llama-stack-api/pyproject.toml rename to src/llama_stack_api/pyproject.toml diff --git a/src/llama-stack-api/llama_stack_api/rag_tool.py b/src/llama_stack_api/rag_tool.py similarity index 100% rename from src/llama-stack-api/llama_stack_api/rag_tool.py rename to src/llama_stack_api/rag_tool.py diff --git a/src/llama-stack-api/llama_stack_api/resource.py b/src/llama_stack_api/resource.py similarity index 100% rename from src/llama-stack-api/llama_stack_api/resource.py rename to src/llama_stack_api/resource.py diff --git a/src/llama-stack-api/llama_stack_api/safety.py b/src/llama_stack_api/safety.py similarity index 100% rename from src/llama-stack-api/llama_stack_api/safety.py rename to src/llama_stack_api/safety.py diff --git a/src/llama-stack-api/llama_stack_api/schema_utils.py b/src/llama_stack_api/schema_utils.py similarity index 100% rename from src/llama-stack-api/llama_stack_api/schema_utils.py rename to src/llama_stack_api/schema_utils.py diff --git a/src/llama-stack-api/llama_stack_api/scoring.py b/src/llama_stack_api/scoring.py similarity index 100% rename from src/llama-stack-api/llama_stack_api/scoring.py rename to src/llama_stack_api/scoring.py diff --git a/src/llama-stack-api/llama_stack_api/scoring_functions.py b/src/llama_stack_api/scoring_functions.py similarity index 100% rename from src/llama-stack-api/llama_stack_api/scoring_functions.py rename to src/llama_stack_api/scoring_functions.py diff --git a/src/llama-stack-api/llama_stack_api/shields.py b/src/llama_stack_api/shields.py similarity index 100% rename from src/llama-stack-api/llama_stack_api/shields.py rename to src/llama_stack_api/shields.py diff --git a/src/llama-stack-api/llama_stack_api/strong_typing/__init__.py b/src/llama_stack_api/strong_typing/__init__.py similarity index 100% rename from src/llama-stack-api/llama_stack_api/strong_typing/__init__.py rename to src/llama_stack_api/strong_typing/__init__.py diff --git a/src/llama-stack-api/llama_stack_api/strong_typing/auxiliary.py b/src/llama_stack_api/strong_typing/auxiliary.py similarity index 100% rename from src/llama-stack-api/llama_stack_api/strong_typing/auxiliary.py rename to src/llama_stack_api/strong_typing/auxiliary.py diff --git a/src/llama-stack-api/llama_stack_api/strong_typing/classdef.py b/src/llama_stack_api/strong_typing/classdef.py similarity index 100% rename from src/llama-stack-api/llama_stack_api/strong_typing/classdef.py rename to src/llama_stack_api/strong_typing/classdef.py diff --git a/src/llama-stack-api/llama_stack_api/strong_typing/core.py b/src/llama_stack_api/strong_typing/core.py similarity index 100% rename from src/llama-stack-api/llama_stack_api/strong_typing/core.py rename to src/llama_stack_api/strong_typing/core.py diff --git a/src/llama-stack-api/llama_stack_api/strong_typing/deserializer.py b/src/llama_stack_api/strong_typing/deserializer.py similarity index 100% rename from src/llama-stack-api/llama_stack_api/strong_typing/deserializer.py rename to src/llama_stack_api/strong_typing/deserializer.py diff --git a/src/llama-stack-api/llama_stack_api/strong_typing/docstring.py b/src/llama_stack_api/strong_typing/docstring.py similarity index 100% rename from src/llama-stack-api/llama_stack_api/strong_typing/docstring.py rename to src/llama_stack_api/strong_typing/docstring.py diff --git a/src/llama-stack-api/llama_stack_api/strong_typing/exception.py b/src/llama_stack_api/strong_typing/exception.py similarity index 100% rename from src/llama-stack-api/llama_stack_api/strong_typing/exception.py rename to src/llama_stack_api/strong_typing/exception.py diff --git a/src/llama-stack-api/llama_stack_api/strong_typing/inspection.py b/src/llama_stack_api/strong_typing/inspection.py similarity index 100% rename from src/llama-stack-api/llama_stack_api/strong_typing/inspection.py rename to src/llama_stack_api/strong_typing/inspection.py diff --git a/src/llama-stack-api/llama_stack_api/strong_typing/mapping.py b/src/llama_stack_api/strong_typing/mapping.py similarity index 100% rename from src/llama-stack-api/llama_stack_api/strong_typing/mapping.py rename to src/llama_stack_api/strong_typing/mapping.py diff --git a/src/llama-stack-api/llama_stack_api/strong_typing/name.py b/src/llama_stack_api/strong_typing/name.py similarity index 100% rename from src/llama-stack-api/llama_stack_api/strong_typing/name.py rename to src/llama_stack_api/strong_typing/name.py diff --git a/src/llama-stack-api/llama_stack_api/strong_typing/py.typed b/src/llama_stack_api/strong_typing/py.typed similarity index 100% rename from src/llama-stack-api/llama_stack_api/strong_typing/py.typed rename to src/llama_stack_api/strong_typing/py.typed diff --git a/src/llama-stack-api/llama_stack_api/strong_typing/schema.py b/src/llama_stack_api/strong_typing/schema.py similarity index 100% rename from src/llama-stack-api/llama_stack_api/strong_typing/schema.py rename to src/llama_stack_api/strong_typing/schema.py diff --git a/src/llama-stack-api/llama_stack_api/strong_typing/serialization.py b/src/llama_stack_api/strong_typing/serialization.py similarity index 100% rename from src/llama-stack-api/llama_stack_api/strong_typing/serialization.py rename to src/llama_stack_api/strong_typing/serialization.py diff --git a/src/llama-stack-api/llama_stack_api/strong_typing/serializer.py b/src/llama_stack_api/strong_typing/serializer.py similarity index 100% rename from src/llama-stack-api/llama_stack_api/strong_typing/serializer.py rename to src/llama_stack_api/strong_typing/serializer.py diff --git a/src/llama-stack-api/llama_stack_api/strong_typing/slots.py b/src/llama_stack_api/strong_typing/slots.py similarity index 100% rename from src/llama-stack-api/llama_stack_api/strong_typing/slots.py rename to src/llama_stack_api/strong_typing/slots.py diff --git a/src/llama-stack-api/llama_stack_api/strong_typing/topological.py b/src/llama_stack_api/strong_typing/topological.py similarity index 100% rename from src/llama-stack-api/llama_stack_api/strong_typing/topological.py rename to src/llama_stack_api/strong_typing/topological.py diff --git a/src/llama-stack-api/llama_stack_api/tools.py b/src/llama_stack_api/tools.py similarity index 100% rename from src/llama-stack-api/llama_stack_api/tools.py rename to src/llama_stack_api/tools.py diff --git a/src/llama-stack-api/llama_stack_api/vector_io.py b/src/llama_stack_api/vector_io.py similarity index 100% rename from src/llama-stack-api/llama_stack_api/vector_io.py rename to src/llama_stack_api/vector_io.py diff --git a/src/llama-stack-api/llama_stack_api/vector_stores.py b/src/llama_stack_api/vector_stores.py similarity index 100% rename from src/llama-stack-api/llama_stack_api/vector_stores.py rename to src/llama_stack_api/vector_stores.py diff --git a/src/llama-stack-api/llama_stack_api/version.py b/src/llama_stack_api/version.py similarity index 100% rename from src/llama-stack-api/llama_stack_api/version.py rename to src/llama_stack_api/version.py diff --git a/tests/integration/batches/conftest.py b/tests/integration/batches/conftest.py index b9c0ac916..4dc5b7993 100644 --- a/tests/integration/batches/conftest.py +++ b/tests/integration/batches/conftest.py @@ -13,6 +13,7 @@ from contextlib import contextmanager from io import BytesIO import pytest + from llama_stack_api import OpenAIFilePurpose diff --git a/tests/integration/files/test_files.py b/tests/integration/files/test_files.py index 61878ac4c..1f19c88c5 100644 --- a/tests/integration/files/test_files.py +++ b/tests/integration/files/test_files.py @@ -9,9 +9,9 @@ from unittest.mock import patch import pytest import requests -from llama_stack_api import OpenAIFilePurpose from llama_stack.core.datatypes import User +from llama_stack_api import OpenAIFilePurpose purpose = OpenAIFilePurpose.ASSISTANTS diff --git a/tests/integration/inference/test_provider_data_routing.py b/tests/integration/inference/test_provider_data_routing.py index d007b57d6..e4a0a24b5 100644 --- a/tests/integration/inference/test_provider_data_routing.py +++ b/tests/integration/inference/test_provider_data_routing.py @@ -15,6 +15,9 @@ that enables routing based on provider_data alone. from unittest.mock import AsyncMock, patch import pytest + +from llama_stack.core.library_client import LlamaStackAsLibraryClient +from llama_stack.core.telemetry.telemetry import MetricEvent from llama_stack_api import ( Api, OpenAIAssistantMessageParam, @@ -23,9 +26,6 @@ from llama_stack_api import ( OpenAIChoice, ) -from llama_stack.core.library_client import LlamaStackAsLibraryClient -from llama_stack.core.telemetry.telemetry import MetricEvent - class OpenAIChatCompletionWithMetrics(OpenAIChatCompletion): metrics: list[MetricEvent] | None = None diff --git a/tests/integration/post_training/test_post_training.py b/tests/integration/post_training/test_post_training.py index ff6925b58..e6868019a 100644 --- a/tests/integration/post_training/test_post_training.py +++ b/tests/integration/post_training/test_post_training.py @@ -9,6 +9,8 @@ import time import uuid import pytest + +from llama_stack.log import get_logger from llama_stack_api import ( DataConfig, DatasetFormat, @@ -18,8 +20,6 @@ from llama_stack_api import ( TrainingConfig, ) -from llama_stack.log import get_logger - # Configure logging logger = get_logger(name=__name__, category="post_training") diff --git a/tests/integration/safety/test_llama_guard.py b/tests/integration/safety/test_llama_guard.py index 99b4982f0..a554752cd 100644 --- a/tests/integration/safety/test_llama_guard.py +++ b/tests/integration/safety/test_llama_guard.py @@ -12,9 +12,9 @@ import warnings from collections.abc import Generator import pytest -from llama_stack_api import ViolationLevel from llama_stack.models.llama.sku_types import CoreModelId +from llama_stack_api import ViolationLevel # Llama Guard models available for text and vision shields LLAMA_GUARD_TEXT_MODELS = [CoreModelId.llama_guard_4_12b.value] diff --git a/tests/integration/safety/test_safety.py b/tests/integration/safety/test_safety.py index 6a926f1d5..857ff2f81 100644 --- a/tests/integration/safety/test_safety.py +++ b/tests/integration/safety/test_safety.py @@ -7,6 +7,7 @@ import base64 import mimetypes import pytest + from llama_stack_api import ViolationLevel CODE_SCANNER_ENABLED_PROVIDERS = {"ollama", "together", "fireworks"} diff --git a/tests/integration/safety/test_vision_safety.py b/tests/integration/safety/test_vision_safety.py index b85a23263..dc7b7e1ad 100644 --- a/tests/integration/safety/test_vision_safety.py +++ b/tests/integration/safety/test_vision_safety.py @@ -9,6 +9,7 @@ import mimetypes import os import pytest + from llama_stack_api import ViolationLevel VISION_SHIELD_ENABLED_PROVIDERS = {"together"} diff --git a/tests/integration/tool_runtime/test_registration.py b/tests/integration/tool_runtime/test_registration.py index 1b1b6ef28..036a5f018 100644 --- a/tests/integration/tool_runtime/test_registration.py +++ b/tests/integration/tool_runtime/test_registration.py @@ -7,9 +7,9 @@ import re import pytest -from llama_stack_api import ToolGroupNotFoundError from llama_stack.core.library_client import LlamaStackAsLibraryClient +from llama_stack_api import ToolGroupNotFoundError from tests.common.mcp import MCP_TOOLGROUP_ID, make_mcp_server diff --git a/tests/integration/vector_io/test_openai_vector_stores.py b/tests/integration/vector_io/test_openai_vector_stores.py index c65dfecac..102f3f00c 100644 --- a/tests/integration/vector_io/test_openai_vector_stores.py +++ b/tests/integration/vector_io/test_openai_vector_stores.py @@ -8,12 +8,12 @@ import time from io import BytesIO import pytest -from llama_stack_api import Chunk, ExpiresAfter from llama_stack_client import BadRequestError from openai import BadRequestError as OpenAIBadRequestError from llama_stack.core.library_client import LlamaStackAsLibraryClient from llama_stack.log import get_logger +from llama_stack_api import Chunk, ExpiresAfter from ..conftest import vector_provider_wrapper diff --git a/tests/integration/vector_io/test_vector_io.py b/tests/integration/vector_io/test_vector_io.py index acaa44bcb..29dbd3e56 100644 --- a/tests/integration/vector_io/test_vector_io.py +++ b/tests/integration/vector_io/test_vector_io.py @@ -5,6 +5,7 @@ # the root directory of this source tree. import pytest + from llama_stack_api import Chunk from ..conftest import vector_provider_wrapper diff --git a/tests/unit/conversations/test_conversations.py b/tests/unit/conversations/test_conversations.py index 2f942eb9c..95c54d379 100644 --- a/tests/unit/conversations/test_conversations.py +++ b/tests/unit/conversations/test_conversations.py @@ -8,7 +8,6 @@ import tempfile from pathlib import Path import pytest -from llama_stack_api import OpenAIResponseInputMessageContentText, OpenAIResponseMessage from openai.types.conversations.conversation import Conversation as OpenAIConversation from openai.types.conversations.conversation_item import ConversationItem as OpenAIConversationItem from pydantic import TypeAdapter @@ -25,6 +24,7 @@ from llama_stack.core.storage.datatypes import ( StorageConfig, ) from llama_stack.providers.utils.sqlstore.sqlstore import register_sqlstore_backends +from llama_stack_api import OpenAIResponseInputMessageContentText, OpenAIResponseMessage @pytest.fixture diff --git a/tests/unit/core/routers/test_safety_router.py b/tests/unit/core/routers/test_safety_router.py index 7e465513e..1b24a59a2 100644 --- a/tests/unit/core/routers/test_safety_router.py +++ b/tests/unit/core/routers/test_safety_router.py @@ -6,10 +6,9 @@ from unittest.mock import AsyncMock -from llama_stack_api import ListShieldsResponse, ModerationObject, ModerationObjectResults, Shield - from llama_stack.core.datatypes import SafetyConfig from llama_stack.core.routers.safety import SafetyRouter +from llama_stack_api import ListShieldsResponse, ModerationObject, ModerationObjectResults, Shield async def test_run_moderation_uses_default_shield_when_model_missing(): diff --git a/tests/unit/core/routers/test_vector_io.py b/tests/unit/core/routers/test_vector_io.py index 071fbe6e7..202e2da1b 100644 --- a/tests/unit/core/routers/test_vector_io.py +++ b/tests/unit/core/routers/test_vector_io.py @@ -7,9 +7,9 @@ from unittest.mock import AsyncMock, Mock import pytest -from llama_stack_api import OpenAICreateVectorStoreRequestWithExtraBody from llama_stack.core.routers.vector_io import VectorIORouter +from llama_stack_api import OpenAICreateVectorStoreRequestWithExtraBody async def test_single_provider_auto_selection(): diff --git a/tests/unit/core/test_stack_validation.py b/tests/unit/core/test_stack_validation.py index acb31e1c9..462a25c8b 100644 --- a/tests/unit/core/test_stack_validation.py +++ b/tests/unit/core/test_stack_validation.py @@ -9,10 +9,10 @@ from unittest.mock import AsyncMock import pytest -from llama_stack_api import Api, ListModelsResponse, ListShieldsResponse, Model, ModelType, Shield from llama_stack.core.datatypes import QualifiedModel, SafetyConfig, StackRunConfig, StorageConfig, VectorStoresConfig from llama_stack.core.stack import validate_safety_config, validate_vector_stores_config +from llama_stack_api import Api, ListModelsResponse, ListShieldsResponse, Model, ModelType, Shield class TestVectorStoresValidation: diff --git a/tests/unit/distribution/routers/test_routing_tables.py b/tests/unit/distribution/routers/test_routing_tables.py index 2405d536e..8fd9d6ec3 100644 --- a/tests/unit/distribution/routers/test_routing_tables.py +++ b/tests/unit/distribution/routers/test_routing_tables.py @@ -9,6 +9,14 @@ from unittest.mock import AsyncMock import pytest + +from llama_stack.core.datatypes import RegistryEntrySource +from llama_stack.core.routing_tables.benchmarks import BenchmarksRoutingTable +from llama_stack.core.routing_tables.datasets import DatasetsRoutingTable +from llama_stack.core.routing_tables.models import ModelsRoutingTable +from llama_stack.core.routing_tables.scoring_functions import ScoringFunctionsRoutingTable +from llama_stack.core.routing_tables.shields import ShieldsRoutingTable +from llama_stack.core.routing_tables.toolgroups import ToolGroupsRoutingTable from llama_stack_api import ( URL, Api, @@ -25,14 +33,6 @@ from llama_stack_api import ( URIDataSource, ) -from llama_stack.core.datatypes import RegistryEntrySource -from llama_stack.core.routing_tables.benchmarks import BenchmarksRoutingTable -from llama_stack.core.routing_tables.datasets import DatasetsRoutingTable -from llama_stack.core.routing_tables.models import ModelsRoutingTable -from llama_stack.core.routing_tables.scoring_functions import ScoringFunctionsRoutingTable -from llama_stack.core.routing_tables.shields import ShieldsRoutingTable -from llama_stack.core.routing_tables.toolgroups import ToolGroupsRoutingTable - class Impl: def __init__(self, api: Api): diff --git a/tests/unit/distribution/test_api_recordings.py b/tests/unit/distribution/test_api_recordings.py index f66b57df8..889f063e6 100644 --- a/tests/unit/distribution/test_api_recordings.py +++ b/tests/unit/distribution/test_api_recordings.py @@ -9,6 +9,14 @@ from pathlib import Path from unittest.mock import patch import pytest +from openai import AsyncOpenAI + +from llama_stack.testing.api_recorder import ( + APIRecordingMode, + ResponseStorage, + api_recording, + normalize_inference_request, +) # Import the real Pydantic response types instead of using Mocks from llama_stack_api import ( @@ -19,14 +27,6 @@ from llama_stack_api import ( OpenAIEmbeddingsResponse, OpenAIEmbeddingUsage, ) -from openai import AsyncOpenAI - -from llama_stack.testing.api_recorder import ( - APIRecordingMode, - ResponseStorage, - api_recording, - normalize_inference_request, -) @pytest.fixture diff --git a/tests/unit/distribution/test_distribution.py b/tests/unit/distribution/test_distribution.py index a27455e24..b8d6ba55d 100644 --- a/tests/unit/distribution/test_distribution.py +++ b/tests/unit/distribution/test_distribution.py @@ -9,7 +9,6 @@ from unittest.mock import patch import pytest import yaml -from llama_stack_api import ProviderSpec from pydantic import BaseModel, Field, ValidationError from llama_stack.core.datatypes import Api, Provider, StackRunConfig @@ -23,6 +22,7 @@ from llama_stack.core.storage.datatypes import ( SqlStoreReference, StorageConfig, ) +from llama_stack_api import ProviderSpec class SampleConfig(BaseModel): @@ -395,9 +395,8 @@ pip_packages: def test_external_provider_from_module_building(self, mock_providers): """Test loading an external provider from a module during build (building=True, partial spec).""" - from llama_stack_api import Api - from llama_stack.core.datatypes import BuildConfig, BuildProvider, DistributionSpec + from llama_stack_api import Api # No importlib patch needed, should not import module when type of `config` is BuildConfig or DistributionSpec build_config = BuildConfig( @@ -457,9 +456,8 @@ class TestGetExternalProvidersFromModule: """Test provider with module containing version spec (e.g., package==1.0.0).""" from types import SimpleNamespace - from llama_stack_api import ProviderSpec - from llama_stack.core.distribution import get_external_providers_from_module + from llama_stack_api import ProviderSpec fake_spec = ProviderSpec( api=Api.inference, @@ -595,9 +593,8 @@ class TestGetExternalProvidersFromModule: """Test when get_provider_spec returns a list of specs.""" from types import SimpleNamespace - from llama_stack_api import ProviderSpec - from llama_stack.core.distribution import get_external_providers_from_module + from llama_stack_api import ProviderSpec spec1 = ProviderSpec( api=Api.inference, @@ -644,9 +641,8 @@ class TestGetExternalProvidersFromModule: """Test that list return filters specs by provider_type.""" from types import SimpleNamespace - from llama_stack_api import ProviderSpec - from llama_stack.core.distribution import get_external_providers_from_module + from llama_stack_api import ProviderSpec spec1 = ProviderSpec( api=Api.inference, @@ -693,9 +689,8 @@ class TestGetExternalProvidersFromModule: """Test that list return adds multiple different provider_types when config requests them.""" from types import SimpleNamespace - from llama_stack_api import ProviderSpec - from llama_stack.core.distribution import get_external_providers_from_module + from llama_stack_api import ProviderSpec # Module returns both inline and remote variants spec1 = ProviderSpec( @@ -833,9 +828,8 @@ class TestGetExternalProvidersFromModule: """Test multiple APIs with providers.""" from types import SimpleNamespace - from llama_stack_api import ProviderSpec - from llama_stack.core.distribution import get_external_providers_from_module + from llama_stack_api import ProviderSpec inference_spec = ProviderSpec( api=Api.inference, diff --git a/tests/unit/files/test_files.py b/tests/unit/files/test_files.py index 080d1ddbe..793f4edd3 100644 --- a/tests/unit/files/test_files.py +++ b/tests/unit/files/test_files.py @@ -6,7 +6,6 @@ import pytest -from llama_stack_api import OpenAIFilePurpose, Order, ResourceNotFoundError from llama_stack.core.access_control.access_control import default_policy from llama_stack.core.storage.datatypes import SqliteSqlStoreConfig, SqlStoreReference @@ -15,6 +14,7 @@ from llama_stack.providers.inline.files.localfs import ( LocalfsFilesImplConfig, ) from llama_stack.providers.utils.sqlstore.sqlstore import register_sqlstore_backends +from llama_stack_api import OpenAIFilePurpose, Order, ResourceNotFoundError class MockUploadFile: diff --git a/tests/unit/providers/batches/test_reference.py b/tests/unit/providers/batches/test_reference.py index 3c93a578d..32d59234d 100644 --- a/tests/unit/providers/batches/test_reference.py +++ b/tests/unit/providers/batches/test_reference.py @@ -58,6 +58,7 @@ import json from unittest.mock import AsyncMock, MagicMock import pytest + from llama_stack_api import BatchObject, ConflictError, ResourceNotFoundError diff --git a/tests/unit/providers/batches/test_reference_idempotency.py b/tests/unit/providers/batches/test_reference_idempotency.py index 4cd5d962d..acb7ca01c 100644 --- a/tests/unit/providers/batches/test_reference_idempotency.py +++ b/tests/unit/providers/batches/test_reference_idempotency.py @@ -43,6 +43,7 @@ Key Behaviors Tested: import asyncio import pytest + from llama_stack_api import ConflictError diff --git a/tests/unit/providers/files/test_s3_files.py b/tests/unit/providers/files/test_s3_files.py index ae63c1a78..de6c92e9c 100644 --- a/tests/unit/providers/files/test_s3_files.py +++ b/tests/unit/providers/files/test_s3_files.py @@ -8,6 +8,7 @@ from unittest.mock import patch import pytest from botocore.exceptions import ClientError + from llama_stack_api import OpenAIFilePurpose, ResourceNotFoundError diff --git a/tests/unit/providers/files/test_s3_files_auth.py b/tests/unit/providers/files/test_s3_files_auth.py index 873db4e27..e113611bd 100644 --- a/tests/unit/providers/files/test_s3_files_auth.py +++ b/tests/unit/providers/files/test_s3_files_auth.py @@ -7,10 +7,10 @@ from unittest.mock import patch import pytest -from llama_stack_api import OpenAIFilePurpose, ResourceNotFoundError from llama_stack.core.datatypes import User from llama_stack.providers.remote.files.s3.files import S3FilesImpl +from llama_stack_api import OpenAIFilePurpose, ResourceNotFoundError async def test_listing_hides_other_users_file(s3_provider, sample_text_file): diff --git a/tests/unit/providers/inference/test_bedrock_adapter.py b/tests/unit/providers/inference/test_bedrock_adapter.py index b3eecc558..a20f2860a 100644 --- a/tests/unit/providers/inference/test_bedrock_adapter.py +++ b/tests/unit/providers/inference/test_bedrock_adapter.py @@ -8,11 +8,11 @@ from types import SimpleNamespace from unittest.mock import AsyncMock, MagicMock import pytest -from llama_stack_api import OpenAIChatCompletionRequestWithExtraBody from openai import AuthenticationError from llama_stack.providers.remote.inference.bedrock.bedrock import BedrockInferenceAdapter from llama_stack.providers.remote.inference.bedrock.config import BedrockConfig +from llama_stack_api import OpenAIChatCompletionRequestWithExtraBody def test_adapter_initialization(): diff --git a/tests/unit/providers/inference/test_remote_vllm.py b/tests/unit/providers/inference/test_remote_vllm.py index e2a5455b7..958895cc4 100644 --- a/tests/unit/providers/inference/test_remote_vllm.py +++ b/tests/unit/providers/inference/test_remote_vllm.py @@ -9,6 +9,11 @@ import time from unittest.mock import AsyncMock, MagicMock, PropertyMock, patch import pytest + +from llama_stack.core.routers.inference import InferenceRouter +from llama_stack.core.routing_tables.models import ModelsRoutingTable +from llama_stack.providers.remote.inference.vllm.config import VLLMInferenceAdapterConfig +from llama_stack.providers.remote.inference.vllm.vllm import VLLMInferenceAdapter from llama_stack_api import ( HealthStatus, Model, @@ -22,11 +27,6 @@ from llama_stack_api import ( ToolChoice, ) -from llama_stack.core.routers.inference import InferenceRouter -from llama_stack.core.routing_tables.models import ModelsRoutingTable -from llama_stack.providers.remote.inference.vllm.config import VLLMInferenceAdapterConfig -from llama_stack.providers.remote.inference.vllm.vllm import VLLMInferenceAdapter - # These are unit test for the remote vllm provider # implementation. This should only contain tests which are specific to # the implementation details of those classes. More general diff --git a/tests/unit/providers/inline/agents/meta_reference/responses/test_streaming.py b/tests/unit/providers/inline/agents/meta_reference/responses/test_streaming.py index 36d2b86a9..658132340 100644 --- a/tests/unit/providers/inline/agents/meta_reference/responses/test_streaming.py +++ b/tests/unit/providers/inline/agents/meta_reference/responses/test_streaming.py @@ -7,12 +7,12 @@ from unittest.mock import AsyncMock import pytest -from llama_stack_api import ToolDef from llama_stack.providers.inline.agents.meta_reference.responses.streaming import ( convert_tooldef_to_chat_tool, ) from llama_stack.providers.inline.agents.meta_reference.responses.types import ChatCompletionContext +from llama_stack_api import ToolDef @pytest.fixture diff --git a/tests/unit/providers/nvidia/test_datastore.py b/tests/unit/providers/nvidia/test_datastore.py index 0d9f1cc35..36006cc39 100644 --- a/tests/unit/providers/nvidia/test_datastore.py +++ b/tests/unit/providers/nvidia/test_datastore.py @@ -8,10 +8,10 @@ import os from unittest.mock import patch import pytest -from llama_stack_api import Dataset, DatasetPurpose, ResourceType, URIDataSource from llama_stack.providers.remote.datasetio.nvidia.config import NvidiaDatasetIOConfig from llama_stack.providers.remote.datasetio.nvidia.datasetio import NvidiaDatasetIOAdapter +from llama_stack_api import Dataset, DatasetPurpose, ResourceType, URIDataSource @pytest.fixture diff --git a/tests/unit/providers/nvidia/test_eval.py b/tests/unit/providers/nvidia/test_eval.py index c41379801..783d664bf 100644 --- a/tests/unit/providers/nvidia/test_eval.py +++ b/tests/unit/providers/nvidia/test_eval.py @@ -8,6 +8,10 @@ import os from unittest.mock import MagicMock, patch import pytest + +from llama_stack.models.llama.sku_types import CoreModelId +from llama_stack.providers.remote.eval.nvidia.config import NVIDIAEvalConfig +from llama_stack.providers.remote.eval.nvidia.eval import NVIDIAEvalImpl from llama_stack_api import ( Benchmark, BenchmarkConfig, @@ -20,10 +24,6 @@ from llama_stack_api import ( TopPSamplingStrategy, ) -from llama_stack.models.llama.sku_types import CoreModelId -from llama_stack.providers.remote.eval.nvidia.config import NVIDIAEvalConfig -from llama_stack.providers.remote.eval.nvidia.eval import NVIDIAEvalImpl - MOCK_DATASET_ID = "default/test-dataset" MOCK_BENCHMARK_ID = "test-benchmark" diff --git a/tests/unit/providers/nvidia/test_parameters.py b/tests/unit/providers/nvidia/test_parameters.py index ba68a7abe..b714fc607 100644 --- a/tests/unit/providers/nvidia/test_parameters.py +++ b/tests/unit/providers/nvidia/test_parameters.py @@ -9,6 +9,12 @@ import warnings from unittest.mock import patch import pytest + +from llama_stack.core.library_client import convert_pydantic_to_json_value +from llama_stack.providers.remote.post_training.nvidia.post_training import ( + NvidiaPostTrainingAdapter, + NvidiaPostTrainingConfig, +) from llama_stack_api import ( DataConfig, DatasetFormat, @@ -19,12 +25,6 @@ from llama_stack_api import ( TrainingConfig, ) -from llama_stack.core.library_client import convert_pydantic_to_json_value -from llama_stack.providers.remote.post_training.nvidia.post_training import ( - NvidiaPostTrainingAdapter, - NvidiaPostTrainingConfig, -) - class TestNvidiaParameters: @pytest.fixture(autouse=True) diff --git a/tests/unit/providers/nvidia/test_rerank_inference.py b/tests/unit/providers/nvidia/test_rerank_inference.py index 8b313abcd..ee62910b8 100644 --- a/tests/unit/providers/nvidia/test_rerank_inference.py +++ b/tests/unit/providers/nvidia/test_rerank_inference.py @@ -8,11 +8,11 @@ from unittest.mock import AsyncMock, MagicMock, patch import aiohttp import pytest -from llama_stack_api import ModelType from llama_stack.providers.remote.inference.nvidia.config import NVIDIAConfig from llama_stack.providers.remote.inference.nvidia.nvidia import NVIDIAInferenceAdapter from llama_stack.providers.utils.inference.openai_mixin import OpenAIMixin +from llama_stack_api import ModelType class MockResponse: diff --git a/tests/unit/providers/nvidia/test_safety.py b/tests/unit/providers/nvidia/test_safety.py index ea6254841..07e04ddea 100644 --- a/tests/unit/providers/nvidia/test_safety.py +++ b/tests/unit/providers/nvidia/test_safety.py @@ -9,6 +9,9 @@ from typing import Any from unittest.mock import AsyncMock, MagicMock, patch import pytest + +from llama_stack.providers.remote.safety.nvidia.config import NVIDIASafetyConfig +from llama_stack.providers.remote.safety.nvidia.nvidia import NVIDIASafetyAdapter from llama_stack_api import ( OpenAIAssistantMessageParam, OpenAIUserMessageParam, @@ -18,9 +21,6 @@ from llama_stack_api import ( ViolationLevel, ) -from llama_stack.providers.remote.safety.nvidia.config import NVIDIASafetyConfig -from llama_stack.providers.remote.safety.nvidia.nvidia import NVIDIASafetyAdapter - class FakeNVIDIASafetyAdapter(NVIDIASafetyAdapter): """Test implementation that provides the required shield_store.""" diff --git a/tests/unit/providers/nvidia/test_supervised_fine_tuning.py b/tests/unit/providers/nvidia/test_supervised_fine_tuning.py index 4d0ce695b..94948da41 100644 --- a/tests/unit/providers/nvidia/test_supervised_fine_tuning.py +++ b/tests/unit/providers/nvidia/test_supervised_fine_tuning.py @@ -9,15 +9,6 @@ import warnings from unittest.mock import patch import pytest -from llama_stack_api import ( - DataConfig, - DatasetFormat, - LoraFinetuningConfig, - OptimizerConfig, - OptimizerType, - QATFinetuningConfig, - TrainingConfig, -) from llama_stack.core.library_client import convert_pydantic_to_json_value from llama_stack.providers.remote.post_training.nvidia.post_training import ( @@ -27,6 +18,15 @@ from llama_stack.providers.remote.post_training.nvidia.post_training import ( NvidiaPostTrainingJob, NvidiaPostTrainingJobStatusResponse, ) +from llama_stack_api import ( + DataConfig, + DatasetFormat, + LoraFinetuningConfig, + OptimizerConfig, + OptimizerType, + QATFinetuningConfig, + TrainingConfig, +) @pytest.fixture diff --git a/tests/unit/providers/test_bedrock.py b/tests/unit/providers/test_bedrock.py index df7453712..7126e1b69 100644 --- a/tests/unit/providers/test_bedrock.py +++ b/tests/unit/providers/test_bedrock.py @@ -7,10 +7,9 @@ from types import SimpleNamespace from unittest.mock import AsyncMock, PropertyMock, patch -from llama_stack_api import OpenAIChatCompletionRequestWithExtraBody - from llama_stack.providers.remote.inference.bedrock.bedrock import BedrockInferenceAdapter from llama_stack.providers.remote.inference.bedrock.config import BedrockConfig +from llama_stack_api import OpenAIChatCompletionRequestWithExtraBody def test_can_create_adapter(): diff --git a/tests/unit/providers/utils/inference/test_openai_mixin.py b/tests/unit/providers/utils/inference/test_openai_mixin.py index b9b59bb79..5b13a75f4 100644 --- a/tests/unit/providers/utils/inference/test_openai_mixin.py +++ b/tests/unit/providers/utils/inference/test_openai_mixin.py @@ -10,12 +10,12 @@ from typing import Any from unittest.mock import AsyncMock, MagicMock, Mock, PropertyMock, patch import pytest -from llama_stack_api import Model, ModelType, OpenAIChatCompletionRequestWithExtraBody, OpenAIUserMessageParam from pydantic import BaseModel, Field from llama_stack.core.request_headers import request_provider_data_context from llama_stack.providers.utils.inference.model_registry import RemoteInferenceProviderConfig from llama_stack.providers.utils.inference.openai_mixin import OpenAIMixin +from llama_stack_api import Model, ModelType, OpenAIChatCompletionRequestWithExtraBody, OpenAIUserMessageParam class OpenAIMixinImpl(OpenAIMixin): diff --git a/tests/unit/providers/utils/inference/test_prompt_adapter.py b/tests/unit/providers/utils/inference/test_prompt_adapter.py index a7c9289d7..ab5736ac5 100644 --- a/tests/unit/providers/utils/inference/test_prompt_adapter.py +++ b/tests/unit/providers/utils/inference/test_prompt_adapter.py @@ -4,12 +4,11 @@ # This source code is licensed under the terms described in the LICENSE file in # the root directory of this source tree. -from llama_stack_api import OpenAIAssistantMessageParam, OpenAIUserMessageParam - from llama_stack.models.llama.datatypes import RawTextItem from llama_stack.providers.utils.inference.prompt_adapter import ( convert_openai_message_to_raw_message, ) +from llama_stack_api import OpenAIAssistantMessageParam, OpenAIUserMessageParam class TestConvertOpenAIMessageToRawMessage: diff --git a/tests/unit/providers/utils/memory/test_vector_store.py b/tests/unit/providers/utils/memory/test_vector_store.py index 00db5795a..f3241ba20 100644 --- a/tests/unit/providers/utils/memory/test_vector_store.py +++ b/tests/unit/providers/utils/memory/test_vector_store.py @@ -7,9 +7,9 @@ from unittest.mock import AsyncMock, MagicMock, patch import pytest -from llama_stack_api import URL, RAGDocument, TextContentItem from llama_stack.providers.utils.memory.vector_store import content_from_data_and_mime_type, content_from_doc +from llama_stack_api import URL, RAGDocument, TextContentItem async def test_content_from_doc_with_url(): diff --git a/tests/unit/providers/utils/test_model_registry.py b/tests/unit/providers/utils/test_model_registry.py index 4a85cf8b8..1e3efafa1 100644 --- a/tests/unit/providers/utils/test_model_registry.py +++ b/tests/unit/providers/utils/test_model_registry.py @@ -34,9 +34,9 @@ # import pytest -from llama_stack_api import Model from llama_stack.providers.utils.inference.model_registry import ModelRegistryHelper, ProviderModelEntry +from llama_stack_api import Model @pytest.fixture diff --git a/tests/unit/providers/vector_io/conftest.py b/tests/unit/providers/vector_io/conftest.py index 216e9b8ea..6408e25ab 100644 --- a/tests/unit/providers/vector_io/conftest.py +++ b/tests/unit/providers/vector_io/conftest.py @@ -9,7 +9,6 @@ from unittest.mock import AsyncMock, MagicMock, patch import numpy as np import pytest -from llama_stack_api import Chunk, ChunkMetadata, QueryChunksResponse, VectorStore from llama_stack.core.storage.datatypes import KVStoreReference, SqliteKVStoreConfig from llama_stack.providers.inline.vector_io.faiss.config import FaissVectorIOConfig @@ -19,6 +18,7 @@ from llama_stack.providers.inline.vector_io.sqlite_vec.sqlite_vec import SQLiteV from llama_stack.providers.remote.vector_io.pgvector.config import PGVectorVectorIOConfig from llama_stack.providers.remote.vector_io.pgvector.pgvector import PGVectorIndex, PGVectorVectorIOAdapter from llama_stack.providers.utils.kvstore import register_kvstore_backends +from llama_stack_api import Chunk, ChunkMetadata, QueryChunksResponse, VectorStore EMBEDDING_DIMENSION = 768 COLLECTION_PREFIX = "test_collection" diff --git a/tests/unit/providers/vector_io/test_faiss.py b/tests/unit/providers/vector_io/test_faiss.py index 0d5c1399f..075296cbb 100644 --- a/tests/unit/providers/vector_io/test_faiss.py +++ b/tests/unit/providers/vector_io/test_faiss.py @@ -9,13 +9,13 @@ from unittest.mock import MagicMock, patch import numpy as np import pytest -from llama_stack_api import Chunk, Files, HealthStatus, QueryChunksResponse, VectorStore from llama_stack.providers.inline.vector_io.faiss.config import FaissVectorIOConfig from llama_stack.providers.inline.vector_io.faiss.faiss import ( FaissIndex, FaissVectorIOAdapter, ) +from llama_stack_api import Chunk, Files, HealthStatus, QueryChunksResponse, VectorStore # This test is a unit test for the FaissVectorIOAdapter class. This should only contain # tests which are specific to this class. More general (API-level) tests should be placed in diff --git a/tests/unit/providers/vector_io/test_sqlite_vec.py b/tests/unit/providers/vector_io/test_sqlite_vec.py index 17a99ce1c..d1548cf37 100644 --- a/tests/unit/providers/vector_io/test_sqlite_vec.py +++ b/tests/unit/providers/vector_io/test_sqlite_vec.py @@ -8,13 +8,13 @@ import asyncio import numpy as np import pytest -from llama_stack_api import Chunk, QueryChunksResponse from llama_stack.providers.inline.vector_io.sqlite_vec.sqlite_vec import ( SQLiteVecIndex, SQLiteVecVectorIOAdapter, _create_sqlite_connection, ) +from llama_stack_api import Chunk, QueryChunksResponse # This test is a unit test for the SQLiteVecVectorIOAdapter class. This should only contain # tests which are specific to this class. More general (API-level) tests should be placed in diff --git a/tests/unit/providers/vector_io/test_vector_io_openai_vector_stores.py b/tests/unit/providers/vector_io/test_vector_io_openai_vector_stores.py index 7ba40eefb..3797abb2c 100644 --- a/tests/unit/providers/vector_io/test_vector_io_openai_vector_stores.py +++ b/tests/unit/providers/vector_io/test_vector_io_openai_vector_stores.py @@ -10,6 +10,8 @@ from unittest.mock import AsyncMock, patch import numpy as np import pytest + +from llama_stack.providers.inline.vector_io.sqlite_vec.sqlite_vec import VECTOR_DBS_PREFIX from llama_stack_api import ( Chunk, OpenAICreateVectorStoreFileBatchRequestWithExtraBody, @@ -21,8 +23,6 @@ from llama_stack_api import ( VectorStoreNotFoundError, ) -from llama_stack.providers.inline.vector_io.sqlite_vec.sqlite_vec import VECTOR_DBS_PREFIX - # This test is a unit test for the inline VectorIO providers. This should only contain # tests which are specific to this class. More general (API-level) tests should be placed in # tests/integration/vector_io/ @@ -255,10 +255,9 @@ async def test_insert_chunks_with_missing_document_id(vector_io_adapter): async def test_document_id_with_invalid_type_raises_error(): """Ensure TypeError is raised when document_id is not a string.""" - from llama_stack_api import Chunk - # Integer document_id should raise TypeError from llama_stack.providers.utils.vector_io.vector_utils import generate_chunk_id + from llama_stack_api import Chunk chunk = Chunk(content="test", chunk_id=generate_chunk_id("test", "test"), metadata={"document_id": 12345}) with pytest.raises(TypeError) as exc_info: diff --git a/tests/unit/providers/vector_io/test_vector_utils.py b/tests/unit/providers/vector_io/test_vector_utils.py index 678b76fbd..7f6b4af79 100644 --- a/tests/unit/providers/vector_io/test_vector_utils.py +++ b/tests/unit/providers/vector_io/test_vector_utils.py @@ -4,9 +4,8 @@ # This source code is licensed under the terms described in the LICENSE file in # the root directory of this source tree. -from llama_stack_api import Chunk, ChunkMetadata - from llama_stack.providers.utils.vector_io.vector_utils import generate_chunk_id +from llama_stack_api import Chunk, ChunkMetadata # This test is a unit test for the chunk_utils.py helpers. This should only contain # tests which are specific to this file. More general (API-level) tests should be placed in diff --git a/tests/unit/rag/test_rag_query.py b/tests/unit/rag/test_rag_query.py index e3f5e46d7..7eb17b74b 100644 --- a/tests/unit/rag/test_rag_query.py +++ b/tests/unit/rag/test_rag_query.py @@ -7,9 +7,9 @@ from unittest.mock import AsyncMock, MagicMock import pytest -from llama_stack_api import Chunk, ChunkMetadata, QueryChunksResponse, RAGQueryConfig from llama_stack.providers.inline.tool_runtime.rag.memory import MemoryToolRuntimeImpl +from llama_stack_api import Chunk, ChunkMetadata, QueryChunksResponse, RAGQueryConfig class TestRagQuery: diff --git a/tests/unit/rag/test_vector_store.py b/tests/unit/rag/test_vector_store.py index 23c12dcab..2562df8d6 100644 --- a/tests/unit/rag/test_vector_store.py +++ b/tests/unit/rag/test_vector_store.py @@ -12,7 +12,6 @@ from unittest.mock import AsyncMock, MagicMock import numpy as np import pytest -from llama_stack_api import Chunk, OpenAIEmbeddingData, OpenAIEmbeddingsRequestWithExtraBody, RAGDocument from llama_stack.providers.utils.memory.vector_store import ( URL, @@ -22,6 +21,7 @@ from llama_stack.providers.utils.memory.vector_store import ( make_overlapped_chunks, ) from llama_stack.providers.utils.vector_io.vector_utils import generate_chunk_id +from llama_stack_api import Chunk, OpenAIEmbeddingData, OpenAIEmbeddingsRequestWithExtraBody, RAGDocument DUMMY_PDF_PATH = Path(os.path.abspath(__file__)).parent / "fixtures" / "dummy.pdf" # Depending on the machine, this can get parsed a couple of ways diff --git a/tests/unit/registry/test_registry.py b/tests/unit/registry/test_registry.py index 01f486ab2..1b5032782 100644 --- a/tests/unit/registry/test_registry.py +++ b/tests/unit/registry/test_registry.py @@ -6,7 +6,6 @@ import pytest -from llama_stack_api import Model, VectorStore from llama_stack.core.datatypes import VectorStoreWithOwner from llama_stack.core.storage.datatypes import KVStoreReference, SqliteKVStoreConfig @@ -16,6 +15,7 @@ from llama_stack.core.store.registry import ( DiskDistributionRegistry, ) from llama_stack.providers.utils.kvstore import kvstore_impl, register_kvstore_backends +from llama_stack_api import Model, VectorStore @pytest.fixture @@ -303,9 +303,8 @@ async def test_double_registration_different_objects(disk_dist_registry): async def test_double_registration_with_cache(cached_disk_dist_registry): """Test double registration behavior with caching enabled.""" - from llama_stack_api import ModelType - from llama_stack.core.datatypes import ModelWithOwner + from llama_stack_api import ModelType model1 = ModelWithOwner( identifier="test_model", diff --git a/tests/unit/registry/test_registry_acl.py b/tests/unit/registry/test_registry_acl.py index 2827f60b9..a09d2a30d 100644 --- a/tests/unit/registry/test_registry_acl.py +++ b/tests/unit/registry/test_registry_acl.py @@ -5,10 +5,9 @@ # the root directory of this source tree. -from llama_stack_api import ModelType - from llama_stack.core.datatypes import ModelWithOwner, User from llama_stack.core.store.registry import CachedDiskDistributionRegistry +from llama_stack_api import ModelType async def test_registry_cache_with_acl(cached_disk_dist_registry): diff --git a/tests/unit/server/test_access_control.py b/tests/unit/server/test_access_control.py index 1df933d4d..23a9636d5 100644 --- a/tests/unit/server/test_access_control.py +++ b/tests/unit/server/test_access_control.py @@ -8,12 +8,12 @@ from unittest.mock import MagicMock, Mock, patch import pytest import yaml -from llama_stack_api import Api, ModelType from pydantic import TypeAdapter, ValidationError from llama_stack.core.access_control.access_control import AccessDeniedError, is_action_allowed from llama_stack.core.datatypes import AccessRule, ModelWithOwner, User from llama_stack.core.routing_tables.models import ModelsRoutingTable +from llama_stack_api import Api, ModelType class AsyncMock(MagicMock): diff --git a/tests/unit/server/test_resolver.py b/tests/unit/server/test_resolver.py index 071178f96..8f8a61ea7 100644 --- a/tests/unit/server/test_resolver.py +++ b/tests/unit/server/test_resolver.py @@ -9,7 +9,6 @@ import sys from typing import Any, Protocol from unittest.mock import AsyncMock, MagicMock -from llama_stack_api import Inference, InlineProviderSpec, ProviderSpec from pydantic import BaseModel, Field from llama_stack.core.datatypes import Api, Provider, StackRunConfig @@ -27,6 +26,7 @@ from llama_stack.core.storage.datatypes import ( ) from llama_stack.providers.utils.kvstore import register_kvstore_backends from llama_stack.providers.utils.sqlstore.sqlstore import register_sqlstore_backends +from llama_stack_api import Inference, InlineProviderSpec, ProviderSpec def add_protocol_methods(cls: type, protocol: type[Protocol]) -> None: diff --git a/tests/unit/server/test_sse.py b/tests/unit/server/test_sse.py index fdaf9022b..d82743c80 100644 --- a/tests/unit/server/test_sse.py +++ b/tests/unit/server/test_sse.py @@ -9,9 +9,9 @@ import logging # allow-direct-logging from unittest.mock import AsyncMock, MagicMock import pytest -from llama_stack_api import PaginatedResponse from llama_stack.core.server.server import create_dynamic_typed_route, create_sse_event, sse_generator +from llama_stack_api import PaginatedResponse @pytest.fixture diff --git a/tests/unit/tools/test_tools_json_schema.py b/tests/unit/tools/test_tools_json_schema.py index 79e0b6e28..623955984 100644 --- a/tests/unit/tools/test_tools_json_schema.py +++ b/tests/unit/tools/test_tools_json_schema.py @@ -9,10 +9,10 @@ Unit tests for JSON Schema-based tool definitions. Tests the new input_schema and output_schema fields. """ -from llama_stack_api import ToolDef from pydantic import ValidationError from llama_stack.models.llama.datatypes import BuiltinTool, ToolDefinition +from llama_stack_api import ToolDef class TestToolDefValidation: diff --git a/tests/unit/utils/inference/test_inference_store.py b/tests/unit/utils/inference/test_inference_store.py index 4da20b125..bdcc529ce 100644 --- a/tests/unit/utils/inference/test_inference_store.py +++ b/tests/unit/utils/inference/test_inference_store.py @@ -7,6 +7,10 @@ import time import pytest + +from llama_stack.core.storage.datatypes import InferenceStoreReference, SqliteSqlStoreConfig +from llama_stack.providers.utils.inference.inference_store import InferenceStore +from llama_stack.providers.utils.sqlstore.sqlstore import register_sqlstore_backends from llama_stack_api import ( OpenAIAssistantMessageParam, OpenAIChatCompletion, @@ -15,10 +19,6 @@ from llama_stack_api import ( Order, ) -from llama_stack.core.storage.datatypes import InferenceStoreReference, SqliteSqlStoreConfig -from llama_stack.providers.utils.inference.inference_store import InferenceStore -from llama_stack.providers.utils.sqlstore.sqlstore import register_sqlstore_backends - @pytest.fixture(autouse=True) def setup_backends(tmp_path): diff --git a/tests/unit/utils/responses/test_responses_store.py b/tests/unit/utils/responses/test_responses_store.py index 1119a93d8..8c108d9c1 100644 --- a/tests/unit/utils/responses/test_responses_store.py +++ b/tests/unit/utils/responses/test_responses_store.py @@ -9,11 +9,11 @@ from tempfile import TemporaryDirectory from uuid import uuid4 import pytest -from llama_stack_api import OpenAIMessageParam, OpenAIResponseInput, OpenAIResponseObject, OpenAIUserMessageParam, Order from llama_stack.core.storage.datatypes import ResponsesStoreReference, SqliteSqlStoreConfig from llama_stack.providers.utils.responses.responses_store import ResponsesStore from llama_stack.providers.utils.sqlstore.sqlstore import register_sqlstore_backends +from llama_stack_api import OpenAIMessageParam, OpenAIResponseInput, OpenAIResponseObject, OpenAIUserMessageParam, Order def build_store(db_path: str, policy: list | None = None) -> ResponsesStore: diff --git a/uv.lock b/uv.lock index ddf8c1cd4..7e0575df6 100644 --- a/uv.lock +++ b/uv.lock @@ -2095,7 +2095,7 @@ requires-dist = [ { name = "httpx" }, { name = "jinja2", specifier = ">=3.1.6" }, { name = "jsonschema" }, - { name = "llama-stack-api", editable = "src/llama-stack-api" }, + { name = "llama-stack-api", editable = "src/llama_stack_api" }, { name = "llama-stack-client", marker = "extra == 'client'", specifier = ">=0.3.0" }, { name = "openai", specifier = ">=2.5.0" }, { name = "opentelemetry-exporter-otlp-proto-http", specifier = ">=1.30.0" }, @@ -2231,7 +2231,7 @@ unit = [ [[package]] name = "llama-stack-api" version = "0.1.0" -source = { editable = "src/llama-stack-api" } +source = { editable = "src/llama_stack_api" } dependencies = [ { name = "jsonschema" }, { name = "opentelemetry-exporter-otlp-proto-http" },