diff --git a/.github/workflows/pre-commit.yml b/.github/workflows/pre-commit.yml index 74f7da19a..ac125bba5 100644 --- a/.github/workflows/pre-commit.yml +++ b/.github/workflows/pre-commit.yml @@ -53,7 +53,7 @@ jobs: working-directory: src/llama_stack_ui - name: Install pre-commit - run: python -m pip install pre-commit + run: python -m pip install 'pre-commit>=4.4.0' - name: Cache pre-commit uses: actions/cache@0057852bfaa89a56745cba8c7296529d2fc39830 # v4 diff --git a/.github/workflows/python-build-test.yml b/.github/workflows/python-build-test.yml index c605a30c3..b0f2c6e69 100644 --- a/.github/workflows/python-build-test.yml +++ b/.github/workflows/python-build-test.yml @@ -30,13 +30,16 @@ jobs: activate-environment: true version: 0.7.6 - - name: Build Llama Stack package - run: | - uv build + - name: Build Llama Stack API package + working-directory: src/llama-stack-api + run: uv build - - name: Install Llama Stack package + - name: Build Llama Stack package + run: uv build + + - name: Install Llama Stack package (with api stubs from local build) run: | - uv pip install dist/*.whl + uv pip install --find-links src/llama-stack-api/dist dist/*.whl - name: Verify Llama Stack package run: | diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 42cd2f5ce..6f4dd6a0e 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -1,5 +1,5 @@ exclude: 'build/' - +minimum_pre_commit_version: 4.4.0 default_language_version: python: python3.12 node: "22" @@ -42,7 +42,7 @@ repos: hooks: - id: ruff args: [ --fix ] - exclude: ^src/llama_stack/strong_typing/.*$ + exclude: ^(src/llama_stack/strong_typing/.*|src/llama-stack-api/llama_stack_api/strong_typing/.*)$ - id: ruff-format - repo: https://github.com/adamchainz/blacken-docs diff --git a/docs/docs/concepts/apis/external.mdx b/docs/docs/concepts/apis/external.mdx index 42819a4ac..005b85647 100644 --- a/docs/docs/concepts/apis/external.mdx +++ b/docs/docs/concepts/apis/external.mdx @@ -58,7 +58,7 @@ External APIs must expose a `available_providers()` function in their module tha ```python # llama_stack_api_weather/api.py -from llama_stack.providers.datatypes import Api, InlineProviderSpec, ProviderSpec +from llama_stack_api import Api, InlineProviderSpec, ProviderSpec def available_providers() -> list[ProviderSpec]: @@ -79,7 +79,7 @@ A Protocol class like so: # llama_stack_api_weather/api.py from typing import Protocol -from llama_stack.schema_utils import webmethod +from llama_stack_api import webmethod class WeatherAPI(Protocol): @@ -151,13 +151,12 @@ __all__ = ["WeatherAPI", "available_providers"] # llama-stack-api-weather/src/llama_stack_api_weather/weather.py from typing import Protocol -from llama_stack.providers.datatypes import ( +from llama_stack_api import ( Api, ProviderSpec, RemoteProviderSpec, + webmethod, ) -from llama_stack.schema_utils import webmethod - def available_providers() -> list[ProviderSpec]: return [ diff --git a/docs/docs/distributions/building_distro.mdx b/docs/docs/distributions/building_distro.mdx index c4a01bf7d..532ffaaf0 100644 --- a/docs/docs/distributions/building_distro.mdx +++ b/docs/docs/distributions/building_distro.mdx @@ -65,7 +65,7 @@ external_providers_dir: /workspace/providers.d Inside `providers.d/custom_ollama/provider.py`, define `get_provider_spec()` so the CLI can discover dependencies: ```python -from llama_stack.providers.datatypes import ProviderSpec +from llama_stack_api.providers.datatypes import ProviderSpec def get_provider_spec() -> ProviderSpec: diff --git a/docs/docs/providers/external/external-providers-guide.mdx b/docs/docs/providers/external/external-providers-guide.mdx index 748fd62c0..dc813c75b 100644 --- a/docs/docs/providers/external/external-providers-guide.mdx +++ b/docs/docs/providers/external/external-providers-guide.mdx @@ -80,7 +80,7 @@ container_image: custom-vector-store:latest # optional All providers must contain a `get_provider_spec` function in their `provider` module. This is a standardized structure that Llama Stack expects and is necessary for getting things such as the config class. The `get_provider_spec` method returns a structure identical to the `adapter`. An example function may look like: ```python -from llama_stack.providers.datatypes import ( +from llama_stack_api.providers.datatypes import ( ProviderSpec, Api, RemoteProviderSpec, diff --git a/docs/docs/providers/vector_io/inline_sqlite-vec.mdx b/docs/docs/providers/vector_io/inline_sqlite-vec.mdx index bfa2f29de..45631dff3 100644 --- a/docs/docs/providers/vector_io/inline_sqlite-vec.mdx +++ b/docs/docs/providers/vector_io/inline_sqlite-vec.mdx @@ -153,7 +153,7 @@ description: | Example using RAGQueryConfig with different search modes: ```python - from llama_stack.apis.tools import RAGQueryConfig, RRFRanker, WeightedRanker + from llama_stack_api import RAGQueryConfig, RRFRanker, WeightedRanker # Vector search config = RAGQueryConfig(mode="vector", max_chunks=5) @@ -358,7 +358,7 @@ Two ranker types are supported: Example using RAGQueryConfig with different search modes: ```python -from llama_stack.apis.tools import RAGQueryConfig, RRFRanker, WeightedRanker +from llama_stack_api import RAGQueryConfig, RRFRanker, WeightedRanker # Vector search config = RAGQueryConfig(mode="vector", max_chunks=5) diff --git a/docs/openapi_generator/generate.py b/docs/openapi_generator/generate.py index 65720df4a..769db32a7 100644 --- a/docs/openapi_generator/generate.py +++ b/docs/openapi_generator/generate.py @@ -16,7 +16,7 @@ import sys import fire import ruamel.yaml as yaml -from llama_stack.apis.version import LLAMA_STACK_API_V1 # noqa: E402 +from llama_stack_api import LLAMA_STACK_API_V1 # noqa: E402 from llama_stack.core.stack import LlamaStack # noqa: E402 from .pyopenapi.options import Options # noqa: E402 diff --git a/docs/openapi_generator/pyopenapi/generator.py b/docs/openapi_generator/pyopenapi/generator.py index 30fc9038d..afbb5c710 100644 --- a/docs/openapi_generator/pyopenapi/generator.py +++ b/docs/openapi_generator/pyopenapi/generator.py @@ -16,27 +16,27 @@ from typing import Annotated, Any, Dict, get_args, get_origin, Set, Union from fastapi import UploadFile -from llama_stack.apis.datatypes import Error -from llama_stack.strong_typing.core import JsonType -from llama_stack.strong_typing.docstring import Docstring, parse_type -from llama_stack.strong_typing.inspection import ( +from llama_stack_api import ( + Docstring, + Error, + JsonSchemaGenerator, + JsonType, + Schema, + SchemaOptions, + get_schema_identifier, is_generic_list, is_type_optional, is_type_union, is_unwrapped_body_param, + json_dump_string, + object_to_json, + parse_type, + python_type_to_name, + register_schema, unwrap_generic_list, unwrap_optional_type, unwrap_union_types, ) -from llama_stack.strong_typing.name import python_type_to_name -from llama_stack.strong_typing.schema import ( - get_schema_identifier, - JsonSchemaGenerator, - register_schema, - Schema, - SchemaOptions, -) -from llama_stack.strong_typing.serialization import json_dump_string, object_to_json from pydantic import BaseModel from .operations import ( diff --git a/docs/openapi_generator/pyopenapi/operations.py b/docs/openapi_generator/pyopenapi/operations.py index a1c95c7a7..42a554f2c 100644 --- a/docs/openapi_generator/pyopenapi/operations.py +++ b/docs/openapi_generator/pyopenapi/operations.py @@ -11,19 +11,21 @@ import typing from dataclasses import dataclass from typing import Any, Callable, Dict, Iterable, Iterator, List, Optional, Tuple, Union -from llama_stack.apis.version import LLAMA_STACK_API_V1, LLAMA_STACK_API_V1BETA, LLAMA_STACK_API_V1ALPHA - from termcolor import colored -from llama_stack.strong_typing.inspection import get_signature - from typing import get_origin, get_args from fastapi import UploadFile from fastapi.params import File, Form from typing import Annotated -from llama_stack.schema_utils import ExtraBodyField +from llama_stack_api import ( + ExtraBodyField, + LLAMA_STACK_API_V1, + LLAMA_STACK_API_V1ALPHA, + LLAMA_STACK_API_V1BETA, + get_signature, +) def split_prefix( diff --git a/docs/openapi_generator/pyopenapi/specification.py b/docs/openapi_generator/pyopenapi/specification.py index 90bf54316..bfa35f539 100644 --- a/docs/openapi_generator/pyopenapi/specification.py +++ b/docs/openapi_generator/pyopenapi/specification.py @@ -9,7 +9,7 @@ import enum from dataclasses import dataclass from typing import Any, ClassVar, Dict, List, Optional, Union -from llama_stack.strong_typing.schema import JsonType, Schema, StrictJsonType +from llama_stack_api import JsonType, Schema, StrictJsonType URL = str diff --git a/docs/openapi_generator/pyopenapi/utility.py b/docs/openapi_generator/pyopenapi/utility.py index c1425b250..762249eb8 100644 --- a/docs/openapi_generator/pyopenapi/utility.py +++ b/docs/openapi_generator/pyopenapi/utility.py @@ -11,8 +11,7 @@ from pathlib import Path from typing import Any, List, Optional, TextIO, Union, get_type_hints, get_origin, get_args from pydantic import BaseModel -from llama_stack.strong_typing.schema import object_to_json, StrictJsonType -from llama_stack.strong_typing.inspection import is_unwrapped_body_param +from llama_stack_api import StrictJsonType, is_unwrapped_body_param, object_to_json from llama_stack.core.resolver import api_protocol_map from .generator import Generator @@ -165,12 +164,12 @@ def _validate_api_delete_method_returns_none(method) -> str | None: return "has no return type annotation" return_type = hints['return'] - + # Allow OpenAI endpoints to return response objects since they follow OpenAI specification method_name = getattr(method, '__name__', '') if method_name.__contains__('openai_'): return None - + if return_type is not None and return_type is not type(None): return "does not return None where None is mandatory" diff --git a/pyproject.toml b/pyproject.toml index e6808af8a..d287b4be7 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -31,6 +31,7 @@ dependencies = [ "httpx", "jinja2>=3.1.6", "jsonschema", + "llama-stack-api", # API and provider specifications (local dev via tool.uv.sources) "openai>=2.5.0", "prompt-toolkit", "python-dotenv", @@ -69,7 +70,7 @@ dev = [ "black", "ruff", "mypy", - "pre-commit", + "pre-commit>=4.4.0", "ruamel.yaml", # needed for openapi generator ] # Type checking dependencies - includes type stubs and optional runtime dependencies @@ -180,7 +181,7 @@ install-wheel-from-presigned = "llama_stack.cli.scripts.run:install_wheel_from_p [tool.setuptools.packages.find] where = ["src"] -include = ["llama_stack", "llama_stack.*"] +include = ["llama_stack", "llama_stack.*", "llama-stack-api", "llama-stack-api.*"] [[tool.uv.index]] name = "pytorch-cpu" @@ -190,6 +191,7 @@ explicit = true [tool.uv.sources] torch = [{ index = "pytorch-cpu" }] torchvision = [{ index = "pytorch-cpu" }] +llama-stack-api = [{ path = "src/llama-stack-api", editable = true }] [tool.ruff] line-length = 120 @@ -256,8 +258,8 @@ unfixable = [ ] # Using import * is acceptable (or at least tolerated) in an __init__.py of a package API [tool.mypy] -mypy_path = ["src"] -packages = ["llama_stack"] +mypy_path = ["src", "src/llama-stack-api"] +packages = ["llama_stack", "llama_stack_api"] plugins = ['pydantic.mypy'] disable_error_code = [] warn_return_any = true @@ -279,15 +281,18 @@ exclude = [ "^src/llama_stack/core/store/registry\\.py$", "^src/llama_stack/core/utils/exec\\.py$", "^src/llama_stack/core/utils/prompt_for_config\\.py$", + # Moved to llama-stack-api but still excluded "^src/llama_stack/models/llama/llama3/interface\\.py$", "^src/llama_stack/models/llama/llama3/tokenizer\\.py$", "^src/llama_stack/models/llama/llama3/tool_utils\\.py$", - "^src/llama_stack/providers/inline/datasetio/localfs/", - "^src/llama_stack/providers/inline/eval/meta_reference/eval\\.py$", - "^src/llama_stack/providers/inline/inference/meta_reference/inference\\.py$", "^src/llama_stack/models/llama/llama3/generation\\.py$", "^src/llama_stack/models/llama/llama3/multimodal/model\\.py$", "^src/llama_stack/models/llama/llama4/", + "^src/llama-stack-api/llama_stack_api/core/telemetry/telemetry\\.py$", + "^src/llama_stack/providers/inline/agents/meta_reference/", + "^src/llama_stack/providers/inline/datasetio/localfs/", + "^src/llama_stack/providers/inline/eval/meta_reference/eval\\.py$", + "^src/llama_stack/providers/inline/inference/meta_reference/inference\\.py$", "^src/llama_stack/providers/inline/inference/sentence_transformers/sentence_transformers\\.py$", "^src/llama_stack/providers/inline/post_training/common/validator\\.py$", "^src/llama_stack/providers/inline/safety/code_scanner/", @@ -337,7 +342,9 @@ exclude = [ "^src/llama_stack/providers/utils/telemetry/dataset_mixin\\.py$", "^src/llama_stack/providers/utils/telemetry/trace_protocol\\.py$", "^src/llama_stack/providers/utils/telemetry/tracing\\.py$", - "^src/llama_stack/strong_typing/auxiliary\\.py$", + "^src/llama-stack-api/llama_stack_api/core/telemetry/trace_protocol\\.py$", + "^src/llama-stack-api/llama_stack_api/core/telemetry/tracing\\.py$", + "^src/llama-stack-api/llama_stack_api/strong_typing/auxiliary\\.py$", "^src/llama_stack/distributions/template\\.py$", ] diff --git a/scripts/generate_prompt_format.py b/scripts/generate_prompt_format.py index 855033f95..8099a3f0d 100755 --- a/scripts/generate_prompt_format.py +++ b/scripts/generate_prompt_format.py @@ -14,8 +14,8 @@ import os from pathlib import Path import fire +from llama_stack_api import ModelNotFoundError -from llama_stack.apis.common.errors import ModelNotFoundError from llama_stack.models.llama.llama3.generation import Llama3 from llama_stack.models.llama.llama4.generation import Llama4 from llama_stack.models.llama.sku_list import resolve_model diff --git a/scripts/provider_codegen.py b/scripts/provider_codegen.py index de79b4d17..d62d626ad 100755 --- a/scripts/provider_codegen.py +++ b/scripts/provider_codegen.py @@ -22,7 +22,7 @@ def get_api_docstring(api_name: str) -> str | None: """Extract docstring from the API protocol class.""" try: # Import the API module dynamically - api_module = __import__(f"llama_stack.apis.{api_name}", fromlist=[api_name.title()]) + api_module = __import__(f"llama_stack_api.{api_name}", fromlist=[api_name.title()]) # Get the main protocol class (usually capitalized API name) protocol_class_name = api_name.title() @@ -83,8 +83,9 @@ def get_config_class_info(config_class_path: str) -> dict[str, Any]: # this string replace is ridiculous field_type = field_type.replace("typing.", "").replace("Optional[", "").replace("]", "") field_type = field_type.replace("Annotated[", "").replace("FieldInfo(", "").replace(")", "") - field_type = field_type.replace("llama_stack.apis.inference.inference.", "") + field_type = field_type.replace("llama_stack_api.inference.", "") field_type = field_type.replace("llama_stack.providers.", "") + field_type = field_type.replace("llama_stack_api.datatypes.", "") default_value = field.default if field.default_factory is not None: diff --git a/src/llama-stack-api/README.md b/src/llama-stack-api/README.md new file mode 100644 index 000000000..aa6b05722 --- /dev/null +++ b/src/llama-stack-api/README.md @@ -0,0 +1,103 @@ +# llama-stack-api + +API and Provider specifications for Llama Stack - a lightweight package with protocol definitions and provider specs. + +## Overview + +`llama-stack-api` is a minimal dependency package that contains: + +- **API Protocol Definitions**: Type-safe protocol definitions for all Llama Stack APIs (inference, agents, safety, etc.) +- **Provider Specifications**: Provider spec definitions for building custom providers +- **Data Types**: Shared data types and models used across the Llama Stack ecosystem +- **Type Utilities**: Strong typing utilities and schema validation + +## What This Package Does NOT Include + +- Server implementation (see `llama-stack` package) +- Provider implementations (see `llama-stack` package) +- CLI tools (see `llama-stack` package) +- Runtime orchestration (see `llama-stack` package) + +## Use Cases + +This package is designed for: + +1. **Third-party Provider Developers**: Build custom providers without depending on the full Llama Stack server +2. **Client Library Authors**: Use type definitions without server dependencies +3. **Documentation Generation**: Generate API docs from protocol definitions +4. **Type Checking**: Validate implementations against the official specs + +## Installation + +```bash +pip install llama-stack-api +``` + +Or with uv: + +```bash +uv pip install llama-stack-api +``` + +## Dependencies + +Minimal dependencies: +- `pydantic>=2.11.9` - For data validation and serialization +- `jsonschema` - For JSON schema utilities + +## Versioning + +This package follows semantic versioning independently from the main `llama-stack` package: + +- **Patch versions** (0.1.x): Documentation, internal improvements +- **Minor versions** (0.x.0): New APIs, backward-compatible changes +- **Major versions** (x.0.0): Breaking changes to existing APIs + +Current version: **0.1.0** + +## Usage Example + +```python +from llama_stack_api.inference import Inference, ChatCompletionRequest +from llama_stack_api.providers.datatypes import ProviderSpec, InlineProviderSpec +from llama_stack_api.datatypes import Api + + +# Use protocol definitions for type checking +class MyInferenceProvider(Inference): + async def chat_completion(self, request: ChatCompletionRequest): + # Your implementation + pass + + +# Define provider specifications +my_provider_spec = InlineProviderSpec( + api=Api.inference, + provider_type="inline::my-provider", + pip_packages=["my-dependencies"], + module="my_package.providers.inference", + config_class="my_package.providers.inference.MyConfig", +) +``` + +## Relationship to llama-stack + +The main `llama-stack` package depends on `llama-stack-api` and provides: +- Full server implementation +- Built-in provider implementations +- CLI tools for running and managing stacks +- Runtime provider resolution and orchestration + +## Contributing + +See the main [Llama Stack repository](https://github.com/llamastack/llama-stack) for contribution guidelines. + +## License + +MIT License - see LICENSE file for details. + +## Links + +- [Main Llama Stack Repository](https://github.com/llamastack/llama-stack) +- [Documentation](https://llamastack.ai/) +- [Client Library](https://pypi.org/project/llama-stack-client/) diff --git a/src/llama-stack-api/llama_stack_api/__init__.py b/src/llama-stack-api/llama_stack_api/__init__.py new file mode 100644 index 000000000..8bbe9f8bd --- /dev/null +++ b/src/llama-stack-api/llama_stack_api/__init__.py @@ -0,0 +1,871 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the terms described in the LICENSE file in +# the root directory of this source tree. + +""" +Llama Stack API Specifications + +This package contains the API definitions, data types, and protocol specifications +for Llama Stack. It is designed to be a lightweight dependency for external providers +and clients that need to interact with Llama Stack APIs without requiring the full +server implementation. + +All imports from this package MUST use the form: + from llama_stack_api import + +Sub-module imports (e.g., from llama_stack_api.agents import Agents) are NOT supported +and considered a code smell. All exported symbols are explicitly listed in __all__. +""" + +__version__ = "0.4.0" + +# Import submodules for those who need them +from . import common, strong_typing # noqa: F401 + +# Import all public API symbols +from .agents import Agents, ResponseGuardrail, ResponseGuardrailSpec +from .batches import Batches, BatchObject, ListBatchesResponse +from .benchmarks import ( + Benchmark, + BenchmarkInput, + Benchmarks, + CommonBenchmarkFields, + ListBenchmarksResponse, +) + +# Import commonly used types from common submodule +from .common.content_types import ( + URL, + ImageContentItem, + InterleavedContent, + InterleavedContentItem, + TextContentItem, + _URLOrData, +) +from .common.errors import ( + ConflictError, + DatasetNotFoundError, + InvalidConversationIdError, + ModelNotFoundError, + ModelTypeError, + ResourceNotFoundError, + TokenValidationError, + ToolGroupNotFoundError, + UnsupportedModelError, + VectorStoreNotFoundError, +) +from .common.job_types import Job, JobStatus +from .common.responses import Order, PaginatedResponse +from .common.training_types import Checkpoint, PostTrainingMetric +from .common.type_system import ( + ChatCompletionInputType, + CompletionInputType, + NumberType, + ParamType, + StringType, +) +from .conversations import ( + Conversation, + ConversationDeletedResource, + ConversationItem, + ConversationItemCreateRequest, + ConversationItemDeletedResource, + ConversationItemInclude, + ConversationItemList, + ConversationMessage, + Conversations, + Metadata, +) +from .datasetio import DatasetIO, DatasetStore +from .datasets import ( + CommonDatasetFields, + Dataset, + DatasetInput, + DatasetPurpose, + Datasets, + DatasetType, + DataSource, + ListDatasetsResponse, + RowsDataSource, + URIDataSource, +) +from .datatypes import ( + Api, + BenchmarksProtocolPrivate, + DatasetsProtocolPrivate, + DynamicApiMeta, + Error, + ExternalApiSpec, + HealthResponse, + HealthStatus, + InlineProviderSpec, + ModelsProtocolPrivate, + ProviderSpec, + RemoteProviderConfig, + RemoteProviderSpec, + RoutingTable, + ScoringFunctionsProtocolPrivate, + ShieldsProtocolPrivate, + ToolGroupsProtocolPrivate, + VectorStoresProtocolPrivate, +) +from .eval import BenchmarkConfig, Eval, EvalCandidate, EvaluateResponse, ModelCandidate +from .files import ( + ExpiresAfter, + Files, + ListOpenAIFileResponse, + OpenAIFileDeleteResponse, + OpenAIFileObject, + OpenAIFilePurpose, +) +from .inference import ( + Bf16QuantizationConfig, + ChatCompletionResponseEventType, + CompletionRequest, + EmbeddingsResponse, + EmbeddingTaskType, + Fp8QuantizationConfig, + GrammarResponseFormat, + GreedySamplingStrategy, + Inference, + InferenceProvider, + Int4QuantizationConfig, + JsonSchemaResponseFormat, + ListOpenAIChatCompletionResponse, + LogProbConfig, + ModelStore, + OpenAIAssistantMessageParam, + OpenAIChatCompletion, + OpenAIChatCompletionChunk, + OpenAIChatCompletionContentPartImageParam, + OpenAIChatCompletionContentPartParam, + OpenAIChatCompletionContentPartTextParam, + OpenAIChatCompletionMessageContent, + OpenAIChatCompletionRequestWithExtraBody, + OpenAIChatCompletionTextOnlyMessageContent, + OpenAIChatCompletionToolCall, + OpenAIChatCompletionToolCallFunction, + OpenAIChatCompletionUsage, + OpenAIChatCompletionUsageCompletionTokensDetails, + OpenAIChatCompletionUsagePromptTokensDetails, + OpenAIChoice, + OpenAIChoiceDelta, + OpenAIChoiceLogprobs, + OpenAIChunkChoice, + OpenAICompletion, + OpenAICompletionChoice, + OpenAICompletionLogprobs, + OpenAICompletionRequestWithExtraBody, + OpenAICompletionWithInputMessages, + OpenAIDeveloperMessageParam, + OpenAIEmbeddingData, + OpenAIEmbeddingsRequestWithExtraBody, + OpenAIEmbeddingsResponse, + OpenAIEmbeddingUsage, + OpenAIFile, + OpenAIFileFile, + OpenAIImageURL, + OpenAIJSONSchema, + OpenAIMessageParam, + OpenAIResponseFormatJSONObject, + OpenAIResponseFormatJSONSchema, + OpenAIResponseFormatParam, + OpenAIResponseFormatText, + OpenAISystemMessageParam, + OpenAITokenLogProb, + OpenAIToolMessageParam, + OpenAITopLogProb, + OpenAIUserMessageParam, + QuantizationConfig, + QuantizationType, + RerankData, + RerankResponse, + ResponseFormat, + ResponseFormatType, + SamplingParams, + SamplingStrategy, + SystemMessage, + SystemMessageBehavior, + TextTruncation, + TokenLogProbs, + ToolChoice, + ToolResponseMessage, + TopKSamplingStrategy, + TopPSamplingStrategy, + UserMessage, +) +from .inspect import ( + ApiFilter, + HealthInfo, + Inspect, + ListRoutesResponse, + RouteInfo, + VersionInfo, +) +from .models import ( + CommonModelFields, + ListModelsResponse, + Model, + ModelInput, + Models, + ModelType, + OpenAIListModelsResponse, + OpenAIModel, +) +from .openai_responses import ( + AllowedToolsFilter, + ApprovalFilter, + ListOpenAIResponseInputItem, + ListOpenAIResponseObject, + MCPListToolsTool, + OpenAIDeleteResponseObject, + OpenAIResponseAnnotationCitation, + OpenAIResponseAnnotationContainerFileCitation, + OpenAIResponseAnnotationFileCitation, + OpenAIResponseAnnotationFilePath, + OpenAIResponseAnnotations, + OpenAIResponseContentPart, + OpenAIResponseContentPartOutputText, + OpenAIResponseContentPartReasoningSummary, + OpenAIResponseContentPartReasoningText, + OpenAIResponseContentPartRefusal, + OpenAIResponseError, + OpenAIResponseInput, + OpenAIResponseInputFunctionToolCallOutput, + OpenAIResponseInputMessageContent, + OpenAIResponseInputMessageContentFile, + OpenAIResponseInputMessageContentImage, + OpenAIResponseInputMessageContentText, + OpenAIResponseInputTool, + OpenAIResponseInputToolFileSearch, + OpenAIResponseInputToolFunction, + OpenAIResponseInputToolMCP, + OpenAIResponseInputToolWebSearch, + OpenAIResponseMCPApprovalRequest, + OpenAIResponseMCPApprovalResponse, + OpenAIResponseMessage, + OpenAIResponseObject, + OpenAIResponseObjectStream, + OpenAIResponseObjectStreamResponseCompleted, + OpenAIResponseObjectStreamResponseContentPartAdded, + OpenAIResponseObjectStreamResponseContentPartDone, + OpenAIResponseObjectStreamResponseCreated, + OpenAIResponseObjectStreamResponseFailed, + OpenAIResponseObjectStreamResponseFileSearchCallCompleted, + OpenAIResponseObjectStreamResponseFileSearchCallInProgress, + OpenAIResponseObjectStreamResponseFileSearchCallSearching, + OpenAIResponseObjectStreamResponseFunctionCallArgumentsDelta, + OpenAIResponseObjectStreamResponseFunctionCallArgumentsDone, + OpenAIResponseObjectStreamResponseIncomplete, + OpenAIResponseObjectStreamResponseInProgress, + OpenAIResponseObjectStreamResponseMcpCallArgumentsDelta, + OpenAIResponseObjectStreamResponseMcpCallArgumentsDone, + OpenAIResponseObjectStreamResponseMcpCallCompleted, + OpenAIResponseObjectStreamResponseMcpCallFailed, + OpenAIResponseObjectStreamResponseMcpCallInProgress, + OpenAIResponseObjectStreamResponseMcpListToolsCompleted, + OpenAIResponseObjectStreamResponseMcpListToolsFailed, + OpenAIResponseObjectStreamResponseMcpListToolsInProgress, + OpenAIResponseObjectStreamResponseOutputItemAdded, + OpenAIResponseObjectStreamResponseOutputItemDone, + OpenAIResponseObjectStreamResponseOutputTextAnnotationAdded, + OpenAIResponseObjectStreamResponseOutputTextDelta, + OpenAIResponseObjectStreamResponseOutputTextDone, + OpenAIResponseObjectStreamResponseReasoningSummaryPartAdded, + OpenAIResponseObjectStreamResponseReasoningSummaryPartDone, + OpenAIResponseObjectStreamResponseReasoningSummaryTextDelta, + OpenAIResponseObjectStreamResponseReasoningSummaryTextDone, + OpenAIResponseObjectStreamResponseReasoningTextDelta, + OpenAIResponseObjectStreamResponseReasoningTextDone, + OpenAIResponseObjectStreamResponseRefusalDelta, + OpenAIResponseObjectStreamResponseRefusalDone, + OpenAIResponseObjectStreamResponseWebSearchCallCompleted, + OpenAIResponseObjectStreamResponseWebSearchCallInProgress, + OpenAIResponseObjectStreamResponseWebSearchCallSearching, + OpenAIResponseObjectWithInput, + OpenAIResponseOutput, + OpenAIResponseOutputMessageContent, + OpenAIResponseOutputMessageContentOutputText, + OpenAIResponseOutputMessageFileSearchToolCall, + OpenAIResponseOutputMessageFileSearchToolCallResults, + OpenAIResponseOutputMessageFunctionToolCall, + OpenAIResponseOutputMessageMCPCall, + OpenAIResponseOutputMessageMCPListTools, + OpenAIResponseOutputMessageWebSearchToolCall, + OpenAIResponsePrompt, + OpenAIResponseText, + OpenAIResponseTextFormat, + OpenAIResponseTool, + OpenAIResponseToolMCP, + OpenAIResponseUsage, + OpenAIResponseUsageInputTokensDetails, + OpenAIResponseUsageOutputTokensDetails, + WebSearchToolTypes, +) +from .post_training import ( + AlgorithmConfig, + DataConfig, + DatasetFormat, + DPOAlignmentConfig, + DPOLossType, + EfficiencyConfig, + ListPostTrainingJobsResponse, + LoraFinetuningConfig, + OptimizerConfig, + OptimizerType, + PostTraining, + PostTrainingJob, + PostTrainingJobArtifactsResponse, + PostTrainingJobLogStream, + PostTrainingJobStatusResponse, + PostTrainingRLHFRequest, + QATFinetuningConfig, + RLHFAlgorithm, + TrainingConfig, +) +from .prompts import ListPromptsResponse, Prompt, Prompts +from .providers import ListProvidersResponse, ProviderInfo, Providers +from .rag_tool import ( + DefaultRAGQueryGeneratorConfig, + LLMRAGQueryGeneratorConfig, + RAGDocument, + RAGQueryConfig, + RAGQueryGenerator, + RAGQueryGeneratorConfig, + RAGQueryResult, + RAGSearchMode, + Ranker, + RRFRanker, + WeightedRanker, +) +from .resource import Resource, ResourceType +from .safety import ( + ModerationObject, + ModerationObjectResults, + RunShieldResponse, + Safety, + SafetyViolation, + ShieldStore, + ViolationLevel, +) +from .schema_utils import ( + CallableT, + ExtraBodyField, + WebMethod, + json_schema_type, + register_schema, + webmethod, +) +from .scoring import ( + ScoreBatchResponse, + ScoreResponse, + Scoring, + ScoringFunctionStore, + ScoringResult, + ScoringResultRow, +) +from .scoring_functions import ( + AggregationFunctionType, + BasicScoringFnParams, + CommonScoringFnFields, + ListScoringFunctionsResponse, + LLMAsJudgeScoringFnParams, + RegexParserScoringFnParams, + ScoringFn, + ScoringFnInput, + ScoringFnParams, + ScoringFnParamsType, + ScoringFunctions, +) +from .shields import ( + CommonShieldFields, + ListShieldsResponse, + Shield, + ShieldInput, + Shields, +) + +# Import from strong_typing +from .strong_typing.core import JsonType +from .strong_typing.docstring import Docstring, parse_type +from .strong_typing.inspection import ( + get_signature, + is_generic_list, + is_type_optional, + is_type_union, + is_unwrapped_body_param, + unwrap_generic_list, + unwrap_optional_type, + unwrap_union_types, +) +from .strong_typing.name import python_type_to_name +from .strong_typing.schema import ( + JsonSchemaGenerator, + Schema, + SchemaOptions, + StrictJsonType, + get_schema_identifier, +) +from .strong_typing.serialization import json_dump_string, object_to_json +from .tools import ( + ListToolDefsResponse, + ListToolGroupsResponse, + SpecialToolGroup, + ToolDef, + ToolGroup, + ToolGroupInput, + ToolGroups, + ToolInvocationResult, + ToolRuntime, + ToolStore, +) +from .vector_io import ( + Chunk, + ChunkMetadata, + OpenAICreateVectorStoreFileBatchRequestWithExtraBody, + OpenAICreateVectorStoreRequestWithExtraBody, + QueryChunksResponse, + SearchRankingOptions, + VectorIO, + VectorStoreChunkingStrategy, + VectorStoreChunkingStrategyAuto, + VectorStoreChunkingStrategyStatic, + VectorStoreChunkingStrategyStaticConfig, + VectorStoreContent, + VectorStoreCreateRequest, + VectorStoreDeleteResponse, + VectorStoreFileBatchObject, + VectorStoreFileContentResponse, + VectorStoreFileCounts, + VectorStoreFileDeleteResponse, + VectorStoreFileLastError, + VectorStoreFileObject, + VectorStoreFilesListInBatchResponse, + VectorStoreFileStatus, + VectorStoreListFilesResponse, + VectorStoreListResponse, + VectorStoreModifyRequest, + VectorStoreObject, + VectorStoreSearchRequest, + VectorStoreSearchResponse, + VectorStoreSearchResponsePage, + VectorStoreTable, +) +from .vector_stores import VectorStore, VectorStoreInput +from .version import ( + LLAMA_STACK_API_V1, + LLAMA_STACK_API_V1ALPHA, + LLAMA_STACK_API_V1BETA, +) + +__all__ = [ + # Submodules + "common", + "strong_typing", + # Version constants + "LLAMA_STACK_API_V1", + "LLAMA_STACK_API_V1ALPHA", + "LLAMA_STACK_API_V1BETA", + # API Symbols + "Agents", + "AggregationFunctionType", + "AlgorithmConfig", + "AllowedToolsFilter", + "Api", + "ApiFilter", + "ApprovalFilter", + "BasicScoringFnParams", + "Batches", + "BatchObject", + "Benchmark", + "BenchmarkConfig", + "BenchmarkInput", + "Benchmarks", + "BenchmarksProtocolPrivate", + "Bf16QuantizationConfig", + "CallableT", + "ChatCompletionInputType", + "ChatCompletionResponseEventType", + "Checkpoint", + "Chunk", + "ChunkMetadata", + "CommonBenchmarkFields", + "ConflictError", + "CommonDatasetFields", + "CommonModelFields", + "CommonScoringFnFields", + "CommonShieldFields", + "CompletionInputType", + "CompletionRequest", + "Conversation", + "ConversationDeletedResource", + "ConversationItem", + "ConversationItemCreateRequest", + "ConversationItemDeletedResource", + "ConversationItemInclude", + "ConversationItemList", + "ConversationMessage", + "Conversations", + "DPOAlignmentConfig", + "DPOLossType", + "DataConfig", + "DataSource", + "Dataset", + "DatasetFormat", + "DatasetIO", + "DatasetInput", + "DatasetPurpose", + "DatasetNotFoundError", + "DatasetStore", + "DatasetType", + "Datasets", + "DatasetsProtocolPrivate", + "DefaultRAGQueryGeneratorConfig", + "Docstring", + "DynamicApiMeta", + "EfficiencyConfig", + "EmbeddingTaskType", + "EmbeddingsResponse", + "Error", + "Eval", + "EvalCandidate", + "EvaluateResponse", + "ExpiresAfter", + "ExternalApiSpec", + "ExtraBodyField", + "Files", + "Fp8QuantizationConfig", + "get_schema_identifier", + "get_signature", + "GrammarResponseFormat", + "GreedySamplingStrategy", + "HealthInfo", + "HealthResponse", + "HealthStatus", + "ImageContentItem", + "Inference", + "InferenceProvider", + "InlineProviderSpec", + "Inspect", + "Int4QuantizationConfig", + "InterleavedContent", + "InterleavedContentItem", + "InvalidConversationIdError", + "is_generic_list", + "is_type_optional", + "is_type_union", + "is_unwrapped_body_param", + "Job", + "JobStatus", + "json_dump_string", + "json_schema_type", + "JsonSchemaGenerator", + "JsonSchemaResponseFormat", + "JsonType", + "LLMAsJudgeScoringFnParams", + "LLMRAGQueryGeneratorConfig", + "ListBatchesResponse", + "ListBenchmarksResponse", + "ListDatasetsResponse", + "ListModelsResponse", + "ListOpenAIChatCompletionResponse", + "ListOpenAIFileResponse", + "ListOpenAIResponseInputItem", + "ListOpenAIResponseObject", + "ListPostTrainingJobsResponse", + "ListPromptsResponse", + "ListProvidersResponse", + "ListRoutesResponse", + "ListScoringFunctionsResponse", + "ListShieldsResponse", + "ListToolDefsResponse", + "ListToolGroupsResponse", + "LogProbConfig", + "LoraFinetuningConfig", + "MCPListToolsTool", + "Metadata", + "Model", + "ModelCandidate", + "ModelInput", + "ModelNotFoundError", + "ModelStore", + "ModelType", + "ModelTypeError", + "Models", + "ModelsProtocolPrivate", + "ModerationObject", + "ModerationObjectResults", + "NumberType", + "object_to_json", + "OpenAIAssistantMessageParam", + "OpenAIChatCompletion", + "OpenAIChatCompletionChunk", + "OpenAIChatCompletionContentPartImageParam", + "OpenAIChatCompletionContentPartParam", + "OpenAIChatCompletionContentPartTextParam", + "OpenAIChatCompletionMessageContent", + "OpenAIChatCompletionRequestWithExtraBody", + "OpenAIChatCompletionTextOnlyMessageContent", + "OpenAIChatCompletionToolCall", + "OpenAIChatCompletionToolCallFunction", + "OpenAIChatCompletionUsage", + "OpenAIChatCompletionUsageCompletionTokensDetails", + "OpenAIChatCompletionUsagePromptTokensDetails", + "OpenAIChoice", + "OpenAIChoiceDelta", + "OpenAIChoiceLogprobs", + "OpenAIChunkChoice", + "OpenAICompletion", + "OpenAICompletionChoice", + "OpenAICompletionLogprobs", + "OpenAICompletionRequestWithExtraBody", + "OpenAICompletionWithInputMessages", + "OpenAICreateVectorStoreFileBatchRequestWithExtraBody", + "OpenAICreateVectorStoreRequestWithExtraBody", + "OpenAIDeleteResponseObject", + "OpenAIDeveloperMessageParam", + "OpenAIEmbeddingData", + "OpenAIEmbeddingUsage", + "OpenAIEmbeddingsRequestWithExtraBody", + "OpenAIEmbeddingsResponse", + "OpenAIFile", + "OpenAIFileDeleteResponse", + "OpenAIFileFile", + "OpenAIFileObject", + "OpenAIFilePurpose", + "OpenAIImageURL", + "OpenAIJSONSchema", + "OpenAIListModelsResponse", + "OpenAIMessageParam", + "OpenAIModel", + "Order", + "OpenAIResponseAnnotationCitation", + "OpenAIResponseAnnotationContainerFileCitation", + "OpenAIResponseAnnotationFileCitation", + "OpenAIResponseAnnotationFilePath", + "OpenAIResponseAnnotations", + "OpenAIResponseContentPart", + "OpenAIResponseContentPartOutputText", + "OpenAIResponseContentPartReasoningSummary", + "OpenAIResponseContentPartReasoningText", + "OpenAIResponseContentPartRefusal", + "OpenAIResponseError", + "OpenAIResponseFormatJSONObject", + "OpenAIResponseFormatJSONSchema", + "OpenAIResponseFormatParam", + "OpenAIResponseFormatText", + "OpenAIResponseInput", + "OpenAIResponseInputFunctionToolCallOutput", + "OpenAIResponseInputMessageContent", + "OpenAIResponseInputMessageContentFile", + "OpenAIResponseInputMessageContentImage", + "OpenAIResponseInputMessageContentText", + "OpenAIResponseInputTool", + "OpenAIResponseInputToolFileSearch", + "OpenAIResponseInputToolFunction", + "OpenAIResponseInputToolMCP", + "OpenAIResponseInputToolWebSearch", + "OpenAIResponseMCPApprovalRequest", + "OpenAIResponseMCPApprovalResponse", + "OpenAIResponseMessage", + "OpenAIResponseObject", + "OpenAIResponseObjectStream", + "OpenAIResponseObjectStreamResponseCompleted", + "OpenAIResponseObjectStreamResponseContentPartAdded", + "OpenAIResponseObjectStreamResponseContentPartDone", + "OpenAIResponseObjectStreamResponseCreated", + "OpenAIResponseObjectStreamResponseFailed", + "OpenAIResponseObjectStreamResponseFileSearchCallCompleted", + "OpenAIResponseObjectStreamResponseFileSearchCallInProgress", + "OpenAIResponseObjectStreamResponseFileSearchCallSearching", + "OpenAIResponseObjectStreamResponseFunctionCallArgumentsDelta", + "OpenAIResponseObjectStreamResponseFunctionCallArgumentsDone", + "OpenAIResponseObjectStreamResponseInProgress", + "OpenAIResponseObjectStreamResponseIncomplete", + "OpenAIResponseObjectStreamResponseMcpCallArgumentsDelta", + "OpenAIResponseObjectStreamResponseMcpCallArgumentsDone", + "OpenAIResponseObjectStreamResponseMcpCallCompleted", + "OpenAIResponseObjectStreamResponseMcpCallFailed", + "OpenAIResponseObjectStreamResponseMcpCallInProgress", + "OpenAIResponseObjectStreamResponseMcpListToolsCompleted", + "OpenAIResponseObjectStreamResponseMcpListToolsFailed", + "OpenAIResponseObjectStreamResponseMcpListToolsInProgress", + "OpenAIResponseObjectStreamResponseOutputItemAdded", + "OpenAIResponseObjectStreamResponseOutputItemDone", + "OpenAIResponseObjectStreamResponseOutputTextAnnotationAdded", + "OpenAIResponseObjectStreamResponseOutputTextDelta", + "OpenAIResponseObjectStreamResponseOutputTextDone", + "OpenAIResponseObjectStreamResponseReasoningSummaryPartAdded", + "OpenAIResponseObjectStreamResponseReasoningSummaryPartDone", + "OpenAIResponseObjectStreamResponseReasoningSummaryTextDelta", + "OpenAIResponseObjectStreamResponseReasoningSummaryTextDone", + "OpenAIResponseObjectStreamResponseReasoningTextDelta", + "OpenAIResponseObjectStreamResponseReasoningTextDone", + "OpenAIResponseObjectStreamResponseRefusalDelta", + "OpenAIResponseObjectStreamResponseRefusalDone", + "OpenAIResponseObjectStreamResponseWebSearchCallCompleted", + "OpenAIResponseObjectStreamResponseWebSearchCallInProgress", + "OpenAIResponseObjectStreamResponseWebSearchCallSearching", + "OpenAIResponseObjectWithInput", + "OpenAIResponseOutput", + "OpenAIResponseOutputMessageContent", + "OpenAIResponseOutputMessageContentOutputText", + "OpenAIResponseOutputMessageFileSearchToolCall", + "OpenAIResponseOutputMessageFileSearchToolCallResults", + "OpenAIResponseOutputMessageFunctionToolCall", + "OpenAIResponseOutputMessageMCPCall", + "OpenAIResponseOutputMessageMCPListTools", + "OpenAIResponseOutputMessageWebSearchToolCall", + "OpenAIResponsePrompt", + "OpenAIResponseText", + "OpenAIResponseTextFormat", + "OpenAIResponseTool", + "OpenAIResponseToolMCP", + "OpenAIResponseUsage", + "OpenAIResponseUsageInputTokensDetails", + "OpenAIResponseUsageOutputTokensDetails", + "OpenAISystemMessageParam", + "OpenAITokenLogProb", + "OpenAIToolMessageParam", + "OpenAITopLogProb", + "OpenAIUserMessageParam", + "OptimizerConfig", + "OptimizerType", + "PaginatedResponse", + "ParamType", + "parse_type", + "PostTraining", + "PostTrainingMetric", + "PostTrainingJob", + "PostTrainingJobArtifactsResponse", + "PostTrainingJobLogStream", + "PostTrainingJobStatusResponse", + "PostTrainingRLHFRequest", + "Prompt", + "Prompts", + "ProviderInfo", + "ProviderSpec", + "Providers", + "python_type_to_name", + "QATFinetuningConfig", + "QuantizationConfig", + "QuantizationType", + "QueryChunksResponse", + "RAGDocument", + "RAGQueryConfig", + "RAGQueryGenerator", + "RAGQueryGeneratorConfig", + "RAGQueryResult", + "RAGSearchMode", + "register_schema", + "RLHFAlgorithm", + "RRFRanker", + "Ranker", + "RegexParserScoringFnParams", + "RemoteProviderConfig", + "RemoteProviderSpec", + "RerankData", + "RerankResponse", + "Resource", + "ResourceNotFoundError", + "ResourceType", + "ResponseFormat", + "ResponseFormatType", + "ResponseGuardrail", + "ResponseGuardrailSpec", + "RouteInfo", + "RoutingTable", + "RowsDataSource", + "RunShieldResponse", + "Safety", + "SafetyViolation", + "SamplingParams", + "SamplingStrategy", + "ScoreBatchResponse", + "ScoreResponse", + "Scoring", + "ScoringFn", + "ScoringFnInput", + "ScoringFnParams", + "ScoringFnParamsType", + "ScoringFunctionStore", + "ScoringFunctions", + "ScoringFunctionsProtocolPrivate", + "ScoringResult", + "ScoringResultRow", + "Schema", + "SchemaOptions", + "SearchRankingOptions", + "Shield", + "ShieldInput", + "ShieldStore", + "Shields", + "ShieldsProtocolPrivate", + "SpecialToolGroup", + "StrictJsonType", + "StringType", + "SystemMessage", + "SystemMessageBehavior", + "TextContentItem", + "TextTruncation", + "TokenLogProbs", + "TokenValidationError", + "ToolChoice", + "ToolGroupNotFoundError", + "ToolDef", + "ToolGroup", + "ToolGroupInput", + "ToolGroups", + "ToolGroupsProtocolPrivate", + "ToolInvocationResult", + "ToolResponseMessage", + "ToolRuntime", + "ToolStore", + "TopKSamplingStrategy", + "TopPSamplingStrategy", + "TrainingConfig", + "UnsupportedModelError", + "unwrap_generic_list", + "unwrap_optional_type", + "unwrap_union_types", + "URIDataSource", + "URL", + "_URLOrData", + "UserMessage", + "VectorIO", + "VectorStore", + "VectorStoreChunkingStrategy", + "VectorStoreChunkingStrategyAuto", + "VectorStoreChunkingStrategyStatic", + "VectorStoreChunkingStrategyStaticConfig", + "VectorStoreContent", + "VectorStoreCreateRequest", + "VectorStoreDeleteResponse", + "VectorStoreFileBatchObject", + "VectorStoreFileContentResponse", + "VectorStoreFileCounts", + "VectorStoreFileDeleteResponse", + "VectorStoreFileLastError", + "VectorStoreFileObject", + "VectorStoreFileStatus", + "VectorStoreFilesListInBatchResponse", + "VectorStoreInput", + "VectorStoreListFilesResponse", + "VectorStoreListResponse", + "VectorStoreModifyRequest", + "VectorStoreObject", + "VectorStoreSearchRequest", + "VectorStoreSearchResponse", + "VectorStoreSearchResponsePage", + "VectorStoreTable", + "VectorStoreNotFoundError", + "VectorStoresProtocolPrivate", + "VersionInfo", + "ViolationLevel", + "webmethod", + "WebMethod", + "WebSearchToolTypes", + "WeightedRanker", +] diff --git a/src/llama_stack/apis/agents/agents.py b/src/llama-stack-api/llama_stack_api/agents.py similarity index 96% rename from src/llama_stack/apis/agents/agents.py rename to src/llama-stack-api/llama_stack_api/agents.py index 09687ef33..ca0611746 100644 --- a/src/llama_stack/apis/agents/agents.py +++ b/src/llama-stack-api/llama_stack_api/agents.py @@ -9,9 +9,9 @@ from typing import Annotated, Protocol, runtime_checkable from pydantic import BaseModel -from llama_stack.apis.common.responses import Order -from llama_stack.apis.version import LLAMA_STACK_API_V1 -from llama_stack.schema_utils import ExtraBodyField, json_schema_type, webmethod +from llama_stack_api.common.responses import Order +from llama_stack_api.schema_utils import ExtraBodyField, json_schema_type, webmethod +from llama_stack_api.version import LLAMA_STACK_API_V1 from .openai_responses import ( ListOpenAIResponseInputItem, diff --git a/src/llama_stack/apis/batches/batches.py b/src/llama-stack-api/llama_stack_api/batches.py similarity index 96% rename from src/llama_stack/apis/batches/batches.py rename to src/llama-stack-api/llama_stack_api/batches.py index 1ee9fdb15..00c47d39f 100644 --- a/src/llama_stack/apis/batches/batches.py +++ b/src/llama-stack-api/llama_stack_api/batches.py @@ -8,8 +8,8 @@ from typing import Literal, Protocol, runtime_checkable from pydantic import BaseModel, Field -from llama_stack.apis.version import LLAMA_STACK_API_V1 -from llama_stack.schema_utils import json_schema_type, webmethod +from llama_stack_api.schema_utils import json_schema_type, webmethod +from llama_stack_api.version import LLAMA_STACK_API_V1 try: from openai.types import Batch as BatchObject diff --git a/src/llama_stack/apis/benchmarks/benchmarks.py b/src/llama-stack-api/llama_stack_api/benchmarks.py similarity index 94% rename from src/llama_stack/apis/benchmarks/benchmarks.py rename to src/llama-stack-api/llama_stack_api/benchmarks.py index 9a67269c3..e9ac3a8b8 100644 --- a/src/llama_stack/apis/benchmarks/benchmarks.py +++ b/src/llama-stack-api/llama_stack_api/benchmarks.py @@ -7,9 +7,9 @@ from typing import Any, Literal, Protocol, runtime_checkable from pydantic import BaseModel, Field -from llama_stack.apis.resource import Resource, ResourceType -from llama_stack.apis.version import LLAMA_STACK_API_V1ALPHA -from llama_stack.schema_utils import json_schema_type, webmethod +from llama_stack_api.resource import Resource, ResourceType +from llama_stack_api.schema_utils import json_schema_type, webmethod +from llama_stack_api.version import LLAMA_STACK_API_V1ALPHA class CommonBenchmarkFields(BaseModel): diff --git a/src/llama_stack/apis/__init__.py b/src/llama-stack-api/llama_stack_api/common/__init__.py similarity index 100% rename from src/llama_stack/apis/__init__.py rename to src/llama-stack-api/llama_stack_api/common/__init__.py diff --git a/src/llama_stack/apis/common/content_types.py b/src/llama-stack-api/llama_stack_api/common/content_types.py similarity index 65% rename from src/llama_stack/apis/common/content_types.py rename to src/llama-stack-api/llama_stack_api/common/content_types.py index 950dd17ff..1bfe109c1 100644 --- a/src/llama_stack/apis/common/content_types.py +++ b/src/llama-stack-api/llama_stack_api/common/content_types.py @@ -4,13 +4,11 @@ # This source code is licensed under the terms described in the LICENSE file in # the root directory of this source tree. -from enum import Enum from typing import Annotated, Literal from pydantic import BaseModel, Field, model_validator -from llama_stack.models.llama.datatypes import ToolCall -from llama_stack.schema_utils import json_schema_type, register_schema +from llama_stack_api.schema_utils import json_schema_type, register_schema @json_schema_type @@ -101,43 +99,3 @@ class ImageDelta(BaseModel): type: Literal["image"] = "image" image: bytes - - -class ToolCallParseStatus(Enum): - """Status of tool call parsing during streaming. - :cvar started: Tool call parsing has begun - :cvar in_progress: Tool call parsing is ongoing - :cvar failed: Tool call parsing failed - :cvar succeeded: Tool call parsing completed successfully - """ - - started = "started" - in_progress = "in_progress" - failed = "failed" - succeeded = "succeeded" - - -@json_schema_type -class ToolCallDelta(BaseModel): - """A tool call content delta for streaming responses. - - :param type: Discriminator type of the delta. Always "tool_call" - :param tool_call: Either an in-progress tool call string or the final parsed tool call - :param parse_status: Current parsing status of the tool call - """ - - type: Literal["tool_call"] = "tool_call" - - # you either send an in-progress tool call so the client can stream a long - # code generation or you send the final parsed tool call at the end of the - # stream - tool_call: str | ToolCall - parse_status: ToolCallParseStatus - - -# streaming completions send a stream of ContentDeltas -ContentDelta = Annotated[ - TextDelta | ImageDelta | ToolCallDelta, - Field(discriminator="type"), -] -register_schema(ContentDelta, name="ContentDelta") diff --git a/src/llama_stack/apis/common/errors.py b/src/llama-stack-api/llama_stack_api/common/errors.py similarity index 100% rename from src/llama_stack/apis/common/errors.py rename to src/llama-stack-api/llama_stack_api/common/errors.py diff --git a/src/llama_stack/apis/common/job_types.py b/src/llama-stack-api/llama_stack_api/common/job_types.py similarity index 94% rename from src/llama_stack/apis/common/job_types.py rename to src/llama-stack-api/llama_stack_api/common/job_types.py index 5da42bfd3..b6ef35d7f 100644 --- a/src/llama_stack/apis/common/job_types.py +++ b/src/llama-stack-api/llama_stack_api/common/job_types.py @@ -7,7 +7,7 @@ from enum import Enum from pydantic import BaseModel -from llama_stack.schema_utils import json_schema_type +from llama_stack_api.schema_utils import json_schema_type class JobStatus(Enum): diff --git a/src/llama_stack/apis/common/responses.py b/src/llama-stack-api/llama_stack_api/common/responses.py similarity index 97% rename from src/llama_stack/apis/common/responses.py rename to src/llama-stack-api/llama_stack_api/common/responses.py index 53a290eea..c843ce1d9 100644 --- a/src/llama_stack/apis/common/responses.py +++ b/src/llama-stack-api/llama_stack_api/common/responses.py @@ -9,7 +9,7 @@ from typing import Any from pydantic import BaseModel -from llama_stack.schema_utils import json_schema_type +from llama_stack_api.schema_utils import json_schema_type class Order(Enum): diff --git a/src/llama_stack/apis/common/tracing.py b/src/llama-stack-api/llama_stack_api/common/tracing.py similarity index 100% rename from src/llama_stack/apis/common/tracing.py rename to src/llama-stack-api/llama_stack_api/common/tracing.py diff --git a/src/llama_stack/apis/common/training_types.py b/src/llama-stack-api/llama_stack_api/common/training_types.py similarity index 96% rename from src/llama_stack/apis/common/training_types.py rename to src/llama-stack-api/llama_stack_api/common/training_types.py index 5c236a25d..aa3481770 100644 --- a/src/llama_stack/apis/common/training_types.py +++ b/src/llama-stack-api/llama_stack_api/common/training_types.py @@ -8,7 +8,7 @@ from datetime import datetime from pydantic import BaseModel -from llama_stack.schema_utils import json_schema_type +from llama_stack_api.schema_utils import json_schema_type @json_schema_type diff --git a/src/llama_stack/apis/common/type_system.py b/src/llama-stack-api/llama_stack_api/common/type_system.py similarity index 97% rename from src/llama_stack/apis/common/type_system.py rename to src/llama-stack-api/llama_stack_api/common/type_system.py index c71501548..8297713cf 100644 --- a/src/llama_stack/apis/common/type_system.py +++ b/src/llama-stack-api/llama_stack_api/common/type_system.py @@ -8,7 +8,7 @@ from typing import Annotated, Literal from pydantic import BaseModel, Field -from llama_stack.schema_utils import json_schema_type, register_schema +from llama_stack_api.schema_utils import json_schema_type, register_schema @json_schema_type diff --git a/src/llama_stack/apis/conversations/conversations.py b/src/llama-stack-api/llama_stack_api/conversations.py similarity index 97% rename from src/llama_stack/apis/conversations/conversations.py rename to src/llama-stack-api/llama_stack_api/conversations.py index 3fdd3b47e..4854181d1 100644 --- a/src/llama_stack/apis/conversations/conversations.py +++ b/src/llama-stack-api/llama_stack_api/conversations.py @@ -9,7 +9,8 @@ from typing import Annotated, Literal, Protocol, runtime_checkable from pydantic import BaseModel, Field -from llama_stack.apis.agents.openai_responses import ( +from llama_stack_api.common.tracing import telemetry_traceable +from llama_stack_api.openai_responses import ( OpenAIResponseInputFunctionToolCallOutput, OpenAIResponseMCPApprovalRequest, OpenAIResponseMCPApprovalResponse, @@ -20,9 +21,8 @@ from llama_stack.apis.agents.openai_responses import ( OpenAIResponseOutputMessageMCPListTools, OpenAIResponseOutputMessageWebSearchToolCall, ) -from llama_stack.apis.common.tracing import telemetry_traceable -from llama_stack.apis.version import LLAMA_STACK_API_V1 -from llama_stack.schema_utils import json_schema_type, register_schema, webmethod +from llama_stack_api.schema_utils import json_schema_type, register_schema, webmethod +from llama_stack_api.version import LLAMA_STACK_API_V1 Metadata = dict[str, str] diff --git a/src/llama_stack/apis/datasetio/datasetio.py b/src/llama-stack-api/llama_stack_api/datasetio.py similarity index 89% rename from src/llama_stack/apis/datasetio/datasetio.py rename to src/llama-stack-api/llama_stack_api/datasetio.py index a0c4a1afc..309a8ff41 100644 --- a/src/llama_stack/apis/datasetio/datasetio.py +++ b/src/llama-stack-api/llama_stack_api/datasetio.py @@ -6,10 +6,10 @@ from typing import Any, Protocol, runtime_checkable -from llama_stack.apis.common.responses import PaginatedResponse -from llama_stack.apis.datasets import Dataset -from llama_stack.apis.version import LLAMA_STACK_API_V1BETA -from llama_stack.schema_utils import webmethod +from llama_stack_api.common.responses import PaginatedResponse +from llama_stack_api.datasets import Dataset +from llama_stack_api.schema_utils import webmethod +from llama_stack_api.version import LLAMA_STACK_API_V1BETA class DatasetStore(Protocol): diff --git a/src/llama_stack/apis/datasets/datasets.py b/src/llama-stack-api/llama_stack_api/datasets.py similarity index 97% rename from src/llama_stack/apis/datasets/datasets.py rename to src/llama-stack-api/llama_stack_api/datasets.py index 9bedc6209..76d787078 100644 --- a/src/llama_stack/apis/datasets/datasets.py +++ b/src/llama-stack-api/llama_stack_api/datasets.py @@ -9,9 +9,9 @@ from typing import Annotated, Any, Literal, Protocol from pydantic import BaseModel, Field -from llama_stack.apis.resource import Resource, ResourceType -from llama_stack.apis.version import LLAMA_STACK_API_V1BETA -from llama_stack.schema_utils import json_schema_type, register_schema, webmethod +from llama_stack_api.resource import Resource, ResourceType +from llama_stack_api.schema_utils import json_schema_type, register_schema, webmethod +from llama_stack_api.version import LLAMA_STACK_API_V1BETA class DatasetPurpose(StrEnum): diff --git a/src/llama_stack/providers/datatypes.py b/src/llama-stack-api/llama_stack_api/datatypes.py similarity index 51% rename from src/llama_stack/providers/datatypes.py rename to src/llama-stack-api/llama_stack_api/datatypes.py index 9be3edb8e..f024068f3 100644 --- a/src/llama_stack/providers/datatypes.py +++ b/src/llama-stack-api/llama_stack_api/datatypes.py @@ -4,21 +4,172 @@ # This source code is licensed under the terms described in the LICENSE file in # the root directory of this source tree. -from enum import StrEnum +from enum import Enum, EnumMeta, StrEnum from typing import Any, Protocol from urllib.parse import urlparse from pydantic import BaseModel, Field -from llama_stack.apis.benchmarks import Benchmark -from llama_stack.apis.datasets import Dataset -from llama_stack.apis.datatypes import Api -from llama_stack.apis.models import Model -from llama_stack.apis.scoring_functions import ScoringFn -from llama_stack.apis.shields import Shield -from llama_stack.apis.tools import ToolGroup -from llama_stack.apis.vector_stores import VectorStore -from llama_stack.schema_utils import json_schema_type +from llama_stack_api.benchmarks import Benchmark +from llama_stack_api.datasets import Dataset +from llama_stack_api.models import Model +from llama_stack_api.schema_utils import json_schema_type +from llama_stack_api.scoring_functions import ScoringFn +from llama_stack_api.shields import Shield +from llama_stack_api.tools import ToolGroup +from llama_stack_api.vector_stores import VectorStore + + +class DynamicApiMeta(EnumMeta): + def __new__(cls, name, bases, namespace): + # Store the original enum values + original_values = {k: v for k, v in namespace.items() if not k.startswith("_")} + + # Create the enum class + cls = super().__new__(cls, name, bases, namespace) + + # Store the original values for reference + cls._original_values = original_values + # Initialize _dynamic_values + cls._dynamic_values = {} + + return cls + + def __call__(cls, value): + try: + return super().__call__(value) + except ValueError as e: + # If this value was already dynamically added, return it + if value in cls._dynamic_values: + return cls._dynamic_values[value] + + # If the value doesn't exist, create a new enum member + # Create a new member name from the value + member_name = value.lower().replace("-", "_") + + # If this member name already exists in the enum, return the existing member + if member_name in cls._member_map_: + return cls._member_map_[member_name] + + # Instead of creating a new member, raise ValueError to force users to use Api.add() to + # register new APIs explicitly + raise ValueError(f"API '{value}' does not exist. Use Api.add() to register new APIs.") from e + + def __iter__(cls): + # Allow iteration over both static and dynamic members + yield from super().__iter__() + if hasattr(cls, "_dynamic_values"): + yield from cls._dynamic_values.values() + + def add(cls, value): + """ + Add a new API to the enum. + Used to register external APIs. + """ + member_name = value.lower().replace("-", "_") + + # If this member name already exists in the enum, return it + if member_name in cls._member_map_: + return cls._member_map_[member_name] + + # Create a new enum member + member = object.__new__(cls) + member._name_ = member_name + member._value_ = value + + # Add it to the enum class + cls._member_map_[member_name] = member + cls._member_names_.append(member_name) + cls._member_type_ = str + + # Store it in our dynamic values + cls._dynamic_values[value] = member + + return member + + +@json_schema_type +class Api(Enum, metaclass=DynamicApiMeta): + """Enumeration of all available APIs in the Llama Stack system. + :cvar providers: Provider management and configuration + :cvar inference: Text generation, chat completions, and embeddings + :cvar safety: Content moderation and safety shields + :cvar agents: Agent orchestration and execution + :cvar batches: Batch processing for asynchronous API requests + :cvar vector_io: Vector database operations and queries + :cvar datasetio: Dataset input/output operations + :cvar scoring: Model output evaluation and scoring + :cvar eval: Model evaluation and benchmarking framework + :cvar post_training: Fine-tuning and model training + :cvar tool_runtime: Tool execution and management + :cvar telemetry: Observability and system monitoring + :cvar models: Model metadata and management + :cvar shields: Safety shield implementations + :cvar datasets: Dataset creation and management + :cvar scoring_functions: Scoring function definitions + :cvar benchmarks: Benchmark suite management + :cvar tool_groups: Tool group organization + :cvar files: File storage and management + :cvar prompts: Prompt versions and management + :cvar inspect: Built-in system inspection and introspection + """ + + providers = "providers" + inference = "inference" + safety = "safety" + agents = "agents" + batches = "batches" + vector_io = "vector_io" + datasetio = "datasetio" + scoring = "scoring" + eval = "eval" + post_training = "post_training" + tool_runtime = "tool_runtime" + + models = "models" + shields = "shields" + vector_stores = "vector_stores" # only used for routing table + datasets = "datasets" + scoring_functions = "scoring_functions" + benchmarks = "benchmarks" + tool_groups = "tool_groups" + files = "files" + prompts = "prompts" + conversations = "conversations" + + # built-in API + inspect = "inspect" + + +@json_schema_type +class Error(BaseModel): + """ + Error response from the API. Roughly follows RFC 7807. + + :param status: HTTP status code + :param title: Error title, a short summary of the error which is invariant for an error type + :param detail: Error detail, a longer human-readable description of the error + :param instance: (Optional) A URL which can be used to retrieve more information about the specific occurrence of the error + """ + + status: int + title: str + detail: str + instance: str | None = None + + +class ExternalApiSpec(BaseModel): + """Specification for an external API implementation.""" + + module: str = Field(..., description="Python module containing the API implementation") + name: str = Field(..., description="Name of the API") + pip_packages: list[str] = Field(default=[], description="List of pip packages to install the API") + protocol: str = Field(..., description="Name of the protocol class for the API") + + +# Provider-related types (merged from providers/datatypes.py) +# NOTE: These imports are forward references to avoid circular dependencies +# They will be resolved at runtime when the classes are used class ModelsProtocolPrivate(Protocol): diff --git a/src/llama_stack/apis/eval/eval.py b/src/llama-stack-api/llama_stack_api/eval.py similarity index 92% rename from src/llama_stack/apis/eval/eval.py rename to src/llama-stack-api/llama_stack_api/eval.py index accb04ce1..7a11c221e 100644 --- a/src/llama_stack/apis/eval/eval.py +++ b/src/llama-stack-api/llama_stack_api/eval.py @@ -8,12 +8,12 @@ from typing import Any, Literal, Protocol from pydantic import BaseModel, Field -from llama_stack.apis.common.job_types import Job -from llama_stack.apis.inference import SamplingParams, SystemMessage -from llama_stack.apis.scoring import ScoringResult -from llama_stack.apis.scoring_functions import ScoringFnParams -from llama_stack.apis.version import LLAMA_STACK_API_V1ALPHA -from llama_stack.schema_utils import json_schema_type, webmethod +from llama_stack_api.common.job_types import Job +from llama_stack_api.inference import SamplingParams, SystemMessage +from llama_stack_api.schema_utils import json_schema_type, webmethod +from llama_stack_api.scoring import ScoringResult +from llama_stack_api.scoring_functions import ScoringFnParams +from llama_stack_api.version import LLAMA_STACK_API_V1ALPHA @json_schema_type diff --git a/src/llama_stack/apis/files/files.py b/src/llama-stack-api/llama_stack_api/files.py similarity index 96% rename from src/llama_stack/apis/files/files.py rename to src/llama-stack-api/llama_stack_api/files.py index f0ea2f892..8a75a1c39 100644 --- a/src/llama_stack/apis/files/files.py +++ b/src/llama-stack-api/llama_stack_api/files.py @@ -10,10 +10,10 @@ from typing import Annotated, ClassVar, Literal, Protocol, runtime_checkable from fastapi import File, Form, Response, UploadFile from pydantic import BaseModel, Field -from llama_stack.apis.common.responses import Order -from llama_stack.apis.common.tracing import telemetry_traceable -from llama_stack.apis.version import LLAMA_STACK_API_V1 -from llama_stack.schema_utils import json_schema_type, webmethod +from llama_stack_api.common.responses import Order +from llama_stack_api.common.tracing import telemetry_traceable +from llama_stack_api.schema_utils import json_schema_type, webmethod +from llama_stack_api.version import LLAMA_STACK_API_V1 # OpenAI Files API Models diff --git a/src/llama_stack/apis/inference/inference.py b/src/llama-stack-api/llama_stack_api/inference.py similarity index 99% rename from src/llama_stack/apis/inference/inference.py rename to src/llama-stack-api/llama_stack_api/inference.py index 9f04917c9..b42de95be 100644 --- a/src/llama_stack/apis/inference/inference.py +++ b/src/llama-stack-api/llama_stack_api/inference.py @@ -18,14 +18,14 @@ from fastapi import Body from pydantic import BaseModel, Field from typing_extensions import TypedDict -from llama_stack.apis.common.content_types import InterleavedContent -from llama_stack.apis.common.responses import ( +from llama_stack_api.common.content_types import InterleavedContent +from llama_stack_api.common.responses import ( Order, ) -from llama_stack.apis.common.tracing import telemetry_traceable -from llama_stack.apis.models import Model -from llama_stack.apis.version import LLAMA_STACK_API_V1, LLAMA_STACK_API_V1ALPHA -from llama_stack.schema_utils import json_schema_type, register_schema, webmethod +from llama_stack_api.common.tracing import telemetry_traceable +from llama_stack_api.models import Model +from llama_stack_api.schema_utils import json_schema_type, register_schema, webmethod +from llama_stack_api.version import LLAMA_STACK_API_V1, LLAMA_STACK_API_V1ALPHA @json_schema_type diff --git a/src/llama_stack/apis/inspect/inspect.py b/src/llama-stack-api/llama_stack_api/inspect.py similarity index 94% rename from src/llama_stack/apis/inspect/inspect.py rename to src/llama-stack-api/llama_stack_api/inspect.py index 235abb124..8326e9e6b 100644 --- a/src/llama_stack/apis/inspect/inspect.py +++ b/src/llama-stack-api/llama_stack_api/inspect.py @@ -8,11 +8,11 @@ from typing import Literal, Protocol, runtime_checkable from pydantic import BaseModel -from llama_stack.apis.version import ( +from llama_stack_api.datatypes import HealthStatus +from llama_stack_api.schema_utils import json_schema_type, webmethod +from llama_stack_api.version import ( LLAMA_STACK_API_V1, ) -from llama_stack.providers.datatypes import HealthStatus -from llama_stack.schema_utils import json_schema_type, webmethod # Valid values for the route filter parameter. # Actual API levels: v1, v1alpha, v1beta (filters by level, excludes deprecated) diff --git a/src/llama_stack/apis/models/models.py b/src/llama-stack-api/llama_stack_api/models.py similarity index 95% rename from src/llama_stack/apis/models/models.py rename to src/llama-stack-api/llama_stack_api/models.py index bbb359b51..833864ec2 100644 --- a/src/llama_stack/apis/models/models.py +++ b/src/llama-stack-api/llama_stack_api/models.py @@ -9,10 +9,10 @@ from typing import Any, Literal, Protocol, runtime_checkable from pydantic import BaseModel, ConfigDict, Field, field_validator -from llama_stack.apis.common.tracing import telemetry_traceable -from llama_stack.apis.resource import Resource, ResourceType -from llama_stack.apis.version import LLAMA_STACK_API_V1 -from llama_stack.schema_utils import json_schema_type, webmethod +from llama_stack_api.common.tracing import telemetry_traceable +from llama_stack_api.resource import Resource, ResourceType +from llama_stack_api.schema_utils import json_schema_type, webmethod +from llama_stack_api.version import LLAMA_STACK_API_V1 class CommonModelFields(BaseModel): diff --git a/src/llama_stack/apis/agents/openai_responses.py b/src/llama-stack-api/llama_stack_api/openai_responses.py similarity index 99% rename from src/llama_stack/apis/agents/openai_responses.py rename to src/llama-stack-api/llama_stack_api/openai_responses.py index 8f9655cae..2dd73e90a 100644 --- a/src/llama_stack/apis/agents/openai_responses.py +++ b/src/llama-stack-api/llama_stack_api/openai_responses.py @@ -10,8 +10,8 @@ from typing import Annotated, Any, Literal from pydantic import BaseModel, Field, model_validator from typing_extensions import TypedDict -from llama_stack.apis.vector_io import SearchRankingOptions as FileSearchRankingOptions -from llama_stack.schema_utils import json_schema_type, register_schema +from llama_stack_api.schema_utils import json_schema_type, register_schema +from llama_stack_api.vector_io import SearchRankingOptions as FileSearchRankingOptions # NOTE(ashwin): this file is literally a copy of the OpenAI responses API schema. We should probably # take their YAML and generate this file automatically. Their YAML is available. diff --git a/src/llama_stack/apis/post_training/post_training.py b/src/llama-stack-api/llama_stack_api/post_training.py similarity index 97% rename from src/llama_stack/apis/post_training/post_training.py rename to src/llama-stack-api/llama_stack_api/post_training.py index 2b7a6222f..0cc9277d9 100644 --- a/src/llama_stack/apis/post_training/post_training.py +++ b/src/llama-stack-api/llama_stack_api/post_training.py @@ -10,11 +10,11 @@ from typing import Annotated, Any, Literal, Protocol from pydantic import BaseModel, Field -from llama_stack.apis.common.content_types import URL -from llama_stack.apis.common.job_types import JobStatus -from llama_stack.apis.common.training_types import Checkpoint -from llama_stack.apis.version import LLAMA_STACK_API_V1ALPHA -from llama_stack.schema_utils import json_schema_type, register_schema, webmethod +from llama_stack_api.common.content_types import URL +from llama_stack_api.common.job_types import JobStatus +from llama_stack_api.common.training_types import Checkpoint +from llama_stack_api.schema_utils import json_schema_type, register_schema, webmethod +from llama_stack_api.version import LLAMA_STACK_API_V1ALPHA @json_schema_type diff --git a/src/llama_stack/apis/prompts/prompts.py b/src/llama-stack-api/llama_stack_api/prompts.py similarity index 97% rename from src/llama_stack/apis/prompts/prompts.py rename to src/llama-stack-api/llama_stack_api/prompts.py index 406ae529c..651d03e61 100644 --- a/src/llama_stack/apis/prompts/prompts.py +++ b/src/llama-stack-api/llama_stack_api/prompts.py @@ -10,9 +10,9 @@ from typing import Protocol, runtime_checkable from pydantic import BaseModel, Field, field_validator, model_validator -from llama_stack.apis.common.tracing import telemetry_traceable -from llama_stack.apis.version import LLAMA_STACK_API_V1 -from llama_stack.schema_utils import json_schema_type, webmethod +from llama_stack_api.common.tracing import telemetry_traceable +from llama_stack_api.schema_utils import json_schema_type, webmethod +from llama_stack_api.version import LLAMA_STACK_API_V1 @json_schema_type diff --git a/src/llama_stack/apis/providers/providers.py b/src/llama-stack-api/llama_stack_api/providers.py similarity index 91% rename from src/llama_stack/apis/providers/providers.py rename to src/llama-stack-api/llama_stack_api/providers.py index e1872571d..5b555b82f 100644 --- a/src/llama_stack/apis/providers/providers.py +++ b/src/llama-stack-api/llama_stack_api/providers.py @@ -8,9 +8,9 @@ from typing import Any, Protocol, runtime_checkable from pydantic import BaseModel -from llama_stack.apis.version import LLAMA_STACK_API_V1 -from llama_stack.providers.datatypes import HealthResponse -from llama_stack.schema_utils import json_schema_type, webmethod +from llama_stack_api.datatypes import HealthResponse +from llama_stack_api.schema_utils import json_schema_type, webmethod +from llama_stack_api.version import LLAMA_STACK_API_V1 @json_schema_type diff --git a/src/llama_stack/strong_typing/py.typed b/src/llama-stack-api/llama_stack_api/py.typed similarity index 100% rename from src/llama_stack/strong_typing/py.typed rename to src/llama-stack-api/llama_stack_api/py.typed diff --git a/src/llama_stack/apis/tools/rag_tool.py b/src/llama-stack-api/llama_stack_api/rag_tool.py similarity index 98% rename from src/llama_stack/apis/tools/rag_tool.py rename to src/llama-stack-api/llama_stack_api/rag_tool.py index 8bcc89bf0..b5edd51af 100644 --- a/src/llama_stack/apis/tools/rag_tool.py +++ b/src/llama-stack-api/llama_stack_api/rag_tool.py @@ -9,7 +9,7 @@ from typing import Annotated, Any, Literal from pydantic import BaseModel, Field, field_validator -from llama_stack.apis.common.content_types import URL, InterleavedContent +from llama_stack_api.common.content_types import URL, InterleavedContent class RRFRanker(BaseModel): diff --git a/src/llama_stack/apis/resource.py b/src/llama-stack-api/llama_stack_api/resource.py similarity index 100% rename from src/llama_stack/apis/resource.py rename to src/llama-stack-api/llama_stack_api/resource.py diff --git a/src/llama_stack/apis/safety/safety.py b/src/llama-stack-api/llama_stack_api/safety.py similarity index 93% rename from src/llama_stack/apis/safety/safety.py rename to src/llama-stack-api/llama_stack_api/safety.py index 8872cc518..ef84be2ea 100644 --- a/src/llama_stack/apis/safety/safety.py +++ b/src/llama-stack-api/llama_stack_api/safety.py @@ -9,11 +9,11 @@ from typing import Any, Protocol, runtime_checkable from pydantic import BaseModel, Field -from llama_stack.apis.common.tracing import telemetry_traceable -from llama_stack.apis.inference import OpenAIMessageParam -from llama_stack.apis.shields import Shield -from llama_stack.apis.version import LLAMA_STACK_API_V1 -from llama_stack.schema_utils import json_schema_type, webmethod +from llama_stack_api.common.tracing import telemetry_traceable +from llama_stack_api.inference import OpenAIMessageParam +from llama_stack_api.schema_utils import json_schema_type, webmethod +from llama_stack_api.shields import Shield +from llama_stack_api.version import LLAMA_STACK_API_V1 @json_schema_type diff --git a/src/llama_stack/schema_utils.py b/src/llama-stack-api/llama_stack_api/schema_utils.py similarity index 100% rename from src/llama_stack/schema_utils.py rename to src/llama-stack-api/llama_stack_api/schema_utils.py diff --git a/src/llama_stack/apis/scoring/scoring.py b/src/llama-stack-api/llama_stack_api/scoring.py similarity index 93% rename from src/llama_stack/apis/scoring/scoring.py rename to src/llama-stack-api/llama_stack_api/scoring.py index 03d943e94..47d144d21 100644 --- a/src/llama_stack/apis/scoring/scoring.py +++ b/src/llama-stack-api/llama_stack_api/scoring.py @@ -8,9 +8,9 @@ from typing import Any, Protocol, runtime_checkable from pydantic import BaseModel -from llama_stack.apis.scoring_functions import ScoringFn, ScoringFnParams -from llama_stack.apis.version import LLAMA_STACK_API_V1 -from llama_stack.schema_utils import json_schema_type, webmethod +from llama_stack_api.schema_utils import json_schema_type, webmethod +from llama_stack_api.scoring_functions import ScoringFn, ScoringFnParams +from llama_stack_api.version import LLAMA_STACK_API_V1 # mapping of metric to value ScoringResultRow = dict[str, Any] diff --git a/src/llama_stack/apis/scoring_functions/scoring_functions.py b/src/llama-stack-api/llama_stack_api/scoring_functions.py similarity index 96% rename from src/llama_stack/apis/scoring_functions/scoring_functions.py rename to src/llama-stack-api/llama_stack_api/scoring_functions.py index 78f4a7541..f75336e54 100644 --- a/src/llama_stack/apis/scoring_functions/scoring_functions.py +++ b/src/llama-stack-api/llama_stack_api/scoring_functions.py @@ -16,10 +16,10 @@ from typing import ( from pydantic import BaseModel, Field -from llama_stack.apis.common.type_system import ParamType -from llama_stack.apis.resource import Resource, ResourceType -from llama_stack.apis.version import LLAMA_STACK_API_V1 -from llama_stack.schema_utils import json_schema_type, register_schema, webmethod +from llama_stack_api.common.type_system import ParamType +from llama_stack_api.resource import Resource, ResourceType +from llama_stack_api.schema_utils import json_schema_type, register_schema, webmethod +from llama_stack_api.version import LLAMA_STACK_API_V1 # Perhaps more structure can be imposed on these functions. Maybe they could be associated diff --git a/src/llama_stack/apis/shields/shields.py b/src/llama-stack-api/llama_stack_api/shields.py similarity index 91% rename from src/llama_stack/apis/shields/shields.py rename to src/llama-stack-api/llama_stack_api/shields.py index 659ba8b75..2aeb83333 100644 --- a/src/llama_stack/apis/shields/shields.py +++ b/src/llama-stack-api/llama_stack_api/shields.py @@ -8,10 +8,10 @@ from typing import Any, Literal, Protocol, runtime_checkable from pydantic import BaseModel -from llama_stack.apis.common.tracing import telemetry_traceable -from llama_stack.apis.resource import Resource, ResourceType -from llama_stack.apis.version import LLAMA_STACK_API_V1 -from llama_stack.schema_utils import json_schema_type, webmethod +from llama_stack_api.common.tracing import telemetry_traceable +from llama_stack_api.resource import Resource, ResourceType +from llama_stack_api.schema_utils import json_schema_type, webmethod +from llama_stack_api.version import LLAMA_STACK_API_V1 class CommonShieldFields(BaseModel): diff --git a/src/llama_stack/strong_typing/__init__.py b/src/llama-stack-api/llama_stack_api/strong_typing/__init__.py similarity index 100% rename from src/llama_stack/strong_typing/__init__.py rename to src/llama-stack-api/llama_stack_api/strong_typing/__init__.py diff --git a/src/llama_stack/strong_typing/auxiliary.py b/src/llama-stack-api/llama_stack_api/strong_typing/auxiliary.py similarity index 100% rename from src/llama_stack/strong_typing/auxiliary.py rename to src/llama-stack-api/llama_stack_api/strong_typing/auxiliary.py diff --git a/src/llama_stack/strong_typing/classdef.py b/src/llama-stack-api/llama_stack_api/strong_typing/classdef.py similarity index 100% rename from src/llama_stack/strong_typing/classdef.py rename to src/llama-stack-api/llama_stack_api/strong_typing/classdef.py diff --git a/src/llama_stack/strong_typing/core.py b/src/llama-stack-api/llama_stack_api/strong_typing/core.py similarity index 100% rename from src/llama_stack/strong_typing/core.py rename to src/llama-stack-api/llama_stack_api/strong_typing/core.py diff --git a/src/llama_stack/strong_typing/deserializer.py b/src/llama-stack-api/llama_stack_api/strong_typing/deserializer.py similarity index 100% rename from src/llama_stack/strong_typing/deserializer.py rename to src/llama-stack-api/llama_stack_api/strong_typing/deserializer.py diff --git a/src/llama_stack/strong_typing/docstring.py b/src/llama-stack-api/llama_stack_api/strong_typing/docstring.py similarity index 100% rename from src/llama_stack/strong_typing/docstring.py rename to src/llama-stack-api/llama_stack_api/strong_typing/docstring.py diff --git a/src/llama_stack/strong_typing/exception.py b/src/llama-stack-api/llama_stack_api/strong_typing/exception.py similarity index 100% rename from src/llama_stack/strong_typing/exception.py rename to src/llama-stack-api/llama_stack_api/strong_typing/exception.py diff --git a/src/llama_stack/strong_typing/inspection.py b/src/llama-stack-api/llama_stack_api/strong_typing/inspection.py similarity index 100% rename from src/llama_stack/strong_typing/inspection.py rename to src/llama-stack-api/llama_stack_api/strong_typing/inspection.py diff --git a/src/llama_stack/strong_typing/mapping.py b/src/llama-stack-api/llama_stack_api/strong_typing/mapping.py similarity index 100% rename from src/llama_stack/strong_typing/mapping.py rename to src/llama-stack-api/llama_stack_api/strong_typing/mapping.py diff --git a/src/llama_stack/strong_typing/name.py b/src/llama-stack-api/llama_stack_api/strong_typing/name.py similarity index 100% rename from src/llama_stack/strong_typing/name.py rename to src/llama-stack-api/llama_stack_api/strong_typing/name.py diff --git a/src/llama-stack-api/llama_stack_api/strong_typing/py.typed b/src/llama-stack-api/llama_stack_api/strong_typing/py.typed new file mode 100644 index 000000000..e69de29bb diff --git a/src/llama_stack/strong_typing/schema.py b/src/llama-stack-api/llama_stack_api/strong_typing/schema.py similarity index 100% rename from src/llama_stack/strong_typing/schema.py rename to src/llama-stack-api/llama_stack_api/strong_typing/schema.py diff --git a/src/llama_stack/strong_typing/serialization.py b/src/llama-stack-api/llama_stack_api/strong_typing/serialization.py similarity index 100% rename from src/llama_stack/strong_typing/serialization.py rename to src/llama-stack-api/llama_stack_api/strong_typing/serialization.py diff --git a/src/llama_stack/strong_typing/serializer.py b/src/llama-stack-api/llama_stack_api/strong_typing/serializer.py similarity index 100% rename from src/llama_stack/strong_typing/serializer.py rename to src/llama-stack-api/llama_stack_api/strong_typing/serializer.py diff --git a/src/llama_stack/strong_typing/slots.py b/src/llama-stack-api/llama_stack_api/strong_typing/slots.py similarity index 100% rename from src/llama_stack/strong_typing/slots.py rename to src/llama-stack-api/llama_stack_api/strong_typing/slots.py diff --git a/src/llama_stack/strong_typing/topological.py b/src/llama-stack-api/llama_stack_api/strong_typing/topological.py similarity index 100% rename from src/llama_stack/strong_typing/topological.py rename to src/llama-stack-api/llama_stack_api/strong_typing/topological.py diff --git a/src/llama_stack/apis/tools/tools.py b/src/llama-stack-api/llama_stack_api/tools.py similarity index 95% rename from src/llama_stack/apis/tools/tools.py rename to src/llama-stack-api/llama_stack_api/tools.py index 06580dc74..81c989f88 100644 --- a/src/llama_stack/apis/tools/tools.py +++ b/src/llama-stack-api/llama_stack_api/tools.py @@ -10,11 +10,11 @@ from typing import Any, Literal, Protocol from pydantic import BaseModel from typing_extensions import runtime_checkable -from llama_stack.apis.common.content_types import URL, InterleavedContent -from llama_stack.apis.common.tracing import telemetry_traceable -from llama_stack.apis.resource import Resource, ResourceType -from llama_stack.apis.version import LLAMA_STACK_API_V1 -from llama_stack.schema_utils import json_schema_type, webmethod +from llama_stack_api.common.content_types import URL, InterleavedContent +from llama_stack_api.common.tracing import telemetry_traceable +from llama_stack_api.resource import Resource, ResourceType +from llama_stack_api.schema_utils import json_schema_type, webmethod +from llama_stack_api.version import LLAMA_STACK_API_V1 @json_schema_type diff --git a/src/llama_stack/apis/vector_io/vector_io.py b/src/llama-stack-api/llama_stack_api/vector_io.py similarity index 98% rename from src/llama_stack/apis/vector_io/vector_io.py rename to src/llama-stack-api/llama_stack_api/vector_io.py index 699241128..053e569f4 100644 --- a/src/llama_stack/apis/vector_io/vector_io.py +++ b/src/llama-stack-api/llama_stack_api/vector_io.py @@ -13,12 +13,12 @@ from typing import Annotated, Any, Literal, Protocol, runtime_checkable from fastapi import Body, Query from pydantic import BaseModel, Field -from llama_stack.apis.common.tracing import telemetry_traceable -from llama_stack.apis.inference import InterleavedContent -from llama_stack.apis.vector_stores import VectorStore -from llama_stack.apis.version import LLAMA_STACK_API_V1 -from llama_stack.schema_utils import json_schema_type, webmethod -from llama_stack.strong_typing.schema import register_schema +from llama_stack_api.common.tracing import telemetry_traceable +from llama_stack_api.inference import InterleavedContent +from llama_stack_api.schema_utils import json_schema_type, webmethod +from llama_stack_api.strong_typing.schema import register_schema +from llama_stack_api.vector_stores import VectorStore +from llama_stack_api.version import LLAMA_STACK_API_V1 @json_schema_type diff --git a/src/llama_stack/apis/vector_stores/vector_stores.py b/src/llama-stack-api/llama_stack_api/vector_stores.py similarity index 96% rename from src/llama_stack/apis/vector_stores/vector_stores.py rename to src/llama-stack-api/llama_stack_api/vector_stores.py index 524624028..0a1e6c53c 100644 --- a/src/llama_stack/apis/vector_stores/vector_stores.py +++ b/src/llama-stack-api/llama_stack_api/vector_stores.py @@ -8,7 +8,7 @@ from typing import Literal from pydantic import BaseModel -from llama_stack.apis.resource import Resource, ResourceType +from llama_stack_api.resource import Resource, ResourceType # Internal resource type for storing the vector store routing and other information diff --git a/src/llama_stack/apis/version.py b/src/llama-stack-api/llama_stack_api/version.py similarity index 100% rename from src/llama_stack/apis/version.py rename to src/llama-stack-api/llama_stack_api/version.py diff --git a/src/llama-stack-api/pyproject.toml b/src/llama-stack-api/pyproject.toml new file mode 100644 index 000000000..a00472d36 --- /dev/null +++ b/src/llama-stack-api/pyproject.toml @@ -0,0 +1,82 @@ +[build-system] +requires = ["setuptools>=61.0"] +build-backend = "setuptools.build_meta" + +[tool.uv] +required-version = ">=0.7.0" + +[project] +name = "llama-stack-api" +version = "0.1.0" +authors = [{ name = "Meta Llama", email = "llama-oss@meta.com" }] +description = "API and Provider specifications for Llama Stack - lightweight package with protocol definitions and provider specs" +readme = "README.md" +requires-python = ">=3.12" +license = { "text" = "MIT" } +classifiers = [ + "License :: OSI Approved :: MIT License", + "Programming Language :: Python :: 3", + "Operating System :: OS Independent", + "Intended Audience :: Developers", + "Intended Audience :: Information Technology", + "Intended Audience :: Science/Research", + "Topic :: Scientific/Engineering :: Artificial Intelligence", + "Topic :: Scientific/Engineering :: Information Analysis", +] +dependencies = [ + "pydantic>=2.11.9", + "jsonschema", + "opentelemetry-sdk>=1.30.0", + "opentelemetry-exporter-otlp-proto-http>=1.30.0", +] + +[project.urls] +Homepage = "https://github.com/llamastack/llama-stack" + +[tool.setuptools.packages.find] +where = ["."] +include = ["llama_stack_api", "llama_stack_api.*"] + +[tool.setuptools.package-data] +llama_stack_api = ["py.typed"] + +[tool.ruff] +line-length = 120 + +[tool.ruff.lint] +select = [ + "UP", # pyupgrade + "B", # flake8-bugbear + "B9", # flake8-bugbear subset + "C", # comprehensions + "E", # pycodestyle + "F", # Pyflakes + "N", # Naming + "W", # Warnings + "DTZ", # datetime rules + "I", # isort (imports order) + "RUF001", # Checks for ambiguous Unicode characters in strings + "RUF002", # Checks for ambiguous Unicode characters in docstrings + "RUF003", # Checks for ambiguous Unicode characters in comments + "PLC2401", # Checks for the use of non-ASCII characters in variable names +] +ignore = [ + # The following ignores are desired by the project maintainers. + "E402", # Module level import not at top of file + "E501", # Line too long + "F405", # Maybe undefined or defined from star import + "C408", # Ignored because we like the dict keyword argument syntax + "N812", # Ignored because import torch.nn.functional as F is PyTorch convention + + # These are the additional ones we started ignoring after moving to ruff. We should look into each one of them later. + "C901", # Complexity of the function is too high +] +unfixable = [ + "PLE2515", +] # Do not fix this automatically since ruff will replace the zero-width space with \u200b - let's do it manually + +[tool.ruff.lint.per-file-ignores] +"llama_stack_api/apis/**/__init__.py" = ["F403"] + +[tool.ruff.lint.pep8-naming] +classmethod-decorators = ["classmethod", "pydantic.field_validator"] diff --git a/src/llama_stack/apis/agents/__init__.py b/src/llama_stack/apis/agents/__init__.py deleted file mode 100644 index 6416b283b..000000000 --- a/src/llama_stack/apis/agents/__init__.py +++ /dev/null @@ -1,7 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the terms described in the LICENSE file in -# the root directory of this source tree. - -from .agents import * diff --git a/src/llama_stack/apis/batches/__init__.py b/src/llama_stack/apis/batches/__init__.py deleted file mode 100644 index 9ce7d3d75..000000000 --- a/src/llama_stack/apis/batches/__init__.py +++ /dev/null @@ -1,9 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the terms described in the LICENSE file in -# the root directory of this source tree. - -from .batches import Batches, BatchObject, ListBatchesResponse - -__all__ = ["Batches", "BatchObject", "ListBatchesResponse"] diff --git a/src/llama_stack/apis/benchmarks/__init__.py b/src/llama_stack/apis/benchmarks/__init__.py deleted file mode 100644 index 62d1b367c..000000000 --- a/src/llama_stack/apis/benchmarks/__init__.py +++ /dev/null @@ -1,7 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the terms described in the LICENSE file in -# the root directory of this source tree. - -from .benchmarks import * diff --git a/src/llama_stack/apis/common/__init__.py b/src/llama_stack/apis/common/__init__.py deleted file mode 100644 index 756f351d8..000000000 --- a/src/llama_stack/apis/common/__init__.py +++ /dev/null @@ -1,5 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the terms described in the LICENSE file in -# the root directory of this source tree. diff --git a/src/llama_stack/apis/conversations/__init__.py b/src/llama_stack/apis/conversations/__init__.py deleted file mode 100644 index b6ddc5999..000000000 --- a/src/llama_stack/apis/conversations/__init__.py +++ /dev/null @@ -1,27 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the terms described in the LICENSE file in -# the root directory of this source tree. - -from .conversations import ( - Conversation, - ConversationDeletedResource, - ConversationItem, - ConversationItemCreateRequest, - ConversationItemDeletedResource, - ConversationItemList, - Conversations, - Metadata, -) - -__all__ = [ - "Conversation", - "ConversationDeletedResource", - "ConversationItem", - "ConversationItemCreateRequest", - "ConversationItemDeletedResource", - "ConversationItemList", - "Conversations", - "Metadata", -] diff --git a/src/llama_stack/apis/datasetio/__init__.py b/src/llama_stack/apis/datasetio/__init__.py deleted file mode 100644 index 8c087bfa4..000000000 --- a/src/llama_stack/apis/datasetio/__init__.py +++ /dev/null @@ -1,7 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the terms described in the LICENSE file in -# the root directory of this source tree. - -from .datasetio import * diff --git a/src/llama_stack/apis/datasets/__init__.py b/src/llama_stack/apis/datasets/__init__.py deleted file mode 100644 index 9c9a128d2..000000000 --- a/src/llama_stack/apis/datasets/__init__.py +++ /dev/null @@ -1,7 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the terms described in the LICENSE file in -# the root directory of this source tree. - -from .datasets import * diff --git a/src/llama_stack/apis/datatypes.py b/src/llama_stack/apis/datatypes.py deleted file mode 100644 index ae01c5dfc..000000000 --- a/src/llama_stack/apis/datatypes.py +++ /dev/null @@ -1,158 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the terms described in the LICENSE file in -# the root directory of this source tree. - -from enum import Enum, EnumMeta - -from pydantic import BaseModel, Field - -from llama_stack.schema_utils import json_schema_type - - -class DynamicApiMeta(EnumMeta): - def __new__(cls, name, bases, namespace): - # Store the original enum values - original_values = {k: v for k, v in namespace.items() if not k.startswith("_")} - - # Create the enum class - cls = super().__new__(cls, name, bases, namespace) - - # Store the original values for reference - cls._original_values = original_values - # Initialize _dynamic_values - cls._dynamic_values = {} - - return cls - - def __call__(cls, value): - try: - return super().__call__(value) - except ValueError as e: - # If this value was already dynamically added, return it - if value in cls._dynamic_values: - return cls._dynamic_values[value] - - # If the value doesn't exist, create a new enum member - # Create a new member name from the value - member_name = value.lower().replace("-", "_") - - # If this member name already exists in the enum, return the existing member - if member_name in cls._member_map_: - return cls._member_map_[member_name] - - # Instead of creating a new member, raise ValueError to force users to use Api.add() to - # register new APIs explicitly - raise ValueError(f"API '{value}' does not exist. Use Api.add() to register new APIs.") from e - - def __iter__(cls): - # Allow iteration over both static and dynamic members - yield from super().__iter__() - if hasattr(cls, "_dynamic_values"): - yield from cls._dynamic_values.values() - - def add(cls, value): - """ - Add a new API to the enum. - Used to register external APIs. - """ - member_name = value.lower().replace("-", "_") - - # If this member name already exists in the enum, return it - if member_name in cls._member_map_: - return cls._member_map_[member_name] - - # Create a new enum member - member = object.__new__(cls) - member._name_ = member_name - member._value_ = value - - # Add it to the enum class - cls._member_map_[member_name] = member - cls._member_names_.append(member_name) - cls._member_type_ = str - - # Store it in our dynamic values - cls._dynamic_values[value] = member - - return member - - -@json_schema_type -class Api(Enum, metaclass=DynamicApiMeta): - """Enumeration of all available APIs in the Llama Stack system. - :cvar providers: Provider management and configuration - :cvar inference: Text generation, chat completions, and embeddings - :cvar safety: Content moderation and safety shields - :cvar agents: Agent orchestration and execution - :cvar batches: Batch processing for asynchronous API requests - :cvar vector_io: Vector database operations and queries - :cvar datasetio: Dataset input/output operations - :cvar scoring: Model output evaluation and scoring - :cvar eval: Model evaluation and benchmarking framework - :cvar post_training: Fine-tuning and model training - :cvar tool_runtime: Tool execution and management - :cvar telemetry: Observability and system monitoring - :cvar models: Model metadata and management - :cvar shields: Safety shield implementations - :cvar datasets: Dataset creation and management - :cvar scoring_functions: Scoring function definitions - :cvar benchmarks: Benchmark suite management - :cvar tool_groups: Tool group organization - :cvar files: File storage and management - :cvar prompts: Prompt versions and management - :cvar inspect: Built-in system inspection and introspection - """ - - providers = "providers" - inference = "inference" - safety = "safety" - agents = "agents" - batches = "batches" - vector_io = "vector_io" - datasetio = "datasetio" - scoring = "scoring" - eval = "eval" - post_training = "post_training" - tool_runtime = "tool_runtime" - - models = "models" - shields = "shields" - vector_stores = "vector_stores" # only used for routing table - datasets = "datasets" - scoring_functions = "scoring_functions" - benchmarks = "benchmarks" - tool_groups = "tool_groups" - files = "files" - prompts = "prompts" - conversations = "conversations" - - # built-in API - inspect = "inspect" - - -@json_schema_type -class Error(BaseModel): - """ - Error response from the API. Roughly follows RFC 7807. - - :param status: HTTP status code - :param title: Error title, a short summary of the error which is invariant for an error type - :param detail: Error detail, a longer human-readable description of the error - :param instance: (Optional) A URL which can be used to retrieve more information about the specific occurrence of the error - """ - - status: int - title: str - detail: str - instance: str | None = None - - -class ExternalApiSpec(BaseModel): - """Specification for an external API implementation.""" - - module: str = Field(..., description="Python module containing the API implementation") - name: str = Field(..., description="Name of the API") - pip_packages: list[str] = Field(default=[], description="List of pip packages to install the API") - protocol: str = Field(..., description="Name of the protocol class for the API") diff --git a/src/llama_stack/apis/eval/__init__.py b/src/llama_stack/apis/eval/__init__.py deleted file mode 100644 index 28a1d6049..000000000 --- a/src/llama_stack/apis/eval/__init__.py +++ /dev/null @@ -1,7 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the terms described in the LICENSE file in -# the root directory of this source tree. - -from .eval import * diff --git a/src/llama_stack/apis/files/__init__.py b/src/llama_stack/apis/files/__init__.py deleted file mode 100644 index 189e4de19..000000000 --- a/src/llama_stack/apis/files/__init__.py +++ /dev/null @@ -1,7 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the terms described in the LICENSE file in -# the root directory of this source tree. - -from .files import * diff --git a/src/llama_stack/apis/inference/__init__.py b/src/llama_stack/apis/inference/__init__.py deleted file mode 100644 index f0c8783c1..000000000 --- a/src/llama_stack/apis/inference/__init__.py +++ /dev/null @@ -1,7 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the terms described in the LICENSE file in -# the root directory of this source tree. - -from .inference import * diff --git a/src/llama_stack/apis/inspect/__init__.py b/src/llama_stack/apis/inspect/__init__.py deleted file mode 100644 index 016937e3d..000000000 --- a/src/llama_stack/apis/inspect/__init__.py +++ /dev/null @@ -1,7 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the terms described in the LICENSE file in -# the root directory of this source tree. - -from .inspect import * diff --git a/src/llama_stack/apis/models/__init__.py b/src/llama_stack/apis/models/__init__.py deleted file mode 100644 index ee90106b6..000000000 --- a/src/llama_stack/apis/models/__init__.py +++ /dev/null @@ -1,7 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the terms described in the LICENSE file in -# the root directory of this source tree. - -from .models import * diff --git a/src/llama_stack/apis/post_training/__init__.py b/src/llama_stack/apis/post_training/__init__.py deleted file mode 100644 index 695575a30..000000000 --- a/src/llama_stack/apis/post_training/__init__.py +++ /dev/null @@ -1,7 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the terms described in the LICENSE file in -# the root directory of this source tree. - -from .post_training import * diff --git a/src/llama_stack/apis/prompts/__init__.py b/src/llama_stack/apis/prompts/__init__.py deleted file mode 100644 index 6070f3450..000000000 --- a/src/llama_stack/apis/prompts/__init__.py +++ /dev/null @@ -1,9 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the terms described in the LICENSE file in -# the root directory of this source tree. - -from .prompts import ListPromptsResponse, Prompt, Prompts - -__all__ = ["Prompt", "Prompts", "ListPromptsResponse"] diff --git a/src/llama_stack/apis/providers/__init__.py b/src/llama_stack/apis/providers/__init__.py deleted file mode 100644 index e35e2fe47..000000000 --- a/src/llama_stack/apis/providers/__init__.py +++ /dev/null @@ -1,7 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the terms described in the LICENSE file in -# the root directory of this source tree. - -from .providers import * diff --git a/src/llama_stack/apis/safety/__init__.py b/src/llama_stack/apis/safety/__init__.py deleted file mode 100644 index d93bc1355..000000000 --- a/src/llama_stack/apis/safety/__init__.py +++ /dev/null @@ -1,7 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the terms described in the LICENSE file in -# the root directory of this source tree. - -from .safety import * diff --git a/src/llama_stack/apis/scoring/__init__.py b/src/llama_stack/apis/scoring/__init__.py deleted file mode 100644 index 624b9e704..000000000 --- a/src/llama_stack/apis/scoring/__init__.py +++ /dev/null @@ -1,7 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the terms described in the LICENSE file in -# the root directory of this source tree. - -from .scoring import * diff --git a/src/llama_stack/apis/scoring_functions/__init__.py b/src/llama_stack/apis/scoring_functions/__init__.py deleted file mode 100644 index fc1de0311..000000000 --- a/src/llama_stack/apis/scoring_functions/__init__.py +++ /dev/null @@ -1,7 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the terms described in the LICENSE file in -# the root directory of this source tree. - -from .scoring_functions import * diff --git a/src/llama_stack/apis/shields/__init__.py b/src/llama_stack/apis/shields/__init__.py deleted file mode 100644 index 783a4d124..000000000 --- a/src/llama_stack/apis/shields/__init__.py +++ /dev/null @@ -1,7 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the terms described in the LICENSE file in -# the root directory of this source tree. - -from .shields import * diff --git a/src/llama_stack/apis/tools/__init__.py b/src/llama_stack/apis/tools/__init__.py deleted file mode 100644 index b25310ecf..000000000 --- a/src/llama_stack/apis/tools/__init__.py +++ /dev/null @@ -1,8 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the terms described in the LICENSE file in -# the root directory of this source tree. - -from .rag_tool import * -from .tools import * diff --git a/src/llama_stack/apis/vector_io/__init__.py b/src/llama_stack/apis/vector_io/__init__.py deleted file mode 100644 index 3f4c60805..000000000 --- a/src/llama_stack/apis/vector_io/__init__.py +++ /dev/null @@ -1,7 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the terms described in the LICENSE file in -# the root directory of this source tree. - -from .vector_io import * diff --git a/src/llama_stack/apis/vector_stores/__init__.py b/src/llama_stack/apis/vector_stores/__init__.py deleted file mode 100644 index 8fc34058a..000000000 --- a/src/llama_stack/apis/vector_stores/__init__.py +++ /dev/null @@ -1,7 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the terms described in the LICENSE file in -# the root directory of this source tree. - -from .vector_stores import * diff --git a/src/llama_stack/cli/stack/_list_deps.py b/src/llama_stack/cli/stack/_list_deps.py index 18141be5f..50fe394fc 100644 --- a/src/llama_stack/cli/stack/_list_deps.py +++ b/src/llama_stack/cli/stack/_list_deps.py @@ -9,6 +9,7 @@ import sys from pathlib import Path import yaml +from llama_stack_api import Api from termcolor import cprint from llama_stack.cli.stack.utils import ImageType @@ -21,7 +22,6 @@ from llama_stack.core.datatypes import ( from llama_stack.core.distribution import get_provider_registry from llama_stack.core.stack import replace_env_vars from llama_stack.log import get_logger -from llama_stack.providers.datatypes import Api TEMPLATES_PATH = Path(__file__).parent.parent.parent / "templates" diff --git a/src/llama_stack/cli/stack/utils.py b/src/llama_stack/cli/stack/utils.py index cc1ca051b..0a4e22b09 100644 --- a/src/llama_stack/cli/stack/utils.py +++ b/src/llama_stack/cli/stack/utils.py @@ -11,6 +11,7 @@ from functools import lru_cache from pathlib import Path import yaml +from llama_stack_api import Api from termcolor import cprint from llama_stack.core.datatypes import ( @@ -32,7 +33,6 @@ from llama_stack.core.storage.datatypes import ( from llama_stack.core.utils.config_dirs import DISTRIBS_BASE_DIR, EXTERNAL_PROVIDERS_DIR from llama_stack.core.utils.dynamic import instantiate_class_type from llama_stack.core.utils.image_types import LlamaStackImageType -from llama_stack.providers.datatypes import Api TEMPLATES_PATH = Path(__file__).parent.parent.parent / "distributions" diff --git a/src/llama_stack/core/build.py b/src/llama_stack/core/build.py index fb3a22109..27ded7ede 100644 --- a/src/llama_stack/core/build.py +++ b/src/llama_stack/core/build.py @@ -6,6 +6,7 @@ import sys +from llama_stack_api import Api from pydantic import BaseModel from termcolor import cprint @@ -13,7 +14,6 @@ from llama_stack.core.datatypes import BuildConfig from llama_stack.core.distribution import get_provider_registry from llama_stack.distributions.template import DistributionTemplate from llama_stack.log import get_logger -from llama_stack.providers.datatypes import Api log = get_logger(name=__name__, category="core") diff --git a/src/llama_stack/core/client.py b/src/llama_stack/core/client.py index 49e01794e..41acacdb5 100644 --- a/src/llama_stack/core/client.py +++ b/src/llama_stack/core/client.py @@ -12,11 +12,10 @@ from enum import Enum from typing import Any, Union, get_args, get_origin import httpx +from llama_stack_api import RemoteProviderConfig from pydantic import BaseModel, parse_obj_as from termcolor import cprint -from llama_stack.providers.datatypes import RemoteProviderConfig - _CLIENT_CLASSES = {} diff --git a/src/llama_stack/core/configure.py b/src/llama_stack/core/configure.py index 5d4a54184..bdb3b9734 100644 --- a/src/llama_stack/core/configure.py +++ b/src/llama_stack/core/configure.py @@ -6,6 +6,8 @@ import textwrap from typing import Any +from llama_stack_api import Api, ProviderSpec + from llama_stack.core.datatypes import ( LLAMA_STACK_RUN_CONFIG_VERSION, DistributionSpec, @@ -20,7 +22,6 @@ from llama_stack.core.stack import cast_image_name_to_string, replace_env_vars from llama_stack.core.utils.dynamic import instantiate_class_type from llama_stack.core.utils.prompt_for_config import prompt_for_config from llama_stack.log import get_logger -from llama_stack.providers.datatypes import Api, ProviderSpec logger = get_logger(name=__name__, category="core") diff --git a/src/llama_stack/core/conversations/conversations.py b/src/llama_stack/core/conversations/conversations.py index f83834522..b94cd4fdd 100644 --- a/src/llama_stack/core/conversations/conversations.py +++ b/src/llama_stack/core/conversations/conversations.py @@ -8,9 +8,7 @@ import secrets import time from typing import Any, Literal -from pydantic import BaseModel, TypeAdapter - -from llama_stack.apis.conversations.conversations import ( +from llama_stack_api import ( Conversation, ConversationDeletedResource, ConversationItem, @@ -20,6 +18,8 @@ from llama_stack.apis.conversations.conversations import ( Conversations, Metadata, ) +from pydantic import BaseModel, TypeAdapter + from llama_stack.core.datatypes import AccessRule, StackRunConfig from llama_stack.log import get_logger from llama_stack.providers.utils.sqlstore.api import ColumnDefinition, ColumnType diff --git a/src/llama_stack/core/datatypes.py b/src/llama_stack/core/datatypes.py index 2182ea4e5..4231363b6 100644 --- a/src/llama_stack/core/datatypes.py +++ b/src/llama_stack/core/datatypes.py @@ -9,22 +9,34 @@ from pathlib import Path from typing import Annotated, Any, Literal, Self from urllib.parse import urlparse +from llama_stack_api import ( + Api, + Benchmark, + BenchmarkInput, + Dataset, + DatasetInput, + DatasetIO, + Eval, + Inference, + Model, + ModelInput, + ProviderSpec, + Resource, + Safety, + Scoring, + ScoringFn, + ScoringFnInput, + Shield, + ShieldInput, + ToolGroup, + ToolGroupInput, + ToolRuntime, + VectorIO, + VectorStore, + VectorStoreInput, +) from pydantic import BaseModel, Field, field_validator, model_validator -from llama_stack.apis.benchmarks import Benchmark, BenchmarkInput -from llama_stack.apis.datasetio import DatasetIO -from llama_stack.apis.datasets import Dataset, DatasetInput -from llama_stack.apis.eval import Eval -from llama_stack.apis.inference import Inference -from llama_stack.apis.models import Model, ModelInput -from llama_stack.apis.resource import Resource -from llama_stack.apis.safety import Safety -from llama_stack.apis.scoring import Scoring -from llama_stack.apis.scoring_functions import ScoringFn, ScoringFnInput -from llama_stack.apis.shields import Shield, ShieldInput -from llama_stack.apis.tools import ToolGroup, ToolGroupInput, ToolRuntime -from llama_stack.apis.vector_io import VectorIO -from llama_stack.apis.vector_stores import VectorStore, VectorStoreInput from llama_stack.core.access_control.datatypes import AccessRule from llama_stack.core.storage.datatypes import ( KVStoreReference, @@ -32,7 +44,6 @@ from llama_stack.core.storage.datatypes import ( StorageConfig, ) from llama_stack.log import LoggingConfig -from llama_stack.providers.datatypes import Api, ProviderSpec LLAMA_STACK_BUILD_CONFIG_VERSION = 2 LLAMA_STACK_RUN_CONFIG_VERSION = 2 diff --git a/src/llama_stack/core/distribution.py b/src/llama_stack/core/distribution.py index 9be5ffb49..162f9f2b0 100644 --- a/src/llama_stack/core/distribution.py +++ b/src/llama_stack/core/distribution.py @@ -10,17 +10,17 @@ import os from typing import Any import yaml -from pydantic import BaseModel - -from llama_stack.core.datatypes import BuildConfig, DistributionSpec -from llama_stack.core.external import load_external_apis -from llama_stack.log import get_logger -from llama_stack.providers.datatypes import ( +from llama_stack_api import ( Api, InlineProviderSpec, ProviderSpec, RemoteProviderSpec, ) +from pydantic import BaseModel + +from llama_stack.core.datatypes import BuildConfig, DistributionSpec +from llama_stack.core.external import load_external_apis +from llama_stack.log import get_logger logger = get_logger(name=__name__, category="core") diff --git a/src/llama_stack/core/external.py b/src/llama_stack/core/external.py index 12e9824ad..ce0c7eb72 100644 --- a/src/llama_stack/core/external.py +++ b/src/llama_stack/core/external.py @@ -6,8 +6,8 @@ import yaml +from llama_stack_api import Api, ExternalApiSpec -from llama_stack.apis.datatypes import Api, ExternalApiSpec from llama_stack.core.datatypes import BuildConfig, StackRunConfig from llama_stack.log import get_logger diff --git a/src/llama_stack/core/inspect.py b/src/llama_stack/core/inspect.py index 07b51128f..53ddd3475 100644 --- a/src/llama_stack/core/inspect.py +++ b/src/llama_stack/core/inspect.py @@ -6,19 +6,19 @@ from importlib.metadata import version -from pydantic import BaseModel - -from llama_stack.apis.inspect import ( +from llama_stack_api import ( HealthInfo, + HealthStatus, Inspect, ListRoutesResponse, RouteInfo, VersionInfo, ) +from pydantic import BaseModel + from llama_stack.core.datatypes import StackRunConfig from llama_stack.core.external import load_external_apis from llama_stack.core.server.routes import get_all_api_routes -from llama_stack.providers.datatypes import HealthStatus class DistributionInspectConfig(BaseModel): diff --git a/src/llama_stack/core/library_client.py b/src/llama_stack/core/library_client.py index db990368b..959284720 100644 --- a/src/llama_stack/core/library_client.py +++ b/src/llama_stack/core/library_client.py @@ -18,6 +18,7 @@ from typing import Any, TypeVar, Union, get_args, get_origin import httpx import yaml from fastapi import Response as FastAPIResponse +from llama_stack_api import is_unwrapped_body_param try: from llama_stack_client import ( @@ -57,7 +58,6 @@ from llama_stack.core.utils.config import redact_sensitive_fields from llama_stack.core.utils.context import preserve_contexts_async_generator from llama_stack.core.utils.exec import in_notebook from llama_stack.log import get_logger, setup_logging -from llama_stack.strong_typing.inspection import is_unwrapped_body_param logger = get_logger(name=__name__, category="core") diff --git a/src/llama_stack/core/prompts/prompts.py b/src/llama_stack/core/prompts/prompts.py index 1a6f38cb5..d9532b978 100644 --- a/src/llama_stack/core/prompts/prompts.py +++ b/src/llama_stack/core/prompts/prompts.py @@ -7,9 +7,9 @@ import json from typing import Any +from llama_stack_api import ListPromptsResponse, Prompt, Prompts from pydantic import BaseModel -from llama_stack.apis.prompts import ListPromptsResponse, Prompt, Prompts from llama_stack.core.datatypes import StackRunConfig from llama_stack.providers.utils.kvstore import KVStore, kvstore_impl diff --git a/src/llama_stack/core/providers.py b/src/llama_stack/core/providers.py index 7095ffd18..7337d9e35 100644 --- a/src/llama_stack/core/providers.py +++ b/src/llama_stack/core/providers.py @@ -7,11 +7,10 @@ import asyncio from typing import Any +from llama_stack_api import HealthResponse, HealthStatus, ListProvidersResponse, ProviderInfo, Providers from pydantic import BaseModel -from llama_stack.apis.providers import ListProvidersResponse, ProviderInfo, Providers from llama_stack.log import get_logger -from llama_stack.providers.datatypes import HealthResponse, HealthStatus from .datatypes import StackRunConfig from .utils.config import redact_sensitive_fields diff --git a/src/llama_stack/core/resolver.py b/src/llama_stack/core/resolver.py index 8bf371fed..ca154fbc6 100644 --- a/src/llama_stack/core/resolver.py +++ b/src/llama_stack/core/resolver.py @@ -8,29 +8,46 @@ import importlib.metadata import inspect from typing import Any -from llama_stack.apis.agents import Agents -from llama_stack.apis.batches import Batches -from llama_stack.apis.benchmarks import Benchmarks -from llama_stack.apis.conversations import Conversations -from llama_stack.apis.datasetio import DatasetIO -from llama_stack.apis.datasets import Datasets -from llama_stack.apis.datatypes import ExternalApiSpec -from llama_stack.apis.eval import Eval -from llama_stack.apis.files import Files -from llama_stack.apis.inference import Inference, InferenceProvider -from llama_stack.apis.inspect import Inspect -from llama_stack.apis.models import Models -from llama_stack.apis.post_training import PostTraining -from llama_stack.apis.prompts import Prompts -from llama_stack.apis.providers import Providers as ProvidersAPI -from llama_stack.apis.safety import Safety -from llama_stack.apis.scoring import Scoring -from llama_stack.apis.scoring_functions import ScoringFunctions -from llama_stack.apis.shields import Shields -from llama_stack.apis.tools import ToolGroups, ToolRuntime -from llama_stack.apis.vector_io import VectorIO -from llama_stack.apis.vector_stores import VectorStore -from llama_stack.apis.version import LLAMA_STACK_API_V1ALPHA +from llama_stack_api import ( + LLAMA_STACK_API_V1ALPHA, + Agents, + Api, + Batches, + Benchmarks, + BenchmarksProtocolPrivate, + Conversations, + DatasetIO, + Datasets, + DatasetsProtocolPrivate, + Eval, + ExternalApiSpec, + Files, + Inference, + InferenceProvider, + Inspect, + Models, + ModelsProtocolPrivate, + PostTraining, + Prompts, + ProviderSpec, + RemoteProviderConfig, + RemoteProviderSpec, + Safety, + Scoring, + ScoringFunctions, + ScoringFunctionsProtocolPrivate, + Shields, + ShieldsProtocolPrivate, + ToolGroups, + ToolGroupsProtocolPrivate, + ToolRuntime, + VectorIO, + VectorStore, +) +from llama_stack_api import ( + Providers as ProvidersAPI, +) + from llama_stack.core.client import get_client_impl from llama_stack.core.datatypes import ( AccessRule, @@ -44,18 +61,6 @@ from llama_stack.core.external import load_external_apis from llama_stack.core.store import DistributionRegistry from llama_stack.core.utils.dynamic import instantiate_class_type from llama_stack.log import get_logger -from llama_stack.providers.datatypes import ( - Api, - BenchmarksProtocolPrivate, - DatasetsProtocolPrivate, - ModelsProtocolPrivate, - ProviderSpec, - RemoteProviderConfig, - RemoteProviderSpec, - ScoringFunctionsProtocolPrivate, - ShieldsProtocolPrivate, - ToolGroupsProtocolPrivate, -) logger = get_logger(name=__name__, category="core") diff --git a/src/llama_stack/core/routers/__init__.py b/src/llama_stack/core/routers/__init__.py index 729d1c9ea..c2d051422 100644 --- a/src/llama_stack/core/routers/__init__.py +++ b/src/llama_stack/core/routers/__init__.py @@ -6,13 +6,14 @@ from typing import Any +from llama_stack_api import Api, RoutingTable + from llama_stack.core.datatypes import ( AccessRule, RoutedProtocol, ) from llama_stack.core.stack import StackRunConfig from llama_stack.core.store import DistributionRegistry -from llama_stack.providers.datatypes import Api, RoutingTable from llama_stack.providers.utils.inference.inference_store import InferenceStore diff --git a/src/llama_stack/core/routers/datasets.py b/src/llama_stack/core/routers/datasets.py index 2f1d5f78e..dcf247874 100644 --- a/src/llama_stack/core/routers/datasets.py +++ b/src/llama_stack/core/routers/datasets.py @@ -6,11 +6,9 @@ from typing import Any -from llama_stack.apis.common.responses import PaginatedResponse -from llama_stack.apis.datasetio import DatasetIO -from llama_stack.apis.datasets import DatasetPurpose, DataSource +from llama_stack_api import DatasetIO, DatasetPurpose, DataSource, PaginatedResponse, RoutingTable + from llama_stack.log import get_logger -from llama_stack.providers.datatypes import RoutingTable logger = get_logger(name=__name__, category="core::routers") diff --git a/src/llama_stack/core/routers/eval_scoring.py b/src/llama_stack/core/routers/eval_scoring.py index ffca81bf0..cbbbf5cc5 100644 --- a/src/llama_stack/core/routers/eval_scoring.py +++ b/src/llama_stack/core/routers/eval_scoring.py @@ -6,15 +6,19 @@ from typing import Any -from llama_stack.apis.eval import BenchmarkConfig, Eval, EvaluateResponse, Job -from llama_stack.apis.scoring import ( +from llama_stack_api import ( + BenchmarkConfig, + Eval, + EvaluateResponse, + Job, + RoutingTable, ScoreBatchResponse, ScoreResponse, Scoring, ScoringFnParams, ) + from llama_stack.log import get_logger -from llama_stack.providers.datatypes import RoutingTable logger = get_logger(name=__name__, category="core::routers") diff --git a/src/llama_stack/core/routers/inference.py b/src/llama_stack/core/routers/inference.py index d6270d428..a538ab02e 100644 --- a/src/llama_stack/core/routers/inference.py +++ b/src/llama_stack/core/routers/inference.py @@ -11,17 +11,19 @@ from datetime import UTC, datetime from typing import Annotated, Any from fastapi import Body -from openai.types.chat import ChatCompletionToolChoiceOptionParam as OpenAIChatCompletionToolChoiceOptionParam -from openai.types.chat import ChatCompletionToolParam as OpenAIChatCompletionToolParam -from pydantic import TypeAdapter - -from llama_stack.apis.common.errors import ModelNotFoundError, ModelTypeError -from llama_stack.apis.inference import ( +from llama_stack_api import ( + HealthResponse, + HealthStatus, Inference, ListOpenAIChatCompletionResponse, + ModelNotFoundError, + ModelType, + ModelTypeError, OpenAIAssistantMessageParam, OpenAIChatCompletion, OpenAIChatCompletionChunk, + OpenAIChatCompletionContentPartImageParam, + OpenAIChatCompletionContentPartTextParam, OpenAIChatCompletionRequestWithExtraBody, OpenAIChatCompletionToolCall, OpenAIChatCompletionToolCallFunction, @@ -35,18 +37,17 @@ from llama_stack.apis.inference import ( OpenAIMessageParam, Order, RerankResponse, + RoutingTable, ) -from llama_stack.apis.inference.inference import ( - OpenAIChatCompletionContentPartImageParam, - OpenAIChatCompletionContentPartTextParam, -) -from llama_stack.apis.models import ModelType +from openai.types.chat import ChatCompletionToolChoiceOptionParam as OpenAIChatCompletionToolChoiceOptionParam +from openai.types.chat import ChatCompletionToolParam as OpenAIChatCompletionToolParam +from pydantic import TypeAdapter + from llama_stack.core.telemetry.telemetry import MetricEvent from llama_stack.core.telemetry.tracing import enqueue_event, get_current_span from llama_stack.log import get_logger from llama_stack.models.llama.llama3.chat_format import ChatFormat from llama_stack.models.llama.llama3.tokenizer import Tokenizer -from llama_stack.providers.datatypes import HealthResponse, HealthStatus, RoutingTable from llama_stack.providers.utils.inference.inference_store import InferenceStore logger = get_logger(name=__name__, category="core::routers") diff --git a/src/llama_stack/core/routers/safety.py b/src/llama_stack/core/routers/safety.py index e5ff2ada9..f85bbb767 100644 --- a/src/llama_stack/core/routers/safety.py +++ b/src/llama_stack/core/routers/safety.py @@ -6,13 +6,10 @@ from typing import Any -from llama_stack.apis.inference import OpenAIMessageParam -from llama_stack.apis.safety import RunShieldResponse, Safety -from llama_stack.apis.safety.safety import ModerationObject -from llama_stack.apis.shields import Shield +from llama_stack_api import ModerationObject, OpenAIMessageParam, RoutingTable, RunShieldResponse, Safety, Shield + from llama_stack.core.datatypes import SafetyConfig from llama_stack.log import get_logger -from llama_stack.providers.datatypes import RoutingTable logger = get_logger(name=__name__, category="core::routers") diff --git a/src/llama_stack/core/routers/tool_runtime.py b/src/llama_stack/core/routers/tool_runtime.py index 3cfe584c5..c67ffd11a 100644 --- a/src/llama_stack/core/routers/tool_runtime.py +++ b/src/llama_stack/core/routers/tool_runtime.py @@ -6,13 +6,12 @@ from typing import Any -from llama_stack.apis.common.content_types import ( +from llama_stack_api import ( URL, -) -from llama_stack.apis.tools import ( ListToolDefsResponse, ToolRuntime, ) + from llama_stack.log import get_logger from ..routing_tables.toolgroups import ToolGroupsRoutingTable diff --git a/src/llama_stack/core/routers/vector_io.py b/src/llama_stack/core/routers/vector_io.py index ed5fb8253..bfd090e32 100644 --- a/src/llama_stack/core/routers/vector_io.py +++ b/src/llama_stack/core/routers/vector_io.py @@ -9,14 +9,16 @@ import uuid from typing import Annotated, Any from fastapi import Body - -from llama_stack.apis.common.content_types import InterleavedContent -from llama_stack.apis.models import ModelType -from llama_stack.apis.vector_io import ( +from llama_stack_api import ( Chunk, + HealthResponse, + HealthStatus, + InterleavedContent, + ModelType, OpenAICreateVectorStoreFileBatchRequestWithExtraBody, OpenAICreateVectorStoreRequestWithExtraBody, QueryChunksResponse, + RoutingTable, SearchRankingOptions, VectorIO, VectorStoreChunkingStrategy, @@ -33,9 +35,9 @@ from llama_stack.apis.vector_io import ( VectorStoreObject, VectorStoreSearchResponsePage, ) + from llama_stack.core.datatypes import VectorStoresConfig from llama_stack.log import get_logger -from llama_stack.providers.datatypes import HealthResponse, HealthStatus, RoutingTable logger = get_logger(name=__name__, category="core::routers") diff --git a/src/llama_stack/core/routing_tables/benchmarks.py b/src/llama_stack/core/routing_tables/benchmarks.py index 8c87d395d..66830bc41 100644 --- a/src/llama_stack/core/routing_tables/benchmarks.py +++ b/src/llama_stack/core/routing_tables/benchmarks.py @@ -6,7 +6,8 @@ from typing import Any -from llama_stack.apis.benchmarks import Benchmark, Benchmarks, ListBenchmarksResponse +from llama_stack_api import Benchmark, Benchmarks, ListBenchmarksResponse + from llama_stack.core.datatypes import ( BenchmarkWithOwner, ) diff --git a/src/llama_stack/core/routing_tables/common.py b/src/llama_stack/core/routing_tables/common.py index d6faf93c5..cfbafc9a8 100644 --- a/src/llama_stack/core/routing_tables/common.py +++ b/src/llama_stack/core/routing_tables/common.py @@ -6,9 +6,8 @@ from typing import Any -from llama_stack.apis.common.errors import ModelNotFoundError -from llama_stack.apis.models import Model -from llama_stack.apis.resource import ResourceType +from llama_stack_api import Api, Model, ModelNotFoundError, ResourceType, RoutingTable + from llama_stack.core.access_control.access_control import AccessDeniedError, is_action_allowed from llama_stack.core.access_control.datatypes import Action from llama_stack.core.datatypes import ( @@ -21,7 +20,6 @@ from llama_stack.core.datatypes import ( from llama_stack.core.request_headers import get_authenticated_user from llama_stack.core.store import DistributionRegistry from llama_stack.log import get_logger -from llama_stack.providers.datatypes import Api, RoutingTable logger = get_logger(name=__name__, category="core::routing_tables") diff --git a/src/llama_stack/core/routing_tables/datasets.py b/src/llama_stack/core/routing_tables/datasets.py index b129c9ec5..c49c9769b 100644 --- a/src/llama_stack/core/routing_tables/datasets.py +++ b/src/llama_stack/core/routing_tables/datasets.py @@ -7,18 +7,19 @@ import uuid from typing import Any -from llama_stack.apis.common.errors import DatasetNotFoundError -from llama_stack.apis.datasets import ( +from llama_stack_api import ( Dataset, + DatasetNotFoundError, DatasetPurpose, Datasets, DatasetType, DataSource, ListDatasetsResponse, + ResourceType, RowsDataSource, URIDataSource, ) -from llama_stack.apis.resource import ResourceType + from llama_stack.core.datatypes import ( DatasetWithOwner, ) diff --git a/src/llama_stack/core/routing_tables/models.py b/src/llama_stack/core/routing_tables/models.py index 1fb1186cd..e1210a139 100644 --- a/src/llama_stack/core/routing_tables/models.py +++ b/src/llama_stack/core/routing_tables/models.py @@ -7,8 +7,16 @@ import time from typing import Any -from llama_stack.apis.common.errors import ModelNotFoundError -from llama_stack.apis.models import ListModelsResponse, Model, Models, ModelType, OpenAIListModelsResponse, OpenAIModel +from llama_stack_api import ( + ListModelsResponse, + Model, + ModelNotFoundError, + Models, + ModelType, + OpenAIListModelsResponse, + OpenAIModel, +) + from llama_stack.core.datatypes import ( ModelWithOwner, RegistryEntrySource, diff --git a/src/llama_stack/core/routing_tables/scoring_functions.py b/src/llama_stack/core/routing_tables/scoring_functions.py index 520f07014..66165ac2f 100644 --- a/src/llama_stack/core/routing_tables/scoring_functions.py +++ b/src/llama_stack/core/routing_tables/scoring_functions.py @@ -4,14 +4,15 @@ # This source code is licensed under the terms described in the LICENSE file in # the root directory of this source tree. -from llama_stack.apis.common.type_system import ParamType -from llama_stack.apis.resource import ResourceType -from llama_stack.apis.scoring_functions import ( +from llama_stack_api import ( ListScoringFunctionsResponse, + ParamType, + ResourceType, ScoringFn, ScoringFnParams, ScoringFunctions, ) + from llama_stack.core.datatypes import ( ScoringFnWithOwner, ) diff --git a/src/llama_stack/core/routing_tables/shields.py b/src/llama_stack/core/routing_tables/shields.py index b1918d20a..0f981c49d 100644 --- a/src/llama_stack/core/routing_tables/shields.py +++ b/src/llama_stack/core/routing_tables/shields.py @@ -6,8 +6,8 @@ from typing import Any -from llama_stack.apis.resource import ResourceType -from llama_stack.apis.shields import ListShieldsResponse, Shield, Shields +from llama_stack_api import ListShieldsResponse, ResourceType, Shield, Shields + from llama_stack.core.datatypes import ( ShieldWithOwner, ) diff --git a/src/llama_stack/core/routing_tables/toolgroups.py b/src/llama_stack/core/routing_tables/toolgroups.py index 7f5ddd2b1..bbef384c2 100644 --- a/src/llama_stack/core/routing_tables/toolgroups.py +++ b/src/llama_stack/core/routing_tables/toolgroups.py @@ -6,9 +6,16 @@ from typing import Any -from llama_stack.apis.common.content_types import URL -from llama_stack.apis.common.errors import ToolGroupNotFoundError -from llama_stack.apis.tools import ListToolDefsResponse, ListToolGroupsResponse, ToolDef, ToolGroup, ToolGroups +from llama_stack_api import ( + URL, + ListToolDefsResponse, + ListToolGroupsResponse, + ToolDef, + ToolGroup, + ToolGroupNotFoundError, + ToolGroups, +) + from llama_stack.core.datatypes import AuthenticationRequiredError, ToolGroupWithOwner from llama_stack.log import get_logger diff --git a/src/llama_stack/core/routing_tables/vector_stores.py b/src/llama_stack/core/routing_tables/vector_stores.py index e77739abe..f95463b3c 100644 --- a/src/llama_stack/core/routing_tables/vector_stores.py +++ b/src/llama_stack/core/routing_tables/vector_stores.py @@ -6,12 +6,12 @@ from typing import Any -from llama_stack.apis.common.errors import ModelNotFoundError, ModelTypeError -from llama_stack.apis.models import ModelType -from llama_stack.apis.resource import ResourceType - # Removed VectorStores import to avoid exposing public API -from llama_stack.apis.vector_io.vector_io import ( +from llama_stack_api import ( + ModelNotFoundError, + ModelType, + ModelTypeError, + ResourceType, SearchRankingOptions, VectorStoreChunkingStrategy, VectorStoreDeleteResponse, @@ -22,6 +22,7 @@ from llama_stack.apis.vector_io.vector_io import ( VectorStoreObject, VectorStoreSearchResponsePage, ) + from llama_stack.core.datatypes import ( VectorStoreWithOwner, ) diff --git a/src/llama_stack/core/server/auth_providers.py b/src/llama_stack/core/server/auth_providers.py index da398bf99..a7f5d7916 100644 --- a/src/llama_stack/core/server/auth_providers.py +++ b/src/llama_stack/core/server/auth_providers.py @@ -11,9 +11,9 @@ from urllib.parse import parse_qs, urljoin, urlparse import httpx import jwt +from llama_stack_api import TokenValidationError from pydantic import BaseModel, Field -from llama_stack.apis.common.errors import TokenValidationError from llama_stack.core.datatypes import ( AuthenticationConfig, CustomAuthConfig, diff --git a/src/llama_stack/core/server/routes.py b/src/llama_stack/core/server/routes.py index 4f7ff2295..e7a84937d 100644 --- a/src/llama_stack/core/server/routes.py +++ b/src/llama_stack/core/server/routes.py @@ -10,11 +10,10 @@ from collections.abc import Callable from typing import Any from aiohttp import hdrs +from llama_stack_api import Api, ExternalApiSpec, WebMethod from starlette.routing import Route -from llama_stack.apis.datatypes import Api, ExternalApiSpec from llama_stack.core.resolver import api_protocol_map -from llama_stack.schema_utils import WebMethod EndpointFunc = Callable[..., Any] PathParams = dict[str, str] diff --git a/src/llama_stack/core/server/server.py b/src/llama_stack/core/server/server.py index 5bf876c02..8116348ec 100644 --- a/src/llama_stack/core/server/server.py +++ b/src/llama_stack/core/server/server.py @@ -28,11 +28,10 @@ from fastapi import Path as FastapiPath from fastapi.exceptions import RequestValidationError from fastapi.middleware.cors import CORSMiddleware from fastapi.responses import JSONResponse, StreamingResponse +from llama_stack_api import Api, ConflictError, PaginatedResponse, ResourceNotFoundError from openai import BadRequestError from pydantic import BaseModel, ValidationError -from llama_stack.apis.common.errors import ConflictError, ResourceNotFoundError -from llama_stack.apis.common.responses import PaginatedResponse from llama_stack.core.access_control.access_control import AccessDeniedError from llama_stack.core.datatypes import ( AuthenticationRequiredError, @@ -58,7 +57,6 @@ from llama_stack.core.utils.config import redact_sensitive_fields from llama_stack.core.utils.config_resolution import Mode, resolve_config_or_distro from llama_stack.core.utils.context import preserve_contexts_async_generator from llama_stack.log import LoggingConfig, get_logger, setup_logging -from llama_stack.providers.datatypes import Api from .auth import AuthenticationMiddleware from .quota import QuotaMiddleware diff --git a/src/llama_stack/core/stack.py b/src/llama_stack/core/stack.py index 2ed0eccd2..674c35f31 100644 --- a/src/llama_stack/core/stack.py +++ b/src/llama_stack/core/stack.py @@ -12,27 +12,31 @@ import tempfile from typing import Any import yaml +from llama_stack_api import ( + Agents, + Api, + Batches, + Benchmarks, + Conversations, + DatasetIO, + Datasets, + Eval, + Files, + Inference, + Inspect, + Models, + PostTraining, + Prompts, + Providers, + Safety, + Scoring, + ScoringFunctions, + Shields, + ToolGroups, + ToolRuntime, + VectorIO, +) -from llama_stack.apis.agents import Agents -from llama_stack.apis.batches import Batches -from llama_stack.apis.benchmarks import Benchmarks -from llama_stack.apis.conversations import Conversations -from llama_stack.apis.datasetio import DatasetIO -from llama_stack.apis.datasets import Datasets -from llama_stack.apis.eval import Eval -from llama_stack.apis.files import Files -from llama_stack.apis.inference import Inference -from llama_stack.apis.inspect import Inspect -from llama_stack.apis.models import Models -from llama_stack.apis.post_training import PostTraining -from llama_stack.apis.prompts import Prompts -from llama_stack.apis.providers import Providers -from llama_stack.apis.safety import Safety -from llama_stack.apis.scoring import Scoring -from llama_stack.apis.scoring_functions import ScoringFunctions -from llama_stack.apis.shields import Shields -from llama_stack.apis.tools import ToolGroups, ToolRuntime -from llama_stack.apis.vector_io import VectorIO from llama_stack.core.conversations.conversations import ConversationServiceConfig, ConversationServiceImpl from llama_stack.core.datatypes import Provider, SafetyConfig, StackRunConfig, VectorStoresConfig from llama_stack.core.distribution import get_provider_registry @@ -54,7 +58,6 @@ from llama_stack.core.storage.datatypes import ( from llama_stack.core.store.registry import create_dist_registry from llama_stack.core.utils.dynamic import instantiate_class_type from llama_stack.log import get_logger -from llama_stack.providers.datatypes import Api logger = get_logger(name=__name__, category="core") diff --git a/src/llama_stack/core/telemetry/telemetry.py b/src/llama_stack/core/telemetry/telemetry.py index 459c1aa1a..1a56277ea 100644 --- a/src/llama_stack/core/telemetry/telemetry.py +++ b/src/llama_stack/core/telemetry/telemetry.py @@ -16,6 +16,7 @@ from typing import ( cast, ) +from llama_stack_api import json_schema_type, register_schema from opentelemetry import metrics, trace from opentelemetry.exporter.otlp.proto.http.metric_exporter import OTLPMetricExporter from opentelemetry.exporter.otlp.proto.http.trace_exporter import OTLPSpanExporter @@ -28,7 +29,6 @@ from pydantic import BaseModel, Field from llama_stack.log import get_logger from llama_stack.models.llama.datatypes import Primitive -from llama_stack.schema_utils import json_schema_type, register_schema ROOT_SPAN_MARKERS = ["__root__", "__root_span__"] diff --git a/src/llama_stack/distributions/dell/dell.py b/src/llama_stack/distributions/dell/dell.py index 88e72688f..fd76e3ccb 100644 --- a/src/llama_stack/distributions/dell/dell.py +++ b/src/llama_stack/distributions/dell/dell.py @@ -4,7 +4,8 @@ # This source code is licensed under the terms described in the LICENSE file in # the root directory of this source tree. -from llama_stack.apis.models import ModelType +from llama_stack_api import ModelType + from llama_stack.core.datatypes import ( BuildProvider, ModelInput, diff --git a/src/llama_stack/distributions/meta-reference-gpu/meta_reference.py b/src/llama_stack/distributions/meta-reference-gpu/meta_reference.py index 4e4ddef33..67af0e92a 100644 --- a/src/llama_stack/distributions/meta-reference-gpu/meta_reference.py +++ b/src/llama_stack/distributions/meta-reference-gpu/meta_reference.py @@ -6,7 +6,8 @@ from pathlib import Path -from llama_stack.apis.models import ModelType +from llama_stack_api import ModelType + from llama_stack.core.datatypes import ( BuildProvider, ModelInput, diff --git a/src/llama_stack/distributions/open-benchmark/open_benchmark.py b/src/llama_stack/distributions/open-benchmark/open_benchmark.py index 2b7760894..59deca6d0 100644 --- a/src/llama_stack/distributions/open-benchmark/open_benchmark.py +++ b/src/llama_stack/distributions/open-benchmark/open_benchmark.py @@ -5,8 +5,8 @@ # the root directory of this source tree. -from llama_stack.apis.datasets import DatasetPurpose, URIDataSource -from llama_stack.apis.models import ModelType +from llama_stack_api import DatasetPurpose, ModelType, URIDataSource + from llama_stack.core.datatypes import ( BenchmarkInput, BuildProvider, diff --git a/src/llama_stack/distributions/starter/starter.py b/src/llama_stack/distributions/starter/starter.py index 7b7773289..1a8126290 100644 --- a/src/llama_stack/distributions/starter/starter.py +++ b/src/llama_stack/distributions/starter/starter.py @@ -7,6 +7,8 @@ from typing import Any +from llama_stack_api import RemoteProviderSpec + from llama_stack.core.datatypes import ( BuildProvider, Provider, @@ -19,7 +21,6 @@ from llama_stack.core.datatypes import ( ) from llama_stack.core.utils.dynamic import instantiate_class_type from llama_stack.distributions.template import DistributionTemplate, RunConfigSettings -from llama_stack.providers.datatypes import RemoteProviderSpec from llama_stack.providers.inline.files.localfs.config import LocalfsFilesImplConfig from llama_stack.providers.inline.inference.sentence_transformers import ( SentenceTransformersInferenceConfig, diff --git a/src/llama_stack/distributions/template.py b/src/llama_stack/distributions/template.py index e6813806a..faf5fb085 100644 --- a/src/llama_stack/distributions/template.py +++ b/src/llama_stack/distributions/template.py @@ -10,10 +10,9 @@ from typing import Any, Literal import jinja2 import rich import yaml +from llama_stack_api import DatasetPurpose, ModelType from pydantic import BaseModel, Field -from llama_stack.apis.datasets import DatasetPurpose -from llama_stack.apis.models import ModelType from llama_stack.core.datatypes import ( LLAMA_STACK_RUN_CONFIG_VERSION, Api, diff --git a/src/llama_stack/providers/inline/agents/meta_reference/agents.py b/src/llama_stack/providers/inline/agents/meta_reference/agents.py index 880e0b680..025fcc676 100644 --- a/src/llama_stack/providers/inline/agents/meta_reference/agents.py +++ b/src/llama_stack/providers/inline/agents/meta_reference/agents.py @@ -5,25 +5,26 @@ # the root directory of this source tree. -from llama_stack.apis.agents import ( +from llama_stack_api import ( Agents, + Conversations, + Inference, ListOpenAIResponseInputItem, ListOpenAIResponseObject, OpenAIDeleteResponseObject, OpenAIResponseInput, OpenAIResponseInputTool, OpenAIResponseObject, + OpenAIResponsePrompt, + OpenAIResponseText, Order, + ResponseGuardrail, + Safety, + ToolGroups, + ToolRuntime, + VectorIO, ) -from llama_stack.apis.agents.agents import ResponseGuardrail -from llama_stack.apis.agents.openai_responses import OpenAIResponsePrompt, OpenAIResponseText -from llama_stack.apis.conversations import Conversations -from llama_stack.apis.inference import ( - Inference, -) -from llama_stack.apis.safety import Safety -from llama_stack.apis.tools import ToolGroups, ToolRuntime -from llama_stack.apis.vector_io import VectorIO + from llama_stack.core.datatypes import AccessRule from llama_stack.log import get_logger from llama_stack.providers.utils.kvstore import InmemoryKVStoreImpl, kvstore_impl diff --git a/src/llama_stack/providers/inline/agents/meta_reference/responses/openai_responses.py b/src/llama_stack/providers/inline/agents/meta_reference/responses/openai_responses.py index dbf7bedff..0298d09f7 100644 --- a/src/llama_stack/providers/inline/agents/meta_reference/responses/openai_responses.py +++ b/src/llama_stack/providers/inline/agents/meta_reference/responses/openai_responses.py @@ -8,14 +8,15 @@ import time import uuid from collections.abc import AsyncIterator -from pydantic import BaseModel, TypeAdapter - -from llama_stack.apis.agents import Order -from llama_stack.apis.agents.agents import ResponseGuardrailSpec -from llama_stack.apis.agents.openai_responses import ( +from llama_stack_api import ( + ConversationItem, + Conversations, + Inference, + InvalidConversationIdError, ListOpenAIResponseInputItem, ListOpenAIResponseObject, OpenAIDeleteResponseObject, + OpenAIMessageParam, OpenAIResponseInput, OpenAIResponseInputMessageContentText, OpenAIResponseInputTool, @@ -25,20 +26,16 @@ from llama_stack.apis.agents.openai_responses import ( OpenAIResponsePrompt, OpenAIResponseText, OpenAIResponseTextFormat, -) -from llama_stack.apis.common.errors import ( - InvalidConversationIdError, -) -from llama_stack.apis.conversations import Conversations -from llama_stack.apis.conversations.conversations import ConversationItem -from llama_stack.apis.inference import ( - Inference, - OpenAIMessageParam, OpenAISystemMessageParam, + Order, + ResponseGuardrailSpec, + Safety, + ToolGroups, + ToolRuntime, + VectorIO, ) -from llama_stack.apis.safety import Safety -from llama_stack.apis.tools import ToolGroups, ToolRuntime -from llama_stack.apis.vector_io import VectorIO +from pydantic import BaseModel, TypeAdapter + from llama_stack.log import get_logger from llama_stack.providers.utils.responses.responses_store import ( ResponsesStore, diff --git a/src/llama_stack/providers/inline/agents/meta_reference/responses/streaming.py b/src/llama_stack/providers/inline/agents/meta_reference/responses/streaming.py index 3fd603064..910117bdb 100644 --- a/src/llama_stack/providers/inline/agents/meta_reference/responses/streaming.py +++ b/src/llama_stack/providers/inline/agents/meta_reference/responses/streaming.py @@ -8,10 +8,18 @@ import uuid from collections.abc import AsyncIterator from typing import Any -from llama_stack.apis.agents.openai_responses import ( +from llama_stack_api import ( AllowedToolsFilter, ApprovalFilter, + Inference, MCPListToolsTool, + OpenAIAssistantMessageParam, + OpenAIChatCompletion, + OpenAIChatCompletionChunk, + OpenAIChatCompletionRequestWithExtraBody, + OpenAIChatCompletionToolCall, + OpenAIChoice, + OpenAIMessageParam, OpenAIResponseContentPartOutputText, OpenAIResponseContentPartReasoningText, OpenAIResponseContentPartRefusal, @@ -56,16 +64,7 @@ from llama_stack.apis.agents.openai_responses import ( OpenAIResponseUsageOutputTokensDetails, WebSearchToolTypes, ) -from llama_stack.apis.inference import ( - Inference, - OpenAIAssistantMessageParam, - OpenAIChatCompletion, - OpenAIChatCompletionChunk, - OpenAIChatCompletionRequestWithExtraBody, - OpenAIChatCompletionToolCall, - OpenAIChoice, - OpenAIMessageParam, -) + from llama_stack.core.telemetry import tracing from llama_stack.log import get_logger from llama_stack.providers.utils.inference.prompt_adapter import interleaved_content_as_str @@ -1023,9 +1022,9 @@ class StreamingResponseOrchestrator: self, tools: list[OpenAIResponseInputTool], output_messages: list[OpenAIResponseOutput] ) -> AsyncIterator[OpenAIResponseObjectStream]: """Process all tools and emit appropriate streaming events.""" + from llama_stack_api import ToolDef from openai.types.chat import ChatCompletionToolParam - from llama_stack.apis.tools import ToolDef from llama_stack.models.llama.datatypes import ToolDefinition from llama_stack.providers.utils.inference.openai_compat import convert_tooldef_to_openai_tool diff --git a/src/llama_stack/providers/inline/agents/meta_reference/responses/tool_executor.py b/src/llama_stack/providers/inline/agents/meta_reference/responses/tool_executor.py index 47ca500a9..e5f2d3cf7 100644 --- a/src/llama_stack/providers/inline/agents/meta_reference/responses/tool_executor.py +++ b/src/llama_stack/providers/inline/agents/meta_reference/responses/tool_executor.py @@ -9,7 +9,12 @@ import json from collections.abc import AsyncIterator from typing import Any -from llama_stack.apis.agents.openai_responses import ( +from llama_stack_api import ( + ImageContentItem, + OpenAIChatCompletionContentPartImageParam, + OpenAIChatCompletionContentPartTextParam, + OpenAIChatCompletionToolCall, + OpenAIImageURL, OpenAIResponseInputToolFileSearch, OpenAIResponseInputToolMCP, OpenAIResponseObjectStreamResponseFileSearchCallCompleted, @@ -23,22 +28,15 @@ from llama_stack.apis.agents.openai_responses import ( OpenAIResponseObjectStreamResponseWebSearchCallSearching, OpenAIResponseOutputMessageFileSearchToolCall, OpenAIResponseOutputMessageFileSearchToolCallResults, - OpenAIResponseOutputMessageMCPCall, OpenAIResponseOutputMessageWebSearchToolCall, -) -from llama_stack.apis.common.content_types import ( - ImageContentItem, - TextContentItem, -) -from llama_stack.apis.inference import ( - OpenAIChatCompletionContentPartImageParam, - OpenAIChatCompletionContentPartTextParam, - OpenAIChatCompletionToolCall, - OpenAIImageURL, OpenAIToolMessageParam, + TextContentItem, + ToolGroups, + ToolInvocationResult, + ToolRuntime, + VectorIO, ) -from llama_stack.apis.tools import ToolGroups, ToolInvocationResult, ToolRuntime -from llama_stack.apis.vector_io import VectorIO + from llama_stack.core.telemetry import tracing from llama_stack.log import get_logger @@ -400,6 +398,10 @@ class ToolExecutor: # Build output message message: Any if mcp_tool_to_server and function.name in mcp_tool_to_server: + from llama_stack_api import ( + OpenAIResponseOutputMessageMCPCall, + ) + message = OpenAIResponseOutputMessageMCPCall( id=item_id, arguments=function.arguments, diff --git a/src/llama_stack/providers/inline/agents/meta_reference/responses/types.py b/src/llama_stack/providers/inline/agents/meta_reference/responses/types.py index 3b9a14b01..35ad03378 100644 --- a/src/llama_stack/providers/inline/agents/meta_reference/responses/types.py +++ b/src/llama_stack/providers/inline/agents/meta_reference/responses/types.py @@ -7,10 +7,10 @@ from dataclasses import dataclass from typing import cast -from openai.types.chat import ChatCompletionToolParam -from pydantic import BaseModel - -from llama_stack.apis.agents.openai_responses import ( +from llama_stack_api import ( + OpenAIChatCompletionToolCall, + OpenAIMessageParam, + OpenAIResponseFormatParam, OpenAIResponseInput, OpenAIResponseInputTool, OpenAIResponseInputToolFileSearch, @@ -26,7 +26,8 @@ from llama_stack.apis.agents.openai_responses import ( OpenAIResponseTool, OpenAIResponseToolMCP, ) -from llama_stack.apis.inference import OpenAIChatCompletionToolCall, OpenAIMessageParam, OpenAIResponseFormatParam +from openai.types.chat import ChatCompletionToolParam +from pydantic import BaseModel class ToolExecutionResult(BaseModel): diff --git a/src/llama_stack/providers/inline/agents/meta_reference/responses/utils.py b/src/llama_stack/providers/inline/agents/meta_reference/responses/utils.py index 26af1d595..943bbae41 100644 --- a/src/llama_stack/providers/inline/agents/meta_reference/responses/utils.py +++ b/src/llama_stack/providers/inline/agents/meta_reference/responses/utils.py @@ -9,9 +9,23 @@ import re import uuid from collections.abc import Sequence -from llama_stack.apis.agents.agents import ResponseGuardrailSpec -from llama_stack.apis.agents.openai_responses import ( +from llama_stack_api import ( + OpenAIAssistantMessageParam, + OpenAIChatCompletionContentPartImageParam, + OpenAIChatCompletionContentPartParam, + OpenAIChatCompletionContentPartTextParam, + OpenAIChatCompletionToolCall, + OpenAIChatCompletionToolCallFunction, + OpenAIChoice, + OpenAIDeveloperMessageParam, + OpenAIImageURL, + OpenAIJSONSchema, + OpenAIMessageParam, OpenAIResponseAnnotationFileCitation, + OpenAIResponseFormatJSONObject, + OpenAIResponseFormatJSONSchema, + OpenAIResponseFormatParam, + OpenAIResponseFormatText, OpenAIResponseInput, OpenAIResponseInputFunctionToolCallOutput, OpenAIResponseInputMessageContent, @@ -27,28 +41,12 @@ from llama_stack.apis.agents.openai_responses import ( OpenAIResponseOutputMessageMCPCall, OpenAIResponseOutputMessageMCPListTools, OpenAIResponseText, -) -from llama_stack.apis.inference import ( - OpenAIAssistantMessageParam, - OpenAIChatCompletionContentPartImageParam, - OpenAIChatCompletionContentPartParam, - OpenAIChatCompletionContentPartTextParam, - OpenAIChatCompletionToolCall, - OpenAIChatCompletionToolCallFunction, - OpenAIChoice, - OpenAIDeveloperMessageParam, - OpenAIImageURL, - OpenAIJSONSchema, - OpenAIMessageParam, - OpenAIResponseFormatJSONObject, - OpenAIResponseFormatJSONSchema, - OpenAIResponseFormatParam, - OpenAIResponseFormatText, OpenAISystemMessageParam, OpenAIToolMessageParam, OpenAIUserMessageParam, + ResponseGuardrailSpec, + Safety, ) -from llama_stack.apis.safety import Safety async def convert_chat_choice_to_response_message( diff --git a/src/llama_stack/providers/inline/agents/meta_reference/safety.py b/src/llama_stack/providers/inline/agents/meta_reference/safety.py index f0ae51423..dd90ac298 100644 --- a/src/llama_stack/providers/inline/agents/meta_reference/safety.py +++ b/src/llama_stack/providers/inline/agents/meta_reference/safety.py @@ -6,8 +6,8 @@ import asyncio -from llama_stack.apis.inference import OpenAIMessageParam -from llama_stack.apis.safety import Safety, SafetyViolation, ViolationLevel +from llama_stack_api import OpenAIMessageParam, Safety, SafetyViolation, ViolationLevel + from llama_stack.core.telemetry import tracing from llama_stack.log import get_logger diff --git a/src/llama_stack/providers/inline/batches/reference/__init__.py b/src/llama_stack/providers/inline/batches/reference/__init__.py index a8ae92eb2..27d0f4213 100644 --- a/src/llama_stack/providers/inline/batches/reference/__init__.py +++ b/src/llama_stack/providers/inline/batches/reference/__init__.py @@ -6,9 +6,8 @@ from typing import Any -from llama_stack.apis.files import Files -from llama_stack.apis.inference import Inference -from llama_stack.apis.models import Models +from llama_stack_api import Files, Inference, Models + from llama_stack.core.datatypes import AccessRule, Api from llama_stack.providers.utils.kvstore import kvstore_impl diff --git a/src/llama_stack/providers/inline/batches/reference/batches.py b/src/llama_stack/providers/inline/batches/reference/batches.py index 7c4358b84..f0f8da96c 100644 --- a/src/llama_stack/providers/inline/batches/reference/batches.py +++ b/src/llama_stack/providers/inline/batches/reference/batches.py @@ -13,25 +13,29 @@ import uuid from io import BytesIO from typing import Any, Literal -from openai.types.batch import BatchError, Errors -from pydantic import BaseModel - -from llama_stack.apis.batches import Batches, BatchObject, ListBatchesResponse -from llama_stack.apis.common.errors import ConflictError, ResourceNotFoundError -from llama_stack.apis.files import Files, OpenAIFilePurpose -from llama_stack.apis.inference import ( +from llama_stack_api import ( + Batches, + BatchObject, + ConflictError, + Files, Inference, + ListBatchesResponse, + Models, OpenAIAssistantMessageParam, OpenAIChatCompletionRequestWithExtraBody, OpenAICompletionRequestWithExtraBody, OpenAIDeveloperMessageParam, OpenAIEmbeddingsRequestWithExtraBody, + OpenAIFilePurpose, OpenAIMessageParam, OpenAISystemMessageParam, OpenAIToolMessageParam, OpenAIUserMessageParam, + ResourceNotFoundError, ) -from llama_stack.apis.models import Models +from openai.types.batch import BatchError, Errors +from pydantic import BaseModel + from llama_stack.log import get_logger from llama_stack.providers.utils.kvstore import KVStore diff --git a/src/llama_stack/providers/inline/datasetio/localfs/datasetio.py b/src/llama_stack/providers/inline/datasetio/localfs/datasetio.py index e8ebeb30d..1fcfbbef4 100644 --- a/src/llama_stack/providers/inline/datasetio/localfs/datasetio.py +++ b/src/llama_stack/providers/inline/datasetio/localfs/datasetio.py @@ -5,10 +5,8 @@ # the root directory of this source tree. from typing import Any -from llama_stack.apis.common.responses import PaginatedResponse -from llama_stack.apis.datasetio import DatasetIO -from llama_stack.apis.datasets import Dataset -from llama_stack.providers.datatypes import DatasetsProtocolPrivate +from llama_stack_api import Dataset, DatasetIO, DatasetsProtocolPrivate, PaginatedResponse + from llama_stack.providers.utils.datasetio.url_utils import get_dataframe_from_uri from llama_stack.providers.utils.kvstore import kvstore_impl from llama_stack.providers.utils.pagination import paginate_records diff --git a/src/llama_stack/providers/inline/eval/meta_reference/eval.py b/src/llama_stack/providers/inline/eval/meta_reference/eval.py index 5ddbd56c5..e6020e8a3 100644 --- a/src/llama_stack/providers/inline/eval/meta_reference/eval.py +++ b/src/llama_stack/providers/inline/eval/meta_reference/eval.py @@ -6,26 +6,29 @@ import json from typing import Any -from tqdm import tqdm - -from llama_stack.apis.agents import Agents -from llama_stack.apis.benchmarks import Benchmark -from llama_stack.apis.datasetio import DatasetIO -from llama_stack.apis.datasets import Datasets -from llama_stack.apis.inference import ( +from llama_stack_api import ( + Agents, + Benchmark, + BenchmarkConfig, + BenchmarksProtocolPrivate, + DatasetIO, + Datasets, + Eval, + EvaluateResponse, Inference, + Job, + JobStatus, OpenAIChatCompletionRequestWithExtraBody, OpenAICompletionRequestWithExtraBody, OpenAISystemMessageParam, OpenAIUserMessageParam, + Scoring, ) -from llama_stack.apis.scoring import Scoring -from llama_stack.providers.datatypes import BenchmarksProtocolPrivate +from tqdm import tqdm + from llama_stack.providers.utils.common.data_schema_validator import ColumnName from llama_stack.providers.utils.kvstore import kvstore_impl -from .....apis.common.job_types import Job, JobStatus -from .....apis.eval.eval import BenchmarkConfig, Eval, EvaluateResponse from .config import MetaReferenceEvalConfig EVAL_TASKS_PREFIX = "benchmarks:" diff --git a/src/llama_stack/providers/inline/files/localfs/files.py b/src/llama_stack/providers/inline/files/localfs/files.py index a76b982ce..5e8c887f1 100644 --- a/src/llama_stack/providers/inline/files/localfs/files.py +++ b/src/llama_stack/providers/inline/files/localfs/files.py @@ -10,17 +10,17 @@ from pathlib import Path from typing import Annotated from fastapi import Depends, File, Form, Response, UploadFile - -from llama_stack.apis.common.errors import ResourceNotFoundError -from llama_stack.apis.common.responses import Order -from llama_stack.apis.files import ( +from llama_stack_api import ( ExpiresAfter, Files, ListOpenAIFileResponse, OpenAIFileDeleteResponse, OpenAIFileObject, OpenAIFilePurpose, + Order, + ResourceNotFoundError, ) + from llama_stack.core.datatypes import AccessRule from llama_stack.core.id_generation import generate_object_id from llama_stack.log import get_logger diff --git a/src/llama_stack/providers/inline/inference/meta_reference/config.py b/src/llama_stack/providers/inline/inference/meta_reference/config.py index 961548f9c..802e79f15 100644 --- a/src/llama_stack/providers/inline/inference/meta_reference/config.py +++ b/src/llama_stack/providers/inline/inference/meta_reference/config.py @@ -6,9 +6,9 @@ from typing import Any +from llama_stack_api import QuantizationConfig from pydantic import BaseModel, field_validator -from llama_stack.apis.inference import QuantizationConfig from llama_stack.providers.utils.inference import supported_inference_models diff --git a/src/llama_stack/providers/inline/inference/meta_reference/generators.py b/src/llama_stack/providers/inline/inference/meta_reference/generators.py index 51a2ddfad..2155a1ae8 100644 --- a/src/llama_stack/providers/inline/inference/meta_reference/generators.py +++ b/src/llama_stack/providers/inline/inference/meta_reference/generators.py @@ -8,9 +8,7 @@ import math from typing import Optional import torch -from lmformatenforcer import JsonSchemaParser, TokenEnforcer, TokenEnforcerTokenizerData - -from llama_stack.apis.inference import ( +from llama_stack_api import ( GreedySamplingStrategy, JsonSchemaResponseFormat, OpenAIChatCompletionRequestWithExtraBody, @@ -20,6 +18,8 @@ from llama_stack.apis.inference import ( SamplingParams, TopPSamplingStrategy, ) +from lmformatenforcer import JsonSchemaParser, TokenEnforcer, TokenEnforcerTokenizerData + from llama_stack.models.llama.datatypes import QuantizationMode, ToolPromptFormat from llama_stack.models.llama.llama3.generation import Llama3 from llama_stack.models.llama.llama3.tokenizer import Tokenizer as Llama3Tokenizer diff --git a/src/llama_stack/providers/inline/inference/meta_reference/inference.py b/src/llama_stack/providers/inline/inference/meta_reference/inference.py index ef21132a0..753185fe7 100644 --- a/src/llama_stack/providers/inline/inference/meta_reference/inference.py +++ b/src/llama_stack/providers/inline/inference/meta_reference/inference.py @@ -9,22 +9,23 @@ import time import uuid from collections.abc import AsyncIterator -from llama_stack.apis.inference import ( +from llama_stack_api import ( InferenceProvider, + Model, + ModelsProtocolPrivate, + ModelType, OpenAIAssistantMessageParam, + OpenAIChatCompletion, + OpenAIChatCompletionChunk, OpenAIChatCompletionRequestWithExtraBody, OpenAIChatCompletionUsage, OpenAIChoice, + OpenAICompletion, OpenAICompletionRequestWithExtraBody, OpenAIUserMessageParam, ToolChoice, ) -from llama_stack.apis.inference.inference import ( - OpenAIChatCompletion, - OpenAIChatCompletionChunk, - OpenAICompletion, -) -from llama_stack.apis.models import Model, ModelType + from llama_stack.log import get_logger from llama_stack.models.llama.datatypes import RawMessage, RawTextItem, ToolDefinition from llama_stack.models.llama.llama3.chat_format import ChatFormat as Llama3ChatFormat @@ -40,7 +41,6 @@ from llama_stack.models.llama.llama4.prompt_templates.system_prompts import ( from llama_stack.models.llama.llama4.tokenizer import Tokenizer as Llama4Tokenizer from llama_stack.models.llama.sku_list import resolve_model from llama_stack.models.llama.sku_types import ModelFamily, is_multimodal -from llama_stack.providers.datatypes import ModelsProtocolPrivate from llama_stack.providers.utils.inference.embedding_mixin import ( SentenceTransformerEmbeddingMixin, ) @@ -376,7 +376,7 @@ class MetaReferenceInferenceImpl( # Convert tool calls to OpenAI format openai_tool_calls = None if decoded_message.tool_calls: - from llama_stack.apis.inference import ( + from llama_stack_api import ( OpenAIChatCompletionToolCall, OpenAIChatCompletionToolCallFunction, ) @@ -441,13 +441,14 @@ class MetaReferenceInferenceImpl( params: OpenAIChatCompletionRequestWithExtraBody, ) -> AsyncIterator[OpenAIChatCompletionChunk]: """Stream chat completion chunks as they're generated.""" - from llama_stack.apis.inference import ( + from llama_stack_api import ( OpenAIChatCompletionChunk, OpenAIChatCompletionToolCall, OpenAIChatCompletionToolCallFunction, OpenAIChoiceDelta, OpenAIChunkChoice, ) + from llama_stack.models.llama.datatypes import StopReason from llama_stack.providers.utils.inference.prompt_adapter import decode_assistant_message diff --git a/src/llama_stack/providers/inline/inference/sentence_transformers/sentence_transformers.py b/src/llama_stack/providers/inline/inference/sentence_transformers/sentence_transformers.py index e6dcf3ae7..14c9a41a4 100644 --- a/src/llama_stack/providers/inline/inference/sentence_transformers/sentence_transformers.py +++ b/src/llama_stack/providers/inline/inference/sentence_transformers/sentence_transformers.py @@ -6,19 +6,19 @@ from collections.abc import AsyncIterator -from llama_stack.apis.inference import ( +from llama_stack_api import ( InferenceProvider, - OpenAIChatCompletionRequestWithExtraBody, - OpenAICompletionRequestWithExtraBody, -) -from llama_stack.apis.inference.inference import ( + Model, + ModelsProtocolPrivate, + ModelType, OpenAIChatCompletion, OpenAIChatCompletionChunk, + OpenAIChatCompletionRequestWithExtraBody, OpenAICompletion, + OpenAICompletionRequestWithExtraBody, ) -from llama_stack.apis.models import ModelType + from llama_stack.log import get_logger -from llama_stack.providers.datatypes import Model, ModelsProtocolPrivate from llama_stack.providers.utils.inference.embedding_mixin import ( SentenceTransformerEmbeddingMixin, ) diff --git a/src/llama_stack/providers/inline/post_training/common/validator.py b/src/llama_stack/providers/inline/post_training/common/validator.py index 950b75f86..7a85d0e03 100644 --- a/src/llama_stack/providers/inline/post_training/common/validator.py +++ b/src/llama_stack/providers/inline/post_training/common/validator.py @@ -12,11 +12,8 @@ from typing import Any -from llama_stack.apis.common.type_system import ( - ChatCompletionInputType, - DialogType, - StringType, -) +from llama_stack_api import ChatCompletionInputType, DialogType, StringType + from llama_stack.providers.utils.common.data_schema_validator import ( ColumnName, ) diff --git a/src/llama_stack/providers/inline/post_training/huggingface/post_training.py b/src/llama_stack/providers/inline/post_training/huggingface/post_training.py index 22ace1ae0..f3f3d8d56 100644 --- a/src/llama_stack/providers/inline/post_training/huggingface/post_training.py +++ b/src/llama_stack/providers/inline/post_training/huggingface/post_training.py @@ -6,11 +6,11 @@ from enum import Enum from typing import Any -from llama_stack.apis.datasetio import DatasetIO -from llama_stack.apis.datasets import Datasets -from llama_stack.apis.post_training import ( +from llama_stack_api import ( AlgorithmConfig, Checkpoint, + DatasetIO, + Datasets, DPOAlignmentConfig, JobStatus, ListPostTrainingJobsResponse, @@ -19,6 +19,7 @@ from llama_stack.apis.post_training import ( PostTrainingJobStatusResponse, TrainingConfig, ) + from llama_stack.providers.inline.post_training.huggingface.config import ( HuggingFacePostTrainingConfig, ) diff --git a/src/llama_stack/providers/inline/post_training/huggingface/recipes/finetune_single_device.py b/src/llama_stack/providers/inline/post_training/huggingface/recipes/finetune_single_device.py index 39b83a3fd..58a30618c 100644 --- a/src/llama_stack/providers/inline/post_training/huggingface/recipes/finetune_single_device.py +++ b/src/llama_stack/providers/inline/post_training/huggingface/recipes/finetune_single_device.py @@ -12,20 +12,20 @@ from typing import Any import torch from datasets import Dataset +from llama_stack_api import ( + Checkpoint, + DataConfig, + DatasetIO, + Datasets, + LoraFinetuningConfig, + TrainingConfig, +) from peft import LoraConfig from transformers import ( AutoTokenizer, ) from trl import SFTConfig, SFTTrainer -from llama_stack.apis.datasetio import DatasetIO -from llama_stack.apis.datasets import Datasets -from llama_stack.apis.post_training import ( - Checkpoint, - DataConfig, - LoraFinetuningConfig, - TrainingConfig, -) from llama_stack.log import get_logger from llama_stack.providers.inline.post_training.common.utils import evacuate_model_from_device diff --git a/src/llama_stack/providers/inline/post_training/huggingface/recipes/finetune_single_device_dpo.py b/src/llama_stack/providers/inline/post_training/huggingface/recipes/finetune_single_device_dpo.py index 11d707df9..f7dc3ebf2 100644 --- a/src/llama_stack/providers/inline/post_training/huggingface/recipes/finetune_single_device_dpo.py +++ b/src/llama_stack/providers/inline/post_training/huggingface/recipes/finetune_single_device_dpo.py @@ -11,18 +11,18 @@ from typing import Any import torch from datasets import Dataset +from llama_stack_api import ( + Checkpoint, + DatasetIO, + Datasets, + DPOAlignmentConfig, + TrainingConfig, +) from transformers import ( AutoTokenizer, ) from trl import DPOConfig, DPOTrainer -from llama_stack.apis.datasetio import DatasetIO -from llama_stack.apis.datasets import Datasets -from llama_stack.apis.post_training import ( - Checkpoint, - DPOAlignmentConfig, - TrainingConfig, -) from llama_stack.log import get_logger from llama_stack.providers.inline.post_training.common.utils import evacuate_model_from_device diff --git a/src/llama_stack/providers/inline/post_training/huggingface/utils.py b/src/llama_stack/providers/inline/post_training/huggingface/utils.py index a930602d0..86c3c3f52 100644 --- a/src/llama_stack/providers/inline/post_training/huggingface/utils.py +++ b/src/llama_stack/providers/inline/post_training/huggingface/utils.py @@ -14,6 +14,7 @@ from typing import TYPE_CHECKING, Any, Protocol import psutil import torch from datasets import Dataset +from llama_stack_api import Checkpoint, DatasetIO, TrainingConfig from transformers import AutoConfig, AutoModelForCausalLM if TYPE_CHECKING: @@ -34,8 +35,6 @@ class HFAutoModel(Protocol): def save_pretrained(self, save_directory: str | Path) -> None: ... -from llama_stack.apis.datasetio import DatasetIO -from llama_stack.apis.post_training import Checkpoint, TrainingConfig from llama_stack.log import get_logger from .config import HuggingFacePostTrainingConfig diff --git a/src/llama_stack/providers/inline/post_training/torchtune/common/utils.py b/src/llama_stack/providers/inline/post_training/torchtune/common/utils.py index f0fa052a2..1483b8385 100644 --- a/src/llama_stack/providers/inline/post_training/torchtune/common/utils.py +++ b/src/llama_stack/providers/inline/post_training/torchtune/common/utils.py @@ -13,6 +13,7 @@ from collections.abc import Callable import torch +from llama_stack_api import DatasetFormat from pydantic import BaseModel from torchtune.data._messages import InputOutputToMessages, ShareGPTToMessages from torchtune.models.llama3 import llama3_tokenizer @@ -21,7 +22,6 @@ from torchtune.models.llama3_1 import lora_llama3_1_8b from torchtune.models.llama3_2 import lora_llama3_2_3b from torchtune.modules.transforms import Transform -from llama_stack.apis.post_training import DatasetFormat from llama_stack.models.llama.sku_list import resolve_model from llama_stack.models.llama.sku_types import Model diff --git a/src/llama_stack/providers/inline/post_training/torchtune/post_training.py b/src/llama_stack/providers/inline/post_training/torchtune/post_training.py index 765f6789d..3370d42fa 100644 --- a/src/llama_stack/providers/inline/post_training/torchtune/post_training.py +++ b/src/llama_stack/providers/inline/post_training/torchtune/post_training.py @@ -6,11 +6,11 @@ from enum import Enum from typing import Any -from llama_stack.apis.datasetio import DatasetIO -from llama_stack.apis.datasets import Datasets -from llama_stack.apis.post_training import ( +from llama_stack_api import ( AlgorithmConfig, Checkpoint, + DatasetIO, + Datasets, DPOAlignmentConfig, JobStatus, ListPostTrainingJobsResponse, @@ -20,6 +20,7 @@ from llama_stack.apis.post_training import ( PostTrainingJobStatusResponse, TrainingConfig, ) + from llama_stack.providers.inline.post_training.torchtune.config import ( TorchtunePostTrainingConfig, ) diff --git a/src/llama_stack/providers/inline/post_training/torchtune/recipes/lora_finetuning_single_device.py b/src/llama_stack/providers/inline/post_training/torchtune/recipes/lora_finetuning_single_device.py index c648cdc46..2bf1d0fe7 100644 --- a/src/llama_stack/providers/inline/post_training/torchtune/recipes/lora_finetuning_single_device.py +++ b/src/llama_stack/providers/inline/post_training/torchtune/recipes/lora_finetuning_single_device.py @@ -12,6 +12,17 @@ from pathlib import Path from typing import Any import torch +from llama_stack_api import ( + Checkpoint, + DataConfig, + DatasetIO, + Datasets, + LoraFinetuningConfig, + OptimizerConfig, + PostTrainingMetric, + QATFinetuningConfig, + TrainingConfig, +) from torch import nn from torch.optim import Optimizer from torch.utils.data import DataLoader, DistributedSampler @@ -32,17 +43,6 @@ from torchtune.training.lr_schedulers import get_cosine_schedule_with_warmup from torchtune.training.metric_logging import DiskLogger from tqdm import tqdm -from llama_stack.apis.common.training_types import PostTrainingMetric -from llama_stack.apis.datasetio import DatasetIO -from llama_stack.apis.datasets import Datasets -from llama_stack.apis.post_training import ( - Checkpoint, - DataConfig, - LoraFinetuningConfig, - OptimizerConfig, - QATFinetuningConfig, - TrainingConfig, -) from llama_stack.core.utils.config_dirs import DEFAULT_CHECKPOINT_DIR from llama_stack.core.utils.model_utils import model_local_dir from llama_stack.log import get_logger diff --git a/src/llama_stack/providers/inline/safety/code_scanner/code_scanner.py b/src/llama_stack/providers/inline/safety/code_scanner/code_scanner.py index 7da9ea0d7..80e907c10 100644 --- a/src/llama_stack/providers/inline/safety/code_scanner/code_scanner.py +++ b/src/llama_stack/providers/inline/safety/code_scanner/code_scanner.py @@ -10,15 +10,17 @@ from typing import TYPE_CHECKING, Any if TYPE_CHECKING: from codeshield.cs import CodeShieldScanResult -from llama_stack.apis.inference import OpenAIMessageParam -from llama_stack.apis.safety import ( +from llama_stack_api import ( + ModerationObject, + ModerationObjectResults, + OpenAIMessageParam, RunShieldResponse, Safety, SafetyViolation, + Shield, ViolationLevel, ) -from llama_stack.apis.safety.safety import ModerationObject, ModerationObjectResults -from llama_stack.apis.shields import Shield + from llama_stack.log import get_logger from llama_stack.providers.utils.inference.prompt_adapter import ( interleaved_content_as_str, diff --git a/src/llama_stack/providers/inline/safety/llama_guard/llama_guard.py b/src/llama_stack/providers/inline/safety/llama_guard/llama_guard.py index 6f6346e82..36e4280b9 100644 --- a/src/llama_stack/providers/inline/safety/llama_guard/llama_guard.py +++ b/src/llama_stack/providers/inline/safety/llama_guard/llama_guard.py @@ -9,26 +9,27 @@ import uuid from string import Template from typing import Any -from llama_stack.apis.common.content_types import ImageContentItem, TextContentItem -from llama_stack.apis.inference import ( +from llama_stack_api import ( + ImageContentItem, Inference, + ModerationObject, + ModerationObjectResults, OpenAIChatCompletionRequestWithExtraBody, OpenAIMessageParam, OpenAIUserMessageParam, -) -from llama_stack.apis.safety import ( RunShieldResponse, Safety, SafetyViolation, + Shield, + ShieldsProtocolPrivate, + TextContentItem, ViolationLevel, ) -from llama_stack.apis.safety.safety import ModerationObject, ModerationObjectResults -from llama_stack.apis.shields import Shield + from llama_stack.core.datatypes import Api from llama_stack.log import get_logger from llama_stack.models.llama.datatypes import Role from llama_stack.models.llama.sku_types import CoreModelId -from llama_stack.providers.datatypes import ShieldsProtocolPrivate from llama_stack.providers.utils.inference.prompt_adapter import ( interleaved_content_as_str, ) diff --git a/src/llama_stack/providers/inline/safety/prompt_guard/prompt_guard.py b/src/llama_stack/providers/inline/safety/prompt_guard/prompt_guard.py index 2015e1150..b4f495f19 100644 --- a/src/llama_stack/providers/inline/safety/prompt_guard/prompt_guard.py +++ b/src/llama_stack/providers/inline/safety/prompt_guard/prompt_guard.py @@ -7,21 +7,21 @@ from typing import Any import torch -from transformers import AutoModelForSequenceClassification, AutoTokenizer - -from llama_stack.apis.inference import OpenAIMessageParam -from llama_stack.apis.safety import ( +from llama_stack_api import ( + ModerationObject, + OpenAIMessageParam, RunShieldResponse, Safety, SafetyViolation, + Shield, + ShieldsProtocolPrivate, ShieldStore, ViolationLevel, ) -from llama_stack.apis.safety.safety import ModerationObject -from llama_stack.apis.shields import Shield +from transformers import AutoModelForSequenceClassification, AutoTokenizer + from llama_stack.core.utils.model_utils import model_local_dir from llama_stack.log import get_logger -from llama_stack.providers.datatypes import ShieldsProtocolPrivate from llama_stack.providers.utils.inference.prompt_adapter import interleaved_content_as_str from .config import PromptGuardConfig, PromptGuardType diff --git a/src/llama_stack/providers/inline/scoring/basic/scoring.py b/src/llama_stack/providers/inline/scoring/basic/scoring.py index b19b68039..326fd9211 100644 --- a/src/llama_stack/providers/inline/scoring/basic/scoring.py +++ b/src/llama_stack/providers/inline/scoring/basic/scoring.py @@ -5,17 +5,19 @@ # the root directory of this source tree. from typing import Any -from llama_stack.apis.datasetio import DatasetIO -from llama_stack.apis.datasets import Datasets -from llama_stack.apis.scoring import ( +from llama_stack_api import ( + DatasetIO, + Datasets, ScoreBatchResponse, ScoreResponse, Scoring, + ScoringFn, + ScoringFnParams, + ScoringFunctionsProtocolPrivate, ScoringResult, ) -from llama_stack.apis.scoring_functions import ScoringFn, ScoringFnParams + from llama_stack.core.datatypes import Api -from llama_stack.providers.datatypes import ScoringFunctionsProtocolPrivate from llama_stack.providers.utils.common.data_schema_validator import ( get_valid_schemas, validate_dataset_schema, diff --git a/src/llama_stack/providers/inline/scoring/basic/scoring_fn/docvqa_scoring_fn.py b/src/llama_stack/providers/inline/scoring/basic/scoring_fn/docvqa_scoring_fn.py index b87974d08..93c2627dd 100644 --- a/src/llama_stack/providers/inline/scoring/basic/scoring_fn/docvqa_scoring_fn.py +++ b/src/llama_stack/providers/inline/scoring/basic/scoring_fn/docvqa_scoring_fn.py @@ -8,8 +8,8 @@ import json import re from typing import Any -from llama_stack.apis.scoring import ScoringResultRow -from llama_stack.apis.scoring_functions import ScoringFnParams +from llama_stack_api import ScoringFnParams, ScoringResultRow + from llama_stack.providers.utils.scoring.base_scoring_fn import RegisteredBaseScoringFn from .fn_defs.docvqa import docvqa diff --git a/src/llama_stack/providers/inline/scoring/basic/scoring_fn/equality_scoring_fn.py b/src/llama_stack/providers/inline/scoring/basic/scoring_fn/equality_scoring_fn.py index 60804330f..382c64d88 100644 --- a/src/llama_stack/providers/inline/scoring/basic/scoring_fn/equality_scoring_fn.py +++ b/src/llama_stack/providers/inline/scoring/basic/scoring_fn/equality_scoring_fn.py @@ -6,8 +6,8 @@ from typing import Any -from llama_stack.apis.scoring import ScoringResultRow -from llama_stack.apis.scoring_functions import ScoringFnParams +from llama_stack_api import ScoringFnParams, ScoringResultRow + from llama_stack.providers.utils.scoring.base_scoring_fn import RegisteredBaseScoringFn from .fn_defs.equality import equality diff --git a/src/llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/docvqa.py b/src/llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/docvqa.py index aad3dfe26..a7305d13a 100644 --- a/src/llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/docvqa.py +++ b/src/llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/docvqa.py @@ -4,10 +4,10 @@ # This source code is licensed under the terms described in the LICENSE file in # the root directory of this source tree. -from llama_stack.apis.common.type_system import NumberType -from llama_stack.apis.scoring_functions import ( +from llama_stack_api import ( AggregationFunctionType, BasicScoringFnParams, + NumberType, ScoringFn, ) diff --git a/src/llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/equality.py b/src/llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/equality.py index 9b24ff791..f7d2f32ae 100644 --- a/src/llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/equality.py +++ b/src/llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/equality.py @@ -4,10 +4,10 @@ # This source code is licensed under the terms described in the LICENSE file in # the root directory of this source tree. -from llama_stack.apis.common.type_system import NumberType -from llama_stack.apis.scoring_functions import ( +from llama_stack_api import ( AggregationFunctionType, BasicScoringFnParams, + NumberType, ScoringFn, ) diff --git a/src/llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/ifeval.py b/src/llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/ifeval.py index adca0791d..a2ed1d695 100644 --- a/src/llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/ifeval.py +++ b/src/llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/ifeval.py @@ -4,10 +4,10 @@ # This source code is licensed under the terms described in the LICENSE file in # the root directory of this source tree. -from llama_stack.apis.common.type_system import NumberType -from llama_stack.apis.scoring_functions import ( +from llama_stack_api import ( AggregationFunctionType, BasicScoringFnParams, + NumberType, ScoringFn, ) diff --git a/src/llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/regex_parser_math_response.py b/src/llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/regex_parser_math_response.py index 8b1bf5352..4e2b49a1f 100644 --- a/src/llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/regex_parser_math_response.py +++ b/src/llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/regex_parser_math_response.py @@ -4,9 +4,9 @@ # This source code is licensed under the terms described in the LICENSE file in # the root directory of this source tree. -from llama_stack.apis.common.type_system import NumberType -from llama_stack.apis.scoring_functions import ( +from llama_stack_api import ( AggregationFunctionType, + NumberType, RegexParserScoringFnParams, ScoringFn, ) diff --git a/src/llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/regex_parser_multiple_choice_answer.py b/src/llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/regex_parser_multiple_choice_answer.py index ea04331c9..df0cf52d9 100644 --- a/src/llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/regex_parser_multiple_choice_answer.py +++ b/src/llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/regex_parser_multiple_choice_answer.py @@ -4,9 +4,9 @@ # This source code is licensed under the terms described in the LICENSE file in # the root directory of this source tree. -from llama_stack.apis.common.type_system import NumberType -from llama_stack.apis.scoring_functions import ( +from llama_stack_api import ( AggregationFunctionType, + NumberType, RegexParserScoringFnParams, ScoringFn, ) diff --git a/src/llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/subset_of.py b/src/llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/subset_of.py index 9cae66fa6..1f143c4a6 100644 --- a/src/llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/subset_of.py +++ b/src/llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/subset_of.py @@ -4,10 +4,10 @@ # This source code is licensed under the terms described in the LICENSE file in # the root directory of this source tree. -from llama_stack.apis.common.type_system import NumberType -from llama_stack.apis.scoring_functions import ( +from llama_stack_api import ( AggregationFunctionType, BasicScoringFnParams, + NumberType, ScoringFn, ) diff --git a/src/llama_stack/providers/inline/scoring/basic/scoring_fn/ifeval_scoring_fn.py b/src/llama_stack/providers/inline/scoring/basic/scoring_fn/ifeval_scoring_fn.py index 77f6176e6..4ec85bb09 100644 --- a/src/llama_stack/providers/inline/scoring/basic/scoring_fn/ifeval_scoring_fn.py +++ b/src/llama_stack/providers/inline/scoring/basic/scoring_fn/ifeval_scoring_fn.py @@ -6,8 +6,8 @@ from typing import Any -from llama_stack.apis.scoring import ScoringResultRow -from llama_stack.apis.scoring_functions import ScoringFnParams +from llama_stack_api import ScoringFnParams, ScoringResultRow + from llama_stack.providers.utils.scoring.base_scoring_fn import RegisteredBaseScoringFn from .fn_defs.ifeval import ( diff --git a/src/llama_stack/providers/inline/scoring/basic/scoring_fn/regex_parser_math_response_scoring_fn.py b/src/llama_stack/providers/inline/scoring/basic/scoring_fn/regex_parser_math_response_scoring_fn.py index d765959a8..4e9d49e96 100644 --- a/src/llama_stack/providers/inline/scoring/basic/scoring_fn/regex_parser_math_response_scoring_fn.py +++ b/src/llama_stack/providers/inline/scoring/basic/scoring_fn/regex_parser_math_response_scoring_fn.py @@ -5,8 +5,8 @@ # the root directory of this source tree. from typing import Any -from llama_stack.apis.scoring import ScoringResultRow -from llama_stack.apis.scoring_functions import ScoringFnParams, ScoringFnParamsType +from llama_stack_api import ScoringFnParams, ScoringFnParamsType, ScoringResultRow + from llama_stack.providers.utils.scoring.base_scoring_fn import RegisteredBaseScoringFn from ..utils.math_utils import first_answer, normalize_final_answer, try_evaluate_frac, try_evaluate_latex diff --git a/src/llama_stack/providers/inline/scoring/basic/scoring_fn/regex_parser_scoring_fn.py b/src/llama_stack/providers/inline/scoring/basic/scoring_fn/regex_parser_scoring_fn.py index cb336e303..7f213b38c 100644 --- a/src/llama_stack/providers/inline/scoring/basic/scoring_fn/regex_parser_scoring_fn.py +++ b/src/llama_stack/providers/inline/scoring/basic/scoring_fn/regex_parser_scoring_fn.py @@ -6,8 +6,8 @@ import re from typing import Any -from llama_stack.apis.scoring import ScoringResultRow -from llama_stack.apis.scoring_functions import ScoringFnParams, ScoringFnParamsType +from llama_stack_api import ScoringFnParams, ScoringFnParamsType, ScoringResultRow + from llama_stack.providers.utils.scoring.base_scoring_fn import RegisteredBaseScoringFn from .fn_defs.regex_parser_multiple_choice_answer import ( diff --git a/src/llama_stack/providers/inline/scoring/basic/scoring_fn/subset_of_scoring_fn.py b/src/llama_stack/providers/inline/scoring/basic/scoring_fn/subset_of_scoring_fn.py index d6e10e6c9..b291924d5 100644 --- a/src/llama_stack/providers/inline/scoring/basic/scoring_fn/subset_of_scoring_fn.py +++ b/src/llama_stack/providers/inline/scoring/basic/scoring_fn/subset_of_scoring_fn.py @@ -6,8 +6,8 @@ from typing import Any -from llama_stack.apis.scoring import ScoringResultRow -from llama_stack.apis.scoring_functions import ScoringFnParams +from llama_stack_api import ScoringFnParams, ScoringResultRow + from llama_stack.providers.utils.scoring.base_scoring_fn import RegisteredBaseScoringFn from .fn_defs.subset_of import subset_of diff --git a/src/llama_stack/providers/inline/scoring/braintrust/braintrust.py b/src/llama_stack/providers/inline/scoring/braintrust/braintrust.py index 14810f706..cbab93c74 100644 --- a/src/llama_stack/providers/inline/scoring/braintrust/braintrust.py +++ b/src/llama_stack/providers/inline/scoring/braintrust/braintrust.py @@ -17,21 +17,22 @@ from autoevals.ragas import ( ContextRelevancy, Faithfulness, ) -from pydantic import BaseModel - -from llama_stack.apis.datasetio import DatasetIO -from llama_stack.apis.datasets import Datasets -from llama_stack.apis.scoring import ( +from llama_stack_api import ( + DatasetIO, + Datasets, ScoreBatchResponse, ScoreResponse, Scoring, + ScoringFn, + ScoringFnParams, + ScoringFunctionsProtocolPrivate, ScoringResult, ScoringResultRow, ) -from llama_stack.apis.scoring_functions import ScoringFn, ScoringFnParams +from pydantic import BaseModel + from llama_stack.core.datatypes import Api from llama_stack.core.request_headers import NeedsRequestProviderData -from llama_stack.providers.datatypes import ScoringFunctionsProtocolPrivate from llama_stack.providers.utils.common.data_schema_validator import ( get_valid_schemas, validate_dataset_schema, diff --git a/src/llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/answer_correctness.py b/src/llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/answer_correctness.py index 4fe07f822..b058305b4 100644 --- a/src/llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/answer_correctness.py +++ b/src/llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/answer_correctness.py @@ -4,10 +4,10 @@ # This source code is licensed under the terms described in the LICENSE file in # the root directory of this source tree. -from llama_stack.apis.common.type_system import NumberType -from llama_stack.apis.scoring_functions import ( +from llama_stack_api import ( AggregationFunctionType, BasicScoringFnParams, + NumberType, ScoringFn, ) diff --git a/src/llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/answer_relevancy.py b/src/llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/answer_relevancy.py index a1995cc4e..d619d38a8 100644 --- a/src/llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/answer_relevancy.py +++ b/src/llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/answer_relevancy.py @@ -4,10 +4,10 @@ # This source code is licensed under the terms described in the LICENSE file in # the root directory of this source tree. -from llama_stack.apis.common.type_system import NumberType -from llama_stack.apis.scoring_functions import ( +from llama_stack_api import ( AggregationFunctionType, BasicScoringFnParams, + NumberType, ScoringFn, ) diff --git a/src/llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/answer_similarity.py b/src/llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/answer_similarity.py index e8fe15259..34354a1fc 100644 --- a/src/llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/answer_similarity.py +++ b/src/llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/answer_similarity.py @@ -4,10 +4,10 @@ # This source code is licensed under the terms described in the LICENSE file in # the root directory of this source tree. -from llama_stack.apis.common.type_system import NumberType -from llama_stack.apis.scoring_functions import ( +from llama_stack_api import ( AggregationFunctionType, BasicScoringFnParams, + NumberType, ScoringFn, ) diff --git a/src/llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/context_entity_recall.py b/src/llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/context_entity_recall.py index d9b129a8b..4092ccc4a 100644 --- a/src/llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/context_entity_recall.py +++ b/src/llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/context_entity_recall.py @@ -4,10 +4,10 @@ # This source code is licensed under the terms described in the LICENSE file in # the root directory of this source tree. -from llama_stack.apis.common.type_system import NumberType -from llama_stack.apis.scoring_functions import ( +from llama_stack_api import ( AggregationFunctionType, BasicScoringFnParams, + NumberType, ScoringFn, ) diff --git a/src/llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/context_precision.py b/src/llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/context_precision.py index c1d7e855b..2b32b9eec 100644 --- a/src/llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/context_precision.py +++ b/src/llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/context_precision.py @@ -4,10 +4,10 @@ # This source code is licensed under the terms described in the LICENSE file in # the root directory of this source tree. -from llama_stack.apis.common.type_system import NumberType -from llama_stack.apis.scoring_functions import ( +from llama_stack_api import ( AggregationFunctionType, BasicScoringFnParams, + NumberType, ScoringFn, ) diff --git a/src/llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/context_recall.py b/src/llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/context_recall.py index 01ddd0dd0..4d6547002 100644 --- a/src/llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/context_recall.py +++ b/src/llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/context_recall.py @@ -4,10 +4,10 @@ # This source code is licensed under the terms described in the LICENSE file in # the root directory of this source tree. -from llama_stack.apis.common.type_system import NumberType -from llama_stack.apis.scoring_functions import ( +from llama_stack_api import ( AggregationFunctionType, BasicScoringFnParams, + NumberType, ScoringFn, ) diff --git a/src/llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/context_relevancy.py b/src/llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/context_relevancy.py index 55d89344a..739dfd7bd 100644 --- a/src/llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/context_relevancy.py +++ b/src/llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/context_relevancy.py @@ -4,10 +4,10 @@ # This source code is licensed under the terms described in the LICENSE file in # the root directory of this source tree. -from llama_stack.apis.common.type_system import NumberType -from llama_stack.apis.scoring_functions import ( +from llama_stack_api import ( AggregationFunctionType, BasicScoringFnParams, + NumberType, ScoringFn, ) diff --git a/src/llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/factuality.py b/src/llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/factuality.py index c621ecf7f..59ed5949b 100644 --- a/src/llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/factuality.py +++ b/src/llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/factuality.py @@ -4,10 +4,10 @@ # This source code is licensed under the terms described in the LICENSE file in # the root directory of this source tree. -from llama_stack.apis.common.type_system import NumberType -from llama_stack.apis.scoring_functions import ( +from llama_stack_api import ( AggregationFunctionType, BasicScoringFnParams, + NumberType, ScoringFn, ) diff --git a/src/llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/faithfulness.py b/src/llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/faithfulness.py index 2e85c0c7c..96c36d226 100644 --- a/src/llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/faithfulness.py +++ b/src/llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/faithfulness.py @@ -4,10 +4,10 @@ # This source code is licensed under the terms described in the LICENSE file in # the root directory of this source tree. -from llama_stack.apis.common.type_system import NumberType -from llama_stack.apis.scoring_functions import ( +from llama_stack_api import ( AggregationFunctionType, BasicScoringFnParams, + NumberType, ScoringFn, ) diff --git a/src/llama_stack/providers/inline/scoring/llm_as_judge/scoring.py b/src/llama_stack/providers/inline/scoring/llm_as_judge/scoring.py index 9b7628524..aa636d2b3 100644 --- a/src/llama_stack/providers/inline/scoring/llm_as_judge/scoring.py +++ b/src/llama_stack/providers/inline/scoring/llm_as_judge/scoring.py @@ -5,18 +5,20 @@ # the root directory of this source tree. from typing import Any -from llama_stack.apis.datasetio import DatasetIO -from llama_stack.apis.datasets import Datasets -from llama_stack.apis.inference import Inference -from llama_stack.apis.scoring import ( +from llama_stack_api import ( + DatasetIO, + Datasets, + Inference, ScoreBatchResponse, ScoreResponse, Scoring, + ScoringFn, + ScoringFnParams, + ScoringFunctionsProtocolPrivate, ScoringResult, ) -from llama_stack.apis.scoring_functions import ScoringFn, ScoringFnParams + from llama_stack.core.datatypes import Api -from llama_stack.providers.datatypes import ScoringFunctionsProtocolPrivate from llama_stack.providers.utils.common.data_schema_validator import ( get_valid_schemas, validate_dataset_schema, diff --git a/src/llama_stack/providers/inline/scoring/llm_as_judge/scoring_fn/fn_defs/llm_as_judge_405b_simpleqa.py b/src/llama_stack/providers/inline/scoring/llm_as_judge/scoring_fn/fn_defs/llm_as_judge_405b_simpleqa.py index 074f1ff46..ed26169a5 100644 --- a/src/llama_stack/providers/inline/scoring/llm_as_judge/scoring_fn/fn_defs/llm_as_judge_405b_simpleqa.py +++ b/src/llama_stack/providers/inline/scoring/llm_as_judge/scoring_fn/fn_defs/llm_as_judge_405b_simpleqa.py @@ -4,10 +4,10 @@ # This source code is licensed under the terms described in the LICENSE file in # the root directory of this source tree. -from llama_stack.apis.common.type_system import NumberType -from llama_stack.apis.scoring_functions import ( +from llama_stack_api import ( AggregationFunctionType, LLMAsJudgeScoringFnParams, + NumberType, ScoringFn, ) diff --git a/src/llama_stack/providers/inline/scoring/llm_as_judge/scoring_fn/fn_defs/llm_as_judge_base.py b/src/llama_stack/providers/inline/scoring/llm_as_judge/scoring_fn/fn_defs/llm_as_judge_base.py index 205e0bbf3..bffffd878 100644 --- a/src/llama_stack/providers/inline/scoring/llm_as_judge/scoring_fn/fn_defs/llm_as_judge_base.py +++ b/src/llama_stack/providers/inline/scoring/llm_as_judge/scoring_fn/fn_defs/llm_as_judge_base.py @@ -4,8 +4,7 @@ # This source code is licensed under the terms described in the LICENSE file in # the root directory of this source tree. -from llama_stack.apis.common.type_system import NumberType -from llama_stack.apis.scoring_functions import LLMAsJudgeScoringFnParams, ScoringFn +from llama_stack_api import LLMAsJudgeScoringFnParams, NumberType, ScoringFn llm_as_judge_base = ScoringFn( identifier="llm-as-judge::base", diff --git a/src/llama_stack/providers/inline/scoring/llm_as_judge/scoring_fn/llm_as_judge_scoring_fn.py b/src/llama_stack/providers/inline/scoring/llm_as_judge/scoring_fn/llm_as_judge_scoring_fn.py index fbecb6e20..169a4d8b7 100644 --- a/src/llama_stack/providers/inline/scoring/llm_as_judge/scoring_fn/llm_as_judge_scoring_fn.py +++ b/src/llama_stack/providers/inline/scoring/llm_as_judge/scoring_fn/llm_as_judge_scoring_fn.py @@ -6,9 +6,8 @@ import re from typing import Any -from llama_stack.apis.inference import Inference, OpenAIChatCompletionRequestWithExtraBody -from llama_stack.apis.scoring import ScoringResultRow -from llama_stack.apis.scoring_functions import ScoringFnParams +from llama_stack_api import Inference, OpenAIChatCompletionRequestWithExtraBody, ScoringFnParams, ScoringResultRow + from llama_stack.providers.utils.scoring.base_scoring_fn import RegisteredBaseScoringFn from .fn_defs.llm_as_judge_405b_simpleqa import llm_as_judge_405b_simpleqa diff --git a/src/llama_stack/providers/inline/tool_runtime/rag/__init__.py b/src/llama_stack/providers/inline/tool_runtime/rag/__init__.py index f9a7e7b89..60117dc3d 100644 --- a/src/llama_stack/providers/inline/tool_runtime/rag/__init__.py +++ b/src/llama_stack/providers/inline/tool_runtime/rag/__init__.py @@ -6,7 +6,7 @@ from typing import Any -from llama_stack.providers.datatypes import Api +from llama_stack_api import Api from .config import RagToolRuntimeConfig diff --git a/src/llama_stack/providers/inline/tool_runtime/rag/context_retriever.py b/src/llama_stack/providers/inline/tool_runtime/rag/context_retriever.py index 14cbec49d..f499989cb 100644 --- a/src/llama_stack/providers/inline/tool_runtime/rag/context_retriever.py +++ b/src/llama_stack/providers/inline/tool_runtime/rag/context_retriever.py @@ -6,15 +6,16 @@ from jinja2 import Template - -from llama_stack.apis.common.content_types import InterleavedContent -from llama_stack.apis.inference import OpenAIChatCompletionRequestWithExtraBody, OpenAIUserMessageParam -from llama_stack.apis.tools.rag_tool import ( +from llama_stack_api import ( DefaultRAGQueryGeneratorConfig, + InterleavedContent, LLMRAGQueryGeneratorConfig, + OpenAIChatCompletionRequestWithExtraBody, + OpenAIUserMessageParam, RAGQueryGenerator, RAGQueryGeneratorConfig, ) + from llama_stack.providers.utils.inference.prompt_adapter import ( interleaved_content_as_str, ) diff --git a/src/llama_stack/providers/inline/tool_runtime/rag/memory.py b/src/llama_stack/providers/inline/tool_runtime/rag/memory.py index ab3833936..b0c9fc461 100644 --- a/src/llama_stack/providers/inline/tool_runtime/rag/memory.py +++ b/src/llama_stack/providers/inline/tool_runtime/rag/memory.py @@ -12,34 +12,31 @@ from typing import Any import httpx from fastapi import UploadFile -from pydantic import TypeAdapter - -from llama_stack.apis.common.content_types import ( +from llama_stack_api import ( URL, + Files, + Inference, InterleavedContent, InterleavedContentItem, - TextContentItem, -) -from llama_stack.apis.files import Files, OpenAIFilePurpose -from llama_stack.apis.inference import Inference -from llama_stack.apis.tools import ( ListToolDefsResponse, + OpenAIFilePurpose, + QueryChunksResponse, RAGDocument, RAGQueryConfig, RAGQueryResult, + TextContentItem, ToolDef, ToolGroup, + ToolGroupsProtocolPrivate, ToolInvocationResult, ToolRuntime, -) -from llama_stack.apis.vector_io import ( - QueryChunksResponse, VectorIO, VectorStoreChunkingStrategyStatic, VectorStoreChunkingStrategyStaticConfig, ) +from pydantic import TypeAdapter + from llama_stack.log import get_logger -from llama_stack.providers.datatypes import ToolGroupsProtocolPrivate from llama_stack.providers.utils.inference.prompt_adapter import interleaved_content_as_str from llama_stack.providers.utils.memory.vector_store import parse_data_url diff --git a/src/llama_stack/providers/inline/vector_io/chroma/__init__.py b/src/llama_stack/providers/inline/vector_io/chroma/__init__.py index 575e5ad88..155b8a0cb 100644 --- a/src/llama_stack/providers/inline/vector_io/chroma/__init__.py +++ b/src/llama_stack/providers/inline/vector_io/chroma/__init__.py @@ -6,7 +6,7 @@ from typing import Any -from llama_stack.providers.datatypes import Api +from llama_stack_api import Api from .config import ChromaVectorIOConfig diff --git a/src/llama_stack/providers/inline/vector_io/chroma/config.py b/src/llama_stack/providers/inline/vector_io/chroma/config.py index 1798f10de..d955b1d06 100644 --- a/src/llama_stack/providers/inline/vector_io/chroma/config.py +++ b/src/llama_stack/providers/inline/vector_io/chroma/config.py @@ -6,10 +6,10 @@ from typing import Any +from llama_stack_api import json_schema_type from pydantic import BaseModel, Field from llama_stack.core.storage.datatypes import KVStoreReference -from llama_stack.schema_utils import json_schema_type @json_schema_type diff --git a/src/llama_stack/providers/inline/vector_io/faiss/__init__.py b/src/llama_stack/providers/inline/vector_io/faiss/__init__.py index 24d1f292a..b834589e3 100644 --- a/src/llama_stack/providers/inline/vector_io/faiss/__init__.py +++ b/src/llama_stack/providers/inline/vector_io/faiss/__init__.py @@ -6,7 +6,7 @@ from typing import Any -from llama_stack.providers.datatypes import Api +from llama_stack_api import Api from .config import FaissVectorIOConfig diff --git a/src/llama_stack/providers/inline/vector_io/faiss/config.py b/src/llama_stack/providers/inline/vector_io/faiss/config.py index dd7a7aeca..dd433f818 100644 --- a/src/llama_stack/providers/inline/vector_io/faiss/config.py +++ b/src/llama_stack/providers/inline/vector_io/faiss/config.py @@ -6,10 +6,10 @@ from typing import Any +from llama_stack_api import json_schema_type from pydantic import BaseModel from llama_stack.core.storage.datatypes import KVStoreReference -from llama_stack.schema_utils import json_schema_type @json_schema_type diff --git a/src/llama_stack/providers/inline/vector_io/faiss/faiss.py b/src/llama_stack/providers/inline/vector_io/faiss/faiss.py index 96760b834..abef42499 100644 --- a/src/llama_stack/providers/inline/vector_io/faiss/faiss.py +++ b/src/llama_stack/providers/inline/vector_io/faiss/faiss.py @@ -12,15 +12,22 @@ from typing import Any import faiss # type: ignore[import-untyped] import numpy as np +from llama_stack_api import ( + Chunk, + Files, + HealthResponse, + HealthStatus, + Inference, + InterleavedContent, + QueryChunksResponse, + VectorIO, + VectorStore, + VectorStoreNotFoundError, + VectorStoresProtocolPrivate, +) from numpy.typing import NDArray -from llama_stack.apis.common.errors import VectorStoreNotFoundError -from llama_stack.apis.files import Files -from llama_stack.apis.inference import Inference, InterleavedContent -from llama_stack.apis.vector_io import Chunk, QueryChunksResponse, VectorIO -from llama_stack.apis.vector_stores import VectorStore from llama_stack.log import get_logger -from llama_stack.providers.datatypes import HealthResponse, HealthStatus, VectorStoresProtocolPrivate from llama_stack.providers.utils.kvstore import kvstore_impl from llama_stack.providers.utils.kvstore.api import KVStore from llama_stack.providers.utils.memory.openai_vector_store_mixin import OpenAIVectorStoreMixin diff --git a/src/llama_stack/providers/inline/vector_io/milvus/__init__.py b/src/llama_stack/providers/inline/vector_io/milvus/__init__.py index 7dc9c6a33..2f84769f3 100644 --- a/src/llama_stack/providers/inline/vector_io/milvus/__init__.py +++ b/src/llama_stack/providers/inline/vector_io/milvus/__init__.py @@ -6,7 +6,7 @@ from typing import Any -from llama_stack.providers.datatypes import Api +from llama_stack_api import Api from .config import MilvusVectorIOConfig diff --git a/src/llama_stack/providers/inline/vector_io/milvus/config.py b/src/llama_stack/providers/inline/vector_io/milvus/config.py index b333b04ea..08d05c991 100644 --- a/src/llama_stack/providers/inline/vector_io/milvus/config.py +++ b/src/llama_stack/providers/inline/vector_io/milvus/config.py @@ -6,10 +6,10 @@ from typing import Any +from llama_stack_api import json_schema_type from pydantic import BaseModel, Field from llama_stack.core.storage.datatypes import KVStoreReference -from llama_stack.schema_utils import json_schema_type @json_schema_type diff --git a/src/llama_stack/providers/inline/vector_io/qdrant/__init__.py b/src/llama_stack/providers/inline/vector_io/qdrant/__init__.py index bef6d50e6..145d19455 100644 --- a/src/llama_stack/providers/inline/vector_io/qdrant/__init__.py +++ b/src/llama_stack/providers/inline/vector_io/qdrant/__init__.py @@ -6,7 +6,7 @@ from typing import Any -from llama_stack.providers.datatypes import Api +from llama_stack_api import Api from .config import QdrantVectorIOConfig diff --git a/src/llama_stack/providers/inline/vector_io/qdrant/config.py b/src/llama_stack/providers/inline/vector_io/qdrant/config.py index e7ecde7b7..437d643f0 100644 --- a/src/llama_stack/providers/inline/vector_io/qdrant/config.py +++ b/src/llama_stack/providers/inline/vector_io/qdrant/config.py @@ -7,10 +7,10 @@ from typing import Any +from llama_stack_api import json_schema_type from pydantic import BaseModel from llama_stack.core.storage.datatypes import KVStoreReference -from llama_stack.schema_utils import json_schema_type @json_schema_type diff --git a/src/llama_stack/providers/inline/vector_io/sqlite_vec/__init__.py b/src/llama_stack/providers/inline/vector_io/sqlite_vec/__init__.py index df96e927c..e84c299dc 100644 --- a/src/llama_stack/providers/inline/vector_io/sqlite_vec/__init__.py +++ b/src/llama_stack/providers/inline/vector_io/sqlite_vec/__init__.py @@ -6,7 +6,7 @@ from typing import Any -from llama_stack.providers.datatypes import Api +from llama_stack_api import Api from .config import SQLiteVectorIOConfig diff --git a/src/llama_stack/providers/inline/vector_io/sqlite_vec/sqlite_vec.py b/src/llama_stack/providers/inline/vector_io/sqlite_vec/sqlite_vec.py index 399800d3e..e979ff323 100644 --- a/src/llama_stack/providers/inline/vector_io/sqlite_vec/sqlite_vec.py +++ b/src/llama_stack/providers/inline/vector_io/sqlite_vec/sqlite_vec.py @@ -12,15 +12,19 @@ from typing import Any import numpy as np import sqlite_vec # type: ignore[import-untyped] +from llama_stack_api import ( + Chunk, + Files, + Inference, + QueryChunksResponse, + VectorIO, + VectorStore, + VectorStoreNotFoundError, + VectorStoresProtocolPrivate, +) from numpy.typing import NDArray -from llama_stack.apis.common.errors import VectorStoreNotFoundError -from llama_stack.apis.files import Files -from llama_stack.apis.inference import Inference -from llama_stack.apis.vector_io import Chunk, QueryChunksResponse, VectorIO -from llama_stack.apis.vector_stores import VectorStore from llama_stack.log import get_logger -from llama_stack.providers.datatypes import VectorStoresProtocolPrivate from llama_stack.providers.utils.kvstore import kvstore_impl from llama_stack.providers.utils.kvstore.api import KVStore from llama_stack.providers.utils.memory.openai_vector_store_mixin import OpenAIVectorStoreMixin diff --git a/src/llama_stack/providers/registry/agents.py b/src/llama_stack/providers/registry/agents.py index 1845d6f46..bd204cecd 100644 --- a/src/llama_stack/providers/registry/agents.py +++ b/src/llama_stack/providers/registry/agents.py @@ -5,11 +5,12 @@ # the root directory of this source tree. -from llama_stack.providers.datatypes import ( +from llama_stack_api import ( Api, InlineProviderSpec, ProviderSpec, ) + from llama_stack.providers.utils.kvstore import kvstore_dependencies diff --git a/src/llama_stack/providers/registry/batches.py b/src/llama_stack/providers/registry/batches.py index a07942486..e11bb8332 100644 --- a/src/llama_stack/providers/registry/batches.py +++ b/src/llama_stack/providers/registry/batches.py @@ -5,7 +5,7 @@ # the root directory of this source tree. -from llama_stack.providers.datatypes import Api, InlineProviderSpec, ProviderSpec +from llama_stack_api import Api, InlineProviderSpec, ProviderSpec def available_providers() -> list[ProviderSpec]: diff --git a/src/llama_stack/providers/registry/datasetio.py b/src/llama_stack/providers/registry/datasetio.py index a9feb0bac..bfd7ede3c 100644 --- a/src/llama_stack/providers/registry/datasetio.py +++ b/src/llama_stack/providers/registry/datasetio.py @@ -5,7 +5,7 @@ # the root directory of this source tree. -from llama_stack.providers.datatypes import ( +from llama_stack_api import ( Api, InlineProviderSpec, ProviderSpec, diff --git a/src/llama_stack/providers/registry/eval.py b/src/llama_stack/providers/registry/eval.py index 4ef0bb41f..9c8b1eebd 100644 --- a/src/llama_stack/providers/registry/eval.py +++ b/src/llama_stack/providers/registry/eval.py @@ -5,7 +5,7 @@ # the root directory of this source tree. -from llama_stack.providers.datatypes import Api, InlineProviderSpec, ProviderSpec, RemoteProviderSpec +from llama_stack_api import Api, InlineProviderSpec, ProviderSpec, RemoteProviderSpec def available_providers() -> list[ProviderSpec]: diff --git a/src/llama_stack/providers/registry/files.py b/src/llama_stack/providers/registry/files.py index 3f5949ba2..dfc527816 100644 --- a/src/llama_stack/providers/registry/files.py +++ b/src/llama_stack/providers/registry/files.py @@ -4,7 +4,8 @@ # This source code is licensed under the terms described in the LICENSE file in # the root directory of this source tree. -from llama_stack.providers.datatypes import Api, InlineProviderSpec, ProviderSpec, RemoteProviderSpec +from llama_stack_api import Api, InlineProviderSpec, ProviderSpec, RemoteProviderSpec + from llama_stack.providers.utils.sqlstore.sqlstore import sql_store_pip_packages diff --git a/src/llama_stack/providers/registry/inference.py b/src/llama_stack/providers/registry/inference.py index 3cbfd408b..819e5aff5 100644 --- a/src/llama_stack/providers/registry/inference.py +++ b/src/llama_stack/providers/registry/inference.py @@ -5,7 +5,7 @@ # the root directory of this source tree. -from llama_stack.providers.datatypes import ( +from llama_stack_api import ( Api, InlineProviderSpec, ProviderSpec, diff --git a/src/llama_stack/providers/registry/post_training.py b/src/llama_stack/providers/registry/post_training.py index 2092e3b2d..a5529b714 100644 --- a/src/llama_stack/providers/registry/post_training.py +++ b/src/llama_stack/providers/registry/post_training.py @@ -7,7 +7,7 @@ from typing import cast -from llama_stack.providers.datatypes import Api, InlineProviderSpec, ProviderSpec, RemoteProviderSpec +from llama_stack_api import Api, InlineProviderSpec, ProviderSpec, RemoteProviderSpec # We provide two versions of these providers so that distributions can package the appropriate version of torch. # The CPU version is used for distributions that don't have GPU support -- they result in smaller container images. diff --git a/src/llama_stack/providers/registry/safety.py b/src/llama_stack/providers/registry/safety.py index b30074398..c9dbbce24 100644 --- a/src/llama_stack/providers/registry/safety.py +++ b/src/llama_stack/providers/registry/safety.py @@ -5,7 +5,7 @@ # the root directory of this source tree. -from llama_stack.providers.datatypes import ( +from llama_stack_api import ( Api, InlineProviderSpec, ProviderSpec, diff --git a/src/llama_stack/providers/registry/scoring.py b/src/llama_stack/providers/registry/scoring.py index a4ec54ed2..45c5dbed7 100644 --- a/src/llama_stack/providers/registry/scoring.py +++ b/src/llama_stack/providers/registry/scoring.py @@ -5,7 +5,7 @@ # the root directory of this source tree. -from llama_stack.providers.datatypes import Api, InlineProviderSpec, ProviderSpec +from llama_stack_api import Api, InlineProviderSpec, ProviderSpec def available_providers() -> list[ProviderSpec]: diff --git a/src/llama_stack/providers/registry/tool_runtime.py b/src/llama_stack/providers/registry/tool_runtime.py index 39dc7fccd..3f0a83a30 100644 --- a/src/llama_stack/providers/registry/tool_runtime.py +++ b/src/llama_stack/providers/registry/tool_runtime.py @@ -5,12 +5,13 @@ # the root directory of this source tree. -from llama_stack.providers.datatypes import ( +from llama_stack_api import ( Api, InlineProviderSpec, ProviderSpec, RemoteProviderSpec, ) + from llama_stack.providers.registry.vector_io import DEFAULT_VECTOR_IO_DEPS diff --git a/src/llama_stack/providers/registry/vector_io.py b/src/llama_stack/providers/registry/vector_io.py index 55b302751..a00941586 100644 --- a/src/llama_stack/providers/registry/vector_io.py +++ b/src/llama_stack/providers/registry/vector_io.py @@ -5,7 +5,7 @@ # the root directory of this source tree. -from llama_stack.providers.datatypes import ( +from llama_stack_api import ( Api, InlineProviderSpec, ProviderSpec, @@ -244,7 +244,7 @@ Two ranker types are supported: Example using RAGQueryConfig with different search modes: ```python -from llama_stack.apis.tools import RAGQueryConfig, RRFRanker, WeightedRanker +from llama_stack_api import RAGQueryConfig, RRFRanker, WeightedRanker # Vector search config = RAGQueryConfig(mode="vector", max_chunks=5) diff --git a/src/llama_stack/providers/remote/datasetio/huggingface/huggingface.py b/src/llama_stack/providers/remote/datasetio/huggingface/huggingface.py index a34e354bf..1260ce644 100644 --- a/src/llama_stack/providers/remote/datasetio/huggingface/huggingface.py +++ b/src/llama_stack/providers/remote/datasetio/huggingface/huggingface.py @@ -6,10 +6,8 @@ from typing import Any from urllib.parse import parse_qs, urlparse -from llama_stack.apis.common.responses import PaginatedResponse -from llama_stack.apis.datasetio import DatasetIO -from llama_stack.apis.datasets import Dataset -from llama_stack.providers.datatypes import DatasetsProtocolPrivate +from llama_stack_api import Dataset, DatasetIO, DatasetsProtocolPrivate, PaginatedResponse + from llama_stack.providers.utils.kvstore import kvstore_impl from llama_stack.providers.utils.pagination import paginate_records diff --git a/src/llama_stack/providers/remote/datasetio/nvidia/datasetio.py b/src/llama_stack/providers/remote/datasetio/nvidia/datasetio.py index f723c92cc..cb674b0d7 100644 --- a/src/llama_stack/providers/remote/datasetio/nvidia/datasetio.py +++ b/src/llama_stack/providers/remote/datasetio/nvidia/datasetio.py @@ -7,11 +7,7 @@ from typing import Any import aiohttp - -from llama_stack.apis.common.content_types import URL -from llama_stack.apis.common.responses import PaginatedResponse -from llama_stack.apis.common.type_system import ParamType -from llama_stack.apis.datasets import Dataset +from llama_stack_api import URL, Dataset, PaginatedResponse, ParamType from .config import NvidiaDatasetIOConfig diff --git a/src/llama_stack/providers/remote/eval/nvidia/eval.py b/src/llama_stack/providers/remote/eval/nvidia/eval.py index 8fc7ffdd3..fbdec0d4d 100644 --- a/src/llama_stack/providers/remote/eval/nvidia/eval.py +++ b/src/llama_stack/providers/remote/eval/nvidia/eval.py @@ -6,18 +6,24 @@ from typing import Any import requests +from llama_stack_api import ( + Agents, + Benchmark, + BenchmarkConfig, + BenchmarksProtocolPrivate, + DatasetIO, + Datasets, + Eval, + EvaluateResponse, + Inference, + Job, + JobStatus, + Scoring, + ScoringResult, +) -from llama_stack.apis.agents import Agents -from llama_stack.apis.benchmarks import Benchmark -from llama_stack.apis.datasetio import DatasetIO -from llama_stack.apis.datasets import Datasets -from llama_stack.apis.inference import Inference -from llama_stack.apis.scoring import Scoring, ScoringResult -from llama_stack.providers.datatypes import BenchmarksProtocolPrivate from llama_stack.providers.utils.inference.model_registry import ModelRegistryHelper -from .....apis.common.job_types import Job, JobStatus -from .....apis.eval.eval import BenchmarkConfig, Eval, EvaluateResponse from .config import NVIDIAEvalConfig DEFAULT_NAMESPACE = "nvidia" diff --git a/src/llama_stack/providers/remote/files/openai/files.py b/src/llama_stack/providers/remote/files/openai/files.py index c5d4194df..bbd630977 100644 --- a/src/llama_stack/providers/remote/files/openai/files.py +++ b/src/llama_stack/providers/remote/files/openai/files.py @@ -8,17 +8,17 @@ from datetime import UTC, datetime from typing import Annotated, Any from fastapi import Depends, File, Form, Response, UploadFile - -from llama_stack.apis.common.errors import ResourceNotFoundError -from llama_stack.apis.common.responses import Order -from llama_stack.apis.files import ( +from llama_stack_api import ( ExpiresAfter, Files, ListOpenAIFileResponse, OpenAIFileDeleteResponse, OpenAIFileObject, OpenAIFilePurpose, + Order, + ResourceNotFoundError, ) + from llama_stack.core.datatypes import AccessRule from llama_stack.providers.utils.files.form_data import parse_expires_after from llama_stack.providers.utils.sqlstore.api import ColumnDefinition, ColumnType diff --git a/src/llama_stack/providers/remote/files/s3/files.py b/src/llama_stack/providers/remote/files/s3/files.py index 76261bdf4..14f1e3852 100644 --- a/src/llama_stack/providers/remote/files/s3/files.py +++ b/src/llama_stack/providers/remote/files/s3/files.py @@ -17,16 +17,17 @@ from fastapi import Depends, File, Form, Response, UploadFile if TYPE_CHECKING: from mypy_boto3_s3.client import S3Client -from llama_stack.apis.common.errors import ResourceNotFoundError -from llama_stack.apis.common.responses import Order -from llama_stack.apis.files import ( +from llama_stack_api import ( ExpiresAfter, Files, ListOpenAIFileResponse, OpenAIFileDeleteResponse, OpenAIFileObject, OpenAIFilePurpose, + Order, + ResourceNotFoundError, ) + from llama_stack.core.datatypes import AccessRule from llama_stack.core.id_generation import generate_object_id from llama_stack.providers.utils.files.form_data import parse_expires_after diff --git a/src/llama_stack/providers/remote/inference/anthropic/config.py b/src/llama_stack/providers/remote/inference/anthropic/config.py index 31e6aa12b..7ee4c54e2 100644 --- a/src/llama_stack/providers/remote/inference/anthropic/config.py +++ b/src/llama_stack/providers/remote/inference/anthropic/config.py @@ -6,10 +6,10 @@ from typing import Any +from llama_stack_api import json_schema_type from pydantic import BaseModel, Field from llama_stack.providers.utils.inference.model_registry import RemoteInferenceProviderConfig -from llama_stack.schema_utils import json_schema_type class AnthropicProviderDataValidator(BaseModel): diff --git a/src/llama_stack/providers/remote/inference/azure/config.py b/src/llama_stack/providers/remote/inference/azure/config.py index 7c31df7a6..596f6c234 100644 --- a/src/llama_stack/providers/remote/inference/azure/config.py +++ b/src/llama_stack/providers/remote/inference/azure/config.py @@ -7,10 +7,10 @@ import os from typing import Any +from llama_stack_api import json_schema_type from pydantic import BaseModel, Field, HttpUrl, SecretStr from llama_stack.providers.utils.inference.model_registry import RemoteInferenceProviderConfig -from llama_stack.schema_utils import json_schema_type class AzureProviderDataValidator(BaseModel): diff --git a/src/llama_stack/providers/remote/inference/bedrock/bedrock.py b/src/llama_stack/providers/remote/inference/bedrock/bedrock.py index 1bf44b51a..1a9fe533b 100644 --- a/src/llama_stack/providers/remote/inference/bedrock/bedrock.py +++ b/src/llama_stack/providers/remote/inference/bedrock/bedrock.py @@ -6,9 +6,7 @@ from collections.abc import AsyncIterator, Iterable -from openai import AuthenticationError - -from llama_stack.apis.inference import ( +from llama_stack_api import ( OpenAIChatCompletion, OpenAIChatCompletionChunk, OpenAIChatCompletionRequestWithExtraBody, @@ -17,6 +15,8 @@ from llama_stack.apis.inference import ( OpenAIEmbeddingsRequestWithExtraBody, OpenAIEmbeddingsResponse, ) +from openai import AuthenticationError + from llama_stack.core.telemetry.tracing import get_current_span from llama_stack.log import get_logger from llama_stack.providers.utils.inference.openai_mixin import OpenAIMixin diff --git a/src/llama_stack/providers/remote/inference/cerebras/cerebras.py b/src/llama_stack/providers/remote/inference/cerebras/cerebras.py index d5def9da1..c7f3111f9 100644 --- a/src/llama_stack/providers/remote/inference/cerebras/cerebras.py +++ b/src/llama_stack/providers/remote/inference/cerebras/cerebras.py @@ -6,10 +6,11 @@ from urllib.parse import urljoin -from llama_stack.apis.inference import ( +from llama_stack_api import ( OpenAIEmbeddingsRequestWithExtraBody, OpenAIEmbeddingsResponse, ) + from llama_stack.providers.utils.inference.openai_mixin import OpenAIMixin from .config import CerebrasImplConfig diff --git a/src/llama_stack/providers/remote/inference/cerebras/config.py b/src/llama_stack/providers/remote/inference/cerebras/config.py index 9ba773724..a1fd41e2d 100644 --- a/src/llama_stack/providers/remote/inference/cerebras/config.py +++ b/src/llama_stack/providers/remote/inference/cerebras/config.py @@ -7,10 +7,10 @@ import os from typing import Any +from llama_stack_api import json_schema_type from pydantic import BaseModel, Field from llama_stack.providers.utils.inference.model_registry import RemoteInferenceProviderConfig -from llama_stack.schema_utils import json_schema_type DEFAULT_BASE_URL = "https://api.cerebras.ai" diff --git a/src/llama_stack/providers/remote/inference/databricks/config.py b/src/llama_stack/providers/remote/inference/databricks/config.py index 84357f764..4974593d2 100644 --- a/src/llama_stack/providers/remote/inference/databricks/config.py +++ b/src/llama_stack/providers/remote/inference/databricks/config.py @@ -6,10 +6,10 @@ from typing import Any +from llama_stack_api import json_schema_type from pydantic import BaseModel, Field, SecretStr from llama_stack.providers.utils.inference.model_registry import RemoteInferenceProviderConfig -from llama_stack.schema_utils import json_schema_type class DatabricksProviderDataValidator(BaseModel): diff --git a/src/llama_stack/providers/remote/inference/databricks/databricks.py b/src/llama_stack/providers/remote/inference/databricks/databricks.py index 636241383..8b802379f 100644 --- a/src/llama_stack/providers/remote/inference/databricks/databricks.py +++ b/src/llama_stack/providers/remote/inference/databricks/databricks.py @@ -7,8 +7,8 @@ from collections.abc import Iterable from databricks.sdk import WorkspaceClient +from llama_stack_api import OpenAICompletion, OpenAICompletionRequestWithExtraBody -from llama_stack.apis.inference import OpenAICompletion, OpenAICompletionRequestWithExtraBody from llama_stack.log import get_logger from llama_stack.providers.utils.inference.openai_mixin import OpenAIMixin diff --git a/src/llama_stack/providers/remote/inference/fireworks/config.py b/src/llama_stack/providers/remote/inference/fireworks/config.py index 20ba99606..d786655eb 100644 --- a/src/llama_stack/providers/remote/inference/fireworks/config.py +++ b/src/llama_stack/providers/remote/inference/fireworks/config.py @@ -6,10 +6,10 @@ from typing import Any +from llama_stack_api import json_schema_type from pydantic import Field from llama_stack.providers.utils.inference.model_registry import RemoteInferenceProviderConfig -from llama_stack.schema_utils import json_schema_type @json_schema_type diff --git a/src/llama_stack/providers/remote/inference/gemini/config.py b/src/llama_stack/providers/remote/inference/gemini/config.py index df5da29a2..6c25c005c 100644 --- a/src/llama_stack/providers/remote/inference/gemini/config.py +++ b/src/llama_stack/providers/remote/inference/gemini/config.py @@ -6,10 +6,10 @@ from typing import Any +from llama_stack_api import json_schema_type from pydantic import BaseModel, Field from llama_stack.providers.utils.inference.model_registry import RemoteInferenceProviderConfig -from llama_stack.schema_utils import json_schema_type class GeminiProviderDataValidator(BaseModel): diff --git a/src/llama_stack/providers/remote/inference/gemini/gemini.py b/src/llama_stack/providers/remote/inference/gemini/gemini.py index ee960d13b..79d694f06 100644 --- a/src/llama_stack/providers/remote/inference/gemini/gemini.py +++ b/src/llama_stack/providers/remote/inference/gemini/gemini.py @@ -6,12 +6,13 @@ from typing import Any -from llama_stack.apis.inference import ( +from llama_stack_api import ( OpenAIEmbeddingData, OpenAIEmbeddingsRequestWithExtraBody, OpenAIEmbeddingsResponse, OpenAIEmbeddingUsage, ) + from llama_stack.providers.utils.inference.openai_mixin import OpenAIMixin from .config import GeminiConfig diff --git a/src/llama_stack/providers/remote/inference/groq/config.py b/src/llama_stack/providers/remote/inference/groq/config.py index c1aedca3e..cec327716 100644 --- a/src/llama_stack/providers/remote/inference/groq/config.py +++ b/src/llama_stack/providers/remote/inference/groq/config.py @@ -6,10 +6,10 @@ from typing import Any +from llama_stack_api import json_schema_type from pydantic import BaseModel, Field from llama_stack.providers.utils.inference.model_registry import RemoteInferenceProviderConfig -from llama_stack.schema_utils import json_schema_type class GroqProviderDataValidator(BaseModel): diff --git a/src/llama_stack/providers/remote/inference/llama_openai_compat/config.py b/src/llama_stack/providers/remote/inference/llama_openai_compat/config.py index 4b5750ed4..c16311830 100644 --- a/src/llama_stack/providers/remote/inference/llama_openai_compat/config.py +++ b/src/llama_stack/providers/remote/inference/llama_openai_compat/config.py @@ -6,10 +6,10 @@ from typing import Any +from llama_stack_api import json_schema_type from pydantic import BaseModel, Field from llama_stack.providers.utils.inference.model_registry import RemoteInferenceProviderConfig -from llama_stack.schema_utils import json_schema_type class LlamaProviderDataValidator(BaseModel): diff --git a/src/llama_stack/providers/remote/inference/llama_openai_compat/llama.py b/src/llama_stack/providers/remote/inference/llama_openai_compat/llama.py index 05d6e8cc8..1dea3e3cb 100644 --- a/src/llama_stack/providers/remote/inference/llama_openai_compat/llama.py +++ b/src/llama_stack/providers/remote/inference/llama_openai_compat/llama.py @@ -4,12 +4,13 @@ # This source code is licensed under the terms described in the LICENSE file in # the root directory of this source tree. -from llama_stack.apis.inference.inference import ( +from llama_stack_api import ( OpenAICompletion, OpenAICompletionRequestWithExtraBody, OpenAIEmbeddingsRequestWithExtraBody, OpenAIEmbeddingsResponse, ) + from llama_stack.log import get_logger from llama_stack.providers.remote.inference.llama_openai_compat.config import LlamaCompatConfig from llama_stack.providers.utils.inference.openai_mixin import OpenAIMixin diff --git a/src/llama_stack/providers/remote/inference/nvidia/__init__.py b/src/llama_stack/providers/remote/inference/nvidia/__init__.py index b4926f33e..b89b2a750 100644 --- a/src/llama_stack/providers/remote/inference/nvidia/__init__.py +++ b/src/llama_stack/providers/remote/inference/nvidia/__init__.py @@ -4,7 +4,7 @@ # This source code is licensed under the terms described in the LICENSE file in # the root directory of this source tree. -from llama_stack.apis.inference import Inference +from llama_stack_api import Inference from .config import NVIDIAConfig diff --git a/src/llama_stack/providers/remote/inference/nvidia/config.py b/src/llama_stack/providers/remote/inference/nvidia/config.py index 618bbe078..6ff98d290 100644 --- a/src/llama_stack/providers/remote/inference/nvidia/config.py +++ b/src/llama_stack/providers/remote/inference/nvidia/config.py @@ -7,10 +7,10 @@ import os from typing import Any +from llama_stack_api import json_schema_type from pydantic import BaseModel, Field from llama_stack.providers.utils.inference.model_registry import RemoteInferenceProviderConfig -from llama_stack.schema_utils import json_schema_type class NVIDIAProviderDataValidator(BaseModel): diff --git a/src/llama_stack/providers/remote/inference/nvidia/nvidia.py b/src/llama_stack/providers/remote/inference/nvidia/nvidia.py index bc5aa7953..9e4c6f559 100644 --- a/src/llama_stack/providers/remote/inference/nvidia/nvidia.py +++ b/src/llama_stack/providers/remote/inference/nvidia/nvidia.py @@ -8,16 +8,15 @@ from collections.abc import Iterable import aiohttp - -from llama_stack.apis.inference import ( +from llama_stack_api import ( + Model, + ModelType, + OpenAIChatCompletionContentPartImageParam, + OpenAIChatCompletionContentPartTextParam, RerankData, RerankResponse, ) -from llama_stack.apis.inference.inference import ( - OpenAIChatCompletionContentPartImageParam, - OpenAIChatCompletionContentPartTextParam, -) -from llama_stack.apis.models import Model, ModelType + from llama_stack.log import get_logger from llama_stack.providers.utils.inference.openai_mixin import OpenAIMixin diff --git a/src/llama_stack/providers/remote/inference/oci/__init__.py b/src/llama_stack/providers/remote/inference/oci/__init__.py index 280a8c1d2..b7d6125f3 100644 --- a/src/llama_stack/providers/remote/inference/oci/__init__.py +++ b/src/llama_stack/providers/remote/inference/oci/__init__.py @@ -4,7 +4,7 @@ # This source code is licensed under the terms described in the LICENSE file in # the root directory of this source tree. -from llama_stack.apis.inference import InferenceProvider +from llama_stack_api import InferenceProvider from .config import OCIConfig diff --git a/src/llama_stack/providers/remote/inference/oci/config.py b/src/llama_stack/providers/remote/inference/oci/config.py index 9747b08ea..24b4ad926 100644 --- a/src/llama_stack/providers/remote/inference/oci/config.py +++ b/src/llama_stack/providers/remote/inference/oci/config.py @@ -7,10 +7,10 @@ import os from typing import Any +from llama_stack_api import json_schema_type from pydantic import BaseModel, Field from llama_stack.providers.utils.inference.model_registry import RemoteInferenceProviderConfig -from llama_stack.schema_utils import json_schema_type class OCIProviderDataValidator(BaseModel): diff --git a/src/llama_stack/providers/remote/inference/oci/oci.py b/src/llama_stack/providers/remote/inference/oci/oci.py index 253dcf2b6..36e56cf6c 100644 --- a/src/llama_stack/providers/remote/inference/oci/oci.py +++ b/src/llama_stack/providers/remote/inference/oci/oci.py @@ -10,15 +10,15 @@ from typing import Any import httpx import oci +from llama_stack_api import ( + ModelType, + OpenAIEmbeddingsRequestWithExtraBody, + OpenAIEmbeddingsResponse, +) from oci.generative_ai.generative_ai_client import GenerativeAiClient from oci.generative_ai.models import ModelCollection from openai._base_client import DefaultAsyncHttpxClient -from llama_stack.apis.inference.inference import ( - OpenAIEmbeddingsRequestWithExtraBody, - OpenAIEmbeddingsResponse, -) -from llama_stack.apis.models import ModelType from llama_stack.log import get_logger from llama_stack.providers.remote.inference.oci.auth import OciInstancePrincipalAuth, OciUserPrincipalAuth from llama_stack.providers.remote.inference.oci.config import OCIConfig diff --git a/src/llama_stack/providers/remote/inference/ollama/ollama.py b/src/llama_stack/providers/remote/inference/ollama/ollama.py index 50f36d045..6a471429e 100644 --- a/src/llama_stack/providers/remote/inference/ollama/ollama.py +++ b/src/llama_stack/providers/remote/inference/ollama/ollama.py @@ -7,15 +7,15 @@ import asyncio -from ollama import AsyncClient as AsyncOllamaClient - -from llama_stack.apis.common.errors import UnsupportedModelError -from llama_stack.apis.models import Model -from llama_stack.log import get_logger -from llama_stack.providers.datatypes import ( +from llama_stack_api import ( HealthResponse, HealthStatus, + Model, + UnsupportedModelError, ) +from ollama import AsyncClient as AsyncOllamaClient + +from llama_stack.log import get_logger from llama_stack.providers.remote.inference.ollama.config import OllamaImplConfig from llama_stack.providers.utils.inference.openai_mixin import OpenAIMixin diff --git a/src/llama_stack/providers/remote/inference/openai/config.py b/src/llama_stack/providers/remote/inference/openai/config.py index 36c66bd28..cbb01b2d0 100644 --- a/src/llama_stack/providers/remote/inference/openai/config.py +++ b/src/llama_stack/providers/remote/inference/openai/config.py @@ -6,10 +6,10 @@ from typing import Any +from llama_stack_api import json_schema_type from pydantic import BaseModel, Field from llama_stack.providers.utils.inference.model_registry import RemoteInferenceProviderConfig -from llama_stack.schema_utils import json_schema_type class OpenAIProviderDataValidator(BaseModel): diff --git a/src/llama_stack/providers/remote/inference/passthrough/config.py b/src/llama_stack/providers/remote/inference/passthrough/config.py index eca28a86a..7045dbf2e 100644 --- a/src/llama_stack/providers/remote/inference/passthrough/config.py +++ b/src/llama_stack/providers/remote/inference/passthrough/config.py @@ -6,10 +6,10 @@ from typing import Any +from llama_stack_api import json_schema_type from pydantic import Field from llama_stack.providers.utils.inference.model_registry import RemoteInferenceProviderConfig -from llama_stack.schema_utils import json_schema_type @json_schema_type diff --git a/src/llama_stack/providers/remote/inference/passthrough/passthrough.py b/src/llama_stack/providers/remote/inference/passthrough/passthrough.py index 3c56acfbd..19cf0c5d7 100644 --- a/src/llama_stack/providers/remote/inference/passthrough/passthrough.py +++ b/src/llama_stack/providers/remote/inference/passthrough/passthrough.py @@ -6,10 +6,9 @@ from collections.abc import AsyncIterator -from openai import AsyncOpenAI - -from llama_stack.apis.inference import ( +from llama_stack_api import ( Inference, + Model, OpenAIChatCompletion, OpenAIChatCompletionChunk, OpenAIChatCompletionRequestWithExtraBody, @@ -18,7 +17,8 @@ from llama_stack.apis.inference import ( OpenAIEmbeddingsRequestWithExtraBody, OpenAIEmbeddingsResponse, ) -from llama_stack.apis.models import Model +from openai import AsyncOpenAI + from llama_stack.core.request_headers import NeedsRequestProviderData from .config import PassthroughImplConfig diff --git a/src/llama_stack/providers/remote/inference/runpod/config.py b/src/llama_stack/providers/remote/inference/runpod/config.py index a2a1add97..aaa4230a8 100644 --- a/src/llama_stack/providers/remote/inference/runpod/config.py +++ b/src/llama_stack/providers/remote/inference/runpod/config.py @@ -6,10 +6,10 @@ from typing import Any +from llama_stack_api import json_schema_type from pydantic import BaseModel, Field, SecretStr from llama_stack.providers.utils.inference.model_registry import RemoteInferenceProviderConfig -from llama_stack.schema_utils import json_schema_type class RunpodProviderDataValidator(BaseModel): diff --git a/src/llama_stack/providers/remote/inference/runpod/runpod.py b/src/llama_stack/providers/remote/inference/runpod/runpod.py index a76e941cb..4596b2df5 100644 --- a/src/llama_stack/providers/remote/inference/runpod/runpod.py +++ b/src/llama_stack/providers/remote/inference/runpod/runpod.py @@ -6,11 +6,12 @@ from collections.abc import AsyncIterator -from llama_stack.apis.inference import ( +from llama_stack_api import ( OpenAIChatCompletion, OpenAIChatCompletionChunk, OpenAIChatCompletionRequestWithExtraBody, ) + from llama_stack.providers.utils.inference.openai_mixin import OpenAIMixin from .config import RunpodImplConfig diff --git a/src/llama_stack/providers/remote/inference/sambanova/config.py b/src/llama_stack/providers/remote/inference/sambanova/config.py index f63210434..6d72e7205 100644 --- a/src/llama_stack/providers/remote/inference/sambanova/config.py +++ b/src/llama_stack/providers/remote/inference/sambanova/config.py @@ -6,10 +6,10 @@ from typing import Any +from llama_stack_api import json_schema_type from pydantic import BaseModel, Field from llama_stack.providers.utils.inference.model_registry import RemoteInferenceProviderConfig -from llama_stack.schema_utils import json_schema_type class SambaNovaProviderDataValidator(BaseModel): diff --git a/src/llama_stack/providers/remote/inference/tgi/config.py b/src/llama_stack/providers/remote/inference/tgi/config.py index 47952abba..051a2afa3 100644 --- a/src/llama_stack/providers/remote/inference/tgi/config.py +++ b/src/llama_stack/providers/remote/inference/tgi/config.py @@ -5,10 +5,10 @@ # the root directory of this source tree. +from llama_stack_api import json_schema_type from pydantic import BaseModel, Field, SecretStr from llama_stack.providers.utils.inference.model_registry import RemoteInferenceProviderConfig -from llama_stack.schema_utils import json_schema_type @json_schema_type diff --git a/src/llama_stack/providers/remote/inference/tgi/tgi.py b/src/llama_stack/providers/remote/inference/tgi/tgi.py index 6ae7b2544..831a26e39 100644 --- a/src/llama_stack/providers/remote/inference/tgi/tgi.py +++ b/src/llama_stack/providers/remote/inference/tgi/tgi.py @@ -8,12 +8,12 @@ from collections.abc import Iterable from huggingface_hub import AsyncInferenceClient, HfApi -from pydantic import SecretStr - -from llama_stack.apis.inference import ( +from llama_stack_api import ( OpenAIEmbeddingsRequestWithExtraBody, OpenAIEmbeddingsResponse, ) +from pydantic import SecretStr + from llama_stack.log import get_logger from llama_stack.providers.utils.inference.openai_mixin import OpenAIMixin diff --git a/src/llama_stack/providers/remote/inference/together/config.py b/src/llama_stack/providers/remote/inference/together/config.py index 47392c8e7..96c0538e3 100644 --- a/src/llama_stack/providers/remote/inference/together/config.py +++ b/src/llama_stack/providers/remote/inference/together/config.py @@ -6,10 +6,10 @@ from typing import Any +from llama_stack_api import json_schema_type from pydantic import Field from llama_stack.providers.utils.inference.model_registry import RemoteInferenceProviderConfig -from llama_stack.schema_utils import json_schema_type @json_schema_type diff --git a/src/llama_stack/providers/remote/inference/together/together.py b/src/llama_stack/providers/remote/inference/together/together.py index 963b384a0..f1355a760 100644 --- a/src/llama_stack/providers/remote/inference/together/together.py +++ b/src/llama_stack/providers/remote/inference/together/together.py @@ -8,15 +8,15 @@ from collections.abc import Iterable from typing import Any, cast +from llama_stack_api import ( + Model, + OpenAIEmbeddingsRequestWithExtraBody, + OpenAIEmbeddingsResponse, + OpenAIEmbeddingUsage, +) from together import AsyncTogether # type: ignore[import-untyped] from together.constants import BASE_URL # type: ignore[import-untyped] -from llama_stack.apis.inference import ( - OpenAIEmbeddingsRequestWithExtraBody, - OpenAIEmbeddingsResponse, -) -from llama_stack.apis.inference.inference import OpenAIEmbeddingUsage -from llama_stack.apis.models import Model from llama_stack.core.request_headers import NeedsRequestProviderData from llama_stack.log import get_logger from llama_stack.providers.utils.inference.openai_mixin import OpenAIMixin diff --git a/src/llama_stack/providers/remote/inference/vertexai/config.py b/src/llama_stack/providers/remote/inference/vertexai/config.py index 5f2efa894..53e2b3e65 100644 --- a/src/llama_stack/providers/remote/inference/vertexai/config.py +++ b/src/llama_stack/providers/remote/inference/vertexai/config.py @@ -6,10 +6,10 @@ from typing import Any +from llama_stack_api import json_schema_type from pydantic import BaseModel, Field, SecretStr from llama_stack.providers.utils.inference.model_registry import RemoteInferenceProviderConfig -from llama_stack.schema_utils import json_schema_type class VertexAIProviderDataValidator(BaseModel): diff --git a/src/llama_stack/providers/remote/inference/vllm/config.py b/src/llama_stack/providers/remote/inference/vllm/config.py index e362aece6..23f713961 100644 --- a/src/llama_stack/providers/remote/inference/vllm/config.py +++ b/src/llama_stack/providers/remote/inference/vllm/config.py @@ -6,10 +6,10 @@ from pathlib import Path +from llama_stack_api import json_schema_type from pydantic import Field, SecretStr, field_validator from llama_stack.providers.utils.inference.model_registry import RemoteInferenceProviderConfig -from llama_stack.schema_utils import json_schema_type @json_schema_type diff --git a/src/llama_stack/providers/remote/inference/vllm/vllm.py b/src/llama_stack/providers/remote/inference/vllm/vllm.py index fa350ec48..f7938c22c 100644 --- a/src/llama_stack/providers/remote/inference/vllm/vllm.py +++ b/src/llama_stack/providers/remote/inference/vllm/vllm.py @@ -7,19 +7,17 @@ from collections.abc import AsyncIterator from urllib.parse import urljoin import httpx -from pydantic import ConfigDict - -from llama_stack.apis.inference import ( +from llama_stack_api import ( + HealthResponse, + HealthStatus, OpenAIChatCompletion, OpenAIChatCompletionChunk, OpenAIChatCompletionRequestWithExtraBody, ToolChoice, ) +from pydantic import ConfigDict + from llama_stack.log import get_logger -from llama_stack.providers.datatypes import ( - HealthResponse, - HealthStatus, -) from llama_stack.providers.utils.inference.openai_mixin import OpenAIMixin from .config import VLLMInferenceAdapterConfig diff --git a/src/llama_stack/providers/remote/inference/watsonx/config.py b/src/llama_stack/providers/remote/inference/watsonx/config.py index 8d8df13b4..1bba040ef 100644 --- a/src/llama_stack/providers/remote/inference/watsonx/config.py +++ b/src/llama_stack/providers/remote/inference/watsonx/config.py @@ -7,10 +7,10 @@ import os from typing import Any +from llama_stack_api import json_schema_type from pydantic import BaseModel, Field from llama_stack.providers.utils.inference.model_registry import RemoteInferenceProviderConfig -from llama_stack.schema_utils import json_schema_type class WatsonXProviderDataValidator(BaseModel): diff --git a/src/llama_stack/providers/remote/inference/watsonx/watsonx.py b/src/llama_stack/providers/remote/inference/watsonx/watsonx.py index e71ffe5e1..de23c25d7 100644 --- a/src/llama_stack/providers/remote/inference/watsonx/watsonx.py +++ b/src/llama_stack/providers/remote/inference/watsonx/watsonx.py @@ -9,8 +9,9 @@ from typing import Any import litellm import requests - -from llama_stack.apis.inference.inference import ( +from llama_stack_api import ( + Model, + ModelType, OpenAIChatCompletion, OpenAIChatCompletionChunk, OpenAIChatCompletionRequestWithExtraBody, @@ -20,8 +21,7 @@ from llama_stack.apis.inference.inference import ( OpenAIEmbeddingsRequestWithExtraBody, OpenAIEmbeddingsResponse, ) -from llama_stack.apis.models import Model -from llama_stack.apis.models.models import ModelType + from llama_stack.core.telemetry.tracing import get_current_span from llama_stack.log import get_logger from llama_stack.providers.remote.inference.watsonx.config import WatsonXConfig @@ -238,7 +238,8 @@ class WatsonXInferenceAdapter(LiteLLMOpenAIMixin): ) # Convert response to OpenAI format - from llama_stack.apis.inference import OpenAIEmbeddingUsage + from llama_stack_api import OpenAIEmbeddingUsage + from llama_stack.providers.utils.inference.litellm_openai_mixin import b64_encode_openai_embeddings_response data = b64_encode_openai_embeddings_response(response.data, params.encoding_format) diff --git a/src/llama_stack/providers/remote/post_training/nvidia/README.md b/src/llama_stack/providers/remote/post_training/nvidia/README.md index 83f20a44e..f998f44ba 100644 --- a/src/llama_stack/providers/remote/post_training/nvidia/README.md +++ b/src/llama_stack/providers/remote/post_training/nvidia/README.md @@ -128,7 +128,7 @@ client.post_training.job.cancel(job_uuid="your-job-id") #### 1. Register the model ```python -from llama_stack.apis.models import Model, ModelType +from llama_stack_api.models import Model, ModelType client.models.register( model_id="test-example-model@v1", diff --git a/src/llama_stack/providers/remote/post_training/nvidia/post_training.py b/src/llama_stack/providers/remote/post_training/nvidia/post_training.py index d839ffd6f..02c35241b 100644 --- a/src/llama_stack/providers/remote/post_training/nvidia/post_training.py +++ b/src/llama_stack/providers/remote/post_training/nvidia/post_training.py @@ -8,9 +8,7 @@ from datetime import datetime from typing import Any, Literal import aiohttp -from pydantic import BaseModel, ConfigDict - -from llama_stack.apis.post_training import ( +from llama_stack_api import ( AlgorithmConfig, DPOAlignmentConfig, JobStatus, @@ -19,6 +17,8 @@ from llama_stack.apis.post_training import ( PostTrainingJobStatusResponse, TrainingConfig, ) +from pydantic import BaseModel, ConfigDict + from llama_stack.providers.remote.post_training.nvidia.config import NvidiaPostTrainingConfig from llama_stack.providers.remote.post_training.nvidia.utils import warn_unsupported_params from llama_stack.providers.utils.inference.model_registry import ModelRegistryHelper diff --git a/src/llama_stack/providers/remote/post_training/nvidia/utils.py b/src/llama_stack/providers/remote/post_training/nvidia/utils.py index 162951ff3..78762155d 100644 --- a/src/llama_stack/providers/remote/post_training/nvidia/utils.py +++ b/src/llama_stack/providers/remote/post_training/nvidia/utils.py @@ -7,9 +7,9 @@ import warnings from typing import Any +from llama_stack_api import TrainingConfig from pydantic import BaseModel -from llama_stack.apis.post_training import TrainingConfig from llama_stack.log import get_logger from llama_stack.providers.remote.post_training.nvidia.config import SFTLoRADefaultConfig diff --git a/src/llama_stack/providers/remote/safety/bedrock/bedrock.py b/src/llama_stack/providers/remote/safety/bedrock/bedrock.py index 75f96816a..86b93c32e 100644 --- a/src/llama_stack/providers/remote/safety/bedrock/bedrock.py +++ b/src/llama_stack/providers/remote/safety/bedrock/bedrock.py @@ -7,16 +7,17 @@ import json from typing import Any -from llama_stack.apis.inference import OpenAIMessageParam -from llama_stack.apis.safety import ( +from llama_stack_api import ( + OpenAIMessageParam, RunShieldResponse, Safety, SafetyViolation, + Shield, + ShieldsProtocolPrivate, ViolationLevel, ) -from llama_stack.apis.shields import Shield + from llama_stack.log import get_logger -from llama_stack.providers.datatypes import ShieldsProtocolPrivate from llama_stack.providers.utils.bedrock.client import create_bedrock_client from .config import BedrockSafetyConfig diff --git a/src/llama_stack/providers/remote/safety/bedrock/config.py b/src/llama_stack/providers/remote/safety/bedrock/config.py index 1ca8d95cb..ca28924d4 100644 --- a/src/llama_stack/providers/remote/safety/bedrock/config.py +++ b/src/llama_stack/providers/remote/safety/bedrock/config.py @@ -5,8 +5,9 @@ # the root directory of this source tree. +from llama_stack_api import json_schema_type + from llama_stack.providers.utils.bedrock.config import BedrockBaseConfig -from llama_stack.schema_utils import json_schema_type @json_schema_type diff --git a/src/llama_stack/providers/remote/safety/nvidia/README.md b/src/llama_stack/providers/remote/safety/nvidia/README.md index af11b2539..f3ec0f1e0 100644 --- a/src/llama_stack/providers/remote/safety/nvidia/README.md +++ b/src/llama_stack/providers/remote/safety/nvidia/README.md @@ -42,8 +42,8 @@ client.initialize() #### Create a safety shield ```python -from llama_stack.apis.safety import Shield -from llama_stack.apis.inference import Message +from llama_stack_api.safety import Shield +from llama_stack_api.inference import Message # Create a safety shield shield = Shield( diff --git a/src/llama_stack/providers/remote/safety/nvidia/config.py b/src/llama_stack/providers/remote/safety/nvidia/config.py index 1c618f4f4..fc686ae73 100644 --- a/src/llama_stack/providers/remote/safety/nvidia/config.py +++ b/src/llama_stack/providers/remote/safety/nvidia/config.py @@ -6,10 +6,9 @@ import os from typing import Any +from llama_stack_api import json_schema_type from pydantic import BaseModel, Field -from llama_stack.schema_utils import json_schema_type - @json_schema_type class NVIDIASafetyConfig(BaseModel): diff --git a/src/llama_stack/providers/remote/safety/nvidia/nvidia.py b/src/llama_stack/providers/remote/safety/nvidia/nvidia.py index 236f16207..b3b5090e0 100644 --- a/src/llama_stack/providers/remote/safety/nvidia/nvidia.py +++ b/src/llama_stack/providers/remote/safety/nvidia/nvidia.py @@ -7,12 +7,18 @@ from typing import Any import requests +from llama_stack_api import ( + ModerationObject, + OpenAIMessageParam, + RunShieldResponse, + Safety, + SafetyViolation, + Shield, + ShieldsProtocolPrivate, + ViolationLevel, +) -from llama_stack.apis.inference import OpenAIMessageParam -from llama_stack.apis.safety import ModerationObject, RunShieldResponse, Safety, SafetyViolation, ViolationLevel -from llama_stack.apis.shields import Shield from llama_stack.log import get_logger -from llama_stack.providers.datatypes import ShieldsProtocolPrivate from .config import NVIDIASafetyConfig diff --git a/src/llama_stack/providers/remote/safety/sambanova/config.py b/src/llama_stack/providers/remote/safety/sambanova/config.py index 2cde97098..a8e745851 100644 --- a/src/llama_stack/providers/remote/safety/sambanova/config.py +++ b/src/llama_stack/providers/remote/safety/sambanova/config.py @@ -6,10 +6,9 @@ from typing import Any +from llama_stack_api import json_schema_type from pydantic import BaseModel, Field, SecretStr -from llama_stack.schema_utils import json_schema_type - class SambaNovaProviderDataValidator(BaseModel): sambanova_api_key: str | None = Field( diff --git a/src/llama_stack/providers/remote/safety/sambanova/sambanova.py b/src/llama_stack/providers/remote/safety/sambanova/sambanova.py index 72359badd..119ebb6ed 100644 --- a/src/llama_stack/providers/remote/safety/sambanova/sambanova.py +++ b/src/llama_stack/providers/remote/safety/sambanova/sambanova.py @@ -8,18 +8,18 @@ from typing import Any import litellm import requests - -from llama_stack.apis.inference import OpenAIMessageParam -from llama_stack.apis.safety import ( +from llama_stack_api import ( + OpenAIMessageParam, RunShieldResponse, Safety, SafetyViolation, + Shield, + ShieldsProtocolPrivate, ViolationLevel, ) -from llama_stack.apis.shields import Shield + from llama_stack.core.request_headers import NeedsRequestProviderData from llama_stack.log import get_logger -from llama_stack.providers.datatypes import ShieldsProtocolPrivate from .config import SambaNovaSafetyConfig diff --git a/src/llama_stack/providers/remote/tool_runtime/bing_search/bing_search.py b/src/llama_stack/providers/remote/tool_runtime/bing_search/bing_search.py index e8ab6dc90..322a8b8e8 100644 --- a/src/llama_stack/providers/remote/tool_runtime/bing_search/bing_search.py +++ b/src/llama_stack/providers/remote/tool_runtime/bing_search/bing_search.py @@ -8,17 +8,17 @@ import json from typing import Any import httpx - -from llama_stack.apis.common.content_types import URL -from llama_stack.apis.tools import ( +from llama_stack_api import ( + URL, ListToolDefsResponse, ToolDef, ToolGroup, + ToolGroupsProtocolPrivate, ToolInvocationResult, ToolRuntime, ) + from llama_stack.core.request_headers import NeedsRequestProviderData -from llama_stack.providers.datatypes import ToolGroupsProtocolPrivate from .config import BingSearchToolConfig diff --git a/src/llama_stack/providers/remote/tool_runtime/brave_search/brave_search.py b/src/llama_stack/providers/remote/tool_runtime/brave_search/brave_search.py index 081082add..e5b013dac 100644 --- a/src/llama_stack/providers/remote/tool_runtime/brave_search/brave_search.py +++ b/src/llama_stack/providers/remote/tool_runtime/brave_search/brave_search.py @@ -7,18 +7,18 @@ from typing import Any import httpx - -from llama_stack.apis.common.content_types import URL -from llama_stack.apis.tools import ( +from llama_stack_api import ( + URL, ListToolDefsResponse, ToolDef, ToolGroup, + ToolGroupsProtocolPrivate, ToolInvocationResult, ToolRuntime, ) + from llama_stack.core.request_headers import NeedsRequestProviderData from llama_stack.models.llama.datatypes import BuiltinTool -from llama_stack.providers.datatypes import ToolGroupsProtocolPrivate from .config import BraveSearchToolConfig diff --git a/src/llama_stack/providers/remote/tool_runtime/model_context_protocol/model_context_protocol.py b/src/llama_stack/providers/remote/tool_runtime/model_context_protocol/model_context_protocol.py index d1ad445c4..4f9531b09 100644 --- a/src/llama_stack/providers/remote/tool_runtime/model_context_protocol/model_context_protocol.py +++ b/src/llama_stack/providers/remote/tool_runtime/model_context_protocol/model_context_protocol.py @@ -7,17 +7,18 @@ from typing import Any from urllib.parse import urlparse -from llama_stack.apis.common.content_types import URL -from llama_stack.apis.datatypes import Api -from llama_stack.apis.tools import ( +from llama_stack_api import ( + URL, + Api, ListToolDefsResponse, ToolGroup, + ToolGroupsProtocolPrivate, ToolInvocationResult, ToolRuntime, ) + from llama_stack.core.request_headers import NeedsRequestProviderData from llama_stack.log import get_logger -from llama_stack.providers.datatypes import ToolGroupsProtocolPrivate from llama_stack.providers.utils.tools.mcp import invoke_mcp_tool, list_mcp_tools from .config import MCPProviderConfig diff --git a/src/llama_stack/providers/remote/tool_runtime/tavily_search/tavily_search.py b/src/llama_stack/providers/remote/tool_runtime/tavily_search/tavily_search.py index 1b49f8a03..3ba2fa101 100644 --- a/src/llama_stack/providers/remote/tool_runtime/tavily_search/tavily_search.py +++ b/src/llama_stack/providers/remote/tool_runtime/tavily_search/tavily_search.py @@ -8,17 +8,17 @@ import json from typing import Any import httpx - -from llama_stack.apis.common.content_types import URL -from llama_stack.apis.tools import ( +from llama_stack_api import ( + URL, ListToolDefsResponse, ToolDef, ToolGroup, + ToolGroupsProtocolPrivate, ToolInvocationResult, ToolRuntime, ) + from llama_stack.core.request_headers import NeedsRequestProviderData -from llama_stack.providers.datatypes import ToolGroupsProtocolPrivate from .config import TavilySearchToolConfig diff --git a/src/llama_stack/providers/remote/tool_runtime/wolfram_alpha/wolfram_alpha.py b/src/llama_stack/providers/remote/tool_runtime/wolfram_alpha/wolfram_alpha.py index 9bacfaa1c..5bfaaf82a 100644 --- a/src/llama_stack/providers/remote/tool_runtime/wolfram_alpha/wolfram_alpha.py +++ b/src/llama_stack/providers/remote/tool_runtime/wolfram_alpha/wolfram_alpha.py @@ -8,17 +8,17 @@ import json from typing import Any import httpx - -from llama_stack.apis.common.content_types import URL -from llama_stack.apis.tools import ( +from llama_stack_api import ( + URL, ListToolDefsResponse, ToolDef, ToolGroup, + ToolGroupsProtocolPrivate, ToolInvocationResult, ToolRuntime, ) + from llama_stack.core.request_headers import NeedsRequestProviderData -from llama_stack.providers.datatypes import ToolGroupsProtocolPrivate from .config import WolframAlphaToolConfig diff --git a/src/llama_stack/providers/remote/vector_io/chroma/__init__.py b/src/llama_stack/providers/remote/vector_io/chroma/__init__.py index e4b77c68d..d774ea643 100644 --- a/src/llama_stack/providers/remote/vector_io/chroma/__init__.py +++ b/src/llama_stack/providers/remote/vector_io/chroma/__init__.py @@ -4,7 +4,7 @@ # This source code is licensed under the terms described in the LICENSE file in # the root directory of this source tree. -from llama_stack.providers.datatypes import Api, ProviderSpec +from llama_stack_api import Api, ProviderSpec from .config import ChromaVectorIOConfig diff --git a/src/llama_stack/providers/remote/vector_io/chroma/chroma.py b/src/llama_stack/providers/remote/vector_io/chroma/chroma.py index 97e2244b8..eca5d349b 100644 --- a/src/llama_stack/providers/remote/vector_io/chroma/chroma.py +++ b/src/llama_stack/providers/remote/vector_io/chroma/chroma.py @@ -9,14 +9,19 @@ from typing import Any from urllib.parse import urlparse import chromadb +from llama_stack_api import ( + Chunk, + Files, + Inference, + InterleavedContent, + QueryChunksResponse, + VectorIO, + VectorStore, + VectorStoresProtocolPrivate, +) from numpy.typing import NDArray -from llama_stack.apis.files import Files -from llama_stack.apis.inference import Inference, InterleavedContent -from llama_stack.apis.vector_io import Chunk, QueryChunksResponse, VectorIO -from llama_stack.apis.vector_stores import VectorStore from llama_stack.log import get_logger -from llama_stack.providers.datatypes import VectorStoresProtocolPrivate from llama_stack.providers.inline.vector_io.chroma import ChromaVectorIOConfig as InlineChromaVectorIOConfig from llama_stack.providers.utils.kvstore import kvstore_impl from llama_stack.providers.utils.kvstore.api import KVStore diff --git a/src/llama_stack/providers/remote/vector_io/chroma/config.py b/src/llama_stack/providers/remote/vector_io/chroma/config.py index 209ba90bb..b1e4f9a4a 100644 --- a/src/llama_stack/providers/remote/vector_io/chroma/config.py +++ b/src/llama_stack/providers/remote/vector_io/chroma/config.py @@ -6,10 +6,10 @@ from typing import Any +from llama_stack_api import json_schema_type from pydantic import BaseModel, Field from llama_stack.core.storage.datatypes import KVStoreReference -from llama_stack.schema_utils import json_schema_type @json_schema_type diff --git a/src/llama_stack/providers/remote/vector_io/milvus/__init__.py b/src/llama_stack/providers/remote/vector_io/milvus/__init__.py index 526075bb2..1b703d486 100644 --- a/src/llama_stack/providers/remote/vector_io/milvus/__init__.py +++ b/src/llama_stack/providers/remote/vector_io/milvus/__init__.py @@ -4,7 +4,7 @@ # This source code is licensed under the terms described in the LICENSE file in # the root directory of this source tree. -from llama_stack.providers.datatypes import Api, ProviderSpec +from llama_stack_api import Api, ProviderSpec from .config import MilvusVectorIOConfig diff --git a/src/llama_stack/providers/remote/vector_io/milvus/config.py b/src/llama_stack/providers/remote/vector_io/milvus/config.py index 8ff9e1328..2e2c788c7 100644 --- a/src/llama_stack/providers/remote/vector_io/milvus/config.py +++ b/src/llama_stack/providers/remote/vector_io/milvus/config.py @@ -6,10 +6,10 @@ from typing import Any +from llama_stack_api import json_schema_type from pydantic import BaseModel, ConfigDict, Field from llama_stack.core.storage.datatypes import KVStoreReference -from llama_stack.schema_utils import json_schema_type @json_schema_type diff --git a/src/llama_stack/providers/remote/vector_io/milvus/milvus.py b/src/llama_stack/providers/remote/vector_io/milvus/milvus.py index 73339b5be..b856bf918 100644 --- a/src/llama_stack/providers/remote/vector_io/milvus/milvus.py +++ b/src/llama_stack/providers/remote/vector_io/milvus/milvus.py @@ -8,16 +8,21 @@ import asyncio import os from typing import Any +from llama_stack_api import ( + Chunk, + Files, + Inference, + InterleavedContent, + QueryChunksResponse, + VectorIO, + VectorStore, + VectorStoreNotFoundError, + VectorStoresProtocolPrivate, +) from numpy.typing import NDArray from pymilvus import AnnSearchRequest, DataType, Function, FunctionType, MilvusClient, RRFRanker, WeightedRanker -from llama_stack.apis.common.errors import VectorStoreNotFoundError -from llama_stack.apis.files import Files -from llama_stack.apis.inference import Inference, InterleavedContent -from llama_stack.apis.vector_io import Chunk, QueryChunksResponse, VectorIO -from llama_stack.apis.vector_stores import VectorStore from llama_stack.log import get_logger -from llama_stack.providers.datatypes import VectorStoresProtocolPrivate from llama_stack.providers.inline.vector_io.milvus import MilvusVectorIOConfig as InlineMilvusVectorIOConfig from llama_stack.providers.utils.kvstore import kvstore_impl from llama_stack.providers.utils.kvstore.api import KVStore diff --git a/src/llama_stack/providers/remote/vector_io/pgvector/__init__.py b/src/llama_stack/providers/remote/vector_io/pgvector/__init__.py index 8086b7650..36018fd95 100644 --- a/src/llama_stack/providers/remote/vector_io/pgvector/__init__.py +++ b/src/llama_stack/providers/remote/vector_io/pgvector/__init__.py @@ -4,7 +4,7 @@ # This source code is licensed under the terms described in the LICENSE file in # the root directory of this source tree. -from llama_stack.providers.datatypes import Api, ProviderSpec +from llama_stack_api import Api, ProviderSpec from .config import PGVectorVectorIOConfig diff --git a/src/llama_stack/providers/remote/vector_io/pgvector/config.py b/src/llama_stack/providers/remote/vector_io/pgvector/config.py index d81e524e4..aeb1c83bb 100644 --- a/src/llama_stack/providers/remote/vector_io/pgvector/config.py +++ b/src/llama_stack/providers/remote/vector_io/pgvector/config.py @@ -6,10 +6,10 @@ from typing import Any +from llama_stack_api import json_schema_type from pydantic import BaseModel, Field from llama_stack.core.storage.datatypes import KVStoreReference -from llama_stack.schema_utils import json_schema_type @json_schema_type diff --git a/src/llama_stack/providers/remote/vector_io/pgvector/pgvector.py b/src/llama_stack/providers/remote/vector_io/pgvector/pgvector.py index cf10a0e01..8aa0303b6 100644 --- a/src/llama_stack/providers/remote/vector_io/pgvector/pgvector.py +++ b/src/llama_stack/providers/remote/vector_io/pgvector/pgvector.py @@ -8,18 +8,23 @@ import heapq from typing import Any import psycopg2 +from llama_stack_api import ( + Chunk, + Files, + Inference, + InterleavedContent, + QueryChunksResponse, + VectorIO, + VectorStore, + VectorStoreNotFoundError, + VectorStoresProtocolPrivate, +) from numpy.typing import NDArray from psycopg2 import sql from psycopg2.extras import Json, execute_values from pydantic import BaseModel, TypeAdapter -from llama_stack.apis.common.errors import VectorStoreNotFoundError -from llama_stack.apis.files import Files -from llama_stack.apis.inference import Inference, InterleavedContent -from llama_stack.apis.vector_io import Chunk, QueryChunksResponse, VectorIO -from llama_stack.apis.vector_stores import VectorStore from llama_stack.log import get_logger -from llama_stack.providers.datatypes import VectorStoresProtocolPrivate from llama_stack.providers.utils.inference.prompt_adapter import interleaved_content_as_str from llama_stack.providers.utils.kvstore import kvstore_impl from llama_stack.providers.utils.kvstore.api import KVStore diff --git a/src/llama_stack/providers/remote/vector_io/qdrant/__init__.py b/src/llama_stack/providers/remote/vector_io/qdrant/__init__.py index e9527f101..b5b02fe59 100644 --- a/src/llama_stack/providers/remote/vector_io/qdrant/__init__.py +++ b/src/llama_stack/providers/remote/vector_io/qdrant/__init__.py @@ -4,7 +4,7 @@ # This source code is licensed under the terms described in the LICENSE file in # the root directory of this source tree. -from llama_stack.providers.datatypes import Api, ProviderSpec +from llama_stack_api import Api, ProviderSpec from .config import QdrantVectorIOConfig diff --git a/src/llama_stack/providers/remote/vector_io/qdrant/config.py b/src/llama_stack/providers/remote/vector_io/qdrant/config.py index 01fbcc5cb..8cc4cbb2b 100644 --- a/src/llama_stack/providers/remote/vector_io/qdrant/config.py +++ b/src/llama_stack/providers/remote/vector_io/qdrant/config.py @@ -6,10 +6,10 @@ from typing import Any +from llama_stack_api import json_schema_type from pydantic import BaseModel from llama_stack.core.storage.datatypes import KVStoreReference -from llama_stack.schema_utils import json_schema_type @json_schema_type diff --git a/src/llama_stack/providers/remote/vector_io/qdrant/qdrant.py b/src/llama_stack/providers/remote/vector_io/qdrant/qdrant.py index 7d17c5591..53d6be2b6 100644 --- a/src/llama_stack/providers/remote/vector_io/qdrant/qdrant.py +++ b/src/llama_stack/providers/remote/vector_io/qdrant/qdrant.py @@ -9,23 +9,24 @@ import hashlib import uuid from typing import Any +from llama_stack_api import ( + Chunk, + Files, + Inference, + InterleavedContent, + QueryChunksResponse, + VectorIO, + VectorStore, + VectorStoreChunkingStrategy, + VectorStoreFileObject, + VectorStoreNotFoundError, + VectorStoresProtocolPrivate, +) from numpy.typing import NDArray from qdrant_client import AsyncQdrantClient, models from qdrant_client.models import PointStruct -from llama_stack.apis.common.errors import VectorStoreNotFoundError -from llama_stack.apis.files import Files -from llama_stack.apis.inference import Inference, InterleavedContent -from llama_stack.apis.vector_io import ( - Chunk, - QueryChunksResponse, - VectorIO, - VectorStoreChunkingStrategy, - VectorStoreFileObject, -) -from llama_stack.apis.vector_stores import VectorStore from llama_stack.log import get_logger -from llama_stack.providers.datatypes import VectorStoresProtocolPrivate from llama_stack.providers.inline.vector_io.qdrant import QdrantVectorIOConfig as InlineQdrantVectorIOConfig from llama_stack.providers.utils.kvstore import kvstore_impl from llama_stack.providers.utils.memory.openai_vector_store_mixin import OpenAIVectorStoreMixin diff --git a/src/llama_stack/providers/remote/vector_io/weaviate/__init__.py b/src/llama_stack/providers/remote/vector_io/weaviate/__init__.py index 12e11d013..47546d459 100644 --- a/src/llama_stack/providers/remote/vector_io/weaviate/__init__.py +++ b/src/llama_stack/providers/remote/vector_io/weaviate/__init__.py @@ -4,7 +4,7 @@ # This source code is licensed under the terms described in the LICENSE file in # the root directory of this source tree. -from llama_stack.providers.datatypes import Api, ProviderSpec +from llama_stack_api import Api, ProviderSpec from .config import WeaviateVectorIOConfig diff --git a/src/llama_stack/providers/remote/vector_io/weaviate/config.py b/src/llama_stack/providers/remote/vector_io/weaviate/config.py index 66dbf1fed..19f9679fb 100644 --- a/src/llama_stack/providers/remote/vector_io/weaviate/config.py +++ b/src/llama_stack/providers/remote/vector_io/weaviate/config.py @@ -6,10 +6,10 @@ from typing import Any +from llama_stack_api import json_schema_type from pydantic import BaseModel, Field from llama_stack.core.storage.datatypes import KVStoreReference -from llama_stack.schema_utils import json_schema_type @json_schema_type diff --git a/src/llama_stack/providers/remote/vector_io/weaviate/weaviate.py b/src/llama_stack/providers/remote/vector_io/weaviate/weaviate.py index d200662da..c72666f63 100644 --- a/src/llama_stack/providers/remote/vector_io/weaviate/weaviate.py +++ b/src/llama_stack/providers/remote/vector_io/weaviate/weaviate.py @@ -8,19 +8,23 @@ from typing import Any import weaviate import weaviate.classes as wvc +from llama_stack_api import ( + Chunk, + Files, + Inference, + InterleavedContent, + QueryChunksResponse, + VectorIO, + VectorStore, + VectorStoreNotFoundError, + VectorStoresProtocolPrivate, +) from numpy.typing import NDArray from weaviate.classes.init import Auth from weaviate.classes.query import Filter, HybridFusion -from llama_stack.apis.common.content_types import InterleavedContent -from llama_stack.apis.common.errors import VectorStoreNotFoundError -from llama_stack.apis.files import Files -from llama_stack.apis.inference import Inference -from llama_stack.apis.vector_io import Chunk, QueryChunksResponse, VectorIO -from llama_stack.apis.vector_stores import VectorStore from llama_stack.core.request_headers import NeedsRequestProviderData from llama_stack.log import get_logger -from llama_stack.providers.datatypes import VectorStoresProtocolPrivate from llama_stack.providers.utils.kvstore import kvstore_impl from llama_stack.providers.utils.kvstore.api import KVStore from llama_stack.providers.utils.memory.openai_vector_store_mixin import OpenAIVectorStoreMixin diff --git a/src/llama_stack/providers/utils/common/data_schema_validator.py b/src/llama_stack/providers/utils/common/data_schema_validator.py index b0305104f..7ef245779 100644 --- a/src/llama_stack/providers/utils/common/data_schema_validator.py +++ b/src/llama_stack/providers/utils/common/data_schema_validator.py @@ -7,11 +7,8 @@ from enum import Enum from typing import Any -from llama_stack.apis.common.type_system import ( - ChatCompletionInputType, - CompletionInputType, - StringType, -) +from llama_stack_api import ChatCompletionInputType, CompletionInputType, StringType + from llama_stack.core.datatypes import Api diff --git a/src/llama_stack/providers/utils/files/form_data.py b/src/llama_stack/providers/utils/files/form_data.py index 3d8fb6d85..21afbec2b 100644 --- a/src/llama_stack/providers/utils/files/form_data.py +++ b/src/llama_stack/providers/utils/files/form_data.py @@ -7,10 +7,9 @@ import json from fastapi import Request +from llama_stack_api import ExpiresAfter from pydantic import BaseModel, ValidationError -from llama_stack.apis.files import ExpiresAfter - async def parse_pydantic_from_form[T: BaseModel](request: Request, field_name: str, model_class: type[T]) -> T | None: """ diff --git a/src/llama_stack/providers/utils/inference/embedding_mixin.py b/src/llama_stack/providers/utils/inference/embedding_mixin.py index bab495eef..f7e5c711b 100644 --- a/src/llama_stack/providers/utils/inference/embedding_mixin.py +++ b/src/llama_stack/providers/utils/inference/embedding_mixin.py @@ -17,7 +17,7 @@ from llama_stack.log import get_logger if TYPE_CHECKING: from sentence_transformers import SentenceTransformer -from llama_stack.apis.inference import ( +from llama_stack_api import ( ModelStore, OpenAIEmbeddingData, OpenAIEmbeddingsRequestWithExtraBody, diff --git a/src/llama_stack/providers/utils/inference/inference_store.py b/src/llama_stack/providers/utils/inference/inference_store.py index a3a28aec0..3c707dd01 100644 --- a/src/llama_stack/providers/utils/inference/inference_store.py +++ b/src/llama_stack/providers/utils/inference/inference_store.py @@ -6,15 +6,15 @@ import asyncio from typing import Any -from sqlalchemy.exc import IntegrityError - -from llama_stack.apis.inference import ( +from llama_stack_api import ( ListOpenAIChatCompletionResponse, OpenAIChatCompletion, OpenAICompletionWithInputMessages, OpenAIMessageParam, Order, ) +from sqlalchemy.exc import IntegrityError + from llama_stack.core.datatypes import AccessRule from llama_stack.core.storage.datatypes import InferenceStoreReference, StorageBackendType from llama_stack.log import get_logger diff --git a/src/llama_stack/providers/utils/inference/litellm_openai_mixin.py b/src/llama_stack/providers/utils/inference/litellm_openai_mixin.py index a793c499e..4f468725b 100644 --- a/src/llama_stack/providers/utils/inference/litellm_openai_mixin.py +++ b/src/llama_stack/providers/utils/inference/litellm_openai_mixin.py @@ -9,8 +9,7 @@ import struct from collections.abc import AsyncIterator import litellm - -from llama_stack.apis.inference import ( +from llama_stack_api import ( InferenceProvider, OpenAIChatCompletion, OpenAIChatCompletionChunk, @@ -22,6 +21,7 @@ from llama_stack.apis.inference import ( OpenAIEmbeddingsResponse, OpenAIEmbeddingUsage, ) + from llama_stack.core.request_headers import NeedsRequestProviderData from llama_stack.log import get_logger from llama_stack.providers.utils.inference.model_registry import ModelRegistryHelper, ProviderModelEntry diff --git a/src/llama_stack/providers/utils/inference/model_registry.py b/src/llama_stack/providers/utils/inference/model_registry.py index 8a120b698..e7ca5ab74 100644 --- a/src/llama_stack/providers/utils/inference/model_registry.py +++ b/src/llama_stack/providers/utils/inference/model_registry.py @@ -6,12 +6,10 @@ from typing import Any +from llama_stack_api import Model, ModelsProtocolPrivate, ModelType, UnsupportedModelError from pydantic import BaseModel, Field, SecretStr -from llama_stack.apis.common.errors import UnsupportedModelError -from llama_stack.apis.models import ModelType from llama_stack.log import get_logger -from llama_stack.providers.datatypes import Model, ModelsProtocolPrivate from llama_stack.providers.utils.inference import ( ALL_HUGGINGFACE_REPOS_TO_MODEL_DESCRIPTOR, ) diff --git a/src/llama_stack/providers/utils/inference/openai_compat.py b/src/llama_stack/providers/utils/inference/openai_compat.py index c2e6829e0..c97e42274 100644 --- a/src/llama_stack/providers/utils/inference/openai_compat.py +++ b/src/llama_stack/providers/utils/inference/openai_compat.py @@ -20,25 +20,23 @@ except ImportError: from openai.types.chat.chat_completion_message_tool_call import ( ChatCompletionMessageToolCall as OpenAIChatCompletionMessageFunctionToolCall, ) +from llama_stack_api import ( + URL, + GreedySamplingStrategy, + ImageContentItem, + JsonSchemaResponseFormat, + OpenAIResponseFormatParam, + SamplingParams, + TextContentItem, + TopKSamplingStrategy, + TopPSamplingStrategy, + _URLOrData, +) from openai.types.chat import ( ChatCompletionMessageToolCall, ) from pydantic import BaseModel -from llama_stack.apis.common.content_types import ( - URL, - ImageContentItem, - TextContentItem, - _URLOrData, -) -from llama_stack.apis.inference import ( - GreedySamplingStrategy, - JsonSchemaResponseFormat, - OpenAIResponseFormatParam, - SamplingParams, - TopKSamplingStrategy, - TopPSamplingStrategy, -) from llama_stack.log import get_logger from llama_stack.models.llama.datatypes import ( BuiltinTool, diff --git a/src/llama_stack/providers/utils/inference/openai_mixin.py b/src/llama_stack/providers/utils/inference/openai_mixin.py index 09059da09..c05873df5 100644 --- a/src/llama_stack/providers/utils/inference/openai_mixin.py +++ b/src/llama_stack/providers/utils/inference/openai_mixin.py @@ -10,11 +10,9 @@ from abc import ABC, abstractmethod from collections.abc import AsyncIterator, Iterable from typing import Any -from openai import AsyncOpenAI -from pydantic import BaseModel, ConfigDict - -from llama_stack.apis.inference import ( +from llama_stack_api import ( Model, + ModelType, OpenAIChatCompletion, OpenAIChatCompletionChunk, OpenAIChatCompletionRequestWithExtraBody, @@ -26,7 +24,9 @@ from llama_stack.apis.inference import ( OpenAIEmbeddingUsage, OpenAIMessageParam, ) -from llama_stack.apis.models import ModelType +from openai import AsyncOpenAI +from pydantic import BaseModel, ConfigDict + from llama_stack.core.request_headers import NeedsRequestProviderData from llama_stack.log import get_logger from llama_stack.providers.utils.inference.model_registry import RemoteInferenceProviderConfig diff --git a/src/llama_stack/providers/utils/inference/prompt_adapter.py b/src/llama_stack/providers/utils/inference/prompt_adapter.py index 35a7b3484..ea01a34e9 100644 --- a/src/llama_stack/providers/utils/inference/prompt_adapter.py +++ b/src/llama_stack/providers/utils/inference/prompt_adapter.py @@ -12,16 +12,11 @@ import re from typing import Any import httpx -from PIL import Image as PIL_Image - -from llama_stack.apis.common.content_types import ( +from llama_stack_api import ( + CompletionRequest, ImageContentItem, InterleavedContent, InterleavedContentItem, - TextContentItem, -) -from llama_stack.apis.inference import ( - CompletionRequest, OpenAIAssistantMessageParam, OpenAIChatCompletionContentPartImageParam, OpenAIChatCompletionContentPartTextParam, @@ -32,8 +27,11 @@ from llama_stack.apis.inference import ( OpenAIUserMessageParam, ResponseFormat, ResponseFormatType, + TextContentItem, ToolChoice, ) +from PIL import Image as PIL_Image + from llama_stack.log import get_logger from llama_stack.models.llama.datatypes import ( RawContent, diff --git a/src/llama_stack/providers/utils/kvstore/sqlite/config.py b/src/llama_stack/providers/utils/kvstore/sqlite/config.py index 6a8b0a7cf..895268a4f 100644 --- a/src/llama_stack/providers/utils/kvstore/sqlite/config.py +++ b/src/llama_stack/providers/utils/kvstore/sqlite/config.py @@ -4,10 +4,9 @@ # This source code is licensed under the terms described in the LICENSE file in # the root directory of this source tree. +from llama_stack_api import json_schema_type from pydantic import BaseModel, Field -from llama_stack.schema_utils import json_schema_type - @json_schema_type class SqliteControlPlaneConfig(BaseModel): diff --git a/src/llama_stack/providers/utils/memory/file_utils.py b/src/llama_stack/providers/utils/memory/file_utils.py index 4c40056f3..6786293c6 100644 --- a/src/llama_stack/providers/utils/memory/file_utils.py +++ b/src/llama_stack/providers/utils/memory/file_utils.py @@ -8,7 +8,7 @@ import base64 import mimetypes import os -from llama_stack.apis.common.content_types import URL +from llama_stack_api import URL def data_url_from_file(file_path: str) -> URL: diff --git a/src/llama_stack/providers/utils/memory/openai_vector_store_mixin.py b/src/llama_stack/providers/utils/memory/openai_vector_store_mixin.py index 853245598..68d1c11e5 100644 --- a/src/llama_stack/providers/utils/memory/openai_vector_store_mixin.py +++ b/src/llama_stack/providers/utils/memory/openai_vector_store_mixin.py @@ -13,16 +13,15 @@ from abc import ABC, abstractmethod from typing import Annotated, Any from fastapi import Body -from pydantic import TypeAdapter - -from llama_stack.apis.common.errors import VectorStoreNotFoundError -from llama_stack.apis.files import Files, OpenAIFileObject -from llama_stack.apis.vector_io import ( +from llama_stack_api import ( Chunk, + Files, OpenAICreateVectorStoreFileBatchRequestWithExtraBody, OpenAICreateVectorStoreRequestWithExtraBody, + OpenAIFileObject, QueryChunksResponse, SearchRankingOptions, + VectorStore, VectorStoreChunkingStrategy, VectorStoreChunkingStrategyAuto, VectorStoreChunkingStrategyStatic, @@ -39,11 +38,13 @@ from llama_stack.apis.vector_io import ( VectorStoreFileStatus, VectorStoreListFilesResponse, VectorStoreListResponse, + VectorStoreNotFoundError, VectorStoreObject, VectorStoreSearchResponse, VectorStoreSearchResponsePage, ) -from llama_stack.apis.vector_stores import VectorStore +from pydantic import TypeAdapter + from llama_stack.core.id_generation import generate_object_id from llama_stack.log import get_logger from llama_stack.providers.utils.kvstore.api import KVStore diff --git a/src/llama_stack/providers/utils/memory/vector_store.py b/src/llama_stack/providers/utils/memory/vector_store.py index 99f875227..37ac79039 100644 --- a/src/llama_stack/providers/utils/memory/vector_store.py +++ b/src/llama_stack/providers/utils/memory/vector_store.py @@ -14,20 +14,22 @@ from urllib.parse import unquote import httpx import numpy as np +from llama_stack_api import ( + URL, + Api, + Chunk, + ChunkMetadata, + InterleavedContent, + OpenAIEmbeddingsRequestWithExtraBody, + QueryChunksResponse, + RAGDocument, + VectorStore, +) from numpy.typing import NDArray from pydantic import BaseModel -from llama_stack.apis.common.content_types import ( - URL, - InterleavedContent, -) -from llama_stack.apis.inference import OpenAIEmbeddingsRequestWithExtraBody -from llama_stack.apis.tools import RAGDocument -from llama_stack.apis.vector_io import Chunk, ChunkMetadata, QueryChunksResponse -from llama_stack.apis.vector_stores import VectorStore from llama_stack.log import get_logger from llama_stack.models.llama.llama3.tokenizer import Tokenizer -from llama_stack.providers.datatypes import Api from llama_stack.providers.utils.inference.prompt_adapter import ( interleaved_content_as_str, ) diff --git a/src/llama_stack/providers/utils/pagination.py b/src/llama_stack/providers/utils/pagination.py index 033022491..d1d9e36c5 100644 --- a/src/llama_stack/providers/utils/pagination.py +++ b/src/llama_stack/providers/utils/pagination.py @@ -6,7 +6,7 @@ from typing import Any -from llama_stack.apis.common.responses import PaginatedResponse +from llama_stack_api import PaginatedResponse def paginate_records( diff --git a/src/llama_stack/providers/utils/responses/responses_store.py b/src/llama_stack/providers/utils/responses/responses_store.py index fdca8ddee..c7dfed15a 100644 --- a/src/llama_stack/providers/utils/responses/responses_store.py +++ b/src/llama_stack/providers/utils/responses/responses_store.py @@ -4,18 +4,17 @@ # This source code is licensed under the terms described in the LICENSE file in # the root directory of this source tree. -from llama_stack.apis.agents import ( - Order, -) -from llama_stack.apis.agents.openai_responses import ( +from llama_stack_api import ( ListOpenAIResponseInputItem, ListOpenAIResponseObject, OpenAIDeleteResponseObject, + OpenAIMessageParam, OpenAIResponseInput, OpenAIResponseObject, OpenAIResponseObjectWithInput, + Order, ) -from llama_stack.apis.inference import OpenAIMessageParam + from llama_stack.core.datatypes import AccessRule from llama_stack.core.storage.datatypes import ResponsesStoreReference, SqlStoreReference from llama_stack.log import get_logger diff --git a/src/llama_stack/providers/utils/scoring/aggregation_utils.py b/src/llama_stack/providers/utils/scoring/aggregation_utils.py index cff9a112f..aa6fe7248 100644 --- a/src/llama_stack/providers/utils/scoring/aggregation_utils.py +++ b/src/llama_stack/providers/utils/scoring/aggregation_utils.py @@ -6,8 +6,7 @@ import statistics from typing import Any -from llama_stack.apis.scoring import ScoringResultRow -from llama_stack.apis.scoring_functions import AggregationFunctionType +from llama_stack_api import AggregationFunctionType, ScoringResultRow def aggregate_accuracy(scoring_results: list[ScoringResultRow]) -> dict[str, Any]: diff --git a/src/llama_stack/providers/utils/scoring/base_scoring_fn.py b/src/llama_stack/providers/utils/scoring/base_scoring_fn.py index 2fae177b7..d16c75263 100644 --- a/src/llama_stack/providers/utils/scoring/base_scoring_fn.py +++ b/src/llama_stack/providers/utils/scoring/base_scoring_fn.py @@ -6,8 +6,8 @@ from abc import ABC, abstractmethod from typing import Any -from llama_stack.apis.scoring import ScoringFnParams, ScoringResultRow -from llama_stack.apis.scoring_functions import ScoringFn +from llama_stack_api import ScoringFn, ScoringFnParams, ScoringResultRow + from llama_stack.providers.utils.scoring.aggregation_utils import aggregate_metrics diff --git a/src/llama_stack/providers/utils/sqlstore/api.py b/src/llama_stack/providers/utils/sqlstore/api.py index bcd224234..033a00edc 100644 --- a/src/llama_stack/providers/utils/sqlstore/api.py +++ b/src/llama_stack/providers/utils/sqlstore/api.py @@ -8,10 +8,9 @@ from collections.abc import Mapping, Sequence from enum import Enum from typing import Any, Literal, Protocol +from llama_stack_api import PaginatedResponse from pydantic import BaseModel -from llama_stack.apis.common.responses import PaginatedResponse - class ColumnType(Enum): INTEGER = "INTEGER" diff --git a/src/llama_stack/providers/utils/sqlstore/sqlalchemy_sqlstore.py b/src/llama_stack/providers/utils/sqlstore/sqlalchemy_sqlstore.py index cfc3131f4..263f5e69f 100644 --- a/src/llama_stack/providers/utils/sqlstore/sqlalchemy_sqlstore.py +++ b/src/llama_stack/providers/utils/sqlstore/sqlalchemy_sqlstore.py @@ -6,6 +6,7 @@ from collections.abc import Mapping, Sequence from typing import Any, Literal, cast +from llama_stack_api import PaginatedResponse from sqlalchemy import ( JSON, Boolean, @@ -26,7 +27,6 @@ from sqlalchemy.ext.asyncio import async_sessionmaker, create_async_engine from sqlalchemy.ext.asyncio.engine import AsyncEngine from sqlalchemy.sql.elements import ColumnElement -from llama_stack.apis.common.responses import PaginatedResponse from llama_stack.core.storage.datatypes import SqlAlchemySqlStoreConfig from llama_stack.log import get_logger diff --git a/src/llama_stack/providers/utils/tools/mcp.py b/src/llama_stack/providers/utils/tools/mcp.py index 573054e25..9558dcde1 100644 --- a/src/llama_stack/providers/utils/tools/mcp.py +++ b/src/llama_stack/providers/utils/tools/mcp.py @@ -10,17 +10,20 @@ from enum import Enum from typing import Any, cast import httpx +from llama_stack_api import ( + ImageContentItem, + InterleavedContentItem, + ListToolDefsResponse, + TextContentItem, + ToolDef, + ToolInvocationResult, + _URLOrData, +) from mcp import ClientSession, McpError from mcp import types as mcp_types from mcp.client.sse import sse_client from mcp.client.streamable_http import streamablehttp_client -from llama_stack.apis.common.content_types import ImageContentItem, InterleavedContentItem, TextContentItem, _URLOrData -from llama_stack.apis.tools import ( - ListToolDefsResponse, - ToolDef, - ToolInvocationResult, -) from llama_stack.core.datatypes import AuthenticationRequiredError from llama_stack.log import get_logger from llama_stack.providers.utils.tools.ttl_dict import TTLDict diff --git a/tests/external/llama-stack-api-weather/src/llama_stack_api_weather/weather.py b/tests/external/llama-stack-api-weather/src/llama_stack_api_weather/weather.py index e97a9d8fb..9c399b7bf 100644 --- a/tests/external/llama-stack-api-weather/src/llama_stack_api_weather/weather.py +++ b/tests/external/llama-stack-api-weather/src/llama_stack_api_weather/weather.py @@ -6,9 +6,7 @@ from typing import Protocol -from llama_stack.apis.version import LLAMA_STACK_API_V1 -from llama_stack.providers.datatypes import Api, ProviderSpec, RemoteProviderSpec -from llama_stack.schema_utils import webmethod +from llama_stack_api import LLAMA_STACK_API_V1, Api, ProviderSpec, RemoteProviderSpec, webmethod def available_providers() -> list[ProviderSpec]: diff --git a/tests/integration/batches/conftest.py b/tests/integration/batches/conftest.py index 3ab8df3d9..b9c0ac916 100644 --- a/tests/integration/batches/conftest.py +++ b/tests/integration/batches/conftest.py @@ -13,8 +13,7 @@ from contextlib import contextmanager from io import BytesIO import pytest - -from llama_stack.apis.files import OpenAIFilePurpose +from llama_stack_api import OpenAIFilePurpose class BatchHelper: diff --git a/tests/integration/files/test_files.py b/tests/integration/files/test_files.py index d9e8dd501..61878ac4c 100644 --- a/tests/integration/files/test_files.py +++ b/tests/integration/files/test_files.py @@ -9,8 +9,8 @@ from unittest.mock import patch import pytest import requests +from llama_stack_api import OpenAIFilePurpose -from llama_stack.apis.files import OpenAIFilePurpose from llama_stack.core.datatypes import User purpose = OpenAIFilePurpose.ASSISTANTS diff --git a/tests/integration/inference/test_provider_data_routing.py b/tests/integration/inference/test_provider_data_routing.py index 99aa75395..d007b57d6 100644 --- a/tests/integration/inference/test_provider_data_routing.py +++ b/tests/integration/inference/test_provider_data_routing.py @@ -15,14 +15,14 @@ that enables routing based on provider_data alone. from unittest.mock import AsyncMock, patch import pytest - -from llama_stack.apis.datatypes import Api -from llama_stack.apis.inference.inference import ( +from llama_stack_api import ( + Api, OpenAIAssistantMessageParam, OpenAIChatCompletion, OpenAIChatCompletionUsage, OpenAIChoice, ) + from llama_stack.core.library_client import LlamaStackAsLibraryClient from llama_stack.core.telemetry.telemetry import MetricEvent diff --git a/tests/integration/post_training/test_post_training.py b/tests/integration/post_training/test_post_training.py index b5be71c7c..ff6925b58 100644 --- a/tests/integration/post_training/test_post_training.py +++ b/tests/integration/post_training/test_post_training.py @@ -9,8 +9,7 @@ import time import uuid import pytest - -from llama_stack.apis.post_training import ( +from llama_stack_api import ( DataConfig, DatasetFormat, DPOAlignmentConfig, @@ -18,6 +17,7 @@ from llama_stack.apis.post_training import ( LoraFinetuningConfig, TrainingConfig, ) + from llama_stack.log import get_logger # Configure logging diff --git a/tests/integration/responses/recordings/42c357284497af596ae6c9341b0c189daa31e88b25d0381a985f24203b7a5a38.json b/tests/integration/responses/recordings/42c357284497af596ae6c9341b0c189daa31e88b25d0381a985f24203b7a5a38.json index 7ec2ac931..4e80e1cdd 100644 --- a/tests/integration/responses/recordings/42c357284497af596ae6c9341b0c189daa31e88b25d0381a985f24203b7a5a38.json +++ b/tests/integration/responses/recordings/42c357284497af596ae6c9341b0c189daa31e88b25d0381a985f24203b7a5a38.json @@ -10,7 +10,7 @@ }, "response": { "body": { - "__type__": "llama_stack.apis.tools.tools.ToolInvocationResult", + "__type__": "llama_stack_api.tools.ToolInvocationResult", "__data__": { "content": "{\"query\": \"Llama 4 Maverick model experts\", \"top_k\": [{\"url\": \"https://console.groq.com/docs/model/meta-llama/llama-4-maverick-17b-128e-instruct\", \"title\": \"Llama 4 Maverick 17B 128E\", \"content\": \"Llama 4 Maverick is Meta's natively multimodal model that enables text and image understanding. With a 17 billion parameter mixture-of-experts architecture (128 experts), this model offers industry-leading performance for multimodal tasks like natural assistant-like chat, image recognition, and coding tasks. Llama 4 Maverick features an auto-regressive language model that uses a mixture-of-experts (MoE) architecture with 17B activated parameters (400B total) and incorporates early fusion for native multimodality. The model uses 128 experts to efficiently handle both text and image inputs while maintaining high performance across chat, knowledge, and code generation tasks, with a knowledge cutoff of August 2024. * For multimodal applications, this model supports up to 5 image inputs create( model =\\\"meta-llama/llama-4-maverick-17b-128e-instruct\\\", messages =[ { \\\"role\\\": \\\"user\\\", \\\"content\\\": \\\"Explain why fast inference is critical for reasoning models\\\" } ] ) print(completion.\", \"score\": 0.9170729, \"raw_content\": null}, {\"url\": \"https://huggingface.co/meta-llama/Llama-4-Maverick-17B-128E\", \"title\": \"meta-llama/Llama-4-Maverick-17B-128E - Hugging Face\", \"content\": \"Model Architecture: The Llama 4 models are auto-regressive language models that use a mixture-of-experts (MoE) architecture and incorporate\", \"score\": 0.8021998, \"raw_content\": null}, {\"url\": \"https://www.ibm.com/new/announcements/meta-llama-4-maverick-and-llama-4-scout-now-available-in-watsonx-ai\", \"title\": \"Meta Llama 4 Maverick and Llama 4 Scout now available in watsonx ...\", \"content\": \"# Meta Llama 4 Maverick and Llama 4 Scout now available in watsonx.ai **IBM is excited to announce the addition of Meta\\u2019s latest generation of open models, Llama 4, to** **watsonx.ai****.** Llama 4 Scout and Llama 4 Maverick, the first mixture of experts (MoE) models released by Meta, provide frontier multimodal performance, high speeds, low cost, and industry leading context length. With the introduction of these latest offerings from Meta, IBM now supports a total of 13 Meta models in the expansive library of \\u00a0foundation models available in watsonx.ai. Trained on 40 trillion tokens of data, Llama 4 Scout offers performance rivalling or exceeding that of models with significantly larger active parameter counts while keeping costs and latency low. ## Llama 4 models on IBM watsonx\", \"score\": 0.78194773, \"raw_content\": null}, {\"url\": \"https://medium.com/@divyanshbhatiajm19/metas-llama-4-family-the-complete-guide-to-scout-maverick-and-behemoth-ai-models-in-2025-21a90c882e8a\", \"title\": \"Meta's Llama 4 Family: The Complete Guide to Scout, Maverick, and ...\", \"content\": \"# Meta\\u2019s Llama 4 Family: The Complete Guide to Scout, Maverick, and Behemoth AI Models in 2025 Feature Llama 4 Scout Llama 4 Maverick Llama 4 Behemoth **Total Parameters** 109B 400B ~2T **Active Parameters** 17B 17B 288B **Expert Count** 16 128 16 **Context Window** 10M tokens 1M tokens Not specified **Hardware Requirements** Single H100 GPU Single H100 DGX host Multiple GPUs **Inference Cost** Not specified $0.19-$0.49 per 1M tokens Not specified **Release Status** Available now Available now In training **Primary Use Cases** Long-context analysis, code processing High-performance multimodal applications Research, STEM reasoning The Llama 4 family represents Meta\\u2019s most significant AI development to date, with each model offering distinct advantages for different use cases:\", \"score\": 0.69672287, \"raw_content\": null}, {\"url\": \"https://www.llama.com/models/llama-4/\", \"title\": \"Unmatched Performance and Efficiency | Llama 4\", \"content\": \"# Llama 4 # Llama 4 Llama 4 Scout Class-leading natively multimodal model that offers superior text and visual intelligence, single H100 GPU efficiency, and a 10M context window for seamless long document analysis. Llama 4 MaverickIndustry-leading natively multimodal model for image and text understanding with groundbreaking intelligence and fast responses at a low cost. We evaluated model performance on a suite of common benchmarks across a wide range of languages, testing for coding, reasoning, knowledge, vision understanding, multilinguality, and long context. 4. Specialized long context evals are not traditionally reported for generalist models, so we share internal runs to showcase llama's frontier performance. 4. Specialized long context evals are not traditionally reported for generalist models, so we share internal runs to showcase llama's frontier performance.\", \"score\": 0.629889, \"raw_content\": null}]}", "error_message": null, diff --git a/tests/integration/responses/recordings/54aa690e31b5c33a0488a5d7403393e5712917253462292829b37b9320d6df82.json b/tests/integration/responses/recordings/54aa690e31b5c33a0488a5d7403393e5712917253462292829b37b9320d6df82.json index a6c31dc72..a8e1e8611 100644 --- a/tests/integration/responses/recordings/54aa690e31b5c33a0488a5d7403393e5712917253462292829b37b9320d6df82.json +++ b/tests/integration/responses/recordings/54aa690e31b5c33a0488a5d7403393e5712917253462292829b37b9320d6df82.json @@ -10,7 +10,7 @@ }, "response": { "body": { - "__type__": "llama_stack.apis.tools.tools.ToolInvocationResult", + "__type__": "llama_stack_api.tools.ToolInvocationResult", "__data__": { "content": "{\"query\": \"Llama 4 Maverick model number of experts\", \"top_k\": [{\"url\": \"https://console.groq.com/docs/model/meta-llama/llama-4-maverick-17b-128e-instruct\", \"title\": \"Llama 4 Maverick 17B 128E\", \"content\": \"Llama 4 Maverick is Meta's natively multimodal model that enables text and image understanding. With a 17 billion parameter mixture-of-experts architecture (128 experts), this model offers industry-leading performance for multimodal tasks like natural assistant-like chat, image recognition, and coding tasks. Llama 4 Maverick features an auto-regressive language model that uses a mixture-of-experts (MoE) architecture with 17B activated parameters (400B total) and incorporates early fusion for native multimodality. The model uses 128 experts to efficiently handle both text and image inputs while maintaining high performance across chat, knowledge, and code generation tasks, with a knowledge cutoff of August 2024. * For multimodal applications, this model supports up to 5 image inputs create( model =\\\"meta-llama/llama-4-maverick-17b-128e-instruct\\\", messages =[ { \\\"role\\\": \\\"user\\\", \\\"content\\\": \\\"Explain why fast inference is critical for reasoning models\\\" } ] ) print(completion.\", \"score\": 0.9287263, \"raw_content\": null}, {\"url\": \"https://huggingface.co/meta-llama/Llama-4-Maverick-17B-128E\", \"title\": \"meta-llama/Llama-4-Maverick-17B-128E\", \"content\": \"... model with 16 experts, and Llama 4 Maverick, a 17 billion parameter model with 128 experts. Model developer: Meta. Model Architecture: The\", \"score\": 0.9183121, \"raw_content\": null}, {\"url\": \"https://build.nvidia.com/meta/llama-4-maverick-17b-128e-instruct/modelcard\", \"title\": \"llama-4-maverick-17b-128e-instruct Model by Meta\", \"content\": \"... model with 16 experts, and Llama 4 Maverick, a 17 billion parameter model with 128 experts. Third-Party Community Consideration. This model\", \"score\": 0.91399205, \"raw_content\": null}, {\"url\": \"https://replicate.com/meta/llama-4-maverick-instruct\", \"title\": \"meta/llama-4-maverick-instruct | Run with an API on ...\", \"content\": \"... model with 16 experts, and Llama 4 Maverick, a 17 billion parameter model with 128 experts. All services are online \\u00b7 Home \\u00b7 About \\u00b7 Changelog\", \"score\": 0.9073207, \"raw_content\": null}, {\"url\": \"https://openrouter.ai/meta-llama/llama-4-maverick\", \"title\": \"Llama 4 Maverick - API, Providers, Stats\", \"content\": \"# Meta: Llama 4 Maverick ### meta-llama/llama-4-maverick Llama 4 Maverick 17B Instruct (128E) is a high-capacity multimodal language model from Meta, built on a mixture-of-experts (MoE) architecture with 128 experts and 17 billion active parameters per forward pass (400B total). Released on April 5, 2025 under the Llama 4 Community License, Maverick is suited for research and commercial applications requiring advanced multimodal understanding and high model throughput. Llama 4 Maverick - API, Providers, Stats | OpenRouter ## Providers for Llama 4 Maverick ## Performance for Llama 4 Maverick ## Apps using Llama 4 Maverick ## Recent activity on Llama 4 Maverick ## Uptime stats for Llama 4 Maverick ## Sample code and API for Llama 4 Maverick\", \"score\": 0.8958969, \"raw_content\": null}]}", "error_message": null, diff --git a/tests/integration/responses/recordings/77ad6e42c34823ac51a784cfe4fa0ee18d09bd413189a7c03b24bf3871e3d8d7.json b/tests/integration/responses/recordings/77ad6e42c34823ac51a784cfe4fa0ee18d09bd413189a7c03b24bf3871e3d8d7.json index b92c67940..dd7884012 100644 --- a/tests/integration/responses/recordings/77ad6e42c34823ac51a784cfe4fa0ee18d09bd413189a7c03b24bf3871e3d8d7.json +++ b/tests/integration/responses/recordings/77ad6e42c34823ac51a784cfe4fa0ee18d09bd413189a7c03b24bf3871e3d8d7.json @@ -10,7 +10,7 @@ }, "response": { "body": { - "__type__": "llama_stack.apis.tools.tools.ToolInvocationResult", + "__type__": "llama_stack_api.tools.ToolInvocationResult", "__data__": { "content": "{\"query\": \"latest version of Python\", \"top_k\": [{\"url\": \"https://www.liquidweb.com/blog/latest-python-version/\", \"title\": \"The latest Python version: Python 3.14 - Liquid Web\", \"content\": \"The latest major version, Python 3.14 was officially released on October 7, 2025. Let's explore the key features of Python's current version, how to download\", \"score\": 0.890761, \"raw_content\": null}, {\"url\": \"https://docs.python.org/3/whatsnew/3.14.html\", \"title\": \"What's new in Python 3.14 \\u2014 Python 3.14.0 documentation\", \"content\": \"Python 3.14 is the latest stable release of the Python programming language, with a mix of changes to the language, the implementation, and the standard\", \"score\": 0.8124067, \"raw_content\": null}, {\"url\": \"https://devguide.python.org/versions/\", \"title\": \"Status of Python versions - Python Developer's Guide\", \"content\": \"The main branch is currently the future Python 3.15, and is the only branch that accepts new features. The latest release for each Python version can be found\", \"score\": 0.80089486, \"raw_content\": null}, {\"url\": \"https://www.python.org/doc/versions/\", \"title\": \"Python documentation by version\", \"content\": \"Python 3.12.4, documentation released on 6 June 2024. Python 3.12.3, documentation released on 9 April 2024. Python 3.12.2, documentation released on 6 February\", \"score\": 0.74563974, \"raw_content\": null}, {\"url\": \"https://www.python.org/downloads/\", \"title\": \"Download Python | Python.org\", \"content\": \"Active Python Releases \\u00b7 3.15 pre-release 2026-10-07 (planned) 2031-10 PEP 790 \\u00b7 3.14 bugfix 2025-10-07 2030-10 PEP 745 \\u00b7 3.13 bugfix 2024-10-07 2029-10 PEP 719\", \"score\": 0.6551821, \"raw_content\": null}]}", "error_message": null, diff --git a/tests/integration/safety/test_llama_guard.py b/tests/integration/safety/test_llama_guard.py index 5a73bb044..99b4982f0 100644 --- a/tests/integration/safety/test_llama_guard.py +++ b/tests/integration/safety/test_llama_guard.py @@ -12,8 +12,8 @@ import warnings from collections.abc import Generator import pytest +from llama_stack_api import ViolationLevel -from llama_stack.apis.safety import ViolationLevel from llama_stack.models.llama.sku_types import CoreModelId # Llama Guard models available for text and vision shields diff --git a/tests/integration/safety/test_safety.py b/tests/integration/safety/test_safety.py index 6337abc9c..6a926f1d5 100644 --- a/tests/integration/safety/test_safety.py +++ b/tests/integration/safety/test_safety.py @@ -7,8 +7,7 @@ import base64 import mimetypes import pytest - -from llama_stack.apis.safety import ViolationLevel +from llama_stack_api import ViolationLevel CODE_SCANNER_ENABLED_PROVIDERS = {"ollama", "together", "fireworks"} diff --git a/tests/integration/safety/test_vision_safety.py b/tests/integration/safety/test_vision_safety.py index 7b3779e9e..b85a23263 100644 --- a/tests/integration/safety/test_vision_safety.py +++ b/tests/integration/safety/test_vision_safety.py @@ -9,8 +9,7 @@ import mimetypes import os import pytest - -from llama_stack.apis.safety import ViolationLevel +from llama_stack_api import ViolationLevel VISION_SHIELD_ENABLED_PROVIDERS = {"together"} diff --git a/tests/integration/tool_runtime/test_registration.py b/tests/integration/tool_runtime/test_registration.py index 4d532ed87..1b1b6ef28 100644 --- a/tests/integration/tool_runtime/test_registration.py +++ b/tests/integration/tool_runtime/test_registration.py @@ -7,8 +7,8 @@ import re import pytest +from llama_stack_api import ToolGroupNotFoundError -from llama_stack.apis.common.errors import ToolGroupNotFoundError from llama_stack.core.library_client import LlamaStackAsLibraryClient from tests.common.mcp import MCP_TOOLGROUP_ID, make_mcp_server diff --git a/tests/integration/vector_io/test_openai_vector_stores.py b/tests/integration/vector_io/test_openai_vector_stores.py index 1043d4903..c65dfecac 100644 --- a/tests/integration/vector_io/test_openai_vector_stores.py +++ b/tests/integration/vector_io/test_openai_vector_stores.py @@ -8,11 +8,10 @@ import time from io import BytesIO import pytest +from llama_stack_api import Chunk, ExpiresAfter from llama_stack_client import BadRequestError from openai import BadRequestError as OpenAIBadRequestError -from llama_stack.apis.files import ExpiresAfter -from llama_stack.apis.vector_io import Chunk from llama_stack.core.library_client import LlamaStackAsLibraryClient from llama_stack.log import get_logger @@ -646,7 +645,7 @@ def test_openai_vector_store_attach_file( ): """Test OpenAI vector store attach file.""" skip_if_provider_doesnt_support_openai_vector_stores(client_with_models) - from llama_stack.apis.files import ExpiresAfter + from llama_stack_api import ExpiresAfter compat_client = compat_client_with_empty_stores @@ -710,7 +709,7 @@ def test_openai_vector_store_attach_files_on_creation( skip_if_provider_doesnt_support_openai_vector_stores(client_with_models) compat_client = compat_client_with_empty_stores - from llama_stack.apis.files import ExpiresAfter + from llama_stack_api import ExpiresAfter # Create some files and attach them to the vector store valid_file_ids = [] @@ -775,7 +774,7 @@ def test_openai_vector_store_list_files( skip_if_provider_doesnt_support_openai_vector_stores(client_with_models) compat_client = compat_client_with_empty_stores - from llama_stack.apis.files import ExpiresAfter + from llama_stack_api import ExpiresAfter # Create a vector store vector_store = compat_client.vector_stores.create( @@ -867,7 +866,7 @@ def test_openai_vector_store_retrieve_file_contents( skip_if_provider_doesnt_support_openai_vector_stores(client_with_models) compat_client = compat_client_with_empty_stores - from llama_stack.apis.files import ExpiresAfter + from llama_stack_api import ExpiresAfter # Create a vector store vector_store = compat_client.vector_stores.create( @@ -928,7 +927,7 @@ def test_openai_vector_store_delete_file( skip_if_provider_doesnt_support_openai_vector_stores(client_with_models) compat_client = compat_client_with_empty_stores - from llama_stack.apis.files import ExpiresAfter + from llama_stack_api import ExpiresAfter # Create a vector store vector_store = compat_client.vector_stores.create( @@ -994,7 +993,7 @@ def test_openai_vector_store_delete_file_removes_from_vector_store( skip_if_provider_doesnt_support_openai_vector_stores(client_with_models) compat_client = compat_client_with_empty_stores - from llama_stack.apis.files import ExpiresAfter + from llama_stack_api import ExpiresAfter # Create a vector store vector_store = compat_client.vector_stores.create( @@ -1046,7 +1045,7 @@ def test_openai_vector_store_update_file( skip_if_provider_doesnt_support_openai_vector_stores(client_with_models) compat_client = compat_client_with_empty_stores - from llama_stack.apis.files import ExpiresAfter + from llama_stack_api import ExpiresAfter # Create a vector store vector_store = compat_client.vector_stores.create( @@ -1103,7 +1102,7 @@ def test_create_vector_store_files_duplicate_vector_store_name( This test confirms that client.vector_stores.create() creates a unique ID """ skip_if_provider_doesnt_support_openai_vector_stores(client_with_models) - from llama_stack.apis.files import ExpiresAfter + from llama_stack_api import ExpiresAfter compat_client = compat_client_with_empty_stores diff --git a/tests/integration/vector_io/test_vector_io.py b/tests/integration/vector_io/test_vector_io.py index 1b2099069..acaa44bcb 100644 --- a/tests/integration/vector_io/test_vector_io.py +++ b/tests/integration/vector_io/test_vector_io.py @@ -5,8 +5,7 @@ # the root directory of this source tree. import pytest - -from llama_stack.apis.vector_io import Chunk +from llama_stack_api import Chunk from ..conftest import vector_provider_wrapper diff --git a/tests/unit/conversations/test_api_models.py b/tests/unit/conversations/test_api_models.py index 8416cba0b..f8576f076 100644 --- a/tests/unit/conversations/test_api_models.py +++ b/tests/unit/conversations/test_api_models.py @@ -5,11 +5,7 @@ # the root directory of this source tree. -from llama_stack.apis.conversations.conversations import ( - Conversation, - ConversationItem, - ConversationItemList, -) +from llama_stack_api import Conversation, ConversationItem, ConversationItemList def test_conversation_model_defaults(): diff --git a/tests/unit/conversations/test_conversations.py b/tests/unit/conversations/test_conversations.py index 3f0175831..2f942eb9c 100644 --- a/tests/unit/conversations/test_conversations.py +++ b/tests/unit/conversations/test_conversations.py @@ -8,14 +8,11 @@ import tempfile from pathlib import Path import pytest +from llama_stack_api import OpenAIResponseInputMessageContentText, OpenAIResponseMessage from openai.types.conversations.conversation import Conversation as OpenAIConversation from openai.types.conversations.conversation_item import ConversationItem as OpenAIConversationItem from pydantic import TypeAdapter -from llama_stack.apis.agents.openai_responses import ( - OpenAIResponseInputMessageContentText, - OpenAIResponseMessage, -) from llama_stack.core.conversations.conversations import ( ConversationServiceConfig, ConversationServiceImpl, diff --git a/tests/unit/core/routers/test_safety_router.py b/tests/unit/core/routers/test_safety_router.py index bf195ff33..7e465513e 100644 --- a/tests/unit/core/routers/test_safety_router.py +++ b/tests/unit/core/routers/test_safety_router.py @@ -6,8 +6,8 @@ from unittest.mock import AsyncMock -from llama_stack.apis.safety.safety import ModerationObject, ModerationObjectResults -from llama_stack.apis.shields import ListShieldsResponse, Shield +from llama_stack_api import ListShieldsResponse, ModerationObject, ModerationObjectResults, Shield + from llama_stack.core.datatypes import SafetyConfig from llama_stack.core.routers.safety import SafetyRouter diff --git a/tests/unit/core/routers/test_vector_io.py b/tests/unit/core/routers/test_vector_io.py index f9bd84a37..071fbe6e7 100644 --- a/tests/unit/core/routers/test_vector_io.py +++ b/tests/unit/core/routers/test_vector_io.py @@ -7,8 +7,8 @@ from unittest.mock import AsyncMock, Mock import pytest +from llama_stack_api import OpenAICreateVectorStoreRequestWithExtraBody -from llama_stack.apis.vector_io import OpenAICreateVectorStoreRequestWithExtraBody from llama_stack.core.routers.vector_io import VectorIORouter diff --git a/tests/unit/core/test_stack_validation.py b/tests/unit/core/test_stack_validation.py index d28803006..acb31e1c9 100644 --- a/tests/unit/core/test_stack_validation.py +++ b/tests/unit/core/test_stack_validation.py @@ -9,12 +9,10 @@ from unittest.mock import AsyncMock import pytest +from llama_stack_api import Api, ListModelsResponse, ListShieldsResponse, Model, ModelType, Shield -from llama_stack.apis.models import ListModelsResponse, Model, ModelType -from llama_stack.apis.shields import ListShieldsResponse, Shield from llama_stack.core.datatypes import QualifiedModel, SafetyConfig, StackRunConfig, StorageConfig, VectorStoresConfig from llama_stack.core.stack import validate_safety_config, validate_vector_stores_config -from llama_stack.providers.datatypes import Api class TestVectorStoresValidation: diff --git a/tests/unit/distribution/routers/test_routing_tables.py b/tests/unit/distribution/routers/test_routing_tables.py index bfa93d913..274fe6219 100644 --- a/tests/unit/distribution/routers/test_routing_tables.py +++ b/tests/unit/distribution/routers/test_routing_tables.py @@ -9,15 +9,22 @@ from unittest.mock import AsyncMock import pytest +from llama_stack_api import ( + URL, + Api, + Dataset, + DatasetPurpose, + ListToolDefsResponse, + Model, + ModelNotFoundError, + ModelType, + NumberType, + Shield, + ToolDef, + ToolGroup, + URIDataSource, +) -from llama_stack.apis.common.content_types import URL -from llama_stack.apis.common.errors import ModelNotFoundError -from llama_stack.apis.common.type_system import NumberType -from llama_stack.apis.datasets.datasets import Dataset, DatasetPurpose, URIDataSource -from llama_stack.apis.datatypes import Api -from llama_stack.apis.models import Model, ModelType -from llama_stack.apis.shields.shields import Shield -from llama_stack.apis.tools import ListToolDefsResponse, ToolDef, ToolGroup from llama_stack.core.datatypes import RegistryEntrySource from llama_stack.core.routing_tables.benchmarks import BenchmarksRoutingTable from llama_stack.core.routing_tables.datasets import DatasetsRoutingTable diff --git a/tests/unit/distribution/test_api_recordings.py b/tests/unit/distribution/test_api_recordings.py index 2b7ce5c4e..f66b57df8 100644 --- a/tests/unit/distribution/test_api_recordings.py +++ b/tests/unit/distribution/test_api_recordings.py @@ -9,10 +9,9 @@ from pathlib import Path from unittest.mock import patch import pytest -from openai import AsyncOpenAI # Import the real Pydantic response types instead of using Mocks -from llama_stack.apis.inference import ( +from llama_stack_api import ( OpenAIAssistantMessageParam, OpenAIChatCompletion, OpenAIChoice, @@ -20,6 +19,8 @@ from llama_stack.apis.inference import ( OpenAIEmbeddingsResponse, OpenAIEmbeddingUsage, ) +from openai import AsyncOpenAI + from llama_stack.testing.api_recorder import ( APIRecordingMode, ResponseStorage, diff --git a/tests/unit/distribution/test_distribution.py b/tests/unit/distribution/test_distribution.py index 11f55cfdb..a27455e24 100644 --- a/tests/unit/distribution/test_distribution.py +++ b/tests/unit/distribution/test_distribution.py @@ -9,6 +9,7 @@ from unittest.mock import patch import pytest import yaml +from llama_stack_api import ProviderSpec from pydantic import BaseModel, Field, ValidationError from llama_stack.core.datatypes import Api, Provider, StackRunConfig @@ -22,7 +23,6 @@ from llama_stack.core.storage.datatypes import ( SqlStoreReference, StorageConfig, ) -from llama_stack.providers.datatypes import ProviderSpec class SampleConfig(BaseModel): @@ -312,7 +312,7 @@ pip_packages: """Test loading an external provider from a module (success path).""" from types import SimpleNamespace - from llama_stack.providers.datatypes import Api, ProviderSpec + from llama_stack_api import Api, ProviderSpec # Simulate a provider module with get_provider_spec fake_spec = ProviderSpec( @@ -395,8 +395,9 @@ pip_packages: def test_external_provider_from_module_building(self, mock_providers): """Test loading an external provider from a module during build (building=True, partial spec).""" + from llama_stack_api import Api + from llama_stack.core.datatypes import BuildConfig, BuildProvider, DistributionSpec - from llama_stack.providers.datatypes import Api # No importlib patch needed, should not import module when type of `config` is BuildConfig or DistributionSpec build_config = BuildConfig( @@ -456,8 +457,9 @@ class TestGetExternalProvidersFromModule: """Test provider with module containing version spec (e.g., package==1.0.0).""" from types import SimpleNamespace + from llama_stack_api import ProviderSpec + from llama_stack.core.distribution import get_external_providers_from_module - from llama_stack.providers.datatypes import ProviderSpec fake_spec = ProviderSpec( api=Api.inference, @@ -593,8 +595,9 @@ class TestGetExternalProvidersFromModule: """Test when get_provider_spec returns a list of specs.""" from types import SimpleNamespace + from llama_stack_api import ProviderSpec + from llama_stack.core.distribution import get_external_providers_from_module - from llama_stack.providers.datatypes import ProviderSpec spec1 = ProviderSpec( api=Api.inference, @@ -641,8 +644,9 @@ class TestGetExternalProvidersFromModule: """Test that list return filters specs by provider_type.""" from types import SimpleNamespace + from llama_stack_api import ProviderSpec + from llama_stack.core.distribution import get_external_providers_from_module - from llama_stack.providers.datatypes import ProviderSpec spec1 = ProviderSpec( api=Api.inference, @@ -689,8 +693,9 @@ class TestGetExternalProvidersFromModule: """Test that list return adds multiple different provider_types when config requests them.""" from types import SimpleNamespace + from llama_stack_api import ProviderSpec + from llama_stack.core.distribution import get_external_providers_from_module - from llama_stack.providers.datatypes import ProviderSpec # Module returns both inline and remote variants spec1 = ProviderSpec( @@ -828,8 +833,9 @@ class TestGetExternalProvidersFromModule: """Test multiple APIs with providers.""" from types import SimpleNamespace + from llama_stack_api import ProviderSpec + from llama_stack.core.distribution import get_external_providers_from_module - from llama_stack.providers.datatypes import ProviderSpec inference_spec = ProviderSpec( api=Api.inference, diff --git a/tests/unit/files/test_files.py b/tests/unit/files/test_files.py index 426e2cf64..080d1ddbe 100644 --- a/tests/unit/files/test_files.py +++ b/tests/unit/files/test_files.py @@ -6,10 +6,8 @@ import pytest +from llama_stack_api import OpenAIFilePurpose, Order, ResourceNotFoundError -from llama_stack.apis.common.errors import ResourceNotFoundError -from llama_stack.apis.common.responses import Order -from llama_stack.apis.files import OpenAIFilePurpose from llama_stack.core.access_control.access_control import default_policy from llama_stack.core.storage.datatypes import SqliteSqlStoreConfig, SqlStoreReference from llama_stack.providers.inline.files.localfs import ( diff --git a/tests/unit/providers/batches/test_reference.py b/tests/unit/providers/batches/test_reference.py index 89cb1af9d..3c93a578d 100644 --- a/tests/unit/providers/batches/test_reference.py +++ b/tests/unit/providers/batches/test_reference.py @@ -58,9 +58,7 @@ import json from unittest.mock import AsyncMock, MagicMock import pytest - -from llama_stack.apis.batches import BatchObject -from llama_stack.apis.common.errors import ConflictError, ResourceNotFoundError +from llama_stack_api import BatchObject, ConflictError, ResourceNotFoundError class TestReferenceBatchesImpl: diff --git a/tests/unit/providers/batches/test_reference_idempotency.py b/tests/unit/providers/batches/test_reference_idempotency.py index e6cb29b9b..4cd5d962d 100644 --- a/tests/unit/providers/batches/test_reference_idempotency.py +++ b/tests/unit/providers/batches/test_reference_idempotency.py @@ -43,8 +43,7 @@ Key Behaviors Tested: import asyncio import pytest - -from llama_stack.apis.common.errors import ConflictError +from llama_stack_api import ConflictError class TestReferenceBatchesIdempotency: diff --git a/tests/unit/providers/files/test_s3_files.py b/tests/unit/providers/files/test_s3_files.py index 92a45a9f2..ae63c1a78 100644 --- a/tests/unit/providers/files/test_s3_files.py +++ b/tests/unit/providers/files/test_s3_files.py @@ -8,9 +8,7 @@ from unittest.mock import patch import pytest from botocore.exceptions import ClientError - -from llama_stack.apis.common.errors import ResourceNotFoundError -from llama_stack.apis.files import OpenAIFilePurpose +from llama_stack_api import OpenAIFilePurpose, ResourceNotFoundError class TestS3FilesImpl: @@ -228,7 +226,7 @@ class TestS3FilesImpl: mock_now.return_value = 0 - from llama_stack.apis.files import ExpiresAfter + from llama_stack_api import ExpiresAfter sample_text_file.filename = "test_expired_file" uploaded = await s3_provider.openai_upload_file( @@ -260,7 +258,7 @@ class TestS3FilesImpl: async def test_unsupported_expires_after_anchor(self, s3_provider, sample_text_file): """Unsupported anchor value should raise ValueError.""" - from llama_stack.apis.files import ExpiresAfter + from llama_stack_api import ExpiresAfter sample_text_file.filename = "test_unsupported_expires_after_anchor" @@ -273,7 +271,7 @@ class TestS3FilesImpl: async def test_nonint_expires_after_seconds(self, s3_provider, sample_text_file): """Non-integer seconds in expires_after should raise ValueError.""" - from llama_stack.apis.files import ExpiresAfter + from llama_stack_api import ExpiresAfter sample_text_file.filename = "test_nonint_expires_after_seconds" @@ -286,7 +284,7 @@ class TestS3FilesImpl: async def test_expires_after_seconds_out_of_bounds(self, s3_provider, sample_text_file): """Seconds outside allowed range should raise ValueError.""" - from llama_stack.apis.files import ExpiresAfter + from llama_stack_api import ExpiresAfter with pytest.raises(ValueError, match="greater than or equal to 3600"): await s3_provider.openai_upload_file( diff --git a/tests/unit/providers/files/test_s3_files_auth.py b/tests/unit/providers/files/test_s3_files_auth.py index 6097f2808..873db4e27 100644 --- a/tests/unit/providers/files/test_s3_files_auth.py +++ b/tests/unit/providers/files/test_s3_files_auth.py @@ -7,9 +7,8 @@ from unittest.mock import patch import pytest +from llama_stack_api import OpenAIFilePurpose, ResourceNotFoundError -from llama_stack.apis.common.errors import ResourceNotFoundError -from llama_stack.apis.files import OpenAIFilePurpose from llama_stack.core.datatypes import User from llama_stack.providers.remote.files.s3.files import S3FilesImpl diff --git a/tests/unit/providers/inference/test_bedrock_adapter.py b/tests/unit/providers/inference/test_bedrock_adapter.py index fdd07c032..b3eecc558 100644 --- a/tests/unit/providers/inference/test_bedrock_adapter.py +++ b/tests/unit/providers/inference/test_bedrock_adapter.py @@ -8,9 +8,9 @@ from types import SimpleNamespace from unittest.mock import AsyncMock, MagicMock import pytest +from llama_stack_api import OpenAIChatCompletionRequestWithExtraBody from openai import AuthenticationError -from llama_stack.apis.inference import OpenAIChatCompletionRequestWithExtraBody from llama_stack.providers.remote.inference.bedrock.bedrock import BedrockInferenceAdapter from llama_stack.providers.remote.inference.bedrock.config import BedrockConfig diff --git a/tests/unit/providers/inference/test_remote_vllm.py b/tests/unit/providers/inference/test_remote_vllm.py index ffd45798e..e2a5455b7 100644 --- a/tests/unit/providers/inference/test_remote_vllm.py +++ b/tests/unit/providers/inference/test_remote_vllm.py @@ -9,8 +9,9 @@ import time from unittest.mock import AsyncMock, MagicMock, PropertyMock, patch import pytest - -from llama_stack.apis.inference import ( +from llama_stack_api import ( + HealthStatus, + Model, OpenAIAssistantMessageParam, OpenAIChatCompletion, OpenAIChatCompletionRequestWithExtraBody, @@ -20,10 +21,9 @@ from llama_stack.apis.inference import ( OpenAICompletionRequestWithExtraBody, ToolChoice, ) -from llama_stack.apis.models import Model + from llama_stack.core.routers.inference import InferenceRouter from llama_stack.core.routing_tables.models import ModelsRoutingTable -from llama_stack.providers.datatypes import HealthStatus from llama_stack.providers.remote.inference.vllm.config import VLLMInferenceAdapterConfig from llama_stack.providers.remote.inference.vllm.vllm import VLLMInferenceAdapter diff --git a/tests/unit/providers/inline/agents/meta_reference/responses/test_streaming.py b/tests/unit/providers/inline/agents/meta_reference/responses/test_streaming.py index fff29928c..36d2b86a9 100644 --- a/tests/unit/providers/inline/agents/meta_reference/responses/test_streaming.py +++ b/tests/unit/providers/inline/agents/meta_reference/responses/test_streaming.py @@ -7,8 +7,8 @@ from unittest.mock import AsyncMock import pytest +from llama_stack_api import ToolDef -from llama_stack.apis.tools import ToolDef from llama_stack.providers.inline.agents.meta_reference.responses.streaming import ( convert_tooldef_to_chat_tool, ) diff --git a/tests/unit/providers/nvidia/test_datastore.py b/tests/unit/providers/nvidia/test_datastore.py index b59636f7b..0d9f1cc35 100644 --- a/tests/unit/providers/nvidia/test_datastore.py +++ b/tests/unit/providers/nvidia/test_datastore.py @@ -8,9 +8,8 @@ import os from unittest.mock import patch import pytest +from llama_stack_api import Dataset, DatasetPurpose, ResourceType, URIDataSource -from llama_stack.apis.datasets import Dataset, DatasetPurpose, URIDataSource -from llama_stack.apis.resource import ResourceType from llama_stack.providers.remote.datasetio.nvidia.config import NvidiaDatasetIOConfig from llama_stack.providers.remote.datasetio.nvidia.datasetio import NvidiaDatasetIOAdapter diff --git a/tests/unit/providers/nvidia/test_eval.py b/tests/unit/providers/nvidia/test_eval.py index 86e005b76..c41379801 100644 --- a/tests/unit/providers/nvidia/test_eval.py +++ b/tests/unit/providers/nvidia/test_eval.py @@ -8,12 +8,18 @@ import os from unittest.mock import MagicMock, patch import pytest +from llama_stack_api import ( + Benchmark, + BenchmarkConfig, + EvaluateResponse, + Job, + JobStatus, + ModelCandidate, + ResourceType, + SamplingParams, + TopPSamplingStrategy, +) -from llama_stack.apis.benchmarks import Benchmark -from llama_stack.apis.common.job_types import Job, JobStatus -from llama_stack.apis.eval.eval import BenchmarkConfig, EvaluateResponse, ModelCandidate, SamplingParams -from llama_stack.apis.inference.inference import TopPSamplingStrategy -from llama_stack.apis.resource import ResourceType from llama_stack.models.llama.sku_types import CoreModelId from llama_stack.providers.remote.eval.nvidia.config import NVIDIAEvalConfig from llama_stack.providers.remote.eval.nvidia.eval import NVIDIAEvalImpl diff --git a/tests/unit/providers/nvidia/test_parameters.py b/tests/unit/providers/nvidia/test_parameters.py index ad381da26..ba68a7abe 100644 --- a/tests/unit/providers/nvidia/test_parameters.py +++ b/tests/unit/providers/nvidia/test_parameters.py @@ -9,8 +9,7 @@ import warnings from unittest.mock import patch import pytest - -from llama_stack.apis.post_training.post_training import ( +from llama_stack_api import ( DataConfig, DatasetFormat, EfficiencyConfig, @@ -19,6 +18,7 @@ from llama_stack.apis.post_training.post_training import ( OptimizerType, TrainingConfig, ) + from llama_stack.core.library_client import convert_pydantic_to_json_value from llama_stack.providers.remote.post_training.nvidia.post_training import ( NvidiaPostTrainingAdapter, diff --git a/tests/unit/providers/nvidia/test_rerank_inference.py b/tests/unit/providers/nvidia/test_rerank_inference.py index 2793b5f44..8b313abcd 100644 --- a/tests/unit/providers/nvidia/test_rerank_inference.py +++ b/tests/unit/providers/nvidia/test_rerank_inference.py @@ -8,8 +8,8 @@ from unittest.mock import AsyncMock, MagicMock, patch import aiohttp import pytest +from llama_stack_api import ModelType -from llama_stack.apis.models import ModelType from llama_stack.providers.remote.inference.nvidia.config import NVIDIAConfig from llama_stack.providers.remote.inference.nvidia.nvidia import NVIDIAInferenceAdapter from llama_stack.providers.utils.inference.openai_mixin import OpenAIMixin diff --git a/tests/unit/providers/nvidia/test_safety.py b/tests/unit/providers/nvidia/test_safety.py index 622302630..ea6254841 100644 --- a/tests/unit/providers/nvidia/test_safety.py +++ b/tests/unit/providers/nvidia/test_safety.py @@ -9,14 +9,15 @@ from typing import Any from unittest.mock import AsyncMock, MagicMock, patch import pytest - -from llama_stack.apis.inference import ( +from llama_stack_api import ( OpenAIAssistantMessageParam, OpenAIUserMessageParam, + ResourceType, + RunShieldResponse, + Shield, + ViolationLevel, ) -from llama_stack.apis.resource import ResourceType -from llama_stack.apis.safety import RunShieldResponse, ViolationLevel -from llama_stack.apis.shields import Shield + from llama_stack.providers.remote.safety.nvidia.config import NVIDIASafetyConfig from llama_stack.providers.remote.safety.nvidia.nvidia import NVIDIASafetyAdapter diff --git a/tests/unit/providers/nvidia/test_supervised_fine_tuning.py b/tests/unit/providers/nvidia/test_supervised_fine_tuning.py index 91148605d..4d0ce695b 100644 --- a/tests/unit/providers/nvidia/test_supervised_fine_tuning.py +++ b/tests/unit/providers/nvidia/test_supervised_fine_tuning.py @@ -9,8 +9,7 @@ import warnings from unittest.mock import patch import pytest - -from llama_stack.apis.post_training.post_training import ( +from llama_stack_api import ( DataConfig, DatasetFormat, LoraFinetuningConfig, @@ -19,6 +18,7 @@ from llama_stack.apis.post_training.post_training import ( QATFinetuningConfig, TrainingConfig, ) + from llama_stack.core.library_client import convert_pydantic_to_json_value from llama_stack.providers.remote.post_training.nvidia.post_training import ( ListNvidiaPostTrainingJobs, diff --git a/tests/unit/providers/test_bedrock.py b/tests/unit/providers/test_bedrock.py index 684fcf262..df7453712 100644 --- a/tests/unit/providers/test_bedrock.py +++ b/tests/unit/providers/test_bedrock.py @@ -7,7 +7,8 @@ from types import SimpleNamespace from unittest.mock import AsyncMock, PropertyMock, patch -from llama_stack.apis.inference import OpenAIChatCompletionRequestWithExtraBody +from llama_stack_api import OpenAIChatCompletionRequestWithExtraBody + from llama_stack.providers.remote.inference.bedrock.bedrock import BedrockInferenceAdapter from llama_stack.providers.remote.inference.bedrock.config import BedrockConfig diff --git a/tests/unit/providers/utils/inference/test_openai_mixin.py b/tests/unit/providers/utils/inference/test_openai_mixin.py index 0b5ea078b..b9b59bb79 100644 --- a/tests/unit/providers/utils/inference/test_openai_mixin.py +++ b/tests/unit/providers/utils/inference/test_openai_mixin.py @@ -10,10 +10,9 @@ from typing import Any from unittest.mock import AsyncMock, MagicMock, Mock, PropertyMock, patch import pytest +from llama_stack_api import Model, ModelType, OpenAIChatCompletionRequestWithExtraBody, OpenAIUserMessageParam from pydantic import BaseModel, Field -from llama_stack.apis.inference import Model, OpenAIChatCompletionRequestWithExtraBody, OpenAIUserMessageParam -from llama_stack.apis.models import ModelType from llama_stack.core.request_headers import request_provider_data_context from llama_stack.providers.utils.inference.model_registry import RemoteInferenceProviderConfig from llama_stack.providers.utils.inference.openai_mixin import OpenAIMixin diff --git a/tests/unit/providers/utils/inference/test_prompt_adapter.py b/tests/unit/providers/utils/inference/test_prompt_adapter.py index 62c8db74d..a7c9289d7 100644 --- a/tests/unit/providers/utils/inference/test_prompt_adapter.py +++ b/tests/unit/providers/utils/inference/test_prompt_adapter.py @@ -4,10 +4,8 @@ # This source code is licensed under the terms described in the LICENSE file in # the root directory of this source tree. -from llama_stack.apis.inference import ( - OpenAIAssistantMessageParam, - OpenAIUserMessageParam, -) +from llama_stack_api import OpenAIAssistantMessageParam, OpenAIUserMessageParam + from llama_stack.models.llama.datatypes import RawTextItem from llama_stack.providers.utils.inference.prompt_adapter import ( convert_openai_message_to_raw_message, diff --git a/tests/unit/providers/utils/memory/test_vector_store.py b/tests/unit/providers/utils/memory/test_vector_store.py index 590bdd1d2..00db5795a 100644 --- a/tests/unit/providers/utils/memory/test_vector_store.py +++ b/tests/unit/providers/utils/memory/test_vector_store.py @@ -7,9 +7,8 @@ from unittest.mock import AsyncMock, MagicMock, patch import pytest +from llama_stack_api import URL, RAGDocument, TextContentItem -from llama_stack.apis.common.content_types import URL, TextContentItem -from llama_stack.apis.tools import RAGDocument from llama_stack.providers.utils.memory.vector_store import content_from_data_and_mime_type, content_from_doc diff --git a/tests/unit/providers/utils/test_model_registry.py b/tests/unit/providers/utils/test_model_registry.py index 04e75aa82..4a85cf8b8 100644 --- a/tests/unit/providers/utils/test_model_registry.py +++ b/tests/unit/providers/utils/test_model_registry.py @@ -34,8 +34,8 @@ # import pytest +from llama_stack_api import Model -from llama_stack.apis.models import Model from llama_stack.providers.utils.inference.model_registry import ModelRegistryHelper, ProviderModelEntry diff --git a/tests/unit/providers/vector_io/conftest.py b/tests/unit/providers/vector_io/conftest.py index 5e56ea417..216e9b8ea 100644 --- a/tests/unit/providers/vector_io/conftest.py +++ b/tests/unit/providers/vector_io/conftest.py @@ -9,9 +9,8 @@ from unittest.mock import AsyncMock, MagicMock, patch import numpy as np import pytest +from llama_stack_api import Chunk, ChunkMetadata, QueryChunksResponse, VectorStore -from llama_stack.apis.vector_io import Chunk, ChunkMetadata, QueryChunksResponse -from llama_stack.apis.vector_stores import VectorStore from llama_stack.core.storage.datatypes import KVStoreReference, SqliteKVStoreConfig from llama_stack.providers.inline.vector_io.faiss.config import FaissVectorIOConfig from llama_stack.providers.inline.vector_io.faiss.faiss import FaissIndex, FaissVectorIOAdapter diff --git a/tests/unit/providers/vector_io/test_faiss.py b/tests/unit/providers/vector_io/test_faiss.py index 44bcd0cfd..0d5c1399f 100644 --- a/tests/unit/providers/vector_io/test_faiss.py +++ b/tests/unit/providers/vector_io/test_faiss.py @@ -9,11 +9,8 @@ from unittest.mock import MagicMock, patch import numpy as np import pytest +from llama_stack_api import Chunk, Files, HealthStatus, QueryChunksResponse, VectorStore -from llama_stack.apis.files import Files -from llama_stack.apis.vector_io import Chunk, QueryChunksResponse -from llama_stack.apis.vector_stores import VectorStore -from llama_stack.providers.datatypes import HealthStatus from llama_stack.providers.inline.vector_io.faiss.config import FaissVectorIOConfig from llama_stack.providers.inline.vector_io.faiss.faiss import ( FaissIndex, diff --git a/tests/unit/providers/vector_io/test_sqlite_vec.py b/tests/unit/providers/vector_io/test_sqlite_vec.py index 5ee62cd63..17a99ce1c 100644 --- a/tests/unit/providers/vector_io/test_sqlite_vec.py +++ b/tests/unit/providers/vector_io/test_sqlite_vec.py @@ -8,8 +8,8 @@ import asyncio import numpy as np import pytest +from llama_stack_api import Chunk, QueryChunksResponse -from llama_stack.apis.vector_io import Chunk, QueryChunksResponse from llama_stack.providers.inline.vector_io.sqlite_vec.sqlite_vec import ( SQLiteVecIndex, SQLiteVecVectorIOAdapter, diff --git a/tests/unit/providers/vector_io/test_vector_io_openai_vector_stores.py b/tests/unit/providers/vector_io/test_vector_io_openai_vector_stores.py index 121623e1b..7ba40eefb 100644 --- a/tests/unit/providers/vector_io/test_vector_io_openai_vector_stores.py +++ b/tests/unit/providers/vector_io/test_vector_io_openai_vector_stores.py @@ -10,17 +10,17 @@ from unittest.mock import AsyncMock, patch import numpy as np import pytest - -from llama_stack.apis.common.errors import VectorStoreNotFoundError -from llama_stack.apis.vector_io import ( +from llama_stack_api import ( Chunk, OpenAICreateVectorStoreFileBatchRequestWithExtraBody, OpenAICreateVectorStoreRequestWithExtraBody, QueryChunksResponse, + VectorStore, VectorStoreChunkingStrategyAuto, VectorStoreFileObject, + VectorStoreNotFoundError, ) -from llama_stack.apis.vector_stores import VectorStore + from llama_stack.providers.inline.vector_io.sqlite_vec.sqlite_vec import VECTOR_DBS_PREFIX # This test is a unit test for the inline VectorIO providers. This should only contain @@ -222,7 +222,7 @@ async def test_insert_chunks_missing_db_raises(vector_io_adapter): async def test_insert_chunks_with_missing_document_id(vector_io_adapter): """Ensure no KeyError when document_id is missing or in different places.""" - from llama_stack.apis.vector_io import Chunk, ChunkMetadata + from llama_stack_api import Chunk, ChunkMetadata fake_index = AsyncMock() vector_io_adapter.cache["db1"] = fake_index @@ -255,7 +255,7 @@ async def test_insert_chunks_with_missing_document_id(vector_io_adapter): async def test_document_id_with_invalid_type_raises_error(): """Ensure TypeError is raised when document_id is not a string.""" - from llama_stack.apis.vector_io import Chunk + from llama_stack_api import Chunk # Integer document_id should raise TypeError from llama_stack.providers.utils.vector_io.vector_utils import generate_chunk_id diff --git a/tests/unit/providers/vector_io/test_vector_utils.py b/tests/unit/providers/vector_io/test_vector_utils.py index 1ca753a44..678b76fbd 100644 --- a/tests/unit/providers/vector_io/test_vector_utils.py +++ b/tests/unit/providers/vector_io/test_vector_utils.py @@ -4,7 +4,8 @@ # This source code is licensed under the terms described in the LICENSE file in # the root directory of this source tree. -from llama_stack.apis.vector_io import Chunk, ChunkMetadata +from llama_stack_api import Chunk, ChunkMetadata + from llama_stack.providers.utils.vector_io.vector_utils import generate_chunk_id # This test is a unit test for the chunk_utils.py helpers. This should only contain diff --git a/tests/unit/rag/test_rag_query.py b/tests/unit/rag/test_rag_query.py index 8563d0d53..e3f5e46d7 100644 --- a/tests/unit/rag/test_rag_query.py +++ b/tests/unit/rag/test_rag_query.py @@ -7,13 +7,8 @@ from unittest.mock import AsyncMock, MagicMock import pytest +from llama_stack_api import Chunk, ChunkMetadata, QueryChunksResponse, RAGQueryConfig -from llama_stack.apis.tools.rag_tool import RAGQueryConfig -from llama_stack.apis.vector_io import ( - Chunk, - ChunkMetadata, - QueryChunksResponse, -) from llama_stack.providers.inline.tool_runtime.rag.memory import MemoryToolRuntimeImpl diff --git a/tests/unit/rag/test_vector_store.py b/tests/unit/rag/test_vector_store.py index 1f73fdb8e..23c12dcab 100644 --- a/tests/unit/rag/test_vector_store.py +++ b/tests/unit/rag/test_vector_store.py @@ -12,13 +12,8 @@ from unittest.mock import AsyncMock, MagicMock import numpy as np import pytest +from llama_stack_api import Chunk, OpenAIEmbeddingData, OpenAIEmbeddingsRequestWithExtraBody, RAGDocument -from llama_stack.apis.inference.inference import ( - OpenAIEmbeddingData, - OpenAIEmbeddingsRequestWithExtraBody, -) -from llama_stack.apis.tools import RAGDocument -from llama_stack.apis.vector_io import Chunk from llama_stack.providers.utils.memory.vector_store import ( URL, VectorStoreWithIndex, diff --git a/tests/unit/registry/test_registry.py b/tests/unit/registry/test_registry.py index d4c9786d1..01f486ab2 100644 --- a/tests/unit/registry/test_registry.py +++ b/tests/unit/registry/test_registry.py @@ -6,9 +6,8 @@ import pytest +from llama_stack_api import Model, VectorStore -from llama_stack.apis.inference import Model -from llama_stack.apis.vector_stores import VectorStore from llama_stack.core.datatypes import VectorStoreWithOwner from llama_stack.core.storage.datatypes import KVStoreReference, SqliteKVStoreConfig from llama_stack.core.store.registry import ( @@ -304,7 +303,8 @@ async def test_double_registration_different_objects(disk_dist_registry): async def test_double_registration_with_cache(cached_disk_dist_registry): """Test double registration behavior with caching enabled.""" - from llama_stack.apis.models import ModelType + from llama_stack_api import ModelType + from llama_stack.core.datatypes import ModelWithOwner model1 = ModelWithOwner( diff --git a/tests/unit/registry/test_registry_acl.py b/tests/unit/registry/test_registry_acl.py index 09b9a3cfb..2827f60b9 100644 --- a/tests/unit/registry/test_registry_acl.py +++ b/tests/unit/registry/test_registry_acl.py @@ -5,7 +5,8 @@ # the root directory of this source tree. -from llama_stack.apis.models import ModelType +from llama_stack_api import ModelType + from llama_stack.core.datatypes import ModelWithOwner, User from llama_stack.core.store.registry import CachedDiskDistributionRegistry diff --git a/tests/unit/server/test_access_control.py b/tests/unit/server/test_access_control.py index ea4f9b8b2..1df933d4d 100644 --- a/tests/unit/server/test_access_control.py +++ b/tests/unit/server/test_access_control.py @@ -8,10 +8,9 @@ from unittest.mock import MagicMock, Mock, patch import pytest import yaml +from llama_stack_api import Api, ModelType from pydantic import TypeAdapter, ValidationError -from llama_stack.apis.datatypes import Api -from llama_stack.apis.models import ModelType from llama_stack.core.access_control.access_control import AccessDeniedError, is_action_allowed from llama_stack.core.datatypes import AccessRule, ModelWithOwner, User from llama_stack.core.routing_tables.models import ModelsRoutingTable diff --git a/tests/unit/server/test_auth.py b/tests/unit/server/test_auth.py index cc9397f07..57a552514 100644 --- a/tests/unit/server/test_auth.py +++ b/tests/unit/server/test_auth.py @@ -144,7 +144,7 @@ def middleware_with_mocks(mock_auth_endpoint): middleware = AuthenticationMiddleware(mock_app, auth_config, {}) # Mock the route_impls to simulate finding routes with required scopes - from llama_stack.schema_utils import WebMethod + from llama_stack_api import WebMethod routes = { ("POST", "/test/scoped"): WebMethod(route="/test/scoped", method="POST", required_scope="test.read"), diff --git a/tests/unit/server/test_resolver.py b/tests/unit/server/test_resolver.py index b44f12f7e..071178f96 100644 --- a/tests/unit/server/test_resolver.py +++ b/tests/unit/server/test_resolver.py @@ -9,9 +9,9 @@ import sys from typing import Any, Protocol from unittest.mock import AsyncMock, MagicMock +from llama_stack_api import Inference, InlineProviderSpec, ProviderSpec from pydantic import BaseModel, Field -from llama_stack.apis.inference import Inference from llama_stack.core.datatypes import Api, Provider, StackRunConfig from llama_stack.core.resolver import resolve_impls from llama_stack.core.routers.inference import InferenceRouter @@ -25,7 +25,6 @@ from llama_stack.core.storage.datatypes import ( SqlStoreReference, StorageConfig, ) -from llama_stack.providers.datatypes import InlineProviderSpec, ProviderSpec from llama_stack.providers.utils.kvstore import register_kvstore_backends from llama_stack.providers.utils.sqlstore.sqlstore import register_sqlstore_backends diff --git a/tests/unit/server/test_sse.py b/tests/unit/server/test_sse.py index 0303a6ded..fdaf9022b 100644 --- a/tests/unit/server/test_sse.py +++ b/tests/unit/server/test_sse.py @@ -9,8 +9,8 @@ import logging # allow-direct-logging from unittest.mock import AsyncMock, MagicMock import pytest +from llama_stack_api import PaginatedResponse -from llama_stack.apis.common.responses import PaginatedResponse from llama_stack.core.server.server import create_dynamic_typed_route, create_sse_event, sse_generator diff --git a/tests/unit/tools/test_tools_json_schema.py b/tests/unit/tools/test_tools_json_schema.py index 8fe3103bc..79e0b6e28 100644 --- a/tests/unit/tools/test_tools_json_schema.py +++ b/tests/unit/tools/test_tools_json_schema.py @@ -9,9 +9,9 @@ Unit tests for JSON Schema-based tool definitions. Tests the new input_schema and output_schema fields. """ +from llama_stack_api import ToolDef from pydantic import ValidationError -from llama_stack.apis.tools import ToolDef from llama_stack.models.llama.datatypes import BuiltinTool, ToolDefinition diff --git a/tests/unit/utils/inference/test_inference_store.py b/tests/unit/utils/inference/test_inference_store.py index d2de1c759..4da20b125 100644 --- a/tests/unit/utils/inference/test_inference_store.py +++ b/tests/unit/utils/inference/test_inference_store.py @@ -7,14 +7,14 @@ import time import pytest - -from llama_stack.apis.inference import ( +from llama_stack_api import ( OpenAIAssistantMessageParam, OpenAIChatCompletion, OpenAIChoice, OpenAIUserMessageParam, Order, ) + from llama_stack.core.storage.datatypes import InferenceStoreReference, SqliteSqlStoreConfig from llama_stack.providers.utils.inference.inference_store import InferenceStore from llama_stack.providers.utils.sqlstore.sqlstore import register_sqlstore_backends diff --git a/tests/unit/utils/responses/test_responses_store.py b/tests/unit/utils/responses/test_responses_store.py index 34cff3d3f..1119a93d8 100644 --- a/tests/unit/utils/responses/test_responses_store.py +++ b/tests/unit/utils/responses/test_responses_store.py @@ -9,13 +9,8 @@ from tempfile import TemporaryDirectory from uuid import uuid4 import pytest +from llama_stack_api import OpenAIMessageParam, OpenAIResponseInput, OpenAIResponseObject, OpenAIUserMessageParam, Order -from llama_stack.apis.agents import Order -from llama_stack.apis.agents.openai_responses import ( - OpenAIResponseInput, - OpenAIResponseObject, -) -from llama_stack.apis.inference import OpenAIMessageParam, OpenAIUserMessageParam from llama_stack.core.storage.datatypes import ResponsesStoreReference, SqliteSqlStoreConfig from llama_stack.providers.utils.responses.responses_store import ResponsesStore from llama_stack.providers.utils.sqlstore.sqlstore import register_sqlstore_backends @@ -46,7 +41,7 @@ def create_test_response_object( def create_test_response_input(content: str, input_id: str) -> OpenAIResponseInput: """Helper to create a test response input.""" - from llama_stack.apis.agents.openai_responses import OpenAIResponseMessage + from llama_stack_api import OpenAIResponseMessage return OpenAIResponseMessage( id=input_id, diff --git a/uv.lock b/uv.lock index f1808f005..ddf8c1cd4 100644 --- a/uv.lock +++ b/uv.lock @@ -1945,6 +1945,7 @@ dependencies = [ { name = "httpx" }, { name = "jinja2" }, { name = "jsonschema" }, + { name = "llama-stack-api" }, { name = "openai" }, { name = "opentelemetry-exporter-otlp-proto-http" }, { name = "opentelemetry-sdk" }, @@ -2094,6 +2095,7 @@ requires-dist = [ { name = "httpx" }, { name = "jinja2", specifier = ">=3.1.6" }, { name = "jsonschema" }, + { name = "llama-stack-api", editable = "src/llama-stack-api" }, { name = "llama-stack-client", marker = "extra == 'client'", specifier = ">=0.3.0" }, { name = "openai", specifier = ">=2.5.0" }, { name = "opentelemetry-exporter-otlp-proto-http", specifier = ">=1.30.0" }, @@ -2125,7 +2127,7 @@ dev = [ { name = "black" }, { name = "mypy" }, { name = "nbval" }, - { name = "pre-commit" }, + { name = "pre-commit", specifier = ">=4.4.0" }, { name = "pytest", specifier = ">=8.4" }, { name = "pytest-asyncio", specifier = ">=1.0" }, { name = "pytest-cov" }, @@ -2226,6 +2228,25 @@ unit = [ { name = "together" }, ] +[[package]] +name = "llama-stack-api" +version = "0.1.0" +source = { editable = "src/llama-stack-api" } +dependencies = [ + { name = "jsonschema" }, + { name = "opentelemetry-exporter-otlp-proto-http" }, + { name = "opentelemetry-sdk" }, + { name = "pydantic" }, +] + +[package.metadata] +requires-dist = [ + { name = "jsonschema" }, + { name = "opentelemetry-exporter-otlp-proto-http", specifier = ">=1.30.0" }, + { name = "opentelemetry-sdk", specifier = ">=1.30.0" }, + { name = "pydantic", specifier = ">=2.11.9" }, +] + [[package]] name = "llama-stack-client" version = "0.3.0" @@ -3403,7 +3424,7 @@ wheels = [ [[package]] name = "pre-commit" -version = "4.2.0" +version = "4.4.0" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "cfgv" }, @@ -3412,9 +3433,9 @@ dependencies = [ { name = "pyyaml" }, { name = "virtualenv" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/08/39/679ca9b26c7bb2999ff122d50faa301e49af82ca9c066ec061cfbc0c6784/pre_commit-4.2.0.tar.gz", hash = "sha256:601283b9757afd87d40c4c4a9b2b5de9637a8ea02eaff7adc2d0fb4e04841146", size = 193424, upload-time = "2025-03-18T21:35:20.987Z" } +sdist = { url = "https://files.pythonhosted.org/packages/a6/49/7845c2d7bf6474efd8e27905b51b11e6ce411708c91e829b93f324de9929/pre_commit-4.4.0.tar.gz", hash = "sha256:f0233ebab440e9f17cabbb558706eb173d19ace965c68cdce2c081042b4fab15", size = 197501, upload-time = "2025-11-08T21:12:11.607Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/88/74/a88bf1b1efeae488a0c0b7bdf71429c313722d1fc0f377537fbe554e6180/pre_commit-4.2.0-py2.py3-none-any.whl", hash = "sha256:a009ca7205f1eb497d10b845e52c838a98b6cdd2102a6c8e4540e94ee75c58bd", size = 220707, upload-time = "2025-03-18T21:35:19.343Z" }, + { url = "https://files.pythonhosted.org/packages/27/11/574fe7d13acf30bfd0a8dd7fa1647040f2b8064f13f43e8c963b1e65093b/pre_commit-4.4.0-py2.py3-none-any.whl", hash = "sha256:b35ea52957cbf83dcc5d8ee636cbead8624e3a15fbfa61a370e42158ac8a5813", size = 226049, upload-time = "2025-11-08T21:12:10.228Z" }, ] [[package]]