feat: refactor llama-stack-api structure

move llama_stack_api.apis... to top level llama_stack_api.

merge provider datatypes and the existing apis.datatypes into a common llama_stack_api.datatypes

update all usages of these packages throughout LLS

Signed-off-by: Charlie Doern <cdoern@redhat.com>
This commit is contained in:
Charlie Doern 2025-11-12 15:59:34 -05:00
parent d6b915ce0a
commit b7480e9c88
296 changed files with 906 additions and 1109 deletions

View file

@ -6,9 +6,9 @@
from typing import Protocol
from llama_stack_api.apis.version import LLAMA_STACK_API_V1
from llama_stack_api.providers.datatypes import Api, ProviderSpec, RemoteProviderSpec
from llama_stack_api.datatypes import Api, ProviderSpec, RemoteProviderSpec
from llama_stack_api.schema_utils import webmethod
from llama_stack_api.version import LLAMA_STACK_API_V1
def available_providers() -> list[ProviderSpec]:

View file

@ -13,7 +13,7 @@ from contextlib import contextmanager
from io import BytesIO
import pytest
from llama_stack_api.apis.files import OpenAIFilePurpose
from llama_stack_api.files import OpenAIFilePurpose
class BatchHelper:

View file

@ -9,7 +9,7 @@ from unittest.mock import patch
import pytest
import requests
from llama_stack_api.apis.files import OpenAIFilePurpose
from llama_stack_api.files import OpenAIFilePurpose
from llama_stack.core.datatypes import User

View file

@ -15,8 +15,8 @@ that enables routing based on provider_data alone.
from unittest.mock import AsyncMock, patch
import pytest
from llama_stack_api.apis.datatypes import Api
from llama_stack_api.apis.inference.inference import (
from llama_stack_api.datatypes import Api
from llama_stack_api.inference import (
OpenAIAssistantMessageParam,
OpenAIChatCompletion,
OpenAIChatCompletionUsage,

View file

@ -9,7 +9,7 @@ import time
import uuid
import pytest
from llama_stack_api.apis.post_training import (
from llama_stack_api.post_training import (
DataConfig,
DatasetFormat,
DPOAlignmentConfig,

View file

@ -10,7 +10,7 @@
},
"response": {
"body": {
"__type__": "llama_stack_api.apis.tools.tools.ToolInvocationResult",
"__type__": "llama_stack_api.tools.ToolInvocationResult",
"__data__": {
"content": "{\"query\": \"Llama 4 Maverick model experts\", \"top_k\": [{\"url\": \"https://console.groq.com/docs/model/meta-llama/llama-4-maverick-17b-128e-instruct\", \"title\": \"Llama 4 Maverick 17B 128E\", \"content\": \"Llama 4 Maverick is Meta's natively multimodal model that enables text and image understanding. With a 17 billion parameter mixture-of-experts architecture (128 experts), this model offers industry-leading performance for multimodal tasks like natural assistant-like chat, image recognition, and coding tasks. Llama 4 Maverick features an auto-regressive language model that uses a mixture-of-experts (MoE) architecture with 17B activated parameters (400B total) and incorporates early fusion for native multimodality. The model uses 128 experts to efficiently handle both text and image inputs while maintaining high performance across chat, knowledge, and code generation tasks, with a knowledge cutoff of August 2024. * For multimodal applications, this model supports up to 5 image inputs create( model =\\\"meta-llama/llama-4-maverick-17b-128e-instruct\\\", messages =[ { \\\"role\\\": \\\"user\\\", \\\"content\\\": \\\"Explain why fast inference is critical for reasoning models\\\" } ] ) print(completion.\", \"score\": 0.9170729, \"raw_content\": null}, {\"url\": \"https://huggingface.co/meta-llama/Llama-4-Maverick-17B-128E\", \"title\": \"meta-llama/Llama-4-Maverick-17B-128E - Hugging Face\", \"content\": \"Model Architecture: The Llama 4 models are auto-regressive language models that use a mixture-of-experts (MoE) architecture and incorporate\", \"score\": 0.8021998, \"raw_content\": null}, {\"url\": \"https://www.ibm.com/new/announcements/meta-llama-4-maverick-and-llama-4-scout-now-available-in-watsonx-ai\", \"title\": \"Meta Llama 4 Maverick and Llama 4 Scout now available in watsonx ...\", \"content\": \"# Meta Llama 4 Maverick and Llama 4 Scout now available in watsonx.ai **IBM is excited to announce the addition of Meta\\u2019s latest generation of open models, Llama 4, to** **watsonx.ai****.** Llama 4 Scout and Llama 4 Maverick, the first mixture of experts (MoE) models released by Meta, provide frontier multimodal performance, high speeds, low cost, and industry leading context length. With the introduction of these latest offerings from Meta, IBM now supports a total of 13 Meta models in the expansive library of \\u00a0foundation models available in watsonx.ai. Trained on 40 trillion tokens of data, Llama 4 Scout offers performance rivalling or exceeding that of models with significantly larger active parameter counts while keeping costs and latency low. ## Llama 4 models on IBM watsonx\", \"score\": 0.78194773, \"raw_content\": null}, {\"url\": \"https://medium.com/@divyanshbhatiajm19/metas-llama-4-family-the-complete-guide-to-scout-maverick-and-behemoth-ai-models-in-2025-21a90c882e8a\", \"title\": \"Meta's Llama 4 Family: The Complete Guide to Scout, Maverick, and ...\", \"content\": \"# Meta\\u2019s Llama 4 Family: The Complete Guide to Scout, Maverick, and Behemoth AI Models in 2025 Feature Llama 4 Scout Llama 4 Maverick Llama 4 Behemoth **Total Parameters** 109B 400B ~2T **Active Parameters** 17B 17B 288B **Expert Count** 16 128 16 **Context Window** 10M tokens 1M tokens Not specified **Hardware Requirements** Single H100 GPU Single H100 DGX host Multiple GPUs **Inference Cost** Not specified $0.19-$0.49 per 1M tokens Not specified **Release Status** Available now Available now In training **Primary Use Cases** Long-context analysis, code processing High-performance multimodal applications Research, STEM reasoning The Llama 4 family represents Meta\\u2019s most significant AI development to date, with each model offering distinct advantages for different use cases:\", \"score\": 0.69672287, \"raw_content\": null}, {\"url\": \"https://www.llama.com/models/llama-4/\", \"title\": \"Unmatched Performance and Efficiency | Llama 4\", \"content\": \"# Llama 4 # Llama 4 Llama 4 Scout Class-leading natively multimodal model that offers superior text and visual intelligence, single H100 GPU efficiency, and a 10M context window for seamless long document analysis. Llama 4 MaverickIndustry-leading natively multimodal model for image and text understanding with groundbreaking intelligence and fast responses at a low cost. We evaluated model performance on a suite of common benchmarks across a wide range of languages, testing for coding, reasoning, knowledge, vision understanding, multilinguality, and long context. 4. Specialized long context evals are not traditionally reported for generalist models, so we share internal runs to showcase llama's frontier performance. 4. Specialized long context evals are not traditionally reported for generalist models, so we share internal runs to showcase llama's frontier performance.\", \"score\": 0.629889, \"raw_content\": null}]}",
"error_message": null,

View file

@ -10,7 +10,7 @@
},
"response": {
"body": {
"__type__": "llama_stack_api.apis.tools.tools.ToolInvocationResult",
"__type__": "llama_stack_api.tools.ToolInvocationResult",
"__data__": {
"content": "{\"query\": \"Llama 4 Maverick model number of experts\", \"top_k\": [{\"url\": \"https://console.groq.com/docs/model/meta-llama/llama-4-maverick-17b-128e-instruct\", \"title\": \"Llama 4 Maverick 17B 128E\", \"content\": \"Llama 4 Maverick is Meta's natively multimodal model that enables text and image understanding. With a 17 billion parameter mixture-of-experts architecture (128 experts), this model offers industry-leading performance for multimodal tasks like natural assistant-like chat, image recognition, and coding tasks. Llama 4 Maverick features an auto-regressive language model that uses a mixture-of-experts (MoE) architecture with 17B activated parameters (400B total) and incorporates early fusion for native multimodality. The model uses 128 experts to efficiently handle both text and image inputs while maintaining high performance across chat, knowledge, and code generation tasks, with a knowledge cutoff of August 2024. * For multimodal applications, this model supports up to 5 image inputs create( model =\\\"meta-llama/llama-4-maverick-17b-128e-instruct\\\", messages =[ { \\\"role\\\": \\\"user\\\", \\\"content\\\": \\\"Explain why fast inference is critical for reasoning models\\\" } ] ) print(completion.\", \"score\": 0.9287263, \"raw_content\": null}, {\"url\": \"https://huggingface.co/meta-llama/Llama-4-Maverick-17B-128E\", \"title\": \"meta-llama/Llama-4-Maverick-17B-128E\", \"content\": \"... model with 16 experts, and Llama 4 Maverick, a 17 billion parameter model with 128 experts. Model developer: Meta. Model Architecture: The\", \"score\": 0.9183121, \"raw_content\": null}, {\"url\": \"https://build.nvidia.com/meta/llama-4-maverick-17b-128e-instruct/modelcard\", \"title\": \"llama-4-maverick-17b-128e-instruct Model by Meta\", \"content\": \"... model with 16 experts, and Llama 4 Maverick, a 17 billion parameter model with 128 experts. Third-Party Community Consideration. This model\", \"score\": 0.91399205, \"raw_content\": null}, {\"url\": \"https://replicate.com/meta/llama-4-maverick-instruct\", \"title\": \"meta/llama-4-maverick-instruct | Run with an API on ...\", \"content\": \"... model with 16 experts, and Llama 4 Maverick, a 17 billion parameter model with 128 experts. All services are online \\u00b7 Home \\u00b7 About \\u00b7 Changelog\", \"score\": 0.9073207, \"raw_content\": null}, {\"url\": \"https://openrouter.ai/meta-llama/llama-4-maverick\", \"title\": \"Llama 4 Maverick - API, Providers, Stats\", \"content\": \"# Meta: Llama 4 Maverick ### meta-llama/llama-4-maverick Llama 4 Maverick 17B Instruct (128E) is a high-capacity multimodal language model from Meta, built on a mixture-of-experts (MoE) architecture with 128 experts and 17 billion active parameters per forward pass (400B total). Released on April 5, 2025 under the Llama 4 Community License, Maverick is suited for research and commercial applications requiring advanced multimodal understanding and high model throughput. Llama 4 Maverick - API, Providers, Stats | OpenRouter ## Providers for Llama 4 Maverick ## Performance for Llama 4 Maverick ## Apps using Llama 4 Maverick ## Recent activity on Llama 4 Maverick ## Uptime stats for Llama 4 Maverick ## Sample code and API for Llama 4 Maverick\", \"score\": 0.8958969, \"raw_content\": null}]}",
"error_message": null,

View file

@ -10,7 +10,7 @@
},
"response": {
"body": {
"__type__": "llama_stack_api.apis.tools.tools.ToolInvocationResult",
"__type__": "llama_stack_api.tools.ToolInvocationResult",
"__data__": {
"content": "{\"query\": \"latest version of Python\", \"top_k\": [{\"url\": \"https://www.liquidweb.com/blog/latest-python-version/\", \"title\": \"The latest Python version: Python 3.14 - Liquid Web\", \"content\": \"The latest major version, Python 3.14 was officially released on October 7, 2025. Let's explore the key features of Python's current version, how to download\", \"score\": 0.890761, \"raw_content\": null}, {\"url\": \"https://docs.python.org/3/whatsnew/3.14.html\", \"title\": \"What's new in Python 3.14 \\u2014 Python 3.14.0 documentation\", \"content\": \"Python 3.14 is the latest stable release of the Python programming language, with a mix of changes to the language, the implementation, and the standard\", \"score\": 0.8124067, \"raw_content\": null}, {\"url\": \"https://devguide.python.org/versions/\", \"title\": \"Status of Python versions - Python Developer's Guide\", \"content\": \"The main branch is currently the future Python 3.15, and is the only branch that accepts new features. The latest release for each Python version can be found\", \"score\": 0.80089486, \"raw_content\": null}, {\"url\": \"https://www.python.org/doc/versions/\", \"title\": \"Python documentation by version\", \"content\": \"Python 3.12.4, documentation released on 6 June 2024. Python 3.12.3, documentation released on 9 April 2024. Python 3.12.2, documentation released on 6 February\", \"score\": 0.74563974, \"raw_content\": null}, {\"url\": \"https://www.python.org/downloads/\", \"title\": \"Download Python | Python.org\", \"content\": \"Active Python Releases \\u00b7 3.15 pre-release 2026-10-07 (planned) 2031-10 PEP 790 \\u00b7 3.14 bugfix 2025-10-07 2030-10 PEP 745 \\u00b7 3.13 bugfix 2024-10-07 2029-10 PEP 719\", \"score\": 0.6551821, \"raw_content\": null}]}",
"error_message": null,

View file

@ -12,7 +12,7 @@ import warnings
from collections.abc import Generator
import pytest
from llama_stack_api.apis.safety import ViolationLevel
from llama_stack_api.safety import ViolationLevel
from llama_stack.models.llama.sku_types import CoreModelId

View file

@ -7,7 +7,7 @@ import base64
import mimetypes
import pytest
from llama_stack_api.apis.safety import ViolationLevel
from llama_stack_api.safety import ViolationLevel
CODE_SCANNER_ENABLED_PROVIDERS = {"ollama", "together", "fireworks"}

View file

@ -9,7 +9,7 @@ import mimetypes
import os
import pytest
from llama_stack_api.apis.safety import ViolationLevel
from llama_stack_api.safety import ViolationLevel
VISION_SHIELD_ENABLED_PROVIDERS = {"together"}

View file

@ -7,7 +7,7 @@
import re
import pytest
from llama_stack_api.apis.common.errors import ToolGroupNotFoundError
from llama_stack_api.common.errors import ToolGroupNotFoundError
from llama_stack.core.library_client import LlamaStackAsLibraryClient
from tests.common.mcp import MCP_TOOLGROUP_ID, make_mcp_server

View file

@ -8,8 +8,8 @@ import time
from io import BytesIO
import pytest
from llama_stack_api.apis.files import ExpiresAfter
from llama_stack_api.apis.vector_io import Chunk
from llama_stack_api.files import ExpiresAfter
from llama_stack_api.vector_io import Chunk
from llama_stack_client import BadRequestError
from openai import BadRequestError as OpenAIBadRequestError
@ -646,7 +646,7 @@ def test_openai_vector_store_attach_file(
):
"""Test OpenAI vector store attach file."""
skip_if_provider_doesnt_support_openai_vector_stores(client_with_models)
from llama_stack_api.apis.files import ExpiresAfter
from llama_stack_api.files import ExpiresAfter
compat_client = compat_client_with_empty_stores
@ -710,7 +710,7 @@ def test_openai_vector_store_attach_files_on_creation(
skip_if_provider_doesnt_support_openai_vector_stores(client_with_models)
compat_client = compat_client_with_empty_stores
from llama_stack_api.apis.files import ExpiresAfter
from llama_stack_api.files import ExpiresAfter
# Create some files and attach them to the vector store
valid_file_ids = []
@ -775,7 +775,7 @@ def test_openai_vector_store_list_files(
skip_if_provider_doesnt_support_openai_vector_stores(client_with_models)
compat_client = compat_client_with_empty_stores
from llama_stack_api.apis.files import ExpiresAfter
from llama_stack_api.files import ExpiresAfter
# Create a vector store
vector_store = compat_client.vector_stores.create(
@ -867,7 +867,7 @@ def test_openai_vector_store_retrieve_file_contents(
skip_if_provider_doesnt_support_openai_vector_stores(client_with_models)
compat_client = compat_client_with_empty_stores
from llama_stack_api.apis.files import ExpiresAfter
from llama_stack_api.files import ExpiresAfter
# Create a vector store
vector_store = compat_client.vector_stores.create(
@ -928,7 +928,7 @@ def test_openai_vector_store_delete_file(
skip_if_provider_doesnt_support_openai_vector_stores(client_with_models)
compat_client = compat_client_with_empty_stores
from llama_stack_api.apis.files import ExpiresAfter
from llama_stack_api.files import ExpiresAfter
# Create a vector store
vector_store = compat_client.vector_stores.create(
@ -994,7 +994,7 @@ def test_openai_vector_store_delete_file_removes_from_vector_store(
skip_if_provider_doesnt_support_openai_vector_stores(client_with_models)
compat_client = compat_client_with_empty_stores
from llama_stack_api.apis.files import ExpiresAfter
from llama_stack_api.files import ExpiresAfter
# Create a vector store
vector_store = compat_client.vector_stores.create(
@ -1046,7 +1046,7 @@ def test_openai_vector_store_update_file(
skip_if_provider_doesnt_support_openai_vector_stores(client_with_models)
compat_client = compat_client_with_empty_stores
from llama_stack_api.apis.files import ExpiresAfter
from llama_stack_api.files import ExpiresAfter
# Create a vector store
vector_store = compat_client.vector_stores.create(
@ -1103,7 +1103,7 @@ def test_create_vector_store_files_duplicate_vector_store_name(
This test confirms that client.vector_stores.create() creates a unique ID
"""
skip_if_provider_doesnt_support_openai_vector_stores(client_with_models)
from llama_stack_api.apis.files import ExpiresAfter
from llama_stack_api.files import ExpiresAfter
compat_client = compat_client_with_empty_stores

View file

@ -5,7 +5,7 @@
# the root directory of this source tree.
import pytest
from llama_stack_api.apis.vector_io import Chunk
from llama_stack_api.vector_io import Chunk
from ..conftest import vector_provider_wrapper

View file

@ -5,7 +5,7 @@
# the root directory of this source tree.
from llama_stack_api.apis.conversations.conversations import (
from llama_stack_api.conversations import (
Conversation,
ConversationItem,
ConversationItemList,

View file

@ -8,7 +8,7 @@ import tempfile
from pathlib import Path
import pytest
from llama_stack_api.apis.agents.openai_responses import (
from llama_stack_api.openai_responses import (
OpenAIResponseInputMessageContentText,
OpenAIResponseMessage,
)

View file

@ -6,8 +6,8 @@
from unittest.mock import AsyncMock
from llama_stack_api.apis.safety.safety import ModerationObject, ModerationObjectResults
from llama_stack_api.apis.shields import ListShieldsResponse, Shield
from llama_stack_api.safety import ModerationObject, ModerationObjectResults
from llama_stack_api.shields import ListShieldsResponse, Shield
from llama_stack.core.datatypes import SafetyConfig
from llama_stack.core.routers.safety import SafetyRouter

View file

@ -7,7 +7,7 @@
from unittest.mock import AsyncMock, Mock
import pytest
from llama_stack_api.apis.vector_io import OpenAICreateVectorStoreRequestWithExtraBody
from llama_stack_api.vector_io import OpenAICreateVectorStoreRequestWithExtraBody
from llama_stack.core.routers.vector_io import VectorIORouter

View file

@ -9,9 +9,9 @@
from unittest.mock import AsyncMock
import pytest
from llama_stack_api.apis.models import ListModelsResponse, Model, ModelType
from llama_stack_api.apis.shields import ListShieldsResponse, Shield
from llama_stack_api.providers.datatypes import Api
from llama_stack_api.datatypes import Api
from llama_stack_api.models import ListModelsResponse, Model, ModelType
from llama_stack_api.shields import ListShieldsResponse, Shield
from llama_stack.core.datatypes import QualifiedModel, SafetyConfig, StackRunConfig, StorageConfig, VectorStoresConfig
from llama_stack.core.stack import validate_safety_config, validate_vector_stores_config

View file

@ -9,14 +9,14 @@
from unittest.mock import AsyncMock
import pytest
from llama_stack_api.apis.common.content_types import URL
from llama_stack_api.apis.common.errors import ModelNotFoundError
from llama_stack_api.apis.common.type_system import NumberType
from llama_stack_api.apis.datasets.datasets import Dataset, DatasetPurpose, URIDataSource
from llama_stack_api.apis.datatypes import Api
from llama_stack_api.apis.models import Model, ModelType
from llama_stack_api.apis.shields.shields import Shield
from llama_stack_api.apis.tools import ListToolDefsResponse, ToolDef, ToolGroup
from llama_stack_api.common.content_types import URL
from llama_stack_api.common.errors import ModelNotFoundError
from llama_stack_api.common.type_system import NumberType
from llama_stack_api.datasets import Dataset, DatasetPurpose, URIDataSource
from llama_stack_api.datatypes import Api
from llama_stack_api.models import Model, ModelType
from llama_stack_api.shields import Shield
from llama_stack_api.tools import ListToolDefsResponse, ToolDef, ToolGroup
from llama_stack.core.datatypes import RegistryEntrySource
from llama_stack.core.routing_tables.benchmarks import BenchmarksRoutingTable

View file

@ -11,7 +11,7 @@ from unittest.mock import patch
import pytest
# Import the real Pydantic response types instead of using Mocks
from llama_stack_api.apis.inference import (
from llama_stack_api.inference import (
OpenAIAssistantMessageParam,
OpenAIChatCompletion,
OpenAIChoice,

View file

@ -9,7 +9,7 @@ from unittest.mock import patch
import pytest
import yaml
from llama_stack_api.providers.datatypes import ProviderSpec
from llama_stack_api.datatypes import ProviderSpec
from pydantic import BaseModel, Field, ValidationError
from llama_stack.core.datatypes import Api, Provider, StackRunConfig
@ -312,7 +312,7 @@ pip_packages:
"""Test loading an external provider from a module (success path)."""
from types import SimpleNamespace
from llama_stack_api.providers.datatypes import Api, ProviderSpec
from llama_stack_api.datatypes import Api, ProviderSpec
# Simulate a provider module with get_provider_spec
fake_spec = ProviderSpec(
@ -395,7 +395,7 @@ pip_packages:
def test_external_provider_from_module_building(self, mock_providers):
"""Test loading an external provider from a module during build (building=True, partial spec)."""
from llama_stack_api.providers.datatypes import Api
from llama_stack_api.datatypes import Api
from llama_stack.core.datatypes import BuildConfig, BuildProvider, DistributionSpec
@ -457,7 +457,7 @@ class TestGetExternalProvidersFromModule:
"""Test provider with module containing version spec (e.g., package==1.0.0)."""
from types import SimpleNamespace
from llama_stack_api.providers.datatypes import ProviderSpec
from llama_stack_api.datatypes import ProviderSpec
from llama_stack.core.distribution import get_external_providers_from_module
@ -595,7 +595,7 @@ class TestGetExternalProvidersFromModule:
"""Test when get_provider_spec returns a list of specs."""
from types import SimpleNamespace
from llama_stack_api.providers.datatypes import ProviderSpec
from llama_stack_api.datatypes import ProviderSpec
from llama_stack.core.distribution import get_external_providers_from_module
@ -644,7 +644,7 @@ class TestGetExternalProvidersFromModule:
"""Test that list return filters specs by provider_type."""
from types import SimpleNamespace
from llama_stack_api.providers.datatypes import ProviderSpec
from llama_stack_api.datatypes import ProviderSpec
from llama_stack.core.distribution import get_external_providers_from_module
@ -693,7 +693,7 @@ class TestGetExternalProvidersFromModule:
"""Test that list return adds multiple different provider_types when config requests them."""
from types import SimpleNamespace
from llama_stack_api.providers.datatypes import ProviderSpec
from llama_stack_api.datatypes import ProviderSpec
from llama_stack.core.distribution import get_external_providers_from_module
@ -833,7 +833,7 @@ class TestGetExternalProvidersFromModule:
"""Test multiple APIs with providers."""
from types import SimpleNamespace
from llama_stack_api.providers.datatypes import ProviderSpec
from llama_stack_api.datatypes import ProviderSpec
from llama_stack.core.distribution import get_external_providers_from_module

View file

@ -6,9 +6,9 @@
import pytest
from llama_stack_api.apis.common.errors import ResourceNotFoundError
from llama_stack_api.apis.common.responses import Order
from llama_stack_api.apis.files import OpenAIFilePurpose
from llama_stack_api.common.errors import ResourceNotFoundError
from llama_stack_api.common.responses import Order
from llama_stack_api.files import OpenAIFilePurpose
from llama_stack.core.access_control.access_control import default_policy
from llama_stack.core.storage.datatypes import SqliteSqlStoreConfig, SqlStoreReference

View file

@ -58,8 +58,8 @@ import json
from unittest.mock import AsyncMock, MagicMock
import pytest
from llama_stack_api.apis.batches import BatchObject
from llama_stack_api.apis.common.errors import ConflictError, ResourceNotFoundError
from llama_stack_api.batches import BatchObject
from llama_stack_api.common.errors import ConflictError, ResourceNotFoundError
class TestReferenceBatchesImpl:

View file

@ -43,7 +43,7 @@ Key Behaviors Tested:
import asyncio
import pytest
from llama_stack_api.apis.common.errors import ConflictError
from llama_stack_api.common.errors import ConflictError
class TestReferenceBatchesIdempotency:

View file

@ -8,8 +8,8 @@ from unittest.mock import patch
import pytest
from botocore.exceptions import ClientError
from llama_stack_api.apis.common.errors import ResourceNotFoundError
from llama_stack_api.apis.files import OpenAIFilePurpose
from llama_stack_api.common.errors import ResourceNotFoundError
from llama_stack_api.files import OpenAIFilePurpose
class TestS3FilesImpl:
@ -227,7 +227,7 @@ class TestS3FilesImpl:
mock_now.return_value = 0
from llama_stack_api.apis.files import ExpiresAfter
from llama_stack_api.files import ExpiresAfter
sample_text_file.filename = "test_expired_file"
uploaded = await s3_provider.openai_upload_file(
@ -259,7 +259,7 @@ class TestS3FilesImpl:
async def test_unsupported_expires_after_anchor(self, s3_provider, sample_text_file):
"""Unsupported anchor value should raise ValueError."""
from llama_stack_api.apis.files import ExpiresAfter
from llama_stack_api.files import ExpiresAfter
sample_text_file.filename = "test_unsupported_expires_after_anchor"
@ -272,7 +272,7 @@ class TestS3FilesImpl:
async def test_nonint_expires_after_seconds(self, s3_provider, sample_text_file):
"""Non-integer seconds in expires_after should raise ValueError."""
from llama_stack_api.apis.files import ExpiresAfter
from llama_stack_api.files import ExpiresAfter
sample_text_file.filename = "test_nonint_expires_after_seconds"
@ -285,7 +285,7 @@ class TestS3FilesImpl:
async def test_expires_after_seconds_out_of_bounds(self, s3_provider, sample_text_file):
"""Seconds outside allowed range should raise ValueError."""
from llama_stack_api.apis.files import ExpiresAfter
from llama_stack_api.files import ExpiresAfter
with pytest.raises(ValueError, match="greater than or equal to 3600"):
await s3_provider.openai_upload_file(

View file

@ -7,8 +7,8 @@
from unittest.mock import patch
import pytest
from llama_stack_api.apis.common.errors import ResourceNotFoundError
from llama_stack_api.apis.files import OpenAIFilePurpose
from llama_stack_api.common.errors import ResourceNotFoundError
from llama_stack_api.files import OpenAIFilePurpose
from llama_stack.core.datatypes import User
from llama_stack.providers.remote.files.s3.files import S3FilesImpl

View file

@ -8,7 +8,7 @@ from types import SimpleNamespace
from unittest.mock import AsyncMock, MagicMock
import pytest
from llama_stack_api.apis.inference import OpenAIChatCompletionRequestWithExtraBody
from llama_stack_api.inference import OpenAIChatCompletionRequestWithExtraBody
from openai import AuthenticationError
from llama_stack.providers.remote.inference.bedrock.bedrock import BedrockInferenceAdapter

View file

@ -9,7 +9,8 @@ import time
from unittest.mock import AsyncMock, MagicMock, PropertyMock, patch
import pytest
from llama_stack_api.apis.inference import (
from llama_stack_api.datatypes import HealthStatus
from llama_stack_api.inference import (
OpenAIAssistantMessageParam,
OpenAIChatCompletion,
OpenAIChatCompletionRequestWithExtraBody,
@ -19,8 +20,7 @@ from llama_stack_api.apis.inference import (
OpenAICompletionRequestWithExtraBody,
ToolChoice,
)
from llama_stack_api.apis.models import Model
from llama_stack_api.providers.datatypes import HealthStatus
from llama_stack_api.models import Model
from llama_stack.core.routers.inference import InferenceRouter
from llama_stack.core.routing_tables.models import ModelsRoutingTable

View file

@ -7,7 +7,7 @@
from unittest.mock import AsyncMock
import pytest
from llama_stack_api.apis.tools import ToolDef
from llama_stack_api.tools import ToolDef
from llama_stack.providers.inline.agents.meta_reference.responses.streaming import (
convert_tooldef_to_chat_tool,

View file

@ -8,8 +8,8 @@ import os
from unittest.mock import patch
import pytest
from llama_stack_api.apis.datasets import Dataset, DatasetPurpose, URIDataSource
from llama_stack_api.apis.resource import ResourceType
from llama_stack_api.datasets import Dataset, DatasetPurpose, URIDataSource
from llama_stack_api.resource import ResourceType
from llama_stack.providers.remote.datasetio.nvidia.config import NvidiaDatasetIOConfig
from llama_stack.providers.remote.datasetio.nvidia.datasetio import NvidiaDatasetIOAdapter

View file

@ -8,11 +8,11 @@ import os
from unittest.mock import MagicMock, patch
import pytest
from llama_stack_api.apis.benchmarks import Benchmark
from llama_stack_api.apis.common.job_types import Job, JobStatus
from llama_stack_api.apis.eval.eval import BenchmarkConfig, EvaluateResponse, ModelCandidate, SamplingParams
from llama_stack_api.apis.inference.inference import TopPSamplingStrategy
from llama_stack_api.apis.resource import ResourceType
from llama_stack_api.benchmarks import Benchmark
from llama_stack_api.common.job_types import Job, JobStatus
from llama_stack_api.eval import BenchmarkConfig, EvaluateResponse, ModelCandidate, SamplingParams
from llama_stack_api.inference import TopPSamplingStrategy
from llama_stack_api.resource import ResourceType
from llama_stack.models.llama.sku_types import CoreModelId
from llama_stack.providers.remote.eval.nvidia.config import NVIDIAEvalConfig

View file

@ -9,7 +9,7 @@ import warnings
from unittest.mock import patch
import pytest
from llama_stack_api.apis.post_training.post_training import (
from llama_stack_api.post_training import (
DataConfig,
DatasetFormat,
EfficiencyConfig,

View file

@ -8,7 +8,7 @@ from unittest.mock import AsyncMock, MagicMock, patch
import aiohttp
import pytest
from llama_stack_api.apis.models import ModelType
from llama_stack_api.models import ModelType
from llama_stack.providers.remote.inference.nvidia.config import NVIDIAConfig
from llama_stack.providers.remote.inference.nvidia.nvidia import NVIDIAInferenceAdapter

View file

@ -9,13 +9,13 @@ from typing import Any
from unittest.mock import AsyncMock, MagicMock, patch
import pytest
from llama_stack_api.apis.inference import (
from llama_stack_api.inference import (
OpenAIAssistantMessageParam,
OpenAIUserMessageParam,
)
from llama_stack_api.apis.resource import ResourceType
from llama_stack_api.apis.safety import RunShieldResponse, ViolationLevel
from llama_stack_api.apis.shields import Shield
from llama_stack_api.resource import ResourceType
from llama_stack_api.safety import RunShieldResponse, ViolationLevel
from llama_stack_api.shields import Shield
from llama_stack.providers.remote.safety.nvidia.config import NVIDIASafetyConfig
from llama_stack.providers.remote.safety.nvidia.nvidia import NVIDIASafetyAdapter

View file

@ -9,7 +9,7 @@ import warnings
from unittest.mock import patch
import pytest
from llama_stack_api.apis.post_training.post_training import (
from llama_stack_api.post_training import (
DataConfig,
DatasetFormat,
LoraFinetuningConfig,

View file

@ -7,7 +7,7 @@
from types import SimpleNamespace
from unittest.mock import AsyncMock, PropertyMock, patch
from llama_stack_api.apis.inference import OpenAIChatCompletionRequestWithExtraBody
from llama_stack_api.inference import OpenAIChatCompletionRequestWithExtraBody
from llama_stack.providers.remote.inference.bedrock.bedrock import BedrockInferenceAdapter
from llama_stack.providers.remote.inference.bedrock.config import BedrockConfig

View file

@ -10,8 +10,8 @@ from typing import Any
from unittest.mock import AsyncMock, MagicMock, Mock, PropertyMock, patch
import pytest
from llama_stack_api.apis.inference import Model, OpenAIChatCompletionRequestWithExtraBody, OpenAIUserMessageParam
from llama_stack_api.apis.models import ModelType
from llama_stack_api.inference import Model, OpenAIChatCompletionRequestWithExtraBody, OpenAIUserMessageParam
from llama_stack_api.models import ModelType
from pydantic import BaseModel, Field
from llama_stack.core.request_headers import request_provider_data_context

View file

@ -4,7 +4,7 @@
# This source code is licensed under the terms described in the LICENSE file in
# the root directory of this source tree.
from llama_stack_api.apis.inference import (
from llama_stack_api.inference import (
OpenAIAssistantMessageParam,
OpenAIUserMessageParam,
)

View file

@ -7,8 +7,8 @@
from unittest.mock import AsyncMock, MagicMock, patch
import pytest
from llama_stack_api.apis.common.content_types import URL, TextContentItem
from llama_stack_api.apis.tools import RAGDocument
from llama_stack_api.common.content_types import URL, TextContentItem
from llama_stack_api.rag_tool import RAGDocument
from llama_stack.providers.utils.memory.vector_store import content_from_data_and_mime_type, content_from_doc

View file

@ -34,7 +34,7 @@
#
import pytest
from llama_stack_api.apis.models import Model
from llama_stack_api.models import Model
from llama_stack.providers.utils.inference.model_registry import ModelRegistryHelper, ProviderModelEntry

View file

@ -9,8 +9,8 @@ from unittest.mock import AsyncMock, MagicMock, patch
import numpy as np
import pytest
from llama_stack_api.apis.vector_io import Chunk, ChunkMetadata, QueryChunksResponse
from llama_stack_api.apis.vector_stores import VectorStore
from llama_stack_api.vector_io import Chunk, ChunkMetadata, QueryChunksResponse
from llama_stack_api.vector_stores import VectorStore
from llama_stack.core.storage.datatypes import KVStoreReference, SqliteKVStoreConfig
from llama_stack.providers.inline.vector_io.faiss.config import FaissVectorIOConfig

View file

@ -9,10 +9,10 @@ from unittest.mock import MagicMock, patch
import numpy as np
import pytest
from llama_stack_api.apis.files import Files
from llama_stack_api.apis.vector_io import Chunk, QueryChunksResponse
from llama_stack_api.apis.vector_stores import VectorStore
from llama_stack_api.providers.datatypes import HealthStatus
from llama_stack_api.datatypes import HealthStatus
from llama_stack_api.files import Files
from llama_stack_api.vector_io import Chunk, QueryChunksResponse
from llama_stack_api.vector_stores import VectorStore
from llama_stack.providers.inline.vector_io.faiss.config import FaissVectorIOConfig
from llama_stack.providers.inline.vector_io.faiss.faiss import (

View file

@ -8,7 +8,7 @@ import asyncio
import numpy as np
import pytest
from llama_stack_api.apis.vector_io import Chunk, QueryChunksResponse
from llama_stack_api.vector_io import Chunk, QueryChunksResponse
from llama_stack.providers.inline.vector_io.sqlite_vec.sqlite_vec import (
SQLiteVecIndex,

View file

@ -10,8 +10,8 @@ from unittest.mock import AsyncMock, patch
import numpy as np
import pytest
from llama_stack_api.apis.common.errors import VectorStoreNotFoundError
from llama_stack_api.apis.vector_io import (
from llama_stack_api.common.errors import VectorStoreNotFoundError
from llama_stack_api.vector_io import (
Chunk,
OpenAICreateVectorStoreFileBatchRequestWithExtraBody,
OpenAICreateVectorStoreRequestWithExtraBody,
@ -19,7 +19,7 @@ from llama_stack_api.apis.vector_io import (
VectorStoreChunkingStrategyAuto,
VectorStoreFileObject,
)
from llama_stack_api.apis.vector_stores import VectorStore
from llama_stack_api.vector_stores import VectorStore
from llama_stack.providers.inline.vector_io.sqlite_vec.sqlite_vec import VECTOR_DBS_PREFIX
@ -222,7 +222,7 @@ async def test_insert_chunks_missing_db_raises(vector_io_adapter):
async def test_insert_chunks_with_missing_document_id(vector_io_adapter):
"""Ensure no KeyError when document_id is missing or in different places."""
from llama_stack_api.apis.vector_io import Chunk, ChunkMetadata
from llama_stack_api.vector_io import Chunk, ChunkMetadata
fake_index = AsyncMock()
vector_io_adapter.cache["db1"] = fake_index
@ -255,7 +255,7 @@ async def test_insert_chunks_with_missing_document_id(vector_io_adapter):
async def test_document_id_with_invalid_type_raises_error():
"""Ensure TypeError is raised when document_id is not a string."""
from llama_stack_api.apis.vector_io import Chunk
from llama_stack_api.vector_io import Chunk
# Integer document_id should raise TypeError
from llama_stack.providers.utils.vector_io.vector_utils import generate_chunk_id

View file

@ -4,7 +4,7 @@
# This source code is licensed under the terms described in the LICENSE file in
# the root directory of this source tree.
from llama_stack_api.apis.vector_io import Chunk, ChunkMetadata
from llama_stack_api.vector_io import Chunk, ChunkMetadata
from llama_stack.providers.utils.vector_io.vector_utils import generate_chunk_id

View file

@ -7,8 +7,8 @@
from unittest.mock import AsyncMock, MagicMock
import pytest
from llama_stack_api.apis.tools.rag_tool import RAGQueryConfig
from llama_stack_api.apis.vector_io import (
from llama_stack_api.rag_tool import RAGQueryConfig
from llama_stack_api.vector_io import (
Chunk,
ChunkMetadata,
QueryChunksResponse,

View file

@ -12,12 +12,12 @@ from unittest.mock import AsyncMock, MagicMock
import numpy as np
import pytest
from llama_stack_api.apis.inference.inference import (
from llama_stack_api.inference import (
OpenAIEmbeddingData,
OpenAIEmbeddingsRequestWithExtraBody,
)
from llama_stack_api.apis.tools import RAGDocument
from llama_stack_api.apis.vector_io import Chunk
from llama_stack_api.rag_tool import RAGDocument
from llama_stack_api.vector_io import Chunk
from llama_stack.providers.utils.memory.vector_store import (
URL,

View file

@ -6,8 +6,8 @@
import pytest
from llama_stack_api.apis.inference import Model
from llama_stack_api.apis.vector_stores import VectorStore
from llama_stack_api.inference import Model
from llama_stack_api.vector_stores import VectorStore
from llama_stack.core.datatypes import VectorStoreWithOwner
from llama_stack.core.storage.datatypes import KVStoreReference, SqliteKVStoreConfig
@ -304,7 +304,7 @@ async def test_double_registration_different_objects(disk_dist_registry):
async def test_double_registration_with_cache(cached_disk_dist_registry):
"""Test double registration behavior with caching enabled."""
from llama_stack_api.apis.models import ModelType
from llama_stack_api.models import ModelType
from llama_stack.core.datatypes import ModelWithOwner

View file

@ -5,7 +5,7 @@
# the root directory of this source tree.
from llama_stack_api.apis.models import ModelType
from llama_stack_api.models import ModelType
from llama_stack.core.datatypes import ModelWithOwner, User
from llama_stack.core.store.registry import CachedDiskDistributionRegistry

View file

@ -8,8 +8,8 @@ from unittest.mock import MagicMock, Mock, patch
import pytest
import yaml
from llama_stack_api.apis.datatypes import Api
from llama_stack_api.apis.models import ModelType
from llama_stack_api.datatypes import Api
from llama_stack_api.models import ModelType
from pydantic import TypeAdapter, ValidationError
from llama_stack.core.access_control.access_control import AccessDeniedError, is_action_allowed

View file

@ -9,8 +9,8 @@ import sys
from typing import Any, Protocol
from unittest.mock import AsyncMock, MagicMock
from llama_stack_api.apis.inference import Inference
from llama_stack_api.providers.datatypes import InlineProviderSpec, ProviderSpec
from llama_stack_api.datatypes import InlineProviderSpec, ProviderSpec
from llama_stack_api.inference import Inference
from pydantic import BaseModel, Field
from llama_stack.core.datatypes import Api, Provider, StackRunConfig

View file

@ -9,7 +9,7 @@ import logging # allow-direct-logging
from unittest.mock import AsyncMock, MagicMock
import pytest
from llama_stack_api.apis.common.responses import PaginatedResponse
from llama_stack_api.common.responses import PaginatedResponse
from llama_stack.core.server.server import create_dynamic_typed_route, create_sse_event, sse_generator

View file

@ -9,7 +9,7 @@ Unit tests for JSON Schema-based tool definitions.
Tests the new input_schema and output_schema fields.
"""
from llama_stack_api.apis.tools import ToolDef
from llama_stack_api.tools import ToolDef
from pydantic import ValidationError
from llama_stack.models.llama.datatypes import BuiltinTool, ToolDefinition

View file

@ -7,7 +7,7 @@
import time
import pytest
from llama_stack_api.apis.inference import (
from llama_stack_api.inference import (
OpenAIAssistantMessageParam,
OpenAIChatCompletion,
OpenAIChoice,

View file

@ -9,12 +9,12 @@ from tempfile import TemporaryDirectory
from uuid import uuid4
import pytest
from llama_stack_api.apis.agents import Order
from llama_stack_api.apis.agents.openai_responses import (
from llama_stack_api.agents import Order
from llama_stack_api.inference import OpenAIMessageParam, OpenAIUserMessageParam
from llama_stack_api.openai_responses import (
OpenAIResponseInput,
OpenAIResponseObject,
)
from llama_stack_api.apis.inference import OpenAIMessageParam, OpenAIUserMessageParam
from llama_stack.core.storage.datatypes import ResponsesStoreReference, SqliteSqlStoreConfig
from llama_stack.providers.utils.responses.responses_store import ResponsesStore
@ -46,7 +46,7 @@ def create_test_response_object(
def create_test_response_input(content: str, input_id: str) -> OpenAIResponseInput:
"""Helper to create a test response input."""
from llama_stack_api.apis.agents.openai_responses import OpenAIResponseMessage
from llama_stack_api.openai_responses import OpenAIResponseMessage
return OpenAIResponseMessage(
id=input_id,