mirror of
https://github.com/meta-llama/llama-stack.git
synced 2025-12-03 18:00:36 +00:00
feat: split API and provider specs into separate llama-stack-api pkg (#3895)
# What does this PR do? Extract API definitions and provider specifications into a standalone llama-stack-api package that can be published to PyPI independently of the main llama-stack server. see: https://github.com/llamastack/llama-stack/pull/2978 and https://github.com/llamastack/llama-stack/pull/2978#issuecomment-3145115942 Motivation External providers currently import from llama-stack, which overrides the installed version and causes dependency conflicts. This separation allows external providers to: - Install only the type definitions they need without server dependencies - Avoid version conflicts with the installed llama-stack package - Be versioned and released independently This enables us to re-enable external provider module tests that were previously blocked by these import conflicts. Changes - Created llama-stack-api package with minimal dependencies (pydantic, jsonschema) - Moved APIs, providers datatypes, strong_typing, and schema_utils - Updated all imports from llama_stack.* to llama_stack_api.* - Configured local editable install for development workflow - Updated linting and type-checking configuration for both packages Next Steps - Publish llama-stack-api to PyPI - Update external provider dependencies - Re-enable external provider module tests Pre-cursor PRs to this one: - #4093 - #3954 - #4064 These PRs moved key pieces _out_ of the Api pkg, limiting the scope of change here. relates to #3237 ## Test Plan Package builds successfully and can be imported independently. All pre-commit hooks pass with expected exclusions maintained. --------- Signed-off-by: Charlie Doern <cdoern@redhat.com>
This commit is contained in:
parent
ceb716b9a0
commit
840ad75fe9
358 changed files with 2337 additions and 1424 deletions
|
|
@ -6,9 +6,7 @@
|
|||
|
||||
from typing import Protocol
|
||||
|
||||
from llama_stack.apis.version import LLAMA_STACK_API_V1
|
||||
from llama_stack.providers.datatypes import Api, ProviderSpec, RemoteProviderSpec
|
||||
from llama_stack.schema_utils import webmethod
|
||||
from llama_stack_api import LLAMA_STACK_API_V1, Api, ProviderSpec, RemoteProviderSpec, webmethod
|
||||
|
||||
|
||||
def available_providers() -> list[ProviderSpec]:
|
||||
|
|
|
|||
|
|
@ -13,8 +13,7 @@ from contextlib import contextmanager
|
|||
from io import BytesIO
|
||||
|
||||
import pytest
|
||||
|
||||
from llama_stack.apis.files import OpenAIFilePurpose
|
||||
from llama_stack_api import OpenAIFilePurpose
|
||||
|
||||
|
||||
class BatchHelper:
|
||||
|
|
|
|||
|
|
@ -9,8 +9,8 @@ from unittest.mock import patch
|
|||
|
||||
import pytest
|
||||
import requests
|
||||
from llama_stack_api import OpenAIFilePurpose
|
||||
|
||||
from llama_stack.apis.files import OpenAIFilePurpose
|
||||
from llama_stack.core.datatypes import User
|
||||
|
||||
purpose = OpenAIFilePurpose.ASSISTANTS
|
||||
|
|
|
|||
|
|
@ -15,14 +15,14 @@ that enables routing based on provider_data alone.
|
|||
from unittest.mock import AsyncMock, patch
|
||||
|
||||
import pytest
|
||||
|
||||
from llama_stack.apis.datatypes import Api
|
||||
from llama_stack.apis.inference.inference import (
|
||||
from llama_stack_api import (
|
||||
Api,
|
||||
OpenAIAssistantMessageParam,
|
||||
OpenAIChatCompletion,
|
||||
OpenAIChatCompletionUsage,
|
||||
OpenAIChoice,
|
||||
)
|
||||
|
||||
from llama_stack.core.library_client import LlamaStackAsLibraryClient
|
||||
from llama_stack.core.telemetry.telemetry import MetricEvent
|
||||
|
||||
|
|
|
|||
|
|
@ -9,8 +9,7 @@ import time
|
|||
import uuid
|
||||
|
||||
import pytest
|
||||
|
||||
from llama_stack.apis.post_training import (
|
||||
from llama_stack_api import (
|
||||
DataConfig,
|
||||
DatasetFormat,
|
||||
DPOAlignmentConfig,
|
||||
|
|
@ -18,6 +17,7 @@ from llama_stack.apis.post_training import (
|
|||
LoraFinetuningConfig,
|
||||
TrainingConfig,
|
||||
)
|
||||
|
||||
from llama_stack.log import get_logger
|
||||
|
||||
# Configure logging
|
||||
|
|
|
|||
|
|
@ -10,7 +10,7 @@
|
|||
},
|
||||
"response": {
|
||||
"body": {
|
||||
"__type__": "llama_stack.apis.tools.tools.ToolInvocationResult",
|
||||
"__type__": "llama_stack_api.tools.ToolInvocationResult",
|
||||
"__data__": {
|
||||
"content": "{\"query\": \"Llama 4 Maverick model experts\", \"top_k\": [{\"url\": \"https://console.groq.com/docs/model/meta-llama/llama-4-maverick-17b-128e-instruct\", \"title\": \"Llama 4 Maverick 17B 128E\", \"content\": \"Llama 4 Maverick is Meta's natively multimodal model that enables text and image understanding. With a 17 billion parameter mixture-of-experts architecture (128 experts), this model offers industry-leading performance for multimodal tasks like natural assistant-like chat, image recognition, and coding tasks. Llama 4 Maverick features an auto-regressive language model that uses a mixture-of-experts (MoE) architecture with 17B activated parameters (400B total) and incorporates early fusion for native multimodality. The model uses 128 experts to efficiently handle both text and image inputs while maintaining high performance across chat, knowledge, and code generation tasks, with a knowledge cutoff of August 2024. * For multimodal applications, this model supports up to 5 image inputs create( model =\\\"meta-llama/llama-4-maverick-17b-128e-instruct\\\", messages =[ { \\\"role\\\": \\\"user\\\", \\\"content\\\": \\\"Explain why fast inference is critical for reasoning models\\\" } ] ) print(completion.\", \"score\": 0.9170729, \"raw_content\": null}, {\"url\": \"https://huggingface.co/meta-llama/Llama-4-Maverick-17B-128E\", \"title\": \"meta-llama/Llama-4-Maverick-17B-128E - Hugging Face\", \"content\": \"Model Architecture: The Llama 4 models are auto-regressive language models that use a mixture-of-experts (MoE) architecture and incorporate\", \"score\": 0.8021998, \"raw_content\": null}, {\"url\": \"https://www.ibm.com/new/announcements/meta-llama-4-maverick-and-llama-4-scout-now-available-in-watsonx-ai\", \"title\": \"Meta Llama 4 Maverick and Llama 4 Scout now available in watsonx ...\", \"content\": \"# Meta Llama 4 Maverick and Llama 4 Scout now available in watsonx.ai **IBM is excited to announce the addition of Meta\\u2019s latest generation of open models, Llama 4, to** **watsonx.ai****.** Llama 4 Scout and Llama 4 Maverick, the first mixture of experts (MoE) models released by Meta, provide frontier multimodal performance, high speeds, low cost, and industry leading context length. With the introduction of these latest offerings from Meta, IBM now supports a total of 13 Meta models in the expansive library of \\u00a0foundation models available in watsonx.ai. Trained on 40 trillion tokens of data, Llama 4 Scout offers performance rivalling or exceeding that of models with significantly larger active parameter counts while keeping costs and latency low. ## Llama 4 models on IBM watsonx\", \"score\": 0.78194773, \"raw_content\": null}, {\"url\": \"https://medium.com/@divyanshbhatiajm19/metas-llama-4-family-the-complete-guide-to-scout-maverick-and-behemoth-ai-models-in-2025-21a90c882e8a\", \"title\": \"Meta's Llama 4 Family: The Complete Guide to Scout, Maverick, and ...\", \"content\": \"# Meta\\u2019s Llama 4 Family: The Complete Guide to Scout, Maverick, and Behemoth AI Models in 2025 Feature Llama 4 Scout Llama 4 Maverick Llama 4 Behemoth **Total Parameters** 109B 400B ~2T **Active Parameters** 17B 17B 288B **Expert Count** 16 128 16 **Context Window** 10M tokens 1M tokens Not specified **Hardware Requirements** Single H100 GPU Single H100 DGX host Multiple GPUs **Inference Cost** Not specified $0.19-$0.49 per 1M tokens Not specified **Release Status** Available now Available now In training **Primary Use Cases** Long-context analysis, code processing High-performance multimodal applications Research, STEM reasoning The Llama 4 family represents Meta\\u2019s most significant AI development to date, with each model offering distinct advantages for different use cases:\", \"score\": 0.69672287, \"raw_content\": null}, {\"url\": \"https://www.llama.com/models/llama-4/\", \"title\": \"Unmatched Performance and Efficiency | Llama 4\", \"content\": \"# Llama 4 # Llama 4 Llama 4 Scout Class-leading natively multimodal model that offers superior text and visual intelligence, single H100 GPU efficiency, and a 10M context window for seamless long document analysis. Llama 4 MaverickIndustry-leading natively multimodal model for image and text understanding with groundbreaking intelligence and fast responses at a low cost. We evaluated model performance on a suite of common benchmarks across a wide range of languages, testing for coding, reasoning, knowledge, vision understanding, multilinguality, and long context. 4. Specialized long context evals are not traditionally reported for generalist models, so we share internal runs to showcase llama's frontier performance. 4. Specialized long context evals are not traditionally reported for generalist models, so we share internal runs to showcase llama's frontier performance.\", \"score\": 0.629889, \"raw_content\": null}]}",
|
||||
"error_message": null,
|
||||
|
|
|
|||
|
|
@ -10,7 +10,7 @@
|
|||
},
|
||||
"response": {
|
||||
"body": {
|
||||
"__type__": "llama_stack.apis.tools.tools.ToolInvocationResult",
|
||||
"__type__": "llama_stack_api.tools.ToolInvocationResult",
|
||||
"__data__": {
|
||||
"content": "{\"query\": \"Llama 4 Maverick model number of experts\", \"top_k\": [{\"url\": \"https://console.groq.com/docs/model/meta-llama/llama-4-maverick-17b-128e-instruct\", \"title\": \"Llama 4 Maverick 17B 128E\", \"content\": \"Llama 4 Maverick is Meta's natively multimodal model that enables text and image understanding. With a 17 billion parameter mixture-of-experts architecture (128 experts), this model offers industry-leading performance for multimodal tasks like natural assistant-like chat, image recognition, and coding tasks. Llama 4 Maverick features an auto-regressive language model that uses a mixture-of-experts (MoE) architecture with 17B activated parameters (400B total) and incorporates early fusion for native multimodality. The model uses 128 experts to efficiently handle both text and image inputs while maintaining high performance across chat, knowledge, and code generation tasks, with a knowledge cutoff of August 2024. * For multimodal applications, this model supports up to 5 image inputs create( model =\\\"meta-llama/llama-4-maverick-17b-128e-instruct\\\", messages =[ { \\\"role\\\": \\\"user\\\", \\\"content\\\": \\\"Explain why fast inference is critical for reasoning models\\\" } ] ) print(completion.\", \"score\": 0.9287263, \"raw_content\": null}, {\"url\": \"https://huggingface.co/meta-llama/Llama-4-Maverick-17B-128E\", \"title\": \"meta-llama/Llama-4-Maverick-17B-128E\", \"content\": \"... model with 16 experts, and Llama 4 Maverick, a 17 billion parameter model with 128 experts. Model developer: Meta. Model Architecture: The\", \"score\": 0.9183121, \"raw_content\": null}, {\"url\": \"https://build.nvidia.com/meta/llama-4-maverick-17b-128e-instruct/modelcard\", \"title\": \"llama-4-maverick-17b-128e-instruct Model by Meta\", \"content\": \"... model with 16 experts, and Llama 4 Maverick, a 17 billion parameter model with 128 experts. Third-Party Community Consideration. This model\", \"score\": 0.91399205, \"raw_content\": null}, {\"url\": \"https://replicate.com/meta/llama-4-maverick-instruct\", \"title\": \"meta/llama-4-maverick-instruct | Run with an API on ...\", \"content\": \"... model with 16 experts, and Llama 4 Maverick, a 17 billion parameter model with 128 experts. All services are online \\u00b7 Home \\u00b7 About \\u00b7 Changelog\", \"score\": 0.9073207, \"raw_content\": null}, {\"url\": \"https://openrouter.ai/meta-llama/llama-4-maverick\", \"title\": \"Llama 4 Maverick - API, Providers, Stats\", \"content\": \"# Meta: Llama 4 Maverick ### meta-llama/llama-4-maverick Llama 4 Maverick 17B Instruct (128E) is a high-capacity multimodal language model from Meta, built on a mixture-of-experts (MoE) architecture with 128 experts and 17 billion active parameters per forward pass (400B total). Released on April 5, 2025 under the Llama 4 Community License, Maverick is suited for research and commercial applications requiring advanced multimodal understanding and high model throughput. Llama 4 Maverick - API, Providers, Stats | OpenRouter ## Providers for Llama 4 Maverick ## Performance for Llama 4 Maverick ## Apps using Llama 4 Maverick ## Recent activity on Llama 4 Maverick ## Uptime stats for Llama 4 Maverick ## Sample code and API for Llama 4 Maverick\", \"score\": 0.8958969, \"raw_content\": null}]}",
|
||||
"error_message": null,
|
||||
|
|
|
|||
|
|
@ -10,7 +10,7 @@
|
|||
},
|
||||
"response": {
|
||||
"body": {
|
||||
"__type__": "llama_stack.apis.tools.tools.ToolInvocationResult",
|
||||
"__type__": "llama_stack_api.tools.ToolInvocationResult",
|
||||
"__data__": {
|
||||
"content": "{\"query\": \"latest version of Python\", \"top_k\": [{\"url\": \"https://www.liquidweb.com/blog/latest-python-version/\", \"title\": \"The latest Python version: Python 3.14 - Liquid Web\", \"content\": \"The latest major version, Python 3.14 was officially released on October 7, 2025. Let's explore the key features of Python's current version, how to download\", \"score\": 0.890761, \"raw_content\": null}, {\"url\": \"https://docs.python.org/3/whatsnew/3.14.html\", \"title\": \"What's new in Python 3.14 \\u2014 Python 3.14.0 documentation\", \"content\": \"Python 3.14 is the latest stable release of the Python programming language, with a mix of changes to the language, the implementation, and the standard\", \"score\": 0.8124067, \"raw_content\": null}, {\"url\": \"https://devguide.python.org/versions/\", \"title\": \"Status of Python versions - Python Developer's Guide\", \"content\": \"The main branch is currently the future Python 3.15, and is the only branch that accepts new features. The latest release for each Python version can be found\", \"score\": 0.80089486, \"raw_content\": null}, {\"url\": \"https://www.python.org/doc/versions/\", \"title\": \"Python documentation by version\", \"content\": \"Python 3.12.4, documentation released on 6 June 2024. Python 3.12.3, documentation released on 9 April 2024. Python 3.12.2, documentation released on 6 February\", \"score\": 0.74563974, \"raw_content\": null}, {\"url\": \"https://www.python.org/downloads/\", \"title\": \"Download Python | Python.org\", \"content\": \"Active Python Releases \\u00b7 3.15 pre-release 2026-10-07 (planned) 2031-10 PEP 790 \\u00b7 3.14 bugfix 2025-10-07 2030-10 PEP 745 \\u00b7 3.13 bugfix 2024-10-07 2029-10 PEP 719\", \"score\": 0.6551821, \"raw_content\": null}]}",
|
||||
"error_message": null,
|
||||
|
|
|
|||
|
|
@ -12,8 +12,8 @@ import warnings
|
|||
from collections.abc import Generator
|
||||
|
||||
import pytest
|
||||
from llama_stack_api import ViolationLevel
|
||||
|
||||
from llama_stack.apis.safety import ViolationLevel
|
||||
from llama_stack.models.llama.sku_types import CoreModelId
|
||||
|
||||
# Llama Guard models available for text and vision shields
|
||||
|
|
|
|||
|
|
@ -7,8 +7,7 @@ import base64
|
|||
import mimetypes
|
||||
|
||||
import pytest
|
||||
|
||||
from llama_stack.apis.safety import ViolationLevel
|
||||
from llama_stack_api import ViolationLevel
|
||||
|
||||
CODE_SCANNER_ENABLED_PROVIDERS = {"ollama", "together", "fireworks"}
|
||||
|
||||
|
|
|
|||
|
|
@ -9,8 +9,7 @@ import mimetypes
|
|||
import os
|
||||
|
||||
import pytest
|
||||
|
||||
from llama_stack.apis.safety import ViolationLevel
|
||||
from llama_stack_api import ViolationLevel
|
||||
|
||||
VISION_SHIELD_ENABLED_PROVIDERS = {"together"}
|
||||
|
||||
|
|
|
|||
|
|
@ -7,8 +7,8 @@
|
|||
import re
|
||||
|
||||
import pytest
|
||||
from llama_stack_api import ToolGroupNotFoundError
|
||||
|
||||
from llama_stack.apis.common.errors import ToolGroupNotFoundError
|
||||
from llama_stack.core.library_client import LlamaStackAsLibraryClient
|
||||
from tests.common.mcp import MCP_TOOLGROUP_ID, make_mcp_server
|
||||
|
||||
|
|
|
|||
|
|
@ -8,11 +8,10 @@ import time
|
|||
from io import BytesIO
|
||||
|
||||
import pytest
|
||||
from llama_stack_api import Chunk, ExpiresAfter
|
||||
from llama_stack_client import BadRequestError
|
||||
from openai import BadRequestError as OpenAIBadRequestError
|
||||
|
||||
from llama_stack.apis.files import ExpiresAfter
|
||||
from llama_stack.apis.vector_io import Chunk
|
||||
from llama_stack.core.library_client import LlamaStackAsLibraryClient
|
||||
from llama_stack.log import get_logger
|
||||
|
||||
|
|
@ -646,7 +645,7 @@ def test_openai_vector_store_attach_file(
|
|||
):
|
||||
"""Test OpenAI vector store attach file."""
|
||||
skip_if_provider_doesnt_support_openai_vector_stores(client_with_models)
|
||||
from llama_stack.apis.files import ExpiresAfter
|
||||
from llama_stack_api import ExpiresAfter
|
||||
|
||||
compat_client = compat_client_with_empty_stores
|
||||
|
||||
|
|
@ -710,7 +709,7 @@ def test_openai_vector_store_attach_files_on_creation(
|
|||
skip_if_provider_doesnt_support_openai_vector_stores(client_with_models)
|
||||
|
||||
compat_client = compat_client_with_empty_stores
|
||||
from llama_stack.apis.files import ExpiresAfter
|
||||
from llama_stack_api import ExpiresAfter
|
||||
|
||||
# Create some files and attach them to the vector store
|
||||
valid_file_ids = []
|
||||
|
|
@ -775,7 +774,7 @@ def test_openai_vector_store_list_files(
|
|||
skip_if_provider_doesnt_support_openai_vector_stores(client_with_models)
|
||||
|
||||
compat_client = compat_client_with_empty_stores
|
||||
from llama_stack.apis.files import ExpiresAfter
|
||||
from llama_stack_api import ExpiresAfter
|
||||
|
||||
# Create a vector store
|
||||
vector_store = compat_client.vector_stores.create(
|
||||
|
|
@ -867,7 +866,7 @@ def test_openai_vector_store_retrieve_file_contents(
|
|||
skip_if_provider_doesnt_support_openai_vector_stores(client_with_models)
|
||||
|
||||
compat_client = compat_client_with_empty_stores
|
||||
from llama_stack.apis.files import ExpiresAfter
|
||||
from llama_stack_api import ExpiresAfter
|
||||
|
||||
# Create a vector store
|
||||
vector_store = compat_client.vector_stores.create(
|
||||
|
|
@ -928,7 +927,7 @@ def test_openai_vector_store_delete_file(
|
|||
skip_if_provider_doesnt_support_openai_vector_stores(client_with_models)
|
||||
|
||||
compat_client = compat_client_with_empty_stores
|
||||
from llama_stack.apis.files import ExpiresAfter
|
||||
from llama_stack_api import ExpiresAfter
|
||||
|
||||
# Create a vector store
|
||||
vector_store = compat_client.vector_stores.create(
|
||||
|
|
@ -994,7 +993,7 @@ def test_openai_vector_store_delete_file_removes_from_vector_store(
|
|||
skip_if_provider_doesnt_support_openai_vector_stores(client_with_models)
|
||||
|
||||
compat_client = compat_client_with_empty_stores
|
||||
from llama_stack.apis.files import ExpiresAfter
|
||||
from llama_stack_api import ExpiresAfter
|
||||
|
||||
# Create a vector store
|
||||
vector_store = compat_client.vector_stores.create(
|
||||
|
|
@ -1046,7 +1045,7 @@ def test_openai_vector_store_update_file(
|
|||
skip_if_provider_doesnt_support_openai_vector_stores(client_with_models)
|
||||
|
||||
compat_client = compat_client_with_empty_stores
|
||||
from llama_stack.apis.files import ExpiresAfter
|
||||
from llama_stack_api import ExpiresAfter
|
||||
|
||||
# Create a vector store
|
||||
vector_store = compat_client.vector_stores.create(
|
||||
|
|
@ -1103,7 +1102,7 @@ def test_create_vector_store_files_duplicate_vector_store_name(
|
|||
This test confirms that client.vector_stores.create() creates a unique ID
|
||||
"""
|
||||
skip_if_provider_doesnt_support_openai_vector_stores(client_with_models)
|
||||
from llama_stack.apis.files import ExpiresAfter
|
||||
from llama_stack_api import ExpiresAfter
|
||||
|
||||
compat_client = compat_client_with_empty_stores
|
||||
|
||||
|
|
|
|||
|
|
@ -5,8 +5,7 @@
|
|||
# the root directory of this source tree.
|
||||
|
||||
import pytest
|
||||
|
||||
from llama_stack.apis.vector_io import Chunk
|
||||
from llama_stack_api import Chunk
|
||||
|
||||
from ..conftest import vector_provider_wrapper
|
||||
|
||||
|
|
|
|||
|
|
@ -5,11 +5,7 @@
|
|||
# the root directory of this source tree.
|
||||
|
||||
|
||||
from llama_stack.apis.conversations.conversations import (
|
||||
Conversation,
|
||||
ConversationItem,
|
||||
ConversationItemList,
|
||||
)
|
||||
from llama_stack_api import Conversation, ConversationItem, ConversationItemList
|
||||
|
||||
|
||||
def test_conversation_model_defaults():
|
||||
|
|
|
|||
|
|
@ -8,14 +8,11 @@ import tempfile
|
|||
from pathlib import Path
|
||||
|
||||
import pytest
|
||||
from llama_stack_api import OpenAIResponseInputMessageContentText, OpenAIResponseMessage
|
||||
from openai.types.conversations.conversation import Conversation as OpenAIConversation
|
||||
from openai.types.conversations.conversation_item import ConversationItem as OpenAIConversationItem
|
||||
from pydantic import TypeAdapter
|
||||
|
||||
from llama_stack.apis.agents.openai_responses import (
|
||||
OpenAIResponseInputMessageContentText,
|
||||
OpenAIResponseMessage,
|
||||
)
|
||||
from llama_stack.core.conversations.conversations import (
|
||||
ConversationServiceConfig,
|
||||
ConversationServiceImpl,
|
||||
|
|
|
|||
|
|
@ -6,8 +6,8 @@
|
|||
|
||||
from unittest.mock import AsyncMock
|
||||
|
||||
from llama_stack.apis.safety.safety import ModerationObject, ModerationObjectResults
|
||||
from llama_stack.apis.shields import ListShieldsResponse, Shield
|
||||
from llama_stack_api import ListShieldsResponse, ModerationObject, ModerationObjectResults, Shield
|
||||
|
||||
from llama_stack.core.datatypes import SafetyConfig
|
||||
from llama_stack.core.routers.safety import SafetyRouter
|
||||
|
||||
|
|
|
|||
|
|
@ -7,8 +7,8 @@
|
|||
from unittest.mock import AsyncMock, Mock
|
||||
|
||||
import pytest
|
||||
from llama_stack_api import OpenAICreateVectorStoreRequestWithExtraBody
|
||||
|
||||
from llama_stack.apis.vector_io import OpenAICreateVectorStoreRequestWithExtraBody
|
||||
from llama_stack.core.routers.vector_io import VectorIORouter
|
||||
|
||||
|
||||
|
|
|
|||
|
|
@ -9,12 +9,10 @@
|
|||
from unittest.mock import AsyncMock
|
||||
|
||||
import pytest
|
||||
from llama_stack_api import Api, ListModelsResponse, ListShieldsResponse, Model, ModelType, Shield
|
||||
|
||||
from llama_stack.apis.models import ListModelsResponse, Model, ModelType
|
||||
from llama_stack.apis.shields import ListShieldsResponse, Shield
|
||||
from llama_stack.core.datatypes import QualifiedModel, SafetyConfig, StackRunConfig, StorageConfig, VectorStoresConfig
|
||||
from llama_stack.core.stack import validate_safety_config, validate_vector_stores_config
|
||||
from llama_stack.providers.datatypes import Api
|
||||
|
||||
|
||||
class TestVectorStoresValidation:
|
||||
|
|
|
|||
|
|
@ -9,15 +9,22 @@
|
|||
from unittest.mock import AsyncMock
|
||||
|
||||
import pytest
|
||||
from llama_stack_api import (
|
||||
URL,
|
||||
Api,
|
||||
Dataset,
|
||||
DatasetPurpose,
|
||||
ListToolDefsResponse,
|
||||
Model,
|
||||
ModelNotFoundError,
|
||||
ModelType,
|
||||
NumberType,
|
||||
Shield,
|
||||
ToolDef,
|
||||
ToolGroup,
|
||||
URIDataSource,
|
||||
)
|
||||
|
||||
from llama_stack.apis.common.content_types import URL
|
||||
from llama_stack.apis.common.errors import ModelNotFoundError
|
||||
from llama_stack.apis.common.type_system import NumberType
|
||||
from llama_stack.apis.datasets.datasets import Dataset, DatasetPurpose, URIDataSource
|
||||
from llama_stack.apis.datatypes import Api
|
||||
from llama_stack.apis.models import Model, ModelType
|
||||
from llama_stack.apis.shields.shields import Shield
|
||||
from llama_stack.apis.tools import ListToolDefsResponse, ToolDef, ToolGroup
|
||||
from llama_stack.core.datatypes import RegistryEntrySource
|
||||
from llama_stack.core.routing_tables.benchmarks import BenchmarksRoutingTable
|
||||
from llama_stack.core.routing_tables.datasets import DatasetsRoutingTable
|
||||
|
|
|
|||
|
|
@ -9,10 +9,9 @@ from pathlib import Path
|
|||
from unittest.mock import patch
|
||||
|
||||
import pytest
|
||||
from openai import AsyncOpenAI
|
||||
|
||||
# Import the real Pydantic response types instead of using Mocks
|
||||
from llama_stack.apis.inference import (
|
||||
from llama_stack_api import (
|
||||
OpenAIAssistantMessageParam,
|
||||
OpenAIChatCompletion,
|
||||
OpenAIChoice,
|
||||
|
|
@ -20,6 +19,8 @@ from llama_stack.apis.inference import (
|
|||
OpenAIEmbeddingsResponse,
|
||||
OpenAIEmbeddingUsage,
|
||||
)
|
||||
from openai import AsyncOpenAI
|
||||
|
||||
from llama_stack.testing.api_recorder import (
|
||||
APIRecordingMode,
|
||||
ResponseStorage,
|
||||
|
|
|
|||
|
|
@ -9,6 +9,7 @@ from unittest.mock import patch
|
|||
|
||||
import pytest
|
||||
import yaml
|
||||
from llama_stack_api import ProviderSpec
|
||||
from pydantic import BaseModel, Field, ValidationError
|
||||
|
||||
from llama_stack.core.datatypes import Api, Provider, StackRunConfig
|
||||
|
|
@ -22,7 +23,6 @@ from llama_stack.core.storage.datatypes import (
|
|||
SqlStoreReference,
|
||||
StorageConfig,
|
||||
)
|
||||
from llama_stack.providers.datatypes import ProviderSpec
|
||||
|
||||
|
||||
class SampleConfig(BaseModel):
|
||||
|
|
@ -312,7 +312,7 @@ pip_packages:
|
|||
"""Test loading an external provider from a module (success path)."""
|
||||
from types import SimpleNamespace
|
||||
|
||||
from llama_stack.providers.datatypes import Api, ProviderSpec
|
||||
from llama_stack_api import Api, ProviderSpec
|
||||
|
||||
# Simulate a provider module with get_provider_spec
|
||||
fake_spec = ProviderSpec(
|
||||
|
|
@ -395,8 +395,9 @@ pip_packages:
|
|||
|
||||
def test_external_provider_from_module_building(self, mock_providers):
|
||||
"""Test loading an external provider from a module during build (building=True, partial spec)."""
|
||||
from llama_stack_api import Api
|
||||
|
||||
from llama_stack.core.datatypes import BuildConfig, BuildProvider, DistributionSpec
|
||||
from llama_stack.providers.datatypes import Api
|
||||
|
||||
# No importlib patch needed, should not import module when type of `config` is BuildConfig or DistributionSpec
|
||||
build_config = BuildConfig(
|
||||
|
|
@ -456,8 +457,9 @@ class TestGetExternalProvidersFromModule:
|
|||
"""Test provider with module containing version spec (e.g., package==1.0.0)."""
|
||||
from types import SimpleNamespace
|
||||
|
||||
from llama_stack_api import ProviderSpec
|
||||
|
||||
from llama_stack.core.distribution import get_external_providers_from_module
|
||||
from llama_stack.providers.datatypes import ProviderSpec
|
||||
|
||||
fake_spec = ProviderSpec(
|
||||
api=Api.inference,
|
||||
|
|
@ -593,8 +595,9 @@ class TestGetExternalProvidersFromModule:
|
|||
"""Test when get_provider_spec returns a list of specs."""
|
||||
from types import SimpleNamespace
|
||||
|
||||
from llama_stack_api import ProviderSpec
|
||||
|
||||
from llama_stack.core.distribution import get_external_providers_from_module
|
||||
from llama_stack.providers.datatypes import ProviderSpec
|
||||
|
||||
spec1 = ProviderSpec(
|
||||
api=Api.inference,
|
||||
|
|
@ -641,8 +644,9 @@ class TestGetExternalProvidersFromModule:
|
|||
"""Test that list return filters specs by provider_type."""
|
||||
from types import SimpleNamespace
|
||||
|
||||
from llama_stack_api import ProviderSpec
|
||||
|
||||
from llama_stack.core.distribution import get_external_providers_from_module
|
||||
from llama_stack.providers.datatypes import ProviderSpec
|
||||
|
||||
spec1 = ProviderSpec(
|
||||
api=Api.inference,
|
||||
|
|
@ -689,8 +693,9 @@ class TestGetExternalProvidersFromModule:
|
|||
"""Test that list return adds multiple different provider_types when config requests them."""
|
||||
from types import SimpleNamespace
|
||||
|
||||
from llama_stack_api import ProviderSpec
|
||||
|
||||
from llama_stack.core.distribution import get_external_providers_from_module
|
||||
from llama_stack.providers.datatypes import ProviderSpec
|
||||
|
||||
# Module returns both inline and remote variants
|
||||
spec1 = ProviderSpec(
|
||||
|
|
@ -828,8 +833,9 @@ class TestGetExternalProvidersFromModule:
|
|||
"""Test multiple APIs with providers."""
|
||||
from types import SimpleNamespace
|
||||
|
||||
from llama_stack_api import ProviderSpec
|
||||
|
||||
from llama_stack.core.distribution import get_external_providers_from_module
|
||||
from llama_stack.providers.datatypes import ProviderSpec
|
||||
|
||||
inference_spec = ProviderSpec(
|
||||
api=Api.inference,
|
||||
|
|
|
|||
|
|
@ -6,10 +6,8 @@
|
|||
|
||||
|
||||
import pytest
|
||||
from llama_stack_api import OpenAIFilePurpose, Order, ResourceNotFoundError
|
||||
|
||||
from llama_stack.apis.common.errors import ResourceNotFoundError
|
||||
from llama_stack.apis.common.responses import Order
|
||||
from llama_stack.apis.files import OpenAIFilePurpose
|
||||
from llama_stack.core.access_control.access_control import default_policy
|
||||
from llama_stack.core.storage.datatypes import SqliteSqlStoreConfig, SqlStoreReference
|
||||
from llama_stack.providers.inline.files.localfs import (
|
||||
|
|
|
|||
|
|
@ -58,9 +58,7 @@ import json
|
|||
from unittest.mock import AsyncMock, MagicMock
|
||||
|
||||
import pytest
|
||||
|
||||
from llama_stack.apis.batches import BatchObject
|
||||
from llama_stack.apis.common.errors import ConflictError, ResourceNotFoundError
|
||||
from llama_stack_api import BatchObject, ConflictError, ResourceNotFoundError
|
||||
|
||||
|
||||
class TestReferenceBatchesImpl:
|
||||
|
|
|
|||
|
|
@ -43,8 +43,7 @@ Key Behaviors Tested:
|
|||
import asyncio
|
||||
|
||||
import pytest
|
||||
|
||||
from llama_stack.apis.common.errors import ConflictError
|
||||
from llama_stack_api import ConflictError
|
||||
|
||||
|
||||
class TestReferenceBatchesIdempotency:
|
||||
|
|
|
|||
|
|
@ -8,9 +8,7 @@ from unittest.mock import patch
|
|||
|
||||
import pytest
|
||||
from botocore.exceptions import ClientError
|
||||
|
||||
from llama_stack.apis.common.errors import ResourceNotFoundError
|
||||
from llama_stack.apis.files import OpenAIFilePurpose
|
||||
from llama_stack_api import OpenAIFilePurpose, ResourceNotFoundError
|
||||
|
||||
|
||||
class TestS3FilesImpl:
|
||||
|
|
@ -228,7 +226,7 @@ class TestS3FilesImpl:
|
|||
|
||||
mock_now.return_value = 0
|
||||
|
||||
from llama_stack.apis.files import ExpiresAfter
|
||||
from llama_stack_api import ExpiresAfter
|
||||
|
||||
sample_text_file.filename = "test_expired_file"
|
||||
uploaded = await s3_provider.openai_upload_file(
|
||||
|
|
@ -260,7 +258,7 @@ class TestS3FilesImpl:
|
|||
|
||||
async def test_unsupported_expires_after_anchor(self, s3_provider, sample_text_file):
|
||||
"""Unsupported anchor value should raise ValueError."""
|
||||
from llama_stack.apis.files import ExpiresAfter
|
||||
from llama_stack_api import ExpiresAfter
|
||||
|
||||
sample_text_file.filename = "test_unsupported_expires_after_anchor"
|
||||
|
||||
|
|
@ -273,7 +271,7 @@ class TestS3FilesImpl:
|
|||
|
||||
async def test_nonint_expires_after_seconds(self, s3_provider, sample_text_file):
|
||||
"""Non-integer seconds in expires_after should raise ValueError."""
|
||||
from llama_stack.apis.files import ExpiresAfter
|
||||
from llama_stack_api import ExpiresAfter
|
||||
|
||||
sample_text_file.filename = "test_nonint_expires_after_seconds"
|
||||
|
||||
|
|
@ -286,7 +284,7 @@ class TestS3FilesImpl:
|
|||
|
||||
async def test_expires_after_seconds_out_of_bounds(self, s3_provider, sample_text_file):
|
||||
"""Seconds outside allowed range should raise ValueError."""
|
||||
from llama_stack.apis.files import ExpiresAfter
|
||||
from llama_stack_api import ExpiresAfter
|
||||
|
||||
with pytest.raises(ValueError, match="greater than or equal to 3600"):
|
||||
await s3_provider.openai_upload_file(
|
||||
|
|
|
|||
|
|
@ -7,9 +7,8 @@
|
|||
from unittest.mock import patch
|
||||
|
||||
import pytest
|
||||
from llama_stack_api import OpenAIFilePurpose, ResourceNotFoundError
|
||||
|
||||
from llama_stack.apis.common.errors import ResourceNotFoundError
|
||||
from llama_stack.apis.files import OpenAIFilePurpose
|
||||
from llama_stack.core.datatypes import User
|
||||
from llama_stack.providers.remote.files.s3.files import S3FilesImpl
|
||||
|
||||
|
|
|
|||
|
|
@ -8,9 +8,9 @@ from types import SimpleNamespace
|
|||
from unittest.mock import AsyncMock, MagicMock
|
||||
|
||||
import pytest
|
||||
from llama_stack_api import OpenAIChatCompletionRequestWithExtraBody
|
||||
from openai import AuthenticationError
|
||||
|
||||
from llama_stack.apis.inference import OpenAIChatCompletionRequestWithExtraBody
|
||||
from llama_stack.providers.remote.inference.bedrock.bedrock import BedrockInferenceAdapter
|
||||
from llama_stack.providers.remote.inference.bedrock.config import BedrockConfig
|
||||
|
||||
|
|
|
|||
|
|
@ -9,8 +9,9 @@ import time
|
|||
from unittest.mock import AsyncMock, MagicMock, PropertyMock, patch
|
||||
|
||||
import pytest
|
||||
|
||||
from llama_stack.apis.inference import (
|
||||
from llama_stack_api import (
|
||||
HealthStatus,
|
||||
Model,
|
||||
OpenAIAssistantMessageParam,
|
||||
OpenAIChatCompletion,
|
||||
OpenAIChatCompletionRequestWithExtraBody,
|
||||
|
|
@ -20,10 +21,9 @@ from llama_stack.apis.inference import (
|
|||
OpenAICompletionRequestWithExtraBody,
|
||||
ToolChoice,
|
||||
)
|
||||
from llama_stack.apis.models import Model
|
||||
|
||||
from llama_stack.core.routers.inference import InferenceRouter
|
||||
from llama_stack.core.routing_tables.models import ModelsRoutingTable
|
||||
from llama_stack.providers.datatypes import HealthStatus
|
||||
from llama_stack.providers.remote.inference.vllm.config import VLLMInferenceAdapterConfig
|
||||
from llama_stack.providers.remote.inference.vllm.vllm import VLLMInferenceAdapter
|
||||
|
||||
|
|
|
|||
|
|
@ -7,8 +7,8 @@
|
|||
from unittest.mock import AsyncMock
|
||||
|
||||
import pytest
|
||||
from llama_stack_api import ToolDef
|
||||
|
||||
from llama_stack.apis.tools import ToolDef
|
||||
from llama_stack.providers.inline.agents.meta_reference.responses.streaming import (
|
||||
convert_tooldef_to_chat_tool,
|
||||
)
|
||||
|
|
|
|||
|
|
@ -8,9 +8,8 @@ import os
|
|||
from unittest.mock import patch
|
||||
|
||||
import pytest
|
||||
from llama_stack_api import Dataset, DatasetPurpose, ResourceType, URIDataSource
|
||||
|
||||
from llama_stack.apis.datasets import Dataset, DatasetPurpose, URIDataSource
|
||||
from llama_stack.apis.resource import ResourceType
|
||||
from llama_stack.providers.remote.datasetio.nvidia.config import NvidiaDatasetIOConfig
|
||||
from llama_stack.providers.remote.datasetio.nvidia.datasetio import NvidiaDatasetIOAdapter
|
||||
|
||||
|
|
|
|||
|
|
@ -8,12 +8,18 @@ import os
|
|||
from unittest.mock import MagicMock, patch
|
||||
|
||||
import pytest
|
||||
from llama_stack_api import (
|
||||
Benchmark,
|
||||
BenchmarkConfig,
|
||||
EvaluateResponse,
|
||||
Job,
|
||||
JobStatus,
|
||||
ModelCandidate,
|
||||
ResourceType,
|
||||
SamplingParams,
|
||||
TopPSamplingStrategy,
|
||||
)
|
||||
|
||||
from llama_stack.apis.benchmarks import Benchmark
|
||||
from llama_stack.apis.common.job_types import Job, JobStatus
|
||||
from llama_stack.apis.eval.eval import BenchmarkConfig, EvaluateResponse, ModelCandidate, SamplingParams
|
||||
from llama_stack.apis.inference.inference import TopPSamplingStrategy
|
||||
from llama_stack.apis.resource import ResourceType
|
||||
from llama_stack.models.llama.sku_types import CoreModelId
|
||||
from llama_stack.providers.remote.eval.nvidia.config import NVIDIAEvalConfig
|
||||
from llama_stack.providers.remote.eval.nvidia.eval import NVIDIAEvalImpl
|
||||
|
|
|
|||
|
|
@ -9,8 +9,7 @@ import warnings
|
|||
from unittest.mock import patch
|
||||
|
||||
import pytest
|
||||
|
||||
from llama_stack.apis.post_training.post_training import (
|
||||
from llama_stack_api import (
|
||||
DataConfig,
|
||||
DatasetFormat,
|
||||
EfficiencyConfig,
|
||||
|
|
@ -19,6 +18,7 @@ from llama_stack.apis.post_training.post_training import (
|
|||
OptimizerType,
|
||||
TrainingConfig,
|
||||
)
|
||||
|
||||
from llama_stack.core.library_client import convert_pydantic_to_json_value
|
||||
from llama_stack.providers.remote.post_training.nvidia.post_training import (
|
||||
NvidiaPostTrainingAdapter,
|
||||
|
|
|
|||
|
|
@ -8,8 +8,8 @@ from unittest.mock import AsyncMock, MagicMock, patch
|
|||
|
||||
import aiohttp
|
||||
import pytest
|
||||
from llama_stack_api import ModelType
|
||||
|
||||
from llama_stack.apis.models import ModelType
|
||||
from llama_stack.providers.remote.inference.nvidia.config import NVIDIAConfig
|
||||
from llama_stack.providers.remote.inference.nvidia.nvidia import NVIDIAInferenceAdapter
|
||||
from llama_stack.providers.utils.inference.openai_mixin import OpenAIMixin
|
||||
|
|
|
|||
|
|
@ -9,14 +9,15 @@ from typing import Any
|
|||
from unittest.mock import AsyncMock, MagicMock, patch
|
||||
|
||||
import pytest
|
||||
|
||||
from llama_stack.apis.inference import (
|
||||
from llama_stack_api import (
|
||||
OpenAIAssistantMessageParam,
|
||||
OpenAIUserMessageParam,
|
||||
ResourceType,
|
||||
RunShieldResponse,
|
||||
Shield,
|
||||
ViolationLevel,
|
||||
)
|
||||
from llama_stack.apis.resource import ResourceType
|
||||
from llama_stack.apis.safety import RunShieldResponse, ViolationLevel
|
||||
from llama_stack.apis.shields import Shield
|
||||
|
||||
from llama_stack.providers.remote.safety.nvidia.config import NVIDIASafetyConfig
|
||||
from llama_stack.providers.remote.safety.nvidia.nvidia import NVIDIASafetyAdapter
|
||||
|
||||
|
|
|
|||
|
|
@ -9,8 +9,7 @@ import warnings
|
|||
from unittest.mock import patch
|
||||
|
||||
import pytest
|
||||
|
||||
from llama_stack.apis.post_training.post_training import (
|
||||
from llama_stack_api import (
|
||||
DataConfig,
|
||||
DatasetFormat,
|
||||
LoraFinetuningConfig,
|
||||
|
|
@ -19,6 +18,7 @@ from llama_stack.apis.post_training.post_training import (
|
|||
QATFinetuningConfig,
|
||||
TrainingConfig,
|
||||
)
|
||||
|
||||
from llama_stack.core.library_client import convert_pydantic_to_json_value
|
||||
from llama_stack.providers.remote.post_training.nvidia.post_training import (
|
||||
ListNvidiaPostTrainingJobs,
|
||||
|
|
|
|||
|
|
@ -7,7 +7,8 @@
|
|||
from types import SimpleNamespace
|
||||
from unittest.mock import AsyncMock, PropertyMock, patch
|
||||
|
||||
from llama_stack.apis.inference import OpenAIChatCompletionRequestWithExtraBody
|
||||
from llama_stack_api import OpenAIChatCompletionRequestWithExtraBody
|
||||
|
||||
from llama_stack.providers.remote.inference.bedrock.bedrock import BedrockInferenceAdapter
|
||||
from llama_stack.providers.remote.inference.bedrock.config import BedrockConfig
|
||||
|
||||
|
|
|
|||
|
|
@ -10,10 +10,9 @@ from typing import Any
|
|||
from unittest.mock import AsyncMock, MagicMock, Mock, PropertyMock, patch
|
||||
|
||||
import pytest
|
||||
from llama_stack_api import Model, ModelType, OpenAIChatCompletionRequestWithExtraBody, OpenAIUserMessageParam
|
||||
from pydantic import BaseModel, Field
|
||||
|
||||
from llama_stack.apis.inference import Model, OpenAIChatCompletionRequestWithExtraBody, OpenAIUserMessageParam
|
||||
from llama_stack.apis.models import ModelType
|
||||
from llama_stack.core.request_headers import request_provider_data_context
|
||||
from llama_stack.providers.utils.inference.model_registry import RemoteInferenceProviderConfig
|
||||
from llama_stack.providers.utils.inference.openai_mixin import OpenAIMixin
|
||||
|
|
|
|||
|
|
@ -4,10 +4,8 @@
|
|||
# This source code is licensed under the terms described in the LICENSE file in
|
||||
# the root directory of this source tree.
|
||||
|
||||
from llama_stack.apis.inference import (
|
||||
OpenAIAssistantMessageParam,
|
||||
OpenAIUserMessageParam,
|
||||
)
|
||||
from llama_stack_api import OpenAIAssistantMessageParam, OpenAIUserMessageParam
|
||||
|
||||
from llama_stack.models.llama.datatypes import RawTextItem
|
||||
from llama_stack.providers.utils.inference.prompt_adapter import (
|
||||
convert_openai_message_to_raw_message,
|
||||
|
|
|
|||
|
|
@ -7,9 +7,8 @@
|
|||
from unittest.mock import AsyncMock, MagicMock, patch
|
||||
|
||||
import pytest
|
||||
from llama_stack_api import URL, RAGDocument, TextContentItem
|
||||
|
||||
from llama_stack.apis.common.content_types import URL, TextContentItem
|
||||
from llama_stack.apis.tools import RAGDocument
|
||||
from llama_stack.providers.utils.memory.vector_store import content_from_data_and_mime_type, content_from_doc
|
||||
|
||||
|
||||
|
|
|
|||
|
|
@ -34,8 +34,8 @@
|
|||
#
|
||||
|
||||
import pytest
|
||||
from llama_stack_api import Model
|
||||
|
||||
from llama_stack.apis.models import Model
|
||||
from llama_stack.providers.utils.inference.model_registry import ModelRegistryHelper, ProviderModelEntry
|
||||
|
||||
|
||||
|
|
|
|||
|
|
@ -9,9 +9,8 @@ from unittest.mock import AsyncMock, MagicMock, patch
|
|||
|
||||
import numpy as np
|
||||
import pytest
|
||||
from llama_stack_api import Chunk, ChunkMetadata, QueryChunksResponse, VectorStore
|
||||
|
||||
from llama_stack.apis.vector_io import Chunk, ChunkMetadata, QueryChunksResponse
|
||||
from llama_stack.apis.vector_stores import VectorStore
|
||||
from llama_stack.core.storage.datatypes import KVStoreReference, SqliteKVStoreConfig
|
||||
from llama_stack.providers.inline.vector_io.faiss.config import FaissVectorIOConfig
|
||||
from llama_stack.providers.inline.vector_io.faiss.faiss import FaissIndex, FaissVectorIOAdapter
|
||||
|
|
|
|||
|
|
@ -9,11 +9,8 @@ from unittest.mock import MagicMock, patch
|
|||
|
||||
import numpy as np
|
||||
import pytest
|
||||
from llama_stack_api import Chunk, Files, HealthStatus, QueryChunksResponse, VectorStore
|
||||
|
||||
from llama_stack.apis.files import Files
|
||||
from llama_stack.apis.vector_io import Chunk, QueryChunksResponse
|
||||
from llama_stack.apis.vector_stores import VectorStore
|
||||
from llama_stack.providers.datatypes import HealthStatus
|
||||
from llama_stack.providers.inline.vector_io.faiss.config import FaissVectorIOConfig
|
||||
from llama_stack.providers.inline.vector_io.faiss.faiss import (
|
||||
FaissIndex,
|
||||
|
|
|
|||
|
|
@ -8,8 +8,8 @@ import asyncio
|
|||
|
||||
import numpy as np
|
||||
import pytest
|
||||
from llama_stack_api import Chunk, QueryChunksResponse
|
||||
|
||||
from llama_stack.apis.vector_io import Chunk, QueryChunksResponse
|
||||
from llama_stack.providers.inline.vector_io.sqlite_vec.sqlite_vec import (
|
||||
SQLiteVecIndex,
|
||||
SQLiteVecVectorIOAdapter,
|
||||
|
|
|
|||
|
|
@ -10,17 +10,17 @@ from unittest.mock import AsyncMock, patch
|
|||
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from llama_stack.apis.common.errors import VectorStoreNotFoundError
|
||||
from llama_stack.apis.vector_io import (
|
||||
from llama_stack_api import (
|
||||
Chunk,
|
||||
OpenAICreateVectorStoreFileBatchRequestWithExtraBody,
|
||||
OpenAICreateVectorStoreRequestWithExtraBody,
|
||||
QueryChunksResponse,
|
||||
VectorStore,
|
||||
VectorStoreChunkingStrategyAuto,
|
||||
VectorStoreFileObject,
|
||||
VectorStoreNotFoundError,
|
||||
)
|
||||
from llama_stack.apis.vector_stores import VectorStore
|
||||
|
||||
from llama_stack.providers.inline.vector_io.sqlite_vec.sqlite_vec import VECTOR_DBS_PREFIX
|
||||
|
||||
# This test is a unit test for the inline VectorIO providers. This should only contain
|
||||
|
|
@ -222,7 +222,7 @@ async def test_insert_chunks_missing_db_raises(vector_io_adapter):
|
|||
|
||||
async def test_insert_chunks_with_missing_document_id(vector_io_adapter):
|
||||
"""Ensure no KeyError when document_id is missing or in different places."""
|
||||
from llama_stack.apis.vector_io import Chunk, ChunkMetadata
|
||||
from llama_stack_api import Chunk, ChunkMetadata
|
||||
|
||||
fake_index = AsyncMock()
|
||||
vector_io_adapter.cache["db1"] = fake_index
|
||||
|
|
@ -255,7 +255,7 @@ async def test_insert_chunks_with_missing_document_id(vector_io_adapter):
|
|||
|
||||
async def test_document_id_with_invalid_type_raises_error():
|
||||
"""Ensure TypeError is raised when document_id is not a string."""
|
||||
from llama_stack.apis.vector_io import Chunk
|
||||
from llama_stack_api import Chunk
|
||||
|
||||
# Integer document_id should raise TypeError
|
||||
from llama_stack.providers.utils.vector_io.vector_utils import generate_chunk_id
|
||||
|
|
|
|||
|
|
@ -4,7 +4,8 @@
|
|||
# This source code is licensed under the terms described in the LICENSE file in
|
||||
# the root directory of this source tree.
|
||||
|
||||
from llama_stack.apis.vector_io import Chunk, ChunkMetadata
|
||||
from llama_stack_api import Chunk, ChunkMetadata
|
||||
|
||||
from llama_stack.providers.utils.vector_io.vector_utils import generate_chunk_id
|
||||
|
||||
# This test is a unit test for the chunk_utils.py helpers. This should only contain
|
||||
|
|
|
|||
|
|
@ -7,13 +7,8 @@
|
|||
from unittest.mock import AsyncMock, MagicMock
|
||||
|
||||
import pytest
|
||||
from llama_stack_api import Chunk, ChunkMetadata, QueryChunksResponse, RAGQueryConfig
|
||||
|
||||
from llama_stack.apis.tools.rag_tool import RAGQueryConfig
|
||||
from llama_stack.apis.vector_io import (
|
||||
Chunk,
|
||||
ChunkMetadata,
|
||||
QueryChunksResponse,
|
||||
)
|
||||
from llama_stack.providers.inline.tool_runtime.rag.memory import MemoryToolRuntimeImpl
|
||||
|
||||
|
||||
|
|
|
|||
|
|
@ -12,13 +12,8 @@ from unittest.mock import AsyncMock, MagicMock
|
|||
|
||||
import numpy as np
|
||||
import pytest
|
||||
from llama_stack_api import Chunk, OpenAIEmbeddingData, OpenAIEmbeddingsRequestWithExtraBody, RAGDocument
|
||||
|
||||
from llama_stack.apis.inference.inference import (
|
||||
OpenAIEmbeddingData,
|
||||
OpenAIEmbeddingsRequestWithExtraBody,
|
||||
)
|
||||
from llama_stack.apis.tools import RAGDocument
|
||||
from llama_stack.apis.vector_io import Chunk
|
||||
from llama_stack.providers.utils.memory.vector_store import (
|
||||
URL,
|
||||
VectorStoreWithIndex,
|
||||
|
|
|
|||
|
|
@ -6,9 +6,8 @@
|
|||
|
||||
|
||||
import pytest
|
||||
from llama_stack_api import Model, VectorStore
|
||||
|
||||
from llama_stack.apis.inference import Model
|
||||
from llama_stack.apis.vector_stores import VectorStore
|
||||
from llama_stack.core.datatypes import VectorStoreWithOwner
|
||||
from llama_stack.core.storage.datatypes import KVStoreReference, SqliteKVStoreConfig
|
||||
from llama_stack.core.store.registry import (
|
||||
|
|
@ -304,7 +303,8 @@ async def test_double_registration_different_objects(disk_dist_registry):
|
|||
|
||||
async def test_double_registration_with_cache(cached_disk_dist_registry):
|
||||
"""Test double registration behavior with caching enabled."""
|
||||
from llama_stack.apis.models import ModelType
|
||||
from llama_stack_api import ModelType
|
||||
|
||||
from llama_stack.core.datatypes import ModelWithOwner
|
||||
|
||||
model1 = ModelWithOwner(
|
||||
|
|
|
|||
|
|
@ -5,7 +5,8 @@
|
|||
# the root directory of this source tree.
|
||||
|
||||
|
||||
from llama_stack.apis.models import ModelType
|
||||
from llama_stack_api import ModelType
|
||||
|
||||
from llama_stack.core.datatypes import ModelWithOwner, User
|
||||
from llama_stack.core.store.registry import CachedDiskDistributionRegistry
|
||||
|
||||
|
|
|
|||
|
|
@ -8,10 +8,9 @@ from unittest.mock import MagicMock, Mock, patch
|
|||
|
||||
import pytest
|
||||
import yaml
|
||||
from llama_stack_api import Api, ModelType
|
||||
from pydantic import TypeAdapter, ValidationError
|
||||
|
||||
from llama_stack.apis.datatypes import Api
|
||||
from llama_stack.apis.models import ModelType
|
||||
from llama_stack.core.access_control.access_control import AccessDeniedError, is_action_allowed
|
||||
from llama_stack.core.datatypes import AccessRule, ModelWithOwner, User
|
||||
from llama_stack.core.routing_tables.models import ModelsRoutingTable
|
||||
|
|
|
|||
|
|
@ -144,7 +144,7 @@ def middleware_with_mocks(mock_auth_endpoint):
|
|||
middleware = AuthenticationMiddleware(mock_app, auth_config, {})
|
||||
|
||||
# Mock the route_impls to simulate finding routes with required scopes
|
||||
from llama_stack.schema_utils import WebMethod
|
||||
from llama_stack_api import WebMethod
|
||||
|
||||
routes = {
|
||||
("POST", "/test/scoped"): WebMethod(route="/test/scoped", method="POST", required_scope="test.read"),
|
||||
|
|
|
|||
|
|
@ -9,9 +9,9 @@ import sys
|
|||
from typing import Any, Protocol
|
||||
from unittest.mock import AsyncMock, MagicMock
|
||||
|
||||
from llama_stack_api import Inference, InlineProviderSpec, ProviderSpec
|
||||
from pydantic import BaseModel, Field
|
||||
|
||||
from llama_stack.apis.inference import Inference
|
||||
from llama_stack.core.datatypes import Api, Provider, StackRunConfig
|
||||
from llama_stack.core.resolver import resolve_impls
|
||||
from llama_stack.core.routers.inference import InferenceRouter
|
||||
|
|
@ -25,7 +25,6 @@ from llama_stack.core.storage.datatypes import (
|
|||
SqlStoreReference,
|
||||
StorageConfig,
|
||||
)
|
||||
from llama_stack.providers.datatypes import InlineProviderSpec, ProviderSpec
|
||||
from llama_stack.providers.utils.kvstore import register_kvstore_backends
|
||||
from llama_stack.providers.utils.sqlstore.sqlstore import register_sqlstore_backends
|
||||
|
||||
|
|
|
|||
|
|
@ -9,8 +9,8 @@ import logging # allow-direct-logging
|
|||
from unittest.mock import AsyncMock, MagicMock
|
||||
|
||||
import pytest
|
||||
from llama_stack_api import PaginatedResponse
|
||||
|
||||
from llama_stack.apis.common.responses import PaginatedResponse
|
||||
from llama_stack.core.server.server import create_dynamic_typed_route, create_sse_event, sse_generator
|
||||
|
||||
|
||||
|
|
|
|||
|
|
@ -9,9 +9,9 @@ Unit tests for JSON Schema-based tool definitions.
|
|||
Tests the new input_schema and output_schema fields.
|
||||
"""
|
||||
|
||||
from llama_stack_api import ToolDef
|
||||
from pydantic import ValidationError
|
||||
|
||||
from llama_stack.apis.tools import ToolDef
|
||||
from llama_stack.models.llama.datatypes import BuiltinTool, ToolDefinition
|
||||
|
||||
|
||||
|
|
|
|||
|
|
@ -7,14 +7,14 @@
|
|||
import time
|
||||
|
||||
import pytest
|
||||
|
||||
from llama_stack.apis.inference import (
|
||||
from llama_stack_api import (
|
||||
OpenAIAssistantMessageParam,
|
||||
OpenAIChatCompletion,
|
||||
OpenAIChoice,
|
||||
OpenAIUserMessageParam,
|
||||
Order,
|
||||
)
|
||||
|
||||
from llama_stack.core.storage.datatypes import InferenceStoreReference, SqliteSqlStoreConfig
|
||||
from llama_stack.providers.utils.inference.inference_store import InferenceStore
|
||||
from llama_stack.providers.utils.sqlstore.sqlstore import register_sqlstore_backends
|
||||
|
|
|
|||
|
|
@ -9,13 +9,8 @@ from tempfile import TemporaryDirectory
|
|||
from uuid import uuid4
|
||||
|
||||
import pytest
|
||||
from llama_stack_api import OpenAIMessageParam, OpenAIResponseInput, OpenAIResponseObject, OpenAIUserMessageParam, Order
|
||||
|
||||
from llama_stack.apis.agents import Order
|
||||
from llama_stack.apis.agents.openai_responses import (
|
||||
OpenAIResponseInput,
|
||||
OpenAIResponseObject,
|
||||
)
|
||||
from llama_stack.apis.inference import OpenAIMessageParam, OpenAIUserMessageParam
|
||||
from llama_stack.core.storage.datatypes import ResponsesStoreReference, SqliteSqlStoreConfig
|
||||
from llama_stack.providers.utils.responses.responses_store import ResponsesStore
|
||||
from llama_stack.providers.utils.sqlstore.sqlstore import register_sqlstore_backends
|
||||
|
|
@ -46,7 +41,7 @@ def create_test_response_object(
|
|||
|
||||
def create_test_response_input(content: str, input_id: str) -> OpenAIResponseInput:
|
||||
"""Helper to create a test response input."""
|
||||
from llama_stack.apis.agents.openai_responses import OpenAIResponseMessage
|
||||
from llama_stack_api import OpenAIResponseMessage
|
||||
|
||||
return OpenAIResponseMessage(
|
||||
id=input_id,
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue