From 2e4eedce14132e5ed125766404fa05f973ab84a5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?S=C3=A9bastien=20Han?= Date: Fri, 11 Jul 2025 16:25:33 +0200 Subject: [PATCH 1/8] fix: container build on podman (#2723) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit # What does this PR do? COPY with chmod does not work, see https://github.com/containers/buildah/issues/4614. Also Docker arguably implements it. Anyway, this command is not even needed since later don't we do: ``` RUN mkdir -p /.llama /.cache && chmod -R g+rw /app /.llama /.cache ``` And providers.d will get the right modes. ## Test Plan Build with CONTAINER_BINARY=podman and success Signed-off-by: Sébastien Han --- llama_stack/distribution/build_container.sh | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/llama_stack/distribution/build_container.sh b/llama_stack/distribution/build_container.sh index d9a918fb5..6e794b36f 100755 --- a/llama_stack/distribution/build_container.sh +++ b/llama_stack/distribution/build_container.sh @@ -96,7 +96,7 @@ FROM $container_base WORKDIR /app # We install the Python 3.12 dev headers and build tools so that any -# C‑extension wheels (e.g. polyleven, faiss‑cpu) can compile successfully. +# C-extension wheels (e.g. polyleven, faiss-cpu) can compile successfully. RUN dnf -y update && dnf install -y iputils git net-tools wget \ vim-minimal python3.12 python3.12-pip python3.12-wheel \ @@ -169,7 +169,7 @@ if [ -n "$run_config" ]; then echo "Copying external providers directory: $external_providers_dir" cp -r "$external_providers_dir" "$BUILD_CONTEXT_DIR/providers.d" add_to_container << EOF -COPY --chmod=g+w providers.d /.llama/providers.d +COPY providers.d /.llama/providers.d EOF fi From 2ebc172f339916d450139325c30808f675f022fb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?S=C3=A9bastien=20Han?= Date: Fri, 11 Jul 2025 16:25:51 +0200 Subject: [PATCH 2/8] fix: pin opentelemtry version (#2722) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit # What does this PR do? Otherwise we can get old versions like 1.11 and experience this error: ``` ModuleNotFoundError: No module named 'opentelemetry.exporter.otlp.proto.http.metric_exporter' ``` Signed-off-by: Sébastien Han --- pyproject.toml | 4 ++-- uv.lock | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index d84a823a3..89ae4bc23 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -42,8 +42,8 @@ dependencies = [ "h11>=0.16.0", "python-multipart>=0.0.20", # For fastapi Form "uvicorn>=0.34.0", # server - "opentelemetry-sdk", # server - "opentelemetry-exporter-otlp-proto-http", # server + "opentelemetry-sdk>=1.30.0", # server + "opentelemetry-exporter-otlp-proto-http>=1.30.0", # server "aiosqlite>=0.21.0", # server - for metadata store "asyncpg", # for metadata store ] diff --git a/uv.lock b/uv.lock index e77fb89f5..8374fe38a 100644 --- a/uv.lock +++ b/uv.lock @@ -1365,8 +1365,8 @@ requires-dist = [ { name = "llama-stack-client", specifier = ">=0.2.14" }, { name = "llama-stack-client", marker = "extra == 'ui'", specifier = ">=0.2.14" }, { name = "openai", specifier = ">=1.66" }, - { name = "opentelemetry-exporter-otlp-proto-http" }, - { name = "opentelemetry-sdk" }, + { name = "opentelemetry-exporter-otlp-proto-http", specifier = ">=1.30.0" }, + { name = "opentelemetry-sdk", specifier = ">=1.30.0" }, { name = "pandas", marker = "extra == 'ui'" }, { name = "pillow" }, { name = "prompt-toolkit" }, From 30b2e6a495361faa6e52e6255780a69e92254f58 Mon Sep 17 00:00:00 2001 From: Matthew Farrellee Date: Fri, 11 Jul 2025 16:00:24 -0400 Subject: [PATCH 3/8] chore: default to pytest asyncio-mode=auto (#2730) # What does this PR do? previously, developers who ran `./scripts/unit-tests.sh` would get `asyncio-mode=auto`, which meant `@pytest.mark.asyncio` and `@pytest_asyncio.fixture` were redundent. developers who ran `pytest` directly would get pytest's default (strict mode), would run into errors leading them to add `@pytest.mark.asyncio` / `@pytest_asyncio.fixture` to their code. with this change - - `asyncio_mode=auto` is included in `pyproject.toml` making behavior consistent for all invocations of pytest - removes all redundant `@pytest_asyncio.fixture` and `@pytest.mark.asyncio` - for good measure, requires `pytest>=8.4` and `pytest-asyncio>=1.0` ## Test Plan - `./scripts/unit-tests.sh` - `uv run pytest tests/unit` --- pyproject.toml | 7 ++++-- scripts/unit-tests.sh | 2 +- tests/integration/agents/test_persistence.py | 2 -- tests/integration/inspect/test_inspect.py | 3 --- tests/integration/providers/test_providers.py | 2 -- .../sqlstore/test_authorized_sqlstore.py | 2 -- .../routers/test_routing_tables.py | 8 ------- tests/unit/distribution/test_context.py | 3 --- tests/unit/files/test_files.py | 22 +------------------ tests/unit/fixtures.py | 8 +++---- .../agent/test_meta_reference_agent.py | 9 +------- .../meta_reference/test_openai_responses.py | 15 ------------- .../agents/test_persistence_access_control.py | 7 +----- .../providers/inference/test_remote_vllm.py | 16 +------------- .../utils/inference/test_openai_compat.py | 6 ----- .../utils/memory/test_vector_store.py | 7 ------ .../providers/utils/test_model_registry.py | 10 --------- tests/unit/providers/utils/test_scheduler.py | 3 --- tests/unit/providers/vector_io/test_faiss.py | 8 ++----- tests/unit/providers/vector_io/test_qdrant.py | 5 +---- .../providers/vector_io/test_sqlite_vec.py | 18 +-------------- .../test_vector_io_openai_vector_stores.py | 19 ---------------- tests/unit/rag/test_rag_query.py | 2 -- tests/unit/rag/test_vector_store.py | 7 ------ tests/unit/registry/test_registry.py | 8 ------- tests/unit/registry/test_registry_acl.py | 5 ----- tests/unit/server/test_access_control.py | 11 ++-------- tests/unit/server/test_auth.py | 1 - tests/unit/server/test_resolver.py | 2 -- tests/unit/server/test_sse.py | 7 ------ .../utils/inference/test_inference_store.py | 5 ----- .../utils/responses/test_responses_store.py | 8 ------- tests/unit/utils/sqlstore/test_sqlstore.py | 8 ------- tests/unit/utils/test_authorized_sqlstore.py | 5 ----- uv.lock | 17 +++++++------- 35 files changed, 29 insertions(+), 239 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 89ae4bc23..f4115d028 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -58,9 +58,9 @@ ui = [ [dependency-groups] dev = [ - "pytest", + "pytest>=8.4", "pytest-timeout", - "pytest-asyncio", + "pytest-asyncio>=1.0", "pytest-cov", "pytest-html", "pytest-json-report", @@ -339,3 +339,6 @@ warn_required_dynamic_aliases = true [tool.ruff.lint.pep8-naming] classmethod-decorators = ["classmethod", "pydantic.field_validator"] + +[tool.pytest.ini_options] +asyncio_mode = "auto" diff --git a/scripts/unit-tests.sh b/scripts/unit-tests.sh index 1fc3ff7fc..68d6458fc 100755 --- a/scripts/unit-tests.sh +++ b/scripts/unit-tests.sh @@ -16,4 +16,4 @@ if [ $FOUND_PYTHON -ne 0 ]; then uv python install "$PYTHON_VERSION" fi -uv run --python "$PYTHON_VERSION" --with-editable . --group unit pytest --asyncio-mode=auto -s -v tests/unit/ $@ +uv run --python "$PYTHON_VERSION" --with-editable . --group unit pytest -s -v tests/unit/ $@ diff --git a/tests/integration/agents/test_persistence.py b/tests/integration/agents/test_persistence.py index ef35c97a5..49d9d42d0 100644 --- a/tests/integration/agents/test_persistence.py +++ b/tests/integration/agents/test_persistence.py @@ -44,7 +44,6 @@ def common_params(inference_model): ) -@pytest.mark.asyncio @pytest.mark.skip(reason="This test needs to be migrated to api / client-sdk world") async def test_delete_agents_and_sessions(self, agents_stack, common_params): agents_impl = agents_stack.impls[Api.agents] @@ -73,7 +72,6 @@ async def test_delete_agents_and_sessions(self, agents_stack, common_params): assert agent_response is None -@pytest.mark.asyncio @pytest.mark.skip(reason="This test needs to be migrated to api / client-sdk world") async def test_get_agent_turns_and_steps(self, agents_stack, sample_messages, common_params): agents_impl = agents_stack.impls[Api.agents] diff --git a/tests/integration/inspect/test_inspect.py b/tests/integration/inspect/test_inspect.py index da704178d..1597a319b 100644 --- a/tests/integration/inspect/test_inspect.py +++ b/tests/integration/inspect/test_inspect.py @@ -4,20 +4,17 @@ # This source code is licensed under the terms described in the LICENSE file in # the root directory of this source tree. -import pytest from llama_stack_client import LlamaStackClient from llama_stack import LlamaStackAsLibraryClient class TestInspect: - @pytest.mark.asyncio def test_health(self, llama_stack_client: LlamaStackAsLibraryClient | LlamaStackClient): health = llama_stack_client.inspect.health() assert health is not None assert health.status == "OK" - @pytest.mark.asyncio def test_version(self, llama_stack_client: LlamaStackAsLibraryClient | LlamaStackClient): version = llama_stack_client.inspect.version() assert version is not None diff --git a/tests/integration/providers/test_providers.py b/tests/integration/providers/test_providers.py index 8b153411c..fc65e2a10 100644 --- a/tests/integration/providers/test_providers.py +++ b/tests/integration/providers/test_providers.py @@ -4,14 +4,12 @@ # This source code is licensed under the terms described in the LICENSE file in # the root directory of this source tree. -import pytest from llama_stack_client import LlamaStackClient from llama_stack import LlamaStackAsLibraryClient class TestProviders: - @pytest.mark.asyncio def test_providers(self, llama_stack_client: LlamaStackAsLibraryClient | LlamaStackClient): provider_list = llama_stack_client.providers.list() assert provider_list is not None diff --git a/tests/integration/providers/utils/sqlstore/test_authorized_sqlstore.py b/tests/integration/providers/utils/sqlstore/test_authorized_sqlstore.py index bf6077532..c32d6cd17 100644 --- a/tests/integration/providers/utils/sqlstore/test_authorized_sqlstore.py +++ b/tests/integration/providers/utils/sqlstore/test_authorized_sqlstore.py @@ -88,7 +88,6 @@ async def cleanup_records(sql_store, table_name, record_ids): pass -@pytest.mark.asyncio @pytest.mark.parametrize("backend_config", BACKEND_CONFIGS) @patch("llama_stack.providers.utils.sqlstore.authorized_sqlstore.get_authenticated_user") async def test_authorized_store_attributes(mock_get_authenticated_user, authorized_store, request): @@ -183,7 +182,6 @@ async def test_authorized_store_attributes(mock_get_authenticated_user, authoriz await cleanup_records(authorized_store.sql_store, table_name, ["1", "2", "3", "4", "5", "6"]) -@pytest.mark.asyncio @pytest.mark.parametrize("backend_config", BACKEND_CONFIGS) @patch("llama_stack.providers.utils.sqlstore.authorized_sqlstore.get_authenticated_user") async def test_user_ownership_policy(mock_get_authenticated_user, authorized_store, request): diff --git a/tests/unit/distribution/routers/test_routing_tables.py b/tests/unit/distribution/routers/test_routing_tables.py index 0eeb68167..3ba042bd9 100644 --- a/tests/unit/distribution/routers/test_routing_tables.py +++ b/tests/unit/distribution/routers/test_routing_tables.py @@ -8,8 +8,6 @@ from unittest.mock import AsyncMock -import pytest - from llama_stack.apis.common.type_system import NumberType from llama_stack.apis.datasets.datasets import Dataset, DatasetPurpose, URIDataSource from llama_stack.apis.datatypes import Api @@ -119,7 +117,6 @@ class ToolGroupsImpl(Impl): ) -@pytest.mark.asyncio async def test_models_routing_table(cached_disk_dist_registry): table = ModelsRoutingTable({"test_provider": InferenceImpl()}, cached_disk_dist_registry, {}) await table.initialize() @@ -161,7 +158,6 @@ async def test_models_routing_table(cached_disk_dist_registry): assert len(openai_models.data) == 0 -@pytest.mark.asyncio async def test_shields_routing_table(cached_disk_dist_registry): table = ShieldsRoutingTable({"test_provider": SafetyImpl()}, cached_disk_dist_registry, {}) await table.initialize() @@ -177,7 +173,6 @@ async def test_shields_routing_table(cached_disk_dist_registry): assert "test-shield-2" in shield_ids -@pytest.mark.asyncio async def test_vectordbs_routing_table(cached_disk_dist_registry): table = VectorDBsRoutingTable({"test_provider": VectorDBImpl()}, cached_disk_dist_registry, {}) await table.initialize() @@ -233,7 +228,6 @@ async def test_datasets_routing_table(cached_disk_dist_registry): assert len(datasets.data) == 0 -@pytest.mark.asyncio async def test_scoring_functions_routing_table(cached_disk_dist_registry): table = ScoringFunctionsRoutingTable({"test_provider": ScoringFunctionsImpl()}, cached_disk_dist_registry, {}) await table.initialize() @@ -259,7 +253,6 @@ async def test_scoring_functions_routing_table(cached_disk_dist_registry): assert "test-scoring-fn-2" in scoring_fn_ids -@pytest.mark.asyncio async def test_benchmarks_routing_table(cached_disk_dist_registry): table = BenchmarksRoutingTable({"test_provider": BenchmarksImpl()}, cached_disk_dist_registry, {}) await table.initialize() @@ -277,7 +270,6 @@ async def test_benchmarks_routing_table(cached_disk_dist_registry): assert "test-benchmark" in benchmark_ids -@pytest.mark.asyncio async def test_tool_groups_routing_table(cached_disk_dist_registry): table = ToolGroupsRoutingTable({"test_provider": ToolGroupsImpl()}, cached_disk_dist_registry, {}) await table.initialize() diff --git a/tests/unit/distribution/test_context.py b/tests/unit/distribution/test_context.py index 84944bfe8..7914be51d 100644 --- a/tests/unit/distribution/test_context.py +++ b/tests/unit/distribution/test_context.py @@ -13,7 +13,6 @@ import pytest from llama_stack.distribution.utils.context import preserve_contexts_async_generator -@pytest.mark.asyncio async def test_preserve_contexts_with_exception(): # Create context variable context_var = ContextVar("exception_var", default="initial") @@ -41,7 +40,6 @@ async def test_preserve_contexts_with_exception(): context_var.reset(token) -@pytest.mark.asyncio async def test_preserve_contexts_empty_generator(): # Create context variable context_var = ContextVar("empty_var", default="initial") @@ -66,7 +64,6 @@ async def test_preserve_contexts_empty_generator(): context_var.reset(token) -@pytest.mark.asyncio async def test_preserve_contexts_across_event_loops(): """ Test that context variables are preserved across event loop boundaries with nested generators. diff --git a/tests/unit/files/test_files.py b/tests/unit/files/test_files.py index ef1dc9743..785077e91 100644 --- a/tests/unit/files/test_files.py +++ b/tests/unit/files/test_files.py @@ -6,7 +6,6 @@ import pytest -import pytest_asyncio from llama_stack.apis.common.responses import Order from llama_stack.apis.files import OpenAIFilePurpose @@ -29,7 +28,7 @@ class MockUploadFile: return self.content -@pytest_asyncio.fixture +@pytest.fixture async def files_provider(tmp_path): """Create a files provider with temporary storage for testing.""" storage_dir = tmp_path / "files" @@ -68,7 +67,6 @@ def large_file(): class TestOpenAIFilesAPI: """Test suite for OpenAI Files API endpoints.""" - @pytest.mark.asyncio async def test_upload_file_success(self, files_provider, sample_text_file): """Test successful file upload.""" # Upload file @@ -82,7 +80,6 @@ class TestOpenAIFilesAPI: assert result.created_at > 0 assert result.expires_at > result.created_at - @pytest.mark.asyncio async def test_upload_different_purposes(self, files_provider, sample_text_file): """Test uploading files with different purposes.""" purposes = list(OpenAIFilePurpose) @@ -93,7 +90,6 @@ class TestOpenAIFilesAPI: uploaded_files.append(result) assert result.purpose == purpose - @pytest.mark.asyncio async def test_upload_different_file_types(self, files_provider, sample_text_file, sample_json_file, large_file): """Test uploading different types and sizes of files.""" files_to_test = [ @@ -107,7 +103,6 @@ class TestOpenAIFilesAPI: assert result.filename == expected_filename assert result.bytes == len(file_obj.content) - @pytest.mark.asyncio async def test_list_files_empty(self, files_provider): """Test listing files when no files exist.""" result = await files_provider.openai_list_files() @@ -117,7 +112,6 @@ class TestOpenAIFilesAPI: assert result.first_id == "" assert result.last_id == "" - @pytest.mark.asyncio async def test_list_files_with_content(self, files_provider, sample_text_file, sample_json_file): """Test listing files when files exist.""" # Upload multiple files @@ -132,7 +126,6 @@ class TestOpenAIFilesAPI: assert file1.id in file_ids assert file2.id in file_ids - @pytest.mark.asyncio async def test_list_files_with_purpose_filter(self, files_provider, sample_text_file): """Test listing files with purpose filtering.""" # Upload file with specific purpose @@ -146,7 +139,6 @@ class TestOpenAIFilesAPI: assert result.data[0].id == uploaded_file.id assert result.data[0].purpose == OpenAIFilePurpose.ASSISTANTS - @pytest.mark.asyncio async def test_list_files_with_limit(self, files_provider, sample_text_file): """Test listing files with limit parameter.""" # Upload multiple files @@ -157,7 +149,6 @@ class TestOpenAIFilesAPI: result = await files_provider.openai_list_files(limit=3) assert len(result.data) == 3 - @pytest.mark.asyncio async def test_list_files_with_order(self, files_provider, sample_text_file): """Test listing files with different order.""" # Upload multiple files @@ -178,7 +169,6 @@ class TestOpenAIFilesAPI: # Oldest should be first assert result_asc.data[0].created_at <= result_asc.data[1].created_at <= result_asc.data[2].created_at - @pytest.mark.asyncio async def test_retrieve_file_success(self, files_provider, sample_text_file): """Test successful file retrieval.""" # Upload file @@ -197,13 +187,11 @@ class TestOpenAIFilesAPI: assert retrieved_file.created_at == uploaded_file.created_at assert retrieved_file.expires_at == uploaded_file.expires_at - @pytest.mark.asyncio async def test_retrieve_file_not_found(self, files_provider): """Test retrieving a non-existent file.""" with pytest.raises(ValueError, match="File with id file-nonexistent not found"): await files_provider.openai_retrieve_file("file-nonexistent") - @pytest.mark.asyncio async def test_retrieve_file_content_success(self, files_provider, sample_text_file): """Test successful file content retrieval.""" # Upload file @@ -217,13 +205,11 @@ class TestOpenAIFilesAPI: # Verify content assert content.body == sample_text_file.content - @pytest.mark.asyncio async def test_retrieve_file_content_not_found(self, files_provider): """Test retrieving content of a non-existent file.""" with pytest.raises(ValueError, match="File with id file-nonexistent not found"): await files_provider.openai_retrieve_file_content("file-nonexistent") - @pytest.mark.asyncio async def test_delete_file_success(self, files_provider, sample_text_file): """Test successful file deletion.""" # Upload file @@ -245,13 +231,11 @@ class TestOpenAIFilesAPI: with pytest.raises(ValueError, match=f"File with id {uploaded_file.id} not found"): await files_provider.openai_retrieve_file(uploaded_file.id) - @pytest.mark.asyncio async def test_delete_file_not_found(self, files_provider): """Test deleting a non-existent file.""" with pytest.raises(ValueError, match="File with id file-nonexistent not found"): await files_provider.openai_delete_file("file-nonexistent") - @pytest.mark.asyncio async def test_file_persistence_across_operations(self, files_provider, sample_text_file): """Test that files persist correctly across multiple operations.""" # Upload file @@ -279,7 +263,6 @@ class TestOpenAIFilesAPI: files_list = await files_provider.openai_list_files() assert len(files_list.data) == 0 - @pytest.mark.asyncio async def test_multiple_files_operations(self, files_provider, sample_text_file, sample_json_file): """Test operations with multiple files.""" # Upload multiple files @@ -302,7 +285,6 @@ class TestOpenAIFilesAPI: content = await files_provider.openai_retrieve_file_content(file2.id) assert content.body == sample_json_file.content - @pytest.mark.asyncio async def test_file_id_uniqueness(self, files_provider, sample_text_file): """Test that each uploaded file gets a unique ID.""" file_ids = set() @@ -316,7 +298,6 @@ class TestOpenAIFilesAPI: file_ids.add(uploaded_file.id) assert uploaded_file.id.startswith("file-") - @pytest.mark.asyncio async def test_file_no_filename_handling(self, files_provider): """Test handling files with no filename.""" file_without_name = MockUploadFile(b"content", None) # No filename @@ -327,7 +308,6 @@ class TestOpenAIFilesAPI: assert uploaded_file.filename == "uploaded_file" # Default filename - @pytest.mark.asyncio async def test_after_pagination_works(self, files_provider, sample_text_file): """Test that 'after' pagination works correctly.""" # Upload multiple files to test pagination diff --git a/tests/unit/fixtures.py b/tests/unit/fixtures.py index 4e50c5e08..7174d2e78 100644 --- a/tests/unit/fixtures.py +++ b/tests/unit/fixtures.py @@ -4,14 +4,14 @@ # This source code is licensed under the terms described in the LICENSE file in # the root directory of this source tree. -import pytest_asyncio +import pytest from llama_stack.distribution.store.registry import CachedDiskDistributionRegistry, DiskDistributionRegistry from llama_stack.providers.utils.kvstore.config import SqliteKVStoreConfig from llama_stack.providers.utils.kvstore.sqlite import SqliteKVStoreImpl -@pytest_asyncio.fixture(scope="function") +@pytest.fixture(scope="function") async def sqlite_kvstore(tmp_path): db_path = tmp_path / "test_kv.db" kvstore_config = SqliteKVStoreConfig(db_path=db_path.as_posix()) @@ -20,14 +20,14 @@ async def sqlite_kvstore(tmp_path): yield kvstore -@pytest_asyncio.fixture(scope="function") +@pytest.fixture(scope="function") async def disk_dist_registry(sqlite_kvstore): registry = DiskDistributionRegistry(sqlite_kvstore) await registry.initialize() yield registry -@pytest_asyncio.fixture(scope="function") +@pytest.fixture(scope="function") async def cached_disk_dist_registry(sqlite_kvstore): registry = CachedDiskDistributionRegistry(sqlite_kvstore) await registry.initialize() diff --git a/tests/unit/providers/agent/test_meta_reference_agent.py b/tests/unit/providers/agent/test_meta_reference_agent.py index 7a7d52892..c06d9ab0e 100644 --- a/tests/unit/providers/agent/test_meta_reference_agent.py +++ b/tests/unit/providers/agent/test_meta_reference_agent.py @@ -8,7 +8,6 @@ from datetime import datetime from unittest.mock import AsyncMock import pytest -import pytest_asyncio from llama_stack.apis.agents import ( Agent, @@ -50,7 +49,7 @@ def config(tmp_path): ) -@pytest_asyncio.fixture +@pytest.fixture async def agents_impl(config, mock_apis): impl = MetaReferenceAgentsImpl( config, @@ -117,7 +116,6 @@ def sample_agent_config(): ) -@pytest.mark.asyncio async def test_create_agent(agents_impl, sample_agent_config): response = await agents_impl.create_agent(sample_agent_config) @@ -132,7 +130,6 @@ async def test_create_agent(agents_impl, sample_agent_config): assert isinstance(agent_info.created_at, datetime) -@pytest.mark.asyncio async def test_get_agent(agents_impl, sample_agent_config): create_response = await agents_impl.create_agent(sample_agent_config) agent_id = create_response.agent_id @@ -146,7 +143,6 @@ async def test_get_agent(agents_impl, sample_agent_config): assert isinstance(agent.created_at, datetime) -@pytest.mark.asyncio async def test_list_agents(agents_impl, sample_agent_config): agent1_response = await agents_impl.create_agent(sample_agent_config) agent2_response = await agents_impl.create_agent(sample_agent_config) @@ -160,7 +156,6 @@ async def test_list_agents(agents_impl, sample_agent_config): assert agent2_response.agent_id in agent_ids -@pytest.mark.asyncio @pytest.mark.parametrize("enable_session_persistence", [True, False]) async def test_create_agent_session_persistence(agents_impl, sample_agent_config, enable_session_persistence): # Create an agent with specified persistence setting @@ -188,7 +183,6 @@ async def test_create_agent_session_persistence(agents_impl, sample_agent_config await agents_impl.get_agents_session(agent_id, session_response.session_id) -@pytest.mark.asyncio @pytest.mark.parametrize("enable_session_persistence", [True, False]) async def test_list_agent_sessions_persistence(agents_impl, sample_agent_config, enable_session_persistence): # Create an agent with specified persistence setting @@ -221,7 +215,6 @@ async def test_list_agent_sessions_persistence(agents_impl, sample_agent_config, assert session2.session_id in {s["session_id"] for s in sessions.data} -@pytest.mark.asyncio async def test_delete_agent(agents_impl, sample_agent_config): # Create an agent response = await agents_impl.create_agent(sample_agent_config) diff --git a/tests/unit/providers/agents/meta_reference/test_openai_responses.py b/tests/unit/providers/agents/meta_reference/test_openai_responses.py index 0d1ef8eca..6485e3512 100644 --- a/tests/unit/providers/agents/meta_reference/test_openai_responses.py +++ b/tests/unit/providers/agents/meta_reference/test_openai_responses.py @@ -122,7 +122,6 @@ async def fake_stream(fixture: str = "simple_chat_completion.yaml"): ) -@pytest.mark.asyncio async def test_create_openai_response_with_string_input(openai_responses_impl, mock_inference_api): """Test creating an OpenAI response with a simple string input.""" # Setup @@ -155,7 +154,6 @@ async def test_create_openai_response_with_string_input(openai_responses_impl, m assert result.output[0].content[0].text == "Dublin" -@pytest.mark.asyncio async def test_create_openai_response_with_string_input_with_tools(openai_responses_impl, mock_inference_api): """Test creating an OpenAI response with a simple string input and tools.""" # Setup @@ -224,7 +222,6 @@ async def test_create_openai_response_with_string_input_with_tools(openai_respon assert result.output[1].content[0].annotations == [] -@pytest.mark.asyncio async def test_create_openai_response_with_tool_call_type_none(openai_responses_impl, mock_inference_api): """Test creating an OpenAI response with a tool call response that has a type of None.""" # Setup @@ -294,7 +291,6 @@ async def test_create_openai_response_with_tool_call_type_none(openai_responses_ assert chunks[1].response.output[0].name == "get_weather" -@pytest.mark.asyncio async def test_create_openai_response_with_multiple_messages(openai_responses_impl, mock_inference_api): """Test creating an OpenAI response with multiple messages.""" # Setup @@ -340,7 +336,6 @@ async def test_create_openai_response_with_multiple_messages(openai_responses_im assert isinstance(inference_messages[i], OpenAIDeveloperMessageParam) -@pytest.mark.asyncio async def test_prepend_previous_response_none(openai_responses_impl): """Test prepending no previous response to a new response.""" @@ -348,7 +343,6 @@ async def test_prepend_previous_response_none(openai_responses_impl): assert input == "fake_input" -@pytest.mark.asyncio async def test_prepend_previous_response_basic(openai_responses_impl, mock_responses_store): """Test prepending a basic previous response to a new response.""" @@ -388,7 +382,6 @@ async def test_prepend_previous_response_basic(openai_responses_impl, mock_respo assert input[2].content == "fake_input" -@pytest.mark.asyncio async def test_prepend_previous_response_web_search(openai_responses_impl, mock_responses_store): """Test prepending a web search previous response to a new response.""" input_item_message = OpenAIResponseMessage( @@ -434,7 +427,6 @@ async def test_prepend_previous_response_web_search(openai_responses_impl, mock_ assert input[3].content == "fake_input" -@pytest.mark.asyncio async def test_create_openai_response_with_instructions(openai_responses_impl, mock_inference_api): # Setup input_text = "What is the capital of Ireland?" @@ -463,7 +455,6 @@ async def test_create_openai_response_with_instructions(openai_responses_impl, m assert sent_messages[1].content == input_text -@pytest.mark.asyncio async def test_create_openai_response_with_instructions_and_multiple_messages( openai_responses_impl, mock_inference_api ): @@ -508,7 +499,6 @@ async def test_create_openai_response_with_instructions_and_multiple_messages( assert sent_messages[3].content == "Which is the largest?" -@pytest.mark.asyncio async def test_create_openai_response_with_instructions_and_previous_response( openai_responses_impl, mock_responses_store, mock_inference_api ): @@ -565,7 +555,6 @@ async def test_create_openai_response_with_instructions_and_previous_response( assert sent_messages[3].content == "Which is the largest?" -@pytest.mark.asyncio async def test_list_openai_response_input_items_delegation(openai_responses_impl, mock_responses_store): """Test that list_openai_response_input_items properly delegates to responses_store with correct parameters.""" # Setup @@ -601,7 +590,6 @@ async def test_list_openai_response_input_items_delegation(openai_responses_impl assert result.data[0].id == "msg_123" -@pytest.mark.asyncio async def test_responses_store_list_input_items_logic(): """Test ResponsesStore list_response_input_items logic - mocks get_response_object to test actual ordering/limiting.""" @@ -680,7 +668,6 @@ async def test_responses_store_list_input_items_logic(): assert len(result.data) == 0 # Should return no items -@pytest.mark.asyncio async def test_store_response_uses_rehydrated_input_with_previous_response( openai_responses_impl, mock_responses_store, mock_inference_api ): @@ -747,7 +734,6 @@ async def test_store_response_uses_rehydrated_input_with_previous_response( assert result.status == "completed" -@pytest.mark.asyncio @pytest.mark.parametrize( "text_format, response_format", [ @@ -787,7 +773,6 @@ async def test_create_openai_response_with_text_format( assert first_call.kwargs["response_format"] == response_format -@pytest.mark.asyncio async def test_create_openai_response_with_invalid_text_format(openai_responses_impl, mock_inference_api): """Test creating an OpenAI response with an invalid text format.""" # Setup diff --git a/tests/unit/providers/agents/test_persistence_access_control.py b/tests/unit/providers/agents/test_persistence_access_control.py index 656d1e53c..26001fcf1 100644 --- a/tests/unit/providers/agents/test_persistence_access_control.py +++ b/tests/unit/providers/agents/test_persistence_access_control.py @@ -9,7 +9,6 @@ from datetime import datetime from unittest.mock import patch import pytest -import pytest_asyncio from llama_stack.apis.agents import Turn from llama_stack.apis.inference import CompletionMessage, StopReason @@ -17,13 +16,12 @@ from llama_stack.distribution.datatypes import User from llama_stack.providers.inline.agents.meta_reference.persistence import AgentPersistence, AgentSessionInfo -@pytest_asyncio.fixture +@pytest.fixture async def test_setup(sqlite_kvstore): agent_persistence = AgentPersistence(agent_id="test_agent", kvstore=sqlite_kvstore, policy={}) yield agent_persistence -@pytest.mark.asyncio @patch("llama_stack.providers.inline.agents.meta_reference.persistence.get_authenticated_user") async def test_session_creation_with_access_attributes(mock_get_authenticated_user, test_setup): agent_persistence = test_setup @@ -44,7 +42,6 @@ async def test_session_creation_with_access_attributes(mock_get_authenticated_us assert session_info.owner.attributes["teams"] == ["ai-team"] -@pytest.mark.asyncio @patch("llama_stack.providers.inline.agents.meta_reference.persistence.get_authenticated_user") async def test_session_access_control(mock_get_authenticated_user, test_setup): agent_persistence = test_setup @@ -79,7 +76,6 @@ async def test_session_access_control(mock_get_authenticated_user, test_setup): assert retrieved_session is None -@pytest.mark.asyncio @patch("llama_stack.providers.inline.agents.meta_reference.persistence.get_authenticated_user") async def test_turn_access_control(mock_get_authenticated_user, test_setup): agent_persistence = test_setup @@ -133,7 +129,6 @@ async def test_turn_access_control(mock_get_authenticated_user, test_setup): await agent_persistence.get_session_turns(session_id) -@pytest.mark.asyncio @patch("llama_stack.providers.inline.agents.meta_reference.persistence.get_authenticated_user") async def test_tool_call_and_infer_iters_access_control(mock_get_authenticated_user, test_setup): agent_persistence = test_setup diff --git a/tests/unit/providers/inference/test_remote_vllm.py b/tests/unit/providers/inference/test_remote_vllm.py index eaa9b40da..ca44cc95d 100644 --- a/tests/unit/providers/inference/test_remote_vllm.py +++ b/tests/unit/providers/inference/test_remote_vllm.py @@ -14,7 +14,6 @@ from typing import Any from unittest.mock import AsyncMock, MagicMock, patch import pytest -import pytest_asyncio from openai.types.chat.chat_completion_chunk import ( ChatCompletionChunk as OpenAIChatCompletionChunk, ) @@ -103,7 +102,7 @@ def mock_openai_models_list(): yield mock_list -@pytest_asyncio.fixture(scope="module") +@pytest.fixture(scope="module") async def vllm_inference_adapter(): config = VLLMInferenceAdapterConfig(url="http://mocked.localhost:12345") inference_adapter = VLLMInferenceAdapter(config) @@ -112,7 +111,6 @@ async def vllm_inference_adapter(): return inference_adapter -@pytest.mark.asyncio async def test_register_model_checks_vllm(mock_openai_models_list, vllm_inference_adapter): async def mock_openai_models(): yield OpenAIModel(id="foo", created=1, object="model", owned_by="test") @@ -125,7 +123,6 @@ async def test_register_model_checks_vllm(mock_openai_models_list, vllm_inferenc mock_openai_models_list.assert_called() -@pytest.mark.asyncio async def test_old_vllm_tool_choice(vllm_inference_adapter): """ Test that we set tool_choice to none when no tools are in use @@ -149,7 +146,6 @@ async def test_old_vllm_tool_choice(vllm_inference_adapter): assert request.tool_config.tool_choice == ToolChoice.none -@pytest.mark.asyncio async def test_tool_call_response(vllm_inference_adapter): """Verify that tool call arguments from a CompletionMessage are correctly converted into the expected JSON format.""" @@ -192,7 +188,6 @@ async def test_tool_call_response(vllm_inference_adapter): ] -@pytest.mark.asyncio async def test_tool_call_delta_empty_tool_call_buf(): """ Test that we don't generate extra chunks when processing a @@ -222,7 +217,6 @@ async def test_tool_call_delta_empty_tool_call_buf(): assert chunks[1].event.stop_reason == StopReason.end_of_turn -@pytest.mark.asyncio async def test_tool_call_delta_streaming_arguments_dict(): async def mock_stream(): mock_chunk_1 = OpenAIChatCompletionChunk( @@ -297,7 +291,6 @@ async def test_tool_call_delta_streaming_arguments_dict(): assert chunks[2].event.event_type.value == "complete" -@pytest.mark.asyncio async def test_multiple_tool_calls(): async def mock_stream(): mock_chunk_1 = OpenAIChatCompletionChunk( @@ -376,7 +369,6 @@ async def test_multiple_tool_calls(): assert chunks[3].event.event_type.value == "complete" -@pytest.mark.asyncio async def test_process_vllm_chat_completion_stream_response_no_choices(): """ Test that we don't error out when vLLM returns no choices for a @@ -453,7 +445,6 @@ def test_chat_completion_doesnt_block_event_loop(caplog): assert not asyncio_warnings -@pytest.mark.asyncio async def test_get_params_empty_tools(vllm_inference_adapter): request = ChatCompletionRequest( tools=[], @@ -464,7 +455,6 @@ async def test_get_params_empty_tools(vllm_inference_adapter): assert "tools" not in params -@pytest.mark.asyncio async def test_process_vllm_chat_completion_stream_response_tool_call_args_last_chunk(): """ Tests the edge case where the model returns the arguments for the tool call in the same chunk that @@ -543,7 +533,6 @@ async def test_process_vllm_chat_completion_stream_response_tool_call_args_last_ assert chunks[-2].event.delta.tool_call.arguments == mock_tool_arguments -@pytest.mark.asyncio async def test_process_vllm_chat_completion_stream_response_no_finish_reason(): """ Tests the edge case where the model requests a tool call and stays idle without explicitly providing the @@ -596,7 +585,6 @@ async def test_process_vllm_chat_completion_stream_response_no_finish_reason(): assert chunks[-2].event.delta.tool_call.arguments == mock_tool_arguments -@pytest.mark.asyncio async def test_process_vllm_chat_completion_stream_response_tool_without_args(): """ Tests the edge case where no arguments are provided for the tool call. @@ -645,7 +633,6 @@ async def test_process_vllm_chat_completion_stream_response_tool_without_args(): assert chunks[-2].event.delta.tool_call.arguments == {} -@pytest.mark.asyncio async def test_health_status_success(vllm_inference_adapter): """ Test the health method of VLLM InferenceAdapter when the connection is successful. @@ -679,7 +666,6 @@ async def test_health_status_success(vllm_inference_adapter): mock_models.list.assert_called_once() -@pytest.mark.asyncio async def test_health_status_failure(vllm_inference_adapter): """ Test the health method of VLLM InferenceAdapter when the connection fails. diff --git a/tests/unit/providers/utils/inference/test_openai_compat.py b/tests/unit/providers/utils/inference/test_openai_compat.py index 3598e4810..f57f6c9b3 100644 --- a/tests/unit/providers/utils/inference/test_openai_compat.py +++ b/tests/unit/providers/utils/inference/test_openai_compat.py @@ -4,7 +4,6 @@ # This source code is licensed under the terms described in the LICENSE file in # the root directory of this source tree. -import pytest from llama_stack.apis.common.content_types import TextContentItem from llama_stack.apis.inference import ( @@ -23,7 +22,6 @@ from llama_stack.providers.utils.inference.openai_compat import ( ) -@pytest.mark.asyncio async def test_convert_message_to_openai_dict(): message = UserMessage(content=[TextContentItem(text="Hello, world!")], role="user") assert await convert_message_to_openai_dict(message) == { @@ -33,7 +31,6 @@ async def test_convert_message_to_openai_dict(): # Test convert_message_to_openai_dict with a tool call -@pytest.mark.asyncio async def test_convert_message_to_openai_dict_with_tool_call(): message = CompletionMessage( content="", @@ -54,7 +51,6 @@ async def test_convert_message_to_openai_dict_with_tool_call(): } -@pytest.mark.asyncio async def test_convert_message_to_openai_dict_with_builtin_tool_call(): message = CompletionMessage( content="", @@ -80,7 +76,6 @@ async def test_convert_message_to_openai_dict_with_builtin_tool_call(): } -@pytest.mark.asyncio async def test_openai_messages_to_messages_with_content_str(): openai_messages = [ OpenAISystemMessageParam(content="system message"), @@ -98,7 +93,6 @@ async def test_openai_messages_to_messages_with_content_str(): assert llama_messages[2].content == "assistant message" -@pytest.mark.asyncio async def test_openai_messages_to_messages_with_content_list(): openai_messages = [ OpenAISystemMessageParam(content=[OpenAIChatCompletionContentPartTextParam(text="system message")]), diff --git a/tests/unit/providers/utils/memory/test_vector_store.py b/tests/unit/providers/utils/memory/test_vector_store.py index 220c21994..90b229262 100644 --- a/tests/unit/providers/utils/memory/test_vector_store.py +++ b/tests/unit/providers/utils/memory/test_vector_store.py @@ -13,7 +13,6 @@ from llama_stack.apis.tools import RAGDocument from llama_stack.providers.utils.memory.vector_store import content_from_data_and_mime_type, content_from_doc -@pytest.mark.asyncio async def test_content_from_doc_with_url(): """Test extracting content from RAGDocument with URL content.""" mock_url = URL(uri="https://example.com") @@ -33,7 +32,6 @@ async def test_content_from_doc_with_url(): mock_instance.get.assert_called_once_with(mock_url.uri) -@pytest.mark.asyncio async def test_content_from_doc_with_pdf_url(): """Test extracting content from RAGDocument with URL pointing to a PDF.""" mock_url = URL(uri="https://example.com/document.pdf") @@ -58,7 +56,6 @@ async def test_content_from_doc_with_pdf_url(): mock_parse_pdf.assert_called_once_with(b"PDF binary data") -@pytest.mark.asyncio async def test_content_from_doc_with_data_url(): """Test extracting content from RAGDocument with data URL content.""" data_url = "data:text/plain;base64,SGVsbG8gV29ybGQ=" # "Hello World" base64 encoded @@ -74,7 +71,6 @@ async def test_content_from_doc_with_data_url(): mock_content_from_data.assert_called_once_with(data_url) -@pytest.mark.asyncio async def test_content_from_doc_with_string(): """Test extracting content from RAGDocument with string content.""" content_string = "This is plain text content" @@ -85,7 +81,6 @@ async def test_content_from_doc_with_string(): assert result == content_string -@pytest.mark.asyncio async def test_content_from_doc_with_string_url(): """Test extracting content from RAGDocument with string URL content.""" url_string = "https://example.com" @@ -105,7 +100,6 @@ async def test_content_from_doc_with_string_url(): mock_instance.get.assert_called_once_with(url_string) -@pytest.mark.asyncio async def test_content_from_doc_with_string_pdf_url(): """Test extracting content from RAGDocument with string URL pointing to a PDF.""" url_string = "https://example.com/document.pdf" @@ -130,7 +124,6 @@ async def test_content_from_doc_with_string_pdf_url(): mock_parse_pdf.assert_called_once_with(b"PDF binary data") -@pytest.mark.asyncio async def test_content_from_doc_with_interleaved_content(): """Test extracting content from RAGDocument with InterleavedContent (the new case added in the commit).""" interleaved_content = [TextContentItem(text="First item"), TextContentItem(text="Second item")] diff --git a/tests/unit/providers/utils/test_model_registry.py b/tests/unit/providers/utils/test_model_registry.py index 10fa1e075..e11f95d49 100644 --- a/tests/unit/providers/utils/test_model_registry.py +++ b/tests/unit/providers/utils/test_model_registry.py @@ -87,18 +87,15 @@ def helper(known_provider_model: ProviderModelEntry, known_provider_model2: Prov return ModelRegistryHelper([known_provider_model, known_provider_model2]) -@pytest.mark.asyncio async def test_lookup_unknown_model(helper: ModelRegistryHelper, unknown_model: Model) -> None: assert helper.get_provider_model_id(unknown_model.model_id) is None -@pytest.mark.asyncio async def test_register_unknown_provider_model(helper: ModelRegistryHelper, unknown_model: Model) -> None: with pytest.raises(ValueError): await helper.register_model(unknown_model) -@pytest.mark.asyncio async def test_register_model(helper: ModelRegistryHelper, known_model: Model) -> None: model = Model( provider_id=known_model.provider_id, @@ -110,7 +107,6 @@ async def test_register_model(helper: ModelRegistryHelper, known_model: Model) - assert helper.get_provider_model_id(model.model_id) == model.provider_resource_id -@pytest.mark.asyncio async def test_register_model_from_alias(helper: ModelRegistryHelper, known_model: Model) -> None: model = Model( provider_id=known_model.provider_id, @@ -122,13 +118,11 @@ async def test_register_model_from_alias(helper: ModelRegistryHelper, known_mode assert helper.get_provider_model_id(model.model_id) == known_model.provider_resource_id -@pytest.mark.asyncio async def test_register_model_existing(helper: ModelRegistryHelper, known_model: Model) -> None: await helper.register_model(known_model) assert helper.get_provider_model_id(known_model.model_id) == known_model.provider_resource_id -@pytest.mark.asyncio async def test_register_model_existing_different( helper: ModelRegistryHelper, known_model: Model, known_model2: Model ) -> None: @@ -137,7 +131,6 @@ async def test_register_model_existing_different( await helper.register_model(known_model) -@pytest.mark.asyncio async def test_unregister_model(helper: ModelRegistryHelper, known_model: Model) -> None: await helper.register_model(known_model) # duplicate entry assert helper.get_provider_model_id(known_model.model_id) == known_model.provider_model_id @@ -145,18 +138,15 @@ async def test_unregister_model(helper: ModelRegistryHelper, known_model: Model) assert helper.get_provider_model_id(known_model.model_id) is None -@pytest.mark.asyncio async def test_unregister_unknown_model(helper: ModelRegistryHelper, unknown_model: Model) -> None: with pytest.raises(ValueError): await helper.unregister_model(unknown_model.model_id) -@pytest.mark.asyncio async def test_register_model_during_init(helper: ModelRegistryHelper, known_model: Model) -> None: assert helper.get_provider_model_id(known_model.provider_resource_id) == known_model.provider_model_id -@pytest.mark.asyncio async def test_unregister_model_during_init(helper: ModelRegistryHelper, known_model: Model) -> None: assert helper.get_provider_model_id(known_model.provider_resource_id) == known_model.provider_model_id await helper.unregister_model(known_model.provider_resource_id) diff --git a/tests/unit/providers/utils/test_scheduler.py b/tests/unit/providers/utils/test_scheduler.py index 25b4935de..e5ee74bfa 100644 --- a/tests/unit/providers/utils/test_scheduler.py +++ b/tests/unit/providers/utils/test_scheduler.py @@ -11,7 +11,6 @@ import pytest from llama_stack.providers.utils.scheduler import JobStatus, Scheduler -@pytest.mark.asyncio async def test_scheduler_unknown_backend(): with pytest.raises(ValueError): Scheduler(backend="unknown") @@ -26,7 +25,6 @@ async def wait_for_job_completed(sched: Scheduler, job_id: str) -> None: raise TimeoutError(f"Job {job_id} did not complete in time.") -@pytest.mark.asyncio async def test_scheduler_naive(): sched = Scheduler() @@ -87,7 +85,6 @@ async def test_scheduler_naive(): assert job.logs[0][0] < job.logs[1][0] -@pytest.mark.asyncio async def test_scheduler_naive_handler_raises(): sched = Scheduler() diff --git a/tests/unit/providers/vector_io/test_faiss.py b/tests/unit/providers/vector_io/test_faiss.py index 8348b84e3..90108d7a0 100644 --- a/tests/unit/providers/vector_io/test_faiss.py +++ b/tests/unit/providers/vector_io/test_faiss.py @@ -9,7 +9,6 @@ from unittest.mock import AsyncMock, MagicMock, patch import numpy as np import pytest -import pytest_asyncio from llama_stack.apis.files import Files from llama_stack.apis.inference import EmbeddingsResponse, Inference @@ -91,13 +90,13 @@ def faiss_config(): return config -@pytest_asyncio.fixture +@pytest.fixture async def faiss_index(embedding_dimension): index = await FaissIndex.create(dimension=embedding_dimension) yield index -@pytest_asyncio.fixture +@pytest.fixture async def faiss_adapter(faiss_config, mock_inference_api, mock_files_api) -> FaissVectorIOAdapter: # Create the adapter adapter = FaissVectorIOAdapter(config=faiss_config, inference_api=mock_inference_api, files_api=mock_files_api) @@ -113,7 +112,6 @@ async def faiss_adapter(faiss_config, mock_inference_api, mock_files_api) -> Fai yield adapter -@pytest.mark.asyncio async def test_faiss_query_vector_returns_infinity_when_query_and_embedding_are_identical( faiss_index, sample_chunks, sample_embeddings, embedding_dimension ): @@ -136,7 +134,6 @@ async def test_faiss_query_vector_returns_infinity_when_query_and_embedding_are_ assert response.chunks[1] == sample_chunks[1] -@pytest.mark.asyncio async def test_health_success(): """Test that the health check returns OK status when faiss is working correctly.""" # Create a fresh instance of FaissVectorIOAdapter for testing @@ -160,7 +157,6 @@ async def test_health_success(): mock_index_flat.assert_called_once_with(128) # VECTOR_DIMENSION is 128 -@pytest.mark.asyncio async def test_health_failure(): """Test that the health check returns ERROR status when faiss encounters an error.""" # Create a fresh instance of FaissVectorIOAdapter for testing diff --git a/tests/unit/providers/vector_io/test_qdrant.py b/tests/unit/providers/vector_io/test_qdrant.py index 6902c8850..d3ffe711c 100644 --- a/tests/unit/providers/vector_io/test_qdrant.py +++ b/tests/unit/providers/vector_io/test_qdrant.py @@ -10,7 +10,6 @@ from typing import Any from unittest.mock import AsyncMock, MagicMock, patch import pytest -import pytest_asyncio from llama_stack.apis.inference import EmbeddingsResponse, Inference from llama_stack.apis.vector_io import ( @@ -68,7 +67,7 @@ def mock_api_service(sample_embeddings): return mock_api_service -@pytest_asyncio.fixture +@pytest.fixture async def qdrant_adapter(qdrant_config, mock_vector_db_store, mock_api_service, loop) -> QdrantVectorIOAdapter: adapter = QdrantVectorIOAdapter(config=qdrant_config, inference_api=mock_api_service) adapter.vector_db_store = mock_vector_db_store @@ -80,7 +79,6 @@ async def qdrant_adapter(qdrant_config, mock_vector_db_store, mock_api_service, __QUERY = "Sample query" -@pytest.mark.asyncio @pytest.mark.parametrize("max_query_chunks, expected_chunks", [(2, 2), (100, 60)]) async def test_qdrant_adapter_returns_expected_chunks( qdrant_adapter: QdrantVectorIOAdapter, @@ -111,7 +109,6 @@ def _prepare_for_json(value: Any) -> str: @patch("llama_stack.providers.utils.telemetry.trace_protocol._prepare_for_json", new=_prepare_for_json) -@pytest.mark.asyncio async def test_qdrant_register_and_unregister_vector_db( qdrant_adapter: QdrantVectorIOAdapter, mock_vector_db, diff --git a/tests/unit/providers/vector_io/test_sqlite_vec.py b/tests/unit/providers/vector_io/test_sqlite_vec.py index 8579c31bb..a61eeeeca 100644 --- a/tests/unit/providers/vector_io/test_sqlite_vec.py +++ b/tests/unit/providers/vector_io/test_sqlite_vec.py @@ -8,7 +8,6 @@ import asyncio import numpy as np import pytest -import pytest_asyncio from llama_stack.apis.vector_io import Chunk, QueryChunksResponse from llama_stack.providers.inline.vector_io.sqlite_vec.sqlite_vec import ( @@ -34,7 +33,7 @@ def loop(): return asyncio.new_event_loop() -@pytest_asyncio.fixture +@pytest.fixture async def sqlite_vec_index(embedding_dimension, tmp_path_factory): temp_dir = tmp_path_factory.getbasetemp() db_path = str(temp_dir / "test_sqlite.db") @@ -43,14 +42,12 @@ async def sqlite_vec_index(embedding_dimension, tmp_path_factory): await index.delete() -@pytest.mark.asyncio async def test_query_chunk_metadata(sqlite_vec_index, sample_chunks_with_metadata, sample_embeddings_with_metadata): await sqlite_vec_index.add_chunks(sample_chunks_with_metadata, sample_embeddings_with_metadata) response = await sqlite_vec_index.query_vector(sample_embeddings_with_metadata[-1], k=2, score_threshold=0.0) assert response.chunks[0].chunk_metadata == sample_chunks_with_metadata[-1].chunk_metadata -@pytest.mark.asyncio async def test_query_chunks_full_text_search(sqlite_vec_index, sample_chunks, sample_embeddings): await sqlite_vec_index.add_chunks(sample_chunks, sample_embeddings) query_string = "Sentence 5" @@ -68,7 +65,6 @@ async def test_query_chunks_full_text_search(sqlite_vec_index, sample_chunks, sa assert len(response_no_results.chunks) == 0, f"Expected 0 results, but got {len(response_no_results.chunks)}" -@pytest.mark.asyncio async def test_query_chunks_hybrid(sqlite_vec_index, sample_chunks, sample_embeddings): await sqlite_vec_index.add_chunks(sample_chunks, sample_embeddings) @@ -90,7 +86,6 @@ async def test_query_chunks_hybrid(sqlite_vec_index, sample_chunks, sample_embed assert all(response.scores[i] >= response.scores[i + 1] for i in range(len(response.scores) - 1)) -@pytest.mark.asyncio async def test_query_chunks_full_text_search_k_greater_than_results(sqlite_vec_index, sample_chunks, sample_embeddings): # Re-initialize with a clean index await sqlite_vec_index.add_chunks(sample_chunks, sample_embeddings) @@ -103,7 +98,6 @@ async def test_query_chunks_full_text_search_k_greater_than_results(sqlite_vec_i assert any("Sentence 1 from document 0" in chunk.content for chunk in response.chunks), "Expected chunk not found" -@pytest.mark.asyncio async def test_chunk_id_conflict(sqlite_vec_index, sample_chunks, embedding_dimension): """Test that chunk IDs do not conflict across batches when inserting chunks.""" # Reduce batch size to force multiple batches for same document @@ -134,7 +128,6 @@ async def sqlite_vec_adapter(sqlite_connection): await adapter.shutdown() -@pytest.mark.asyncio async def test_query_chunks_hybrid_no_keyword_matches(sqlite_vec_index, sample_chunks, sample_embeddings): """Test hybrid search when keyword search returns no matches - should still return vector results.""" await sqlite_vec_index.add_chunks(sample_chunks, sample_embeddings) @@ -163,7 +156,6 @@ async def test_query_chunks_hybrid_no_keyword_matches(sqlite_vec_index, sample_c assert all(response.scores[i] >= response.scores[i + 1] for i in range(len(response.scores) - 1)) -@pytest.mark.asyncio async def test_query_chunks_hybrid_score_threshold(sqlite_vec_index, sample_chunks, sample_embeddings): """Test hybrid search with a high score threshold.""" await sqlite_vec_index.add_chunks(sample_chunks, sample_embeddings) @@ -185,7 +177,6 @@ async def test_query_chunks_hybrid_score_threshold(sqlite_vec_index, sample_chun assert len(response.chunks) == 0 -@pytest.mark.asyncio async def test_query_chunks_hybrid_different_embedding( sqlite_vec_index, sample_chunks, sample_embeddings, embedding_dimension ): @@ -211,7 +202,6 @@ async def test_query_chunks_hybrid_different_embedding( assert all(response.scores[i] >= response.scores[i + 1] for i in range(len(response.scores) - 1)) -@pytest.mark.asyncio async def test_query_chunks_hybrid_rrf_ranking(sqlite_vec_index, sample_chunks, sample_embeddings): """Test that RRF properly combines rankings when documents appear in both search methods.""" await sqlite_vec_index.add_chunks(sample_chunks, sample_embeddings) @@ -236,7 +226,6 @@ async def test_query_chunks_hybrid_rrf_ranking(sqlite_vec_index, sample_chunks, assert all(response.scores[i] >= response.scores[i + 1] for i in range(len(response.scores) - 1)) -@pytest.mark.asyncio async def test_query_chunks_hybrid_score_selection(sqlite_vec_index, sample_chunks, sample_embeddings): await sqlite_vec_index.add_chunks(sample_chunks, sample_embeddings) @@ -284,7 +273,6 @@ async def test_query_chunks_hybrid_score_selection(sqlite_vec_index, sample_chun assert response.scores[0] == pytest.approx(2.0 / 61.0, rel=1e-6) # Should behave like RRF -@pytest.mark.asyncio async def test_query_chunks_hybrid_mixed_results(sqlite_vec_index, sample_chunks, sample_embeddings): """Test hybrid search with documents that appear in only one search method.""" await sqlite_vec_index.add_chunks(sample_chunks, sample_embeddings) @@ -313,7 +301,6 @@ async def test_query_chunks_hybrid_mixed_results(sqlite_vec_index, sample_chunks assert "document-2" in doc_ids # From keyword search -@pytest.mark.asyncio async def test_query_chunks_hybrid_weighted_reranker_parametrization( sqlite_vec_index, sample_chunks, sample_embeddings ): @@ -369,7 +356,6 @@ async def test_query_chunks_hybrid_weighted_reranker_parametrization( ) -@pytest.mark.asyncio async def test_query_chunks_hybrid_rrf_impact_factor(sqlite_vec_index, sample_chunks, sample_embeddings): """Test RRFReRanker with different impact factors.""" await sqlite_vec_index.add_chunks(sample_chunks, sample_embeddings) @@ -401,7 +387,6 @@ async def test_query_chunks_hybrid_rrf_impact_factor(sqlite_vec_index, sample_ch assert response.scores[0] == pytest.approx(2.0 / 101.0, rel=1e-6) -@pytest.mark.asyncio async def test_query_chunks_hybrid_edge_cases(sqlite_vec_index, sample_chunks, sample_embeddings): await sqlite_vec_index.add_chunks(sample_chunks, sample_embeddings) @@ -445,7 +430,6 @@ async def test_query_chunks_hybrid_edge_cases(sqlite_vec_index, sample_chunks, s assert len(response.chunks) <= 100 -@pytest.mark.asyncio async def test_query_chunks_hybrid_tie_breaking( sqlite_vec_index, sample_embeddings, embedding_dimension, tmp_path_factory ): diff --git a/tests/unit/providers/vector_io/test_vector_io_openai_vector_stores.py b/tests/unit/providers/vector_io/test_vector_io_openai_vector_stores.py index 5f7926ce6..97e2f085e 100644 --- a/tests/unit/providers/vector_io/test_vector_io_openai_vector_stores.py +++ b/tests/unit/providers/vector_io/test_vector_io_openai_vector_stores.py @@ -25,12 +25,10 @@ from llama_stack.providers.remote.vector_io.milvus.milvus import VECTOR_DBS_PREF # -v -s --tb=short --disable-warnings --asyncio-mode=auto -@pytest.mark.asyncio async def test_initialize_index(vector_index): await vector_index.initialize() -@pytest.mark.asyncio async def test_add_chunks_query_vector(vector_index, sample_chunks, sample_embeddings): vector_index.delete() vector_index.initialize() @@ -40,7 +38,6 @@ async def test_add_chunks_query_vector(vector_index, sample_chunks, sample_embed vector_index.delete() -@pytest.mark.asyncio async def test_chunk_id_conflict(vector_index, sample_chunks, embedding_dimension): embeddings = np.random.rand(len(sample_chunks), embedding_dimension).astype(np.float32) await vector_index.add_chunks(sample_chunks, embeddings) @@ -54,7 +51,6 @@ async def test_chunk_id_conflict(vector_index, sample_chunks, embedding_dimensio assert len(contents) == len(set(contents)) -@pytest.mark.asyncio async def test_initialize_adapter_with_existing_kvstore(vector_io_adapter): key = f"{VECTOR_DBS_PREFIX}db1" dummy = VectorDB( @@ -65,7 +61,6 @@ async def test_initialize_adapter_with_existing_kvstore(vector_io_adapter): await vector_io_adapter.initialize() -@pytest.mark.asyncio async def test_persistence_across_adapter_restarts(vector_io_adapter): await vector_io_adapter.initialize() dummy = VectorDB( @@ -79,7 +74,6 @@ async def test_persistence_across_adapter_restarts(vector_io_adapter): await vector_io_adapter.shutdown() -@pytest.mark.asyncio async def test_register_and_unregister_vector_db(vector_io_adapter): unique_id = f"foo_db_{np.random.randint(1e6)}" dummy = VectorDB( @@ -92,14 +86,12 @@ async def test_register_and_unregister_vector_db(vector_io_adapter): assert dummy.identifier not in vector_io_adapter.cache -@pytest.mark.asyncio async def test_query_unregistered_raises(vector_io_adapter): fake_emb = np.zeros(8, dtype=np.float32) with pytest.raises(ValueError): await vector_io_adapter.query_chunks("no_such_db", fake_emb) -@pytest.mark.asyncio async def test_insert_chunks_calls_underlying_index(vector_io_adapter): fake_index = AsyncMock() vector_io_adapter._get_and_cache_vector_db_index = AsyncMock(return_value=fake_index) @@ -110,7 +102,6 @@ async def test_insert_chunks_calls_underlying_index(vector_io_adapter): fake_index.insert_chunks.assert_awaited_once_with(chunks) -@pytest.mark.asyncio async def test_insert_chunks_missing_db_raises(vector_io_adapter): vector_io_adapter._get_and_cache_vector_db_index = AsyncMock(return_value=None) @@ -118,7 +109,6 @@ async def test_insert_chunks_missing_db_raises(vector_io_adapter): await vector_io_adapter.insert_chunks("db_not_exist", []) -@pytest.mark.asyncio async def test_query_chunks_calls_underlying_index_and_returns(vector_io_adapter): expected = QueryChunksResponse(chunks=[Chunk(content="c1")], scores=[0.1]) fake_index = AsyncMock(query_chunks=AsyncMock(return_value=expected)) @@ -130,7 +120,6 @@ async def test_query_chunks_calls_underlying_index_and_returns(vector_io_adapter assert response is expected -@pytest.mark.asyncio async def test_query_chunks_missing_db_raises(vector_io_adapter): vector_io_adapter._get_and_cache_vector_db_index = AsyncMock(return_value=None) @@ -138,7 +127,6 @@ async def test_query_chunks_missing_db_raises(vector_io_adapter): await vector_io_adapter.query_chunks("db_missing", "q", None) -@pytest.mark.asyncio async def test_save_openai_vector_store(vector_io_adapter): store_id = "vs_1234" openai_vector_store = { @@ -155,7 +143,6 @@ async def test_save_openai_vector_store(vector_io_adapter): assert vector_io_adapter.openai_vector_stores[openai_vector_store["id"]] == openai_vector_store -@pytest.mark.asyncio async def test_update_openai_vector_store(vector_io_adapter): store_id = "vs_1234" openai_vector_store = { @@ -172,7 +159,6 @@ async def test_update_openai_vector_store(vector_io_adapter): assert vector_io_adapter.openai_vector_stores[openai_vector_store["id"]] == openai_vector_store -@pytest.mark.asyncio async def test_delete_openai_vector_store(vector_io_adapter): store_id = "vs_1234" openai_vector_store = { @@ -188,7 +174,6 @@ async def test_delete_openai_vector_store(vector_io_adapter): assert openai_vector_store["id"] not in vector_io_adapter.openai_vector_stores -@pytest.mark.asyncio async def test_load_openai_vector_stores(vector_io_adapter): store_id = "vs_1234" openai_vector_store = { @@ -204,7 +189,6 @@ async def test_load_openai_vector_stores(vector_io_adapter): assert loaded_stores[store_id] == openai_vector_store -@pytest.mark.asyncio async def test_save_openai_vector_store_file(vector_io_adapter, tmp_path_factory): store_id = "vs_1234" file_id = "file_1234" @@ -226,7 +210,6 @@ async def test_save_openai_vector_store_file(vector_io_adapter, tmp_path_factory await vector_io_adapter._save_openai_vector_store_file(store_id, file_id, file_info, file_contents) -@pytest.mark.asyncio async def test_update_openai_vector_store_file(vector_io_adapter, tmp_path_factory): store_id = "vs_1234" file_id = "file_1234" @@ -260,7 +243,6 @@ async def test_update_openai_vector_store_file(vector_io_adapter, tmp_path_facto assert loaded_contents != file_info -@pytest.mark.asyncio async def test_load_openai_vector_store_file_contents(vector_io_adapter, tmp_path_factory): store_id = "vs_1234" file_id = "file_1234" @@ -284,7 +266,6 @@ async def test_load_openai_vector_store_file_contents(vector_io_adapter, tmp_pat assert loaded_contents == file_contents -@pytest.mark.asyncio async def test_delete_openai_vector_store_file_from_storage(vector_io_adapter, tmp_path_factory): store_id = "vs_1234" file_id = "file_1234" diff --git a/tests/unit/rag/test_rag_query.py b/tests/unit/rag/test_rag_query.py index d2dd1783b..b2baa744a 100644 --- a/tests/unit/rag/test_rag_query.py +++ b/tests/unit/rag/test_rag_query.py @@ -17,13 +17,11 @@ from llama_stack.providers.inline.tool_runtime.rag.memory import MemoryToolRunti class TestRagQuery: - @pytest.mark.asyncio async def test_query_raises_on_empty_vector_db_ids(self): rag_tool = MemoryToolRuntimeImpl(config=MagicMock(), vector_io_api=MagicMock(), inference_api=MagicMock()) with pytest.raises(ValueError): await rag_tool.query(content=MagicMock(), vector_db_ids=[]) - @pytest.mark.asyncio async def test_query_chunk_metadata_handling(self): rag_tool = MemoryToolRuntimeImpl(config=MagicMock(), vector_io_api=MagicMock(), inference_api=MagicMock()) content = "test query content" diff --git a/tests/unit/rag/test_vector_store.py b/tests/unit/rag/test_vector_store.py index 9d6b9ee67..dd36d3992 100644 --- a/tests/unit/rag/test_vector_store.py +++ b/tests/unit/rag/test_vector_store.py @@ -112,7 +112,6 @@ class TestValidateEmbedding: class TestVectorStore: - @pytest.mark.asyncio async def test_returns_content_from_pdf_data_uri(self): data_uri = data_url_from_file(DUMMY_PDF_PATH) doc = RAGDocument( @@ -124,7 +123,6 @@ class TestVectorStore: content = await content_from_doc(doc) assert content in DUMMY_PDF_TEXT_CHOICES - @pytest.mark.asyncio async def test_downloads_pdf_and_returns_content(self): # Using GitHub to host the PDF file url = "https://raw.githubusercontent.com/meta-llama/llama-stack/da035d69cfca915318eaf485770a467ca3c2a238/llama_stack/providers/tests/memory/fixtures/dummy.pdf" @@ -137,7 +135,6 @@ class TestVectorStore: content = await content_from_doc(doc) assert content in DUMMY_PDF_TEXT_CHOICES - @pytest.mark.asyncio async def test_downloads_pdf_and_returns_content_with_url_object(self): # Using GitHub to host the PDF file url = "https://raw.githubusercontent.com/meta-llama/llama-stack/da035d69cfca915318eaf485770a467ca3c2a238/llama_stack/providers/tests/memory/fixtures/dummy.pdf" @@ -204,7 +201,6 @@ class TestVectorStore: class TestVectorDBWithIndex: - @pytest.mark.asyncio async def test_insert_chunks_without_embeddings(self): mock_vector_db = MagicMock() mock_vector_db.embedding_model = "test-model without embeddings" @@ -230,7 +226,6 @@ class TestVectorDBWithIndex: assert args[0] == chunks assert np.array_equal(args[1], np.array([[0.1, 0.2, 0.3], [0.4, 0.5, 0.6]], dtype=np.float32)) - @pytest.mark.asyncio async def test_insert_chunks_with_valid_embeddings(self): mock_vector_db = MagicMock() mock_vector_db.embedding_model = "test-model with embeddings" @@ -255,7 +250,6 @@ class TestVectorDBWithIndex: assert args[0] == chunks assert np.array_equal(args[1], np.array([[0.1, 0.2, 0.3], [0.4, 0.5, 0.6]], dtype=np.float32)) - @pytest.mark.asyncio async def test_insert_chunks_with_invalid_embeddings(self): mock_vector_db = MagicMock() mock_vector_db.embedding_dimension = 3 @@ -295,7 +289,6 @@ class TestVectorDBWithIndex: mock_inference_api.embeddings.assert_not_called() mock_index.add_chunks.assert_not_called() - @pytest.mark.asyncio async def test_insert_chunks_with_partially_precomputed_embeddings(self): mock_vector_db = MagicMock() mock_vector_db.embedding_model = "test-model with partial embeddings" diff --git a/tests/unit/registry/test_registry.py b/tests/unit/registry/test_registry.py index 909581bb7..87fe18d54 100644 --- a/tests/unit/registry/test_registry.py +++ b/tests/unit/registry/test_registry.py @@ -38,14 +38,12 @@ def sample_model(): ) -@pytest.mark.asyncio async def test_registry_initialization(disk_dist_registry): # Test empty registry result = await disk_dist_registry.get("nonexistent", "nonexistent") assert result is None -@pytest.mark.asyncio async def test_basic_registration(disk_dist_registry, sample_vector_db, sample_model): print(f"Registering {sample_vector_db}") await disk_dist_registry.register(sample_vector_db) @@ -64,7 +62,6 @@ async def test_basic_registration(disk_dist_registry, sample_vector_db, sample_m assert result_model.provider_id == sample_model.provider_id -@pytest.mark.asyncio async def test_cached_registry_initialization(sqlite_kvstore, sample_vector_db, sample_model): # First populate the disk registry disk_registry = DiskDistributionRegistry(sqlite_kvstore) @@ -85,7 +82,6 @@ async def test_cached_registry_initialization(sqlite_kvstore, sample_vector_db, assert result_vector_db.provider_id == sample_vector_db.provider_id -@pytest.mark.asyncio async def test_cached_registry_updates(cached_disk_dist_registry): new_vector_db = VectorDB( identifier="test_vector_db_2", @@ -112,7 +108,6 @@ async def test_cached_registry_updates(cached_disk_dist_registry): assert result_vector_db.provider_id == new_vector_db.provider_id -@pytest.mark.asyncio async def test_duplicate_provider_registration(cached_disk_dist_registry): original_vector_db = VectorDB( identifier="test_vector_db_2", @@ -137,7 +132,6 @@ async def test_duplicate_provider_registration(cached_disk_dist_registry): assert result.embedding_model == original_vector_db.embedding_model # Original values preserved -@pytest.mark.asyncio async def test_get_all_objects(cached_disk_dist_registry): # Create multiple test banks # Create multiple test banks @@ -170,7 +164,6 @@ async def test_get_all_objects(cached_disk_dist_registry): assert stored_vector_db.embedding_dimension == original_vector_db.embedding_dimension -@pytest.mark.asyncio async def test_parse_registry_values_error_handling(sqlite_kvstore): valid_db = VectorDB( identifier="valid_vector_db", @@ -209,7 +202,6 @@ async def test_parse_registry_values_error_handling(sqlite_kvstore): assert invalid_obj is None -@pytest.mark.asyncio async def test_cached_registry_error_handling(sqlite_kvstore): valid_db = VectorDB( identifier="valid_cached_db", diff --git a/tests/unit/registry/test_registry_acl.py b/tests/unit/registry/test_registry_acl.py index 48b3ac51b..6cfb20944 100644 --- a/tests/unit/registry/test_registry_acl.py +++ b/tests/unit/registry/test_registry_acl.py @@ -5,14 +5,11 @@ # the root directory of this source tree. -import pytest - from llama_stack.apis.models import ModelType from llama_stack.distribution.datatypes import ModelWithOwner, User from llama_stack.distribution.store.registry import CachedDiskDistributionRegistry -@pytest.mark.asyncio async def test_registry_cache_with_acl(cached_disk_dist_registry): model = ModelWithOwner( identifier="model-acl", @@ -48,7 +45,6 @@ async def test_registry_cache_with_acl(cached_disk_dist_registry): assert new_model.owner.attributes["teams"] == ["ai-team"] -@pytest.mark.asyncio async def test_registry_empty_acl(cached_disk_dist_registry): model = ModelWithOwner( identifier="model-empty-acl", @@ -85,7 +81,6 @@ async def test_registry_empty_acl(cached_disk_dist_registry): assert len(all_models) == 2 -@pytest.mark.asyncio async def test_registry_serialization(cached_disk_dist_registry): attributes = { "roles": ["admin", "researcher"], diff --git a/tests/unit/server/test_access_control.py b/tests/unit/server/test_access_control.py index af03ddacb..fb9c6f95e 100644 --- a/tests/unit/server/test_access_control.py +++ b/tests/unit/server/test_access_control.py @@ -7,7 +7,6 @@ from unittest.mock import MagicMock, Mock, patch import pytest -import pytest_asyncio import yaml from pydantic import TypeAdapter, ValidationError @@ -27,7 +26,7 @@ def _return_model(model): return model -@pytest_asyncio.fixture +@pytest.fixture async def test_setup(cached_disk_dist_registry): mock_inference = Mock() mock_inference.__provider_spec__ = MagicMock() @@ -41,7 +40,6 @@ async def test_setup(cached_disk_dist_registry): yield cached_disk_dist_registry, routing_table -@pytest.mark.asyncio @patch("llama_stack.distribution.routing_tables.common.get_authenticated_user") async def test_access_control_with_cache(mock_get_authenticated_user, test_setup): registry, routing_table = test_setup @@ -106,7 +104,6 @@ async def test_access_control_with_cache(mock_get_authenticated_user, test_setup await routing_table.get_model("model-admin") -@pytest.mark.asyncio @patch("llama_stack.distribution.routing_tables.common.get_authenticated_user") async def test_access_control_and_updates(mock_get_authenticated_user, test_setup): registry, routing_table = test_setup @@ -145,7 +142,6 @@ async def test_access_control_and_updates(mock_get_authenticated_user, test_setu assert model.identifier == "model-updates" -@pytest.mark.asyncio @patch("llama_stack.distribution.routing_tables.common.get_authenticated_user") async def test_access_control_empty_attributes(mock_get_authenticated_user, test_setup): registry, routing_table = test_setup @@ -170,7 +166,6 @@ async def test_access_control_empty_attributes(mock_get_authenticated_user, test assert "model-empty-attrs" in model_ids -@pytest.mark.asyncio @patch("llama_stack.distribution.routing_tables.common.get_authenticated_user") async def test_no_user_attributes(mock_get_authenticated_user, test_setup): registry, routing_table = test_setup @@ -201,7 +196,6 @@ async def test_no_user_attributes(mock_get_authenticated_user, test_setup): assert all_models.data[0].identifier == "model-public-2" -@pytest.mark.asyncio @patch("llama_stack.distribution.routing_tables.common.get_authenticated_user") async def test_automatic_access_attributes(mock_get_authenticated_user, test_setup): """Test that newly created resources inherit access attributes from their creator.""" @@ -246,7 +240,7 @@ async def test_automatic_access_attributes(mock_get_authenticated_user, test_set assert model.identifier == "auto-access-model" -@pytest_asyncio.fixture +@pytest.fixture async def test_setup_with_access_policy(cached_disk_dist_registry): mock_inference = Mock() mock_inference.__provider_spec__ = MagicMock() @@ -281,7 +275,6 @@ async def test_setup_with_access_policy(cached_disk_dist_registry): yield routing_table -@pytest.mark.asyncio @patch("llama_stack.distribution.routing_tables.common.get_authenticated_user") async def test_access_policy(mock_get_authenticated_user, test_setup_with_access_policy): routing_table = test_setup_with_access_policy diff --git a/tests/unit/server/test_auth.py b/tests/unit/server/test_auth.py index 39d6af1c8..7012a7f17 100644 --- a/tests/unit/server/test_auth.py +++ b/tests/unit/server/test_auth.py @@ -202,7 +202,6 @@ def test_http_auth_request_payload(http_client, valid_api_key, mock_auth_endpoin assert "param2" in payload["request"]["params"] -@pytest.mark.asyncio async def test_http_middleware_with_access_attributes(mock_http_middleware, mock_scope): """Test HTTP middleware behavior with access attributes""" middleware, mock_app = mock_http_middleware diff --git a/tests/unit/server/test_resolver.py b/tests/unit/server/test_resolver.py index acf4da0a3..a348590b1 100644 --- a/tests/unit/server/test_resolver.py +++ b/tests/unit/server/test_resolver.py @@ -9,7 +9,6 @@ import sys from typing import Any, Protocol from unittest.mock import AsyncMock, MagicMock -import pytest from pydantic import BaseModel, Field from llama_stack.apis.inference import Inference @@ -66,7 +65,6 @@ class SampleImpl: pass -@pytest.mark.asyncio async def test_resolve_impls_basic(): # Create a real provider spec provider_spec = InlineProviderSpec( diff --git a/tests/unit/server/test_sse.py b/tests/unit/server/test_sse.py index 60e9f4609..d42857186 100644 --- a/tests/unit/server/test_sse.py +++ b/tests/unit/server/test_sse.py @@ -7,13 +7,10 @@ import asyncio from unittest.mock import AsyncMock, MagicMock -import pytest - from llama_stack.apis.common.responses import PaginatedResponse from llama_stack.distribution.server.server import create_dynamic_typed_route, create_sse_event, sse_generator -@pytest.mark.asyncio async def test_sse_generator_basic(): # An AsyncIterator wrapped in an Awaitable, just like our web methods async def async_event_gen(): @@ -35,7 +32,6 @@ async def test_sse_generator_basic(): assert seen_events[1] == create_sse_event("Test event 2") -@pytest.mark.asyncio async def test_sse_generator_client_disconnected(): # An AsyncIterator wrapped in an Awaitable, just like our web methods async def async_event_gen(): @@ -58,7 +54,6 @@ async def test_sse_generator_client_disconnected(): assert seen_events[0] == create_sse_event("Test event 1") -@pytest.mark.asyncio async def test_sse_generator_client_disconnected_before_response_starts(): # Disconnect before the response starts async def async_event_gen(): @@ -75,7 +70,6 @@ async def test_sse_generator_client_disconnected_before_response_starts(): assert len(seen_events) == 0 -@pytest.mark.asyncio async def test_sse_generator_error_before_response_starts(): # Raise an error before the response starts async def async_event_gen(): @@ -93,7 +87,6 @@ async def test_sse_generator_error_before_response_starts(): assert 'data: {"error":' in seen_events[0] -@pytest.mark.asyncio async def test_paginated_response_url_setting(): """Test that PaginatedResponse gets url set to route path.""" diff --git a/tests/unit/utils/inference/test_inference_store.py b/tests/unit/utils/inference/test_inference_store.py index de619c760..730f54a05 100644 --- a/tests/unit/utils/inference/test_inference_store.py +++ b/tests/unit/utils/inference/test_inference_store.py @@ -42,7 +42,6 @@ def create_test_chat_completion( ) -@pytest.mark.asyncio async def test_inference_store_pagination_basic(): """Test basic pagination functionality.""" with TemporaryDirectory() as tmp_dir: @@ -88,7 +87,6 @@ async def test_inference_store_pagination_basic(): assert result3.has_more is False -@pytest.mark.asyncio async def test_inference_store_pagination_ascending(): """Test pagination with ascending order.""" with TemporaryDirectory() as tmp_dir: @@ -123,7 +121,6 @@ async def test_inference_store_pagination_ascending(): assert result2.has_more is True -@pytest.mark.asyncio async def test_inference_store_pagination_with_model_filter(): """Test pagination combined with model filtering.""" with TemporaryDirectory() as tmp_dir: @@ -161,7 +158,6 @@ async def test_inference_store_pagination_with_model_filter(): assert result2.has_more is False -@pytest.mark.asyncio async def test_inference_store_pagination_invalid_after(): """Test error handling for invalid 'after' parameter.""" with TemporaryDirectory() as tmp_dir: @@ -174,7 +170,6 @@ async def test_inference_store_pagination_invalid_after(): await store.list_chat_completions(after="non-existent", limit=2) -@pytest.mark.asyncio async def test_inference_store_pagination_no_limit(): """Test pagination behavior when no limit is specified.""" with TemporaryDirectory() as tmp_dir: diff --git a/tests/unit/utils/responses/test_responses_store.py b/tests/unit/utils/responses/test_responses_store.py index 3f25e2524..44d4b30da 100644 --- a/tests/unit/utils/responses/test_responses_store.py +++ b/tests/unit/utils/responses/test_responses_store.py @@ -44,7 +44,6 @@ def create_test_response_input(content: str, input_id: str) -> OpenAIResponseInp ) -@pytest.mark.asyncio async def test_responses_store_pagination_basic(): """Test basic pagination functionality for responses store.""" with TemporaryDirectory() as tmp_dir: @@ -90,7 +89,6 @@ async def test_responses_store_pagination_basic(): assert result3.has_more is False -@pytest.mark.asyncio async def test_responses_store_pagination_ascending(): """Test pagination with ascending order.""" with TemporaryDirectory() as tmp_dir: @@ -125,7 +123,6 @@ async def test_responses_store_pagination_ascending(): assert result2.has_more is True -@pytest.mark.asyncio async def test_responses_store_pagination_with_model_filter(): """Test pagination combined with model filtering.""" with TemporaryDirectory() as tmp_dir: @@ -163,7 +160,6 @@ async def test_responses_store_pagination_with_model_filter(): assert result2.has_more is False -@pytest.mark.asyncio async def test_responses_store_pagination_invalid_after(): """Test error handling for invalid 'after' parameter.""" with TemporaryDirectory() as tmp_dir: @@ -176,7 +172,6 @@ async def test_responses_store_pagination_invalid_after(): await store.list_responses(after="non-existent", limit=2) -@pytest.mark.asyncio async def test_responses_store_pagination_no_limit(): """Test pagination behavior when no limit is specified.""" with TemporaryDirectory() as tmp_dir: @@ -205,7 +200,6 @@ async def test_responses_store_pagination_no_limit(): assert result.has_more is False -@pytest.mark.asyncio async def test_responses_store_get_response_object(): """Test retrieving a single response object.""" with TemporaryDirectory() as tmp_dir: @@ -230,7 +224,6 @@ async def test_responses_store_get_response_object(): await store.get_response_object("non-existent") -@pytest.mark.asyncio async def test_responses_store_input_items_pagination(): """Test pagination functionality for input items.""" with TemporaryDirectory() as tmp_dir: @@ -308,7 +301,6 @@ async def test_responses_store_input_items_pagination(): await store.list_response_input_items("test-resp", before="some-id", after="other-id") -@pytest.mark.asyncio async def test_responses_store_input_items_before_pagination(): """Test before pagination functionality for input items.""" with TemporaryDirectory() as tmp_dir: diff --git a/tests/unit/utils/sqlstore/test_sqlstore.py b/tests/unit/utils/sqlstore/test_sqlstore.py index c4230a396..778f0b658 100644 --- a/tests/unit/utils/sqlstore/test_sqlstore.py +++ b/tests/unit/utils/sqlstore/test_sqlstore.py @@ -14,7 +14,6 @@ from llama_stack.providers.utils.sqlstore.sqlalchemy_sqlstore import SqlAlchemyS from llama_stack.providers.utils.sqlstore.sqlstore import SqliteSqlStoreConfig -@pytest.mark.asyncio async def test_sqlite_sqlstore(): with TemporaryDirectory() as tmp_dir: db_name = "test.db" @@ -66,7 +65,6 @@ async def test_sqlite_sqlstore(): assert result.has_more is False -@pytest.mark.asyncio async def test_sqlstore_pagination_basic(): """Test basic pagination functionality at the SQL store level.""" with TemporaryDirectory() as tmp_dir: @@ -131,7 +129,6 @@ async def test_sqlstore_pagination_basic(): assert result3.has_more is False -@pytest.mark.asyncio async def test_sqlstore_pagination_with_filter(): """Test pagination with WHERE conditions.""" with TemporaryDirectory() as tmp_dir: @@ -184,7 +181,6 @@ async def test_sqlstore_pagination_with_filter(): assert result2.has_more is False -@pytest.mark.asyncio async def test_sqlstore_pagination_ascending_order(): """Test pagination with ascending order.""" with TemporaryDirectory() as tmp_dir: @@ -233,7 +229,6 @@ async def test_sqlstore_pagination_ascending_order(): assert result2.has_more is True -@pytest.mark.asyncio async def test_sqlstore_pagination_multi_column_ordering_error(): """Test that multi-column ordering raises an error when using cursor pagination.""" with TemporaryDirectory() as tmp_dir: @@ -271,7 +266,6 @@ async def test_sqlstore_pagination_multi_column_ordering_error(): assert result.data[0]["id"] == "task1" -@pytest.mark.asyncio async def test_sqlstore_pagination_cursor_requires_order_by(): """Test that cursor pagination requires order_by parameter.""" with TemporaryDirectory() as tmp_dir: @@ -289,7 +283,6 @@ async def test_sqlstore_pagination_cursor_requires_order_by(): ) -@pytest.mark.asyncio async def test_sqlstore_pagination_error_handling(): """Test error handling for invalid columns and cursor IDs.""" with TemporaryDirectory() as tmp_dir: @@ -339,7 +332,6 @@ async def test_sqlstore_pagination_error_handling(): ) -@pytest.mark.asyncio async def test_sqlstore_pagination_custom_key_column(): """Test pagination with custom primary key column (not 'id').""" with TemporaryDirectory() as tmp_dir: diff --git a/tests/unit/utils/test_authorized_sqlstore.py b/tests/unit/utils/test_authorized_sqlstore.py index 61763719a..066f67a98 100644 --- a/tests/unit/utils/test_authorized_sqlstore.py +++ b/tests/unit/utils/test_authorized_sqlstore.py @@ -7,8 +7,6 @@ from tempfile import TemporaryDirectory from unittest.mock import patch -import pytest - from llama_stack.distribution.access_control.access_control import default_policy, is_action_allowed from llama_stack.distribution.access_control.datatypes import Action from llama_stack.distribution.datatypes import User @@ -18,7 +16,6 @@ from llama_stack.providers.utils.sqlstore.sqlalchemy_sqlstore import SqlAlchemyS from llama_stack.providers.utils.sqlstore.sqlstore import SqliteSqlStoreConfig -@pytest.mark.asyncio @patch("llama_stack.providers.utils.sqlstore.authorized_sqlstore.get_authenticated_user") async def test_authorized_fetch_with_where_sql_access_control(mock_get_authenticated_user): """Test that fetch_all works correctly with where_sql for access control""" @@ -81,7 +78,6 @@ async def test_authorized_fetch_with_where_sql_access_control(mock_get_authentic assert row["title"] == "User Document" -@pytest.mark.asyncio @patch("llama_stack.providers.utils.sqlstore.authorized_sqlstore.get_authenticated_user") async def test_sql_policy_consistency(mock_get_authenticated_user): """Test that SQL WHERE clause logic exactly matches is_action_allowed policy logic""" @@ -168,7 +164,6 @@ async def test_sql_policy_consistency(mock_get_authenticated_user): ) -@pytest.mark.asyncio @patch("llama_stack.providers.utils.sqlstore.authorized_sqlstore.get_authenticated_user") async def test_authorized_store_user_attribute_capture(mock_get_authenticated_user): """Test that user attributes are properly captured during insert""" diff --git a/uv.lock b/uv.lock index 8374fe38a..fe50f88aa 100644 --- a/uv.lock +++ b/uv.lock @@ -1394,8 +1394,8 @@ dev = [ { name = "black" }, { name = "nbval" }, { name = "pre-commit" }, - { name = "pytest" }, - { name = "pytest-asyncio" }, + { name = "pytest", specifier = ">=8.4" }, + { name = "pytest-asyncio", specifier = ">=1.0" }, { name = "pytest-cov" }, { name = "pytest-html" }, { name = "pytest-json-report" }, @@ -2432,29 +2432,30 @@ wheels = [ [[package]] name = "pytest" -version = "8.3.4" +version = "8.4.1" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "colorama", marker = "sys_platform == 'win32'" }, { name = "iniconfig" }, { name = "packaging" }, { name = "pluggy" }, + { name = "pygments" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/05/35/30e0d83068951d90a01852cb1cef56e5d8a09d20c7f511634cc2f7e0372a/pytest-8.3.4.tar.gz", hash = "sha256:965370d062bce11e73868e0335abac31b4d3de0e82f4007408d242b4f8610761", size = 1445919, upload-time = "2024-12-01T12:54:25.98Z" } +sdist = { url = "https://files.pythonhosted.org/packages/08/ba/45911d754e8eba3d5a841a5ce61a65a685ff1798421ac054f85aa8747dfb/pytest-8.4.1.tar.gz", hash = "sha256:7c67fd69174877359ed9371ec3af8a3d2b04741818c51e5e99cc1742251fa93c", size = 1517714, upload-time = "2025-06-18T05:48:06.109Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/11/92/76a1c94d3afee238333bc0a42b82935dd8f9cf8ce9e336ff87ee14d9e1cf/pytest-8.3.4-py3-none-any.whl", hash = "sha256:50e16d954148559c9a74109af1eaf0c945ba2d8f30f0a3d3335edde19788b6f6", size = 343083, upload-time = "2024-12-01T12:54:19.735Z" }, + { url = "https://files.pythonhosted.org/packages/29/16/c8a903f4c4dffe7a12843191437d7cd8e32751d5de349d45d3fe69544e87/pytest-8.4.1-py3-none-any.whl", hash = "sha256:539c70ba6fcead8e78eebbf1115e8b589e7565830d7d006a8723f19ac8a0afb7", size = 365474, upload-time = "2025-06-18T05:48:03.955Z" }, ] [[package]] name = "pytest-asyncio" -version = "0.25.3" +version = "1.0.0" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "pytest" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/f2/a8/ecbc8ede70921dd2f544ab1cadd3ff3bf842af27f87bbdea774c7baa1d38/pytest_asyncio-0.25.3.tar.gz", hash = "sha256:fc1da2cf9f125ada7e710b4ddad05518d4cee187ae9412e9ac9271003497f07a", size = 54239, upload-time = "2025-01-28T18:37:58.729Z" } +sdist = { url = "https://files.pythonhosted.org/packages/d0/d4/14f53324cb1a6381bef29d698987625d80052bb33932d8e7cbf9b337b17c/pytest_asyncio-1.0.0.tar.gz", hash = "sha256:d15463d13f4456e1ead2594520216b225a16f781e144f8fdf6c5bb4667c48b3f", size = 46960, upload-time = "2025-05-26T04:54:40.484Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/67/17/3493c5624e48fd97156ebaec380dcaafee9506d7e2c46218ceebbb57d7de/pytest_asyncio-0.25.3-py3-none-any.whl", hash = "sha256:9e89518e0f9bd08928f97a3482fdc4e244df17529460bc038291ccaf8f85c7c3", size = 19467, upload-time = "2025-01-28T18:37:56.798Z" }, + { url = "https://files.pythonhosted.org/packages/30/05/ce271016e351fddc8399e546f6e23761967ee09c8c568bbfbecb0c150171/pytest_asyncio-1.0.0-py3-none-any.whl", hash = "sha256:4f024da9f1ef945e680dc68610b52550e36590a67fd31bb3b4943979a1f90ef3", size = 15976, upload-time = "2025-05-26T04:54:39.035Z" }, ] [[package]] From aa2595c7c3a4145951acdf8c7bc5247c72ded353 Mon Sep 17 00:00:00 2001 From: Jorge Piedrahita Ortiz Date: Fri, 11 Jul 2025 15:29:15 -0500 Subject: [PATCH 4/8] fix: sambanova shields and model validation (#2693) # What does this PR do? Update the shield register validation of Sambanova not to raise, but only warn when a model is not available in the base url endpoint used, also added warnings when model is not available in the base url endpoint used ## Test Plan run starter distro with Sambanova enabled --- .../remote/inference/sambanova/sambanova.py | 21 ++++++++++++++++++- .../remote/safety/sambanova/sambanova.py | 21 ++++++++++--------- .../inference/test_openai_completion.py | 1 - 3 files changed, 31 insertions(+), 12 deletions(-) diff --git a/llama_stack/providers/remote/inference/sambanova/sambanova.py b/llama_stack/providers/remote/inference/sambanova/sambanova.py index 20f863665..9c2dda889 100644 --- a/llama_stack/providers/remote/inference/sambanova/sambanova.py +++ b/llama_stack/providers/remote/inference/sambanova/sambanova.py @@ -7,6 +7,7 @@ import json from collections.abc import Iterable +import requests from openai.types.chat import ( ChatCompletionAssistantMessageParam as OpenAIChatCompletionAssistantMessage, ) @@ -56,6 +57,7 @@ from llama_stack.apis.inference import ( ToolResponseMessage, UserMessage, ) +from llama_stack.apis.models import Model from llama_stack.log import get_logger from llama_stack.models.llama.datatypes import BuiltinTool from llama_stack.providers.utils.inference.litellm_openai_mixin import LiteLLMOpenAIMixin @@ -176,10 +178,11 @@ class SambaNovaInferenceAdapter(LiteLLMOpenAIMixin): def __init__(self, config: SambaNovaImplConfig): self.config = config + self.environment_available_models = [] LiteLLMOpenAIMixin.__init__( self, model_entries=MODEL_ENTRIES, - api_key_from_config=self.config.api_key, + api_key_from_config=self.config.api_key.get_secret_value() if self.config.api_key else None, provider_data_api_key_field="sambanova_api_key", ) @@ -246,6 +249,22 @@ class SambaNovaInferenceAdapter(LiteLLMOpenAIMixin): **get_sampling_options(request.sampling_params), } + async def register_model(self, model: Model) -> Model: + model_id = self.get_provider_model_id(model.provider_resource_id) + + list_models_url = self.config.url + "/models" + if len(self.environment_available_models) == 0: + try: + response = requests.get(list_models_url) + response.raise_for_status() + except requests.exceptions.RequestException as e: + raise RuntimeError(f"Request to {list_models_url} failed") from e + self.environment_available_models = [model.get("id") for model in response.json().get("data", {})] + + if model_id.split("sambanova/")[-1] not in self.environment_available_models: + logger.warning(f"Model {model_id} not available in {list_models_url}") + return model + async def initialize(self): await super().initialize() diff --git a/llama_stack/providers/remote/safety/sambanova/sambanova.py b/llama_stack/providers/remote/safety/sambanova/sambanova.py index 84c8267ae..1a65f6aa1 100644 --- a/llama_stack/providers/remote/safety/sambanova/sambanova.py +++ b/llama_stack/providers/remote/safety/sambanova/sambanova.py @@ -33,6 +33,7 @@ CANNED_RESPONSE_TEXT = "I can't answer that. Can I help with something else?" class SambaNovaSafetyAdapter(Safety, ShieldsProtocolPrivate, NeedsRequestProviderData): def __init__(self, config: SambaNovaSafetyConfig) -> None: self.config = config + self.environment_available_models = [] async def initialize(self) -> None: pass @@ -54,18 +55,18 @@ class SambaNovaSafetyAdapter(Safety, ShieldsProtocolPrivate, NeedsRequestProvide async def register_shield(self, shield: Shield) -> None: list_models_url = self.config.url + "/models" - try: - response = requests.get(list_models_url) - response.raise_for_status() - except requests.exceptions.RequestException as e: - raise RuntimeError(f"Request to {list_models_url} failed") from e - available_models = [model.get("id") for model in response.json().get("data", {})] + if len(self.environment_available_models) == 0: + try: + response = requests.get(list_models_url) + response.raise_for_status() + except requests.exceptions.RequestException as e: + raise RuntimeError(f"Request to {list_models_url} failed") from e + self.environment_available_models = [model.get("id") for model in response.json().get("data", {})] if ( - len(available_models) == 0 - or "guard" not in shield.provider_resource_id.lower() - or shield.provider_resource_id.split("sambanova/")[-1] not in available_models + "guard" not in shield.provider_resource_id.lower() + or shield.provider_resource_id.split("sambanova/")[-1] not in self.environment_available_models ): - raise ValueError(f"Shield {shield.provider_resource_id} not found in SambaNova") + logger.warning(f"Shield {shield.provider_resource_id} not available in {list_models_url}") async def run_shield( self, shield_id: str, messages: list[Message], params: dict[str, Any] | None = None diff --git a/tests/integration/inference/test_openai_completion.py b/tests/integration/inference/test_openai_completion.py index 05aee5096..e82714ffd 100644 --- a/tests/integration/inference/test_openai_completion.py +++ b/tests/integration/inference/test_openai_completion.py @@ -71,7 +71,6 @@ def skip_if_model_doesnt_support_openai_chat_completion(client_with_models, mode "remote::cerebras", "remote::databricks", "remote::runpod", - "remote::sambanova", "remote::tgi", ): pytest.skip(f"Model {model_id} hosted by {provider.provider_type} doesn't support OpenAI chat completions.") From 51d9fd48083de32e9796f00030b57be10a8ac7bd Mon Sep 17 00:00:00 2001 From: Ben Browning Date: Fri, 11 Jul 2025 16:38:27 -0400 Subject: [PATCH 5/8] fix: Don't cache clients for passthrough auth providers (#2728) # What does this PR do? Some of our inference providers support passthrough authentication via `x-llamastack-provider-data` header values. This fixes the providers that support passthrough auth to not cache their clients to the backend providers (mostly OpenAI client instances) so that the client connecting to Llama Stack has to provide those auth values on each and every request. ## Test Plan I added some unit tests to ensure we're not caching clients across requests for all the fixed providers in this PR. ``` uv run pytest -sv tests/unit/providers/inference/test_inference_client_caching.py ``` I also ran some of our OpenAI compatible API integration tests for each of the changed providers, just to ensure they still work. Note that these providers don't actually pass all these tests (for unrelated reasons due to quirks of the Groq and Together SaaS services), but enough of the tests passed to confirm the clients are still working as intended. ### Together ``` ENABLE_TOGETHER="together" \ uv run llama stack run llama_stack/templates/starter/run.yaml LLAMA_STACK_CONFIG=http://localhost:8321 \ uv run pytest -sv \ tests/integration/inference/test_openai_completion.py \ --text-model "together/meta-llama/Llama-3.1-8B-Instruct" ``` ### OpenAI ``` ENABLE_OPENAI="openai" \ uv run llama stack run llama_stack/templates/starter/run.yaml LLAMA_STACK_CONFIG=http://localhost:8321 \ uv run pytest -sv \ tests/integration/inference/test_openai_completion.py \ --text-model "openai/gpt-4o-mini" ``` ### Groq ``` ENABLE_GROQ="groq" \ uv run llama stack run llama_stack/templates/starter/run.yaml LLAMA_STACK_CONFIG=http://localhost:8321 \ uv run pytest -sv \ tests/integration/inference/test_openai_completion.py \ --text-model "groq/meta-llama/Llama-3.1-8B-Instruct" ``` --------- Signed-off-by: Ben Browning --- .../providers/remote/inference/groq/groq.py | 14 +-- .../remote/inference/openai/openai.py | 14 +-- .../remote/inference/together/together.py | 47 ++++------ pyproject.toml | 2 + .../test_inference_client_caching.py | 73 +++++++++++++++ uv.lock | 91 +++++++++++++++++++ 6 files changed, 196 insertions(+), 45 deletions(-) create mode 100644 tests/unit/providers/inference/test_inference_client_caching.py diff --git a/llama_stack/providers/remote/inference/groq/groq.py b/llama_stack/providers/remote/inference/groq/groq.py index 4b295e788..91c6b6c17 100644 --- a/llama_stack/providers/remote/inference/groq/groq.py +++ b/llama_stack/providers/remote/inference/groq/groq.py @@ -38,24 +38,18 @@ class GroqInferenceAdapter(LiteLLMOpenAIMixin): provider_data_api_key_field="groq_api_key", ) self.config = config - self._openai_client = None async def initialize(self): await super().initialize() async def shutdown(self): await super().shutdown() - if self._openai_client: - await self._openai_client.close() - self._openai_client = None def _get_openai_client(self) -> AsyncOpenAI: - if not self._openai_client: - self._openai_client = AsyncOpenAI( - base_url=f"{self.config.url}/openai/v1", - api_key=self.config.api_key, - ) - return self._openai_client + return AsyncOpenAI( + base_url=f"{self.config.url}/openai/v1", + api_key=self.get_api_key(), + ) async def openai_chat_completion( self, diff --git a/llama_stack/providers/remote/inference/openai/openai.py b/llama_stack/providers/remote/inference/openai/openai.py index 72428422f..818883919 100644 --- a/llama_stack/providers/remote/inference/openai/openai.py +++ b/llama_stack/providers/remote/inference/openai/openai.py @@ -59,9 +59,6 @@ class OpenAIInferenceAdapter(LiteLLMOpenAIMixin): # if we do not set this, users will be exposed to the # litellm specific model names, an abstraction leak. self.is_openai_compat = True - self._openai_client = AsyncOpenAI( - api_key=self.config.api_key, - ) async def initialize(self) -> None: await super().initialize() @@ -69,6 +66,11 @@ class OpenAIInferenceAdapter(LiteLLMOpenAIMixin): async def shutdown(self) -> None: await super().shutdown() + def _get_openai_client(self) -> AsyncOpenAI: + return AsyncOpenAI( + api_key=self.get_api_key(), + ) + async def openai_completion( self, model: str, @@ -120,7 +122,7 @@ class OpenAIInferenceAdapter(LiteLLMOpenAIMixin): user=user, suffix=suffix, ) - return await self._openai_client.completions.create(**params) + return await self._get_openai_client().completions.create(**params) async def openai_chat_completion( self, @@ -176,7 +178,7 @@ class OpenAIInferenceAdapter(LiteLLMOpenAIMixin): top_p=top_p, user=user, ) - return await self._openai_client.chat.completions.create(**params) + return await self._get_openai_client().chat.completions.create(**params) async def openai_embeddings( self, @@ -204,7 +206,7 @@ class OpenAIInferenceAdapter(LiteLLMOpenAIMixin): params["user"] = user # Call OpenAI embeddings API - response = await self._openai_client.embeddings.create(**params) + response = await self._get_openai_client().embeddings.create(**params) data = [] for i, embedding_data in enumerate(response.data): diff --git a/llama_stack/providers/remote/inference/together/together.py b/llama_stack/providers/remote/inference/together/together.py index 9e6877b7c..e1eb934c5 100644 --- a/llama_stack/providers/remote/inference/together/together.py +++ b/llama_stack/providers/remote/inference/together/together.py @@ -68,19 +68,12 @@ class TogetherInferenceAdapter(ModelRegistryHelper, Inference, NeedsRequestProvi def __init__(self, config: TogetherImplConfig) -> None: ModelRegistryHelper.__init__(self, MODEL_ENTRIES) self.config = config - self._client = None - self._openai_client = None async def initialize(self) -> None: pass async def shutdown(self) -> None: - if self._client: - # Together client has no close method, so just set to None - self._client = None - if self._openai_client: - await self._openai_client.close() - self._openai_client = None + pass async def completion( self, @@ -108,29 +101,25 @@ class TogetherInferenceAdapter(ModelRegistryHelper, Inference, NeedsRequestProvi return await self._nonstream_completion(request) def _get_client(self) -> AsyncTogether: - if not self._client: - together_api_key = None - config_api_key = self.config.api_key.get_secret_value() if self.config.api_key else None - if config_api_key: - together_api_key = config_api_key - else: - provider_data = self.get_request_provider_data() - if provider_data is None or not provider_data.together_api_key: - raise ValueError( - 'Pass Together API Key in the header X-LlamaStack-Provider-Data as { "together_api_key": }' - ) - together_api_key = provider_data.together_api_key - self._client = AsyncTogether(api_key=together_api_key) - return self._client + together_api_key = None + config_api_key = self.config.api_key.get_secret_value() if self.config.api_key else None + if config_api_key: + together_api_key = config_api_key + else: + provider_data = self.get_request_provider_data() + if provider_data is None or not provider_data.together_api_key: + raise ValueError( + 'Pass Together API Key in the header X-LlamaStack-Provider-Data as { "together_api_key": }' + ) + together_api_key = provider_data.together_api_key + return AsyncTogether(api_key=together_api_key) def _get_openai_client(self) -> AsyncOpenAI: - if not self._openai_client: - together_client = self._get_client().client - self._openai_client = AsyncOpenAI( - base_url=together_client.base_url, - api_key=together_client.api_key, - ) - return self._openai_client + together_client = self._get_client().client + return AsyncOpenAI( + base_url=together_client.base_url, + api_key=together_client.api_key, + ) async def _nonstream_completion(self, request: CompletionRequest) -> ChatCompletionResponse: params = await self._get_params(request) diff --git a/pyproject.toml b/pyproject.toml index f4115d028..9977d7372 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -87,6 +87,8 @@ unit = [ "blobfile", "faiss-cpu", "pymilvus>=2.5.12", + "litellm", + "together", ] # These are the core dependencies required for running integration tests. They are shared across all # providers. If a provider requires additional dependencies, please add them to your environment diff --git a/tests/unit/providers/inference/test_inference_client_caching.py b/tests/unit/providers/inference/test_inference_client_caching.py new file mode 100644 index 000000000..c9a931d47 --- /dev/null +++ b/tests/unit/providers/inference/test_inference_client_caching.py @@ -0,0 +1,73 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the terms described in the LICENSE file in +# the root directory of this source tree. + +import json +from unittest.mock import MagicMock + +from llama_stack.distribution.request_headers import request_provider_data_context +from llama_stack.providers.remote.inference.groq.config import GroqConfig +from llama_stack.providers.remote.inference.groq.groq import GroqInferenceAdapter +from llama_stack.providers.remote.inference.openai.config import OpenAIConfig +from llama_stack.providers.remote.inference.openai.openai import OpenAIInferenceAdapter +from llama_stack.providers.remote.inference.together.config import TogetherImplConfig +from llama_stack.providers.remote.inference.together.together import TogetherInferenceAdapter + + +def test_groq_provider_openai_client_caching(): + """Ensure the Groq provider does not cache api keys across client requests""" + + config = GroqConfig() + inference_adapter = GroqInferenceAdapter(config) + + inference_adapter.__provider_spec__ = MagicMock() + inference_adapter.__provider_spec__.provider_data_validator = ( + "llama_stack.providers.remote.inference.groq.config.GroqProviderDataValidator" + ) + + for api_key in ["test1", "test2"]: + with request_provider_data_context( + {"x-llamastack-provider-data": json.dumps({inference_adapter.provider_data_api_key_field: api_key})} + ): + openai_client = inference_adapter._get_openai_client() + assert openai_client.api_key == api_key + + +def test_openai_provider_openai_client_caching(): + """Ensure the OpenAI provider does not cache api keys across client requests""" + + config = OpenAIConfig() + inference_adapter = OpenAIInferenceAdapter(config) + + inference_adapter.__provider_spec__ = MagicMock() + inference_adapter.__provider_spec__.provider_data_validator = ( + "llama_stack.providers.remote.inference.openai.config.OpenAIProviderDataValidator" + ) + + for api_key in ["test1", "test2"]: + with request_provider_data_context( + {"x-llamastack-provider-data": json.dumps({inference_adapter.provider_data_api_key_field: api_key})} + ): + openai_client = inference_adapter._get_openai_client() + assert openai_client.api_key == api_key + + +def test_together_provider_openai_client_caching(): + """Ensure the Together provider does not cache api keys across client requests""" + + config = TogetherImplConfig() + inference_adapter = TogetherInferenceAdapter(config) + + inference_adapter.__provider_spec__ = MagicMock() + inference_adapter.__provider_spec__.provider_data_validator = ( + "llama_stack.providers.remote.inference.together.TogetherProviderDataValidator" + ) + + for api_key in ["test1", "test2"]: + with request_provider_data_context({"x-llamastack-provider-data": json.dumps({"together_api_key": api_key})}): + together_client = inference_adapter._get_client() + assert together_client.client.api_key == api_key + openai_client = inference_adapter._get_openai_client() + assert openai_client.api_key == api_key diff --git a/uv.lock b/uv.lock index fe50f88aa..bca12fc51 100644 --- a/uv.lock +++ b/uv.lock @@ -615,6 +615,15 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/cb/a3/460c57f094a4a165c84a1341c373b0a4f5ec6ac244b998d5021aade89b77/ecdsa-0.19.1-py2.py3-none-any.whl", hash = "sha256:30638e27cf77b7e15c4c4cc1973720149e1033827cfd00661ca5c8cc0cdb24c3", size = 150607, upload-time = "2025-03-13T11:52:41.757Z" }, ] +[[package]] +name = "eval-type-backport" +version = "0.2.2" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/30/ea/8b0ac4469d4c347c6a385ff09dc3c048c2d021696664e26c7ee6791631b5/eval_type_backport-0.2.2.tar.gz", hash = "sha256:f0576b4cf01ebb5bd358d02314d31846af5e07678387486e2c798af0e7d849c1", size = 9079, upload-time = "2024-12-21T20:09:46.005Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/ce/31/55cd413eaccd39125368be33c46de24a1f639f2e12349b0361b4678f3915/eval_type_backport-0.2.2-py3-none-any.whl", hash = "sha256:cb6ad7c393517f476f96d456d0412ea80f0a8cf96f6892834cd9340149111b0a", size = 5830, upload-time = "2024-12-21T20:09:44.175Z" }, +] + [[package]] name = "executing" version = "2.2.0" @@ -1238,6 +1247,28 @@ version = "1.4" source = { registry = "https://pypi.org/simple" } sdist = { url = "https://files.pythonhosted.org/packages/65/c6/246100fa3967074d9725b3716913bd495823547bde5047050d4c3462f994/linkify-1.4.tar.gz", hash = "sha256:9ba276ba179525f7262820d90f009604e51cd4f1466c1112b882ef7eda243d5e", size = 1749, upload-time = "2009-11-12T21:42:00.934Z" } +[[package]] +name = "litellm" +version = "1.74.2" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "aiohttp" }, + { name = "click" }, + { name = "httpx" }, + { name = "importlib-metadata" }, + { name = "jinja2" }, + { name = "jsonschema" }, + { name = "openai" }, + { name = "pydantic" }, + { name = "python-dotenv" }, + { name = "tiktoken" }, + { name = "tokenizers" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/38/10/63cdae1b1d581ad1db51153dfd06c4e18394a3ba8de495f73f2d797ece3b/litellm-1.74.2.tar.gz", hash = "sha256:cbacffe93976c60ca674fec0a942c70b900b4ad1c8069395174049a162f255bf", size = 9230641, upload-time = "2025-07-11T03:31:07.925Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/2a/f7/67689245f48b9e79bcd2f3a10a3690cb1918fb99fffd5a623ed2496bca66/litellm-1.74.2-py3-none-any.whl", hash = "sha256:29bb555b45128e4cc696e72921a6ec24e97b14e9b69e86eed6f155124ad629b1", size = 8587065, upload-time = "2025-07-11T03:31:05.598Z" }, +] + [[package]] name = "llama-stack" version = "0.2.14" @@ -1341,6 +1372,7 @@ unit = [ { name = "blobfile" }, { name = "chardet" }, { name = "faiss-cpu" }, + { name = "litellm" }, { name = "mcp" }, { name = "openai" }, { name = "pymilvus" }, @@ -1348,6 +1380,7 @@ unit = [ { name = "qdrant-client" }, { name = "sqlalchemy", extra = ["asyncio"] }, { name = "sqlite-vec" }, + { name = "together" }, ] [package.metadata] @@ -1446,6 +1479,7 @@ unit = [ { name = "blobfile" }, { name = "chardet" }, { name = "faiss-cpu" }, + { name = "litellm" }, { name = "mcp" }, { name = "openai" }, { name = "pymilvus", specifier = ">=2.5.12" }, @@ -1454,6 +1488,7 @@ unit = [ { name = "sqlalchemy" }, { name = "sqlalchemy", extras = ["asyncio"], specifier = ">=2.0.41" }, { name = "sqlite-vec" }, + { name = "together" }, ] [[package]] @@ -2952,6 +2987,15 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/58/29/93c53c098d301132196c3238c312825324740851d77a8500a2462c0fd888/setuptools-80.8.0-py3-none-any.whl", hash = "sha256:95a60484590d24103af13b686121328cc2736bee85de8936383111e421b9edc0", size = 1201470, upload-time = "2025-05-20T14:02:51.348Z" }, ] +[[package]] +name = "shellingham" +version = "1.5.4" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/58/15/8b3609fd3830ef7b27b655beb4b4e9c62313a4e8da8c676e142cc210d58e/shellingham-1.5.4.tar.gz", hash = "sha256:8dbca0739d487e5bd35ab3ca4b36e11c4078f3a234bfce294b0a0291363404de", size = 10310, upload-time = "2023-10-24T04:13:40.426Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/e0/f9/0595336914c5619e5f28a1fb793285925a8cd4b432c9da0a987836c7f822/shellingham-1.5.4-py2.py3-none-any.whl", hash = "sha256:7ecfff8f2fd72616f7481040475a65b2bf8af90a56c89140852d1120324e8686", size = 9755, upload-time = "2023-10-24T04:13:38.866Z" }, +] + [[package]] name = "six" version = "1.17.0" @@ -3384,6 +3428,15 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/b2/fe/81695a1aa331a842b582453b605175f419fe8540355886031328089d840a/sympy-1.13.1-py3-none-any.whl", hash = "sha256:db36cdc64bf61b9b24578b6f7bab1ecdd2452cf008f34faa33776680c26d66f8", size = 6189177, upload-time = "2024-07-19T09:26:48.863Z" }, ] +[[package]] +name = "tabulate" +version = "0.9.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/ec/fe/802052aecb21e3797b8f7902564ab6ea0d60ff8ca23952079064155d1ae1/tabulate-0.9.0.tar.gz", hash = "sha256:0095b12bf5966de529c0feb1fa08671671b3368eec77d7ef7ab114be2c068b3c", size = 81090, upload-time = "2022-10-06T17:21:48.54Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/40/44/4a5f08c96eb108af5cb50b41f76142f0afa346dfa99d5296fe7202a11854/tabulate-0.9.0-py3-none-any.whl", hash = "sha256:024ca478df22e9340661486f85298cff5f6dcdba14f3813e8830015b9ed1948f", size = 35252, upload-time = "2022-10-06T17:21:44.262Z" }, +] + [[package]] name = "tenacity" version = "9.1.2" @@ -3426,6 +3479,29 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/de/a8/8f499c179ec900783ffe133e9aab10044481679bb9aad78436d239eee716/tiktoken-0.9.0-cp313-cp313-win_amd64.whl", hash = "sha256:5ea0edb6f83dc56d794723286215918c1cde03712cbbafa0348b33448faf5b95", size = 894669, upload-time = "2025-02-14T06:02:47.341Z" }, ] +[[package]] +name = "together" +version = "1.5.21" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "aiohttp" }, + { name = "click" }, + { name = "eval-type-backport" }, + { name = "filelock" }, + { name = "numpy" }, + { name = "pillow" }, + { name = "pydantic" }, + { name = "requests" }, + { name = "rich" }, + { name = "tabulate" }, + { name = "tqdm" }, + { name = "typer" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/ea/53/e33c5e6d53c2e2bbd07f9dcb1979e27ac670fca0e4e238b169aa4c358ee2/together-1.5.21.tar.gz", hash = "sha256:59adb8cf4c5b77eca76b8c66a73c47c45fd828aaf4f059f33f893f8c5f68f85a", size = 69887, upload-time = "2025-07-10T21:04:43.781Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/fc/31/6556a303ea39929fa016f4260eef289b620cf366a576c304507cb75b4d12/together-1.5.21-py3-none-any.whl", hash = "sha256:35e6c0072033a2e5f1105de8781e969f41cffc85dae508b6f4dc293360026872", size = 96141, upload-time = "2025-07-10T21:04:42.418Z" }, +] + [[package]] name = "tokenizers" version = "0.21.1" @@ -3644,6 +3720,21 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/aa/22/733a6fc4a6445d835242f64c490fdd30f4a08d58f2b788613de3f9170692/transformers-4.50.3-py3-none-any.whl", hash = "sha256:6111610a43dec24ef32c3df0632c6b25b07d9711c01d9e1077bdd2ff6b14a38c", size = 10180411, upload-time = "2025-03-28T18:20:59.265Z" }, ] +[[package]] +name = "typer" +version = "0.15.4" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "click" }, + { name = "rich" }, + { name = "shellingham" }, + { name = "typing-extensions" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/6c/89/c527e6c848739be8ceb5c44eb8208c52ea3515c6cf6406aa61932887bf58/typer-0.15.4.tar.gz", hash = "sha256:89507b104f9b6a0730354f27c39fae5b63ccd0c95b1ce1f1a6ba0cfd329997c3", size = 101559, upload-time = "2025-05-14T16:34:57.704Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/c9/62/d4ba7afe2096d5659ec3db8b15d8665bdcb92a3c6ff0b95e99895b335a9c/typer-0.15.4-py3-none-any.whl", hash = "sha256:eb0651654dcdea706780c466cf06d8f174405a659ffff8f163cfbfee98c0e173", size = 45258, upload-time = "2025-05-14T16:34:55.583Z" }, +] + [[package]] name = "types-requests" version = "2.32.0.20241016" From 8374d4cefd3e59c4dad68ec2842622b5f4154fd5 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Sat, 12 Jul 2025 16:23:42 -0400 Subject: [PATCH 6/8] chore(github-deps): bump medyagh/setup-minikube from 0.0.19 to 0.0.20 (#2738) --- .github/workflows/integration-auth-tests.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/integration-auth-tests.yml b/.github/workflows/integration-auth-tests.yml index 7822e4216..cf10e005c 100644 --- a/.github/workflows/integration-auth-tests.yml +++ b/.github/workflows/integration-auth-tests.yml @@ -35,7 +35,7 @@ jobs: - name: Install minikube if: ${{ matrix.auth-provider == 'kubernetes' }} - uses: medyagh/setup-minikube@cea33675329b799adccc9526aa5daccc26cd5052 # v0.0.19 + uses: medyagh/setup-minikube@e3c7f79eb1e997eabccc536a6cf318a2b0fe19d9 # v0.0.20 - name: Start minikube if: ${{ matrix.auth-provider == 'oauth2_token' }} From 68e7978c8890fd0aec901e0871fb92061a0d8fa5 Mon Sep 17 00:00:00 2001 From: Matthew Farrellee Date: Sat, 12 Jul 2025 19:53:54 -0400 Subject: [PATCH 7/8] chore: block network access from unit tests (#2732) # What does this PR do? this blocks network access for all `tests/unit/` tests. `tests/integration/` are untouched. it also introduces an `allow_network` marker to explicitly allow network access. ## Test Plan `./scripts/unit-tests.sh` --- pyproject.toml | 4 ++++ tests/unit/conftest.py | 11 +++++++++++ tests/unit/providers/inference/test_remote_vllm.py | 1 + tests/unit/rag/test_vector_store.py | 2 ++ uv.lock | 14 ++++++++++++++ 5 files changed, 32 insertions(+) diff --git a/pyproject.toml b/pyproject.toml index 9977d7372..2974ff996 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -64,6 +64,7 @@ dev = [ "pytest-cov", "pytest-html", "pytest-json-report", + "pytest-socket", # For blocking network access in unit tests "nbval", # For notebook testing "black", "ruff", @@ -344,3 +345,6 @@ classmethod-decorators = ["classmethod", "pydantic.field_validator"] [tool.pytest.ini_options] asyncio_mode = "auto" +markers = [ + "allow_network: Allow network access for specific unit tests", +] diff --git a/tests/unit/conftest.py b/tests/unit/conftest.py index aedac0386..b5eb1217d 100644 --- a/tests/unit/conftest.py +++ b/tests/unit/conftest.py @@ -4,6 +4,17 @@ # This source code is licensed under the terms described in the LICENSE file in # the root directory of this source tree. +import pytest_socket + # We need to import the fixtures here so that pytest can find them # but ruff doesn't think they are used and removes the import. "noqa: F401" prevents them from being removed from .fixtures import cached_disk_dist_registry, disk_dist_registry, sqlite_kvstore # noqa: F401 + + +def pytest_runtest_setup(item): + """Setup for each test - check if network access should be allowed.""" + if "allow_network" in item.keywords: + pytest_socket.enable_socket() + else: + # Allowing Unix sockets is necessary for some tests that use local servers and mocks + pytest_socket.disable_socket(allow_unix_socket=True) diff --git a/tests/unit/providers/inference/test_remote_vllm.py b/tests/unit/providers/inference/test_remote_vllm.py index ca44cc95d..5c2ad03ab 100644 --- a/tests/unit/providers/inference/test_remote_vllm.py +++ b/tests/unit/providers/inference/test_remote_vllm.py @@ -393,6 +393,7 @@ async def test_process_vllm_chat_completion_stream_response_no_choices(): assert chunks[0].event.event_type.value == "start" +@pytest.mark.allow_network def test_chat_completion_doesnt_block_event_loop(caplog): loop = asyncio.new_event_loop() loop.set_debug(True) diff --git a/tests/unit/rag/test_vector_store.py b/tests/unit/rag/test_vector_store.py index dd36d3992..919f97ba7 100644 --- a/tests/unit/rag/test_vector_store.py +++ b/tests/unit/rag/test_vector_store.py @@ -123,6 +123,7 @@ class TestVectorStore: content = await content_from_doc(doc) assert content in DUMMY_PDF_TEXT_CHOICES + @pytest.mark.allow_network async def test_downloads_pdf_and_returns_content(self): # Using GitHub to host the PDF file url = "https://raw.githubusercontent.com/meta-llama/llama-stack/da035d69cfca915318eaf485770a467ca3c2a238/llama_stack/providers/tests/memory/fixtures/dummy.pdf" @@ -135,6 +136,7 @@ class TestVectorStore: content = await content_from_doc(doc) assert content in DUMMY_PDF_TEXT_CHOICES + @pytest.mark.allow_network async def test_downloads_pdf_and_returns_content_with_url_object(self): # Using GitHub to host the PDF file url = "https://raw.githubusercontent.com/meta-llama/llama-stack/da035d69cfca915318eaf485770a467ca3c2a238/llama_stack/providers/tests/memory/fixtures/dummy.pdf" diff --git a/uv.lock b/uv.lock index bca12fc51..83e502e7f 100644 --- a/uv.lock +++ b/uv.lock @@ -1324,6 +1324,7 @@ dev = [ { name = "pytest-cov" }, { name = "pytest-html" }, { name = "pytest-json-report" }, + { name = "pytest-socket" }, { name = "pytest-timeout" }, { name = "ruamel-yaml" }, { name = "ruff" }, @@ -1432,6 +1433,7 @@ dev = [ { name = "pytest-cov" }, { name = "pytest-html" }, { name = "pytest-json-report" }, + { name = "pytest-socket" }, { name = "pytest-timeout" }, { name = "ruamel-yaml" }, { name = "ruff" }, @@ -2545,6 +2547,18 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/3e/43/7e7b2ec865caa92f67b8f0e9231a798d102724ca4c0e1f414316be1c1ef2/pytest_metadata-3.1.1-py3-none-any.whl", hash = "sha256:c8e0844db684ee1c798cfa38908d20d67d0463ecb6137c72e91f418558dd5f4b", size = 11428, upload-time = "2024-02-12T19:38:42.531Z" }, ] +[[package]] +name = "pytest-socket" +version = "0.7.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "pytest" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/05/ff/90c7e1e746baf3d62ce864c479fd53410b534818b9437413903596f81580/pytest_socket-0.7.0.tar.gz", hash = "sha256:71ab048cbbcb085c15a4423b73b619a8b35d6a307f46f78ea46be51b1b7e11b3", size = 12389, upload-time = "2024-01-28T20:17:23.177Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/19/58/5d14cb5cb59409e491ebe816c47bf81423cd03098ea92281336320ae5681/pytest_socket-0.7.0-py3-none-any.whl", hash = "sha256:7e0f4642177d55d317bbd58fc68c6bd9048d6eadb2d46a89307fa9221336ce45", size = 6754, upload-time = "2024-01-28T20:17:22.105Z" }, +] + [[package]] name = "pytest-timeout" version = "2.4.0" From 958fc92b1bc99ba8e57e0819696f74a7e09f45f0 Mon Sep 17 00:00:00 2001 From: Francisco Arceo Date: Sun, 13 Jul 2025 04:03:55 -0400 Subject: [PATCH 8/8] feat: Add Vector stores UI (#2737) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit # What does this PR do? - Adds two pages to UI - Vector stores - Vector store detail view - Fixed darkmode navbar highlighting - Updated darkmode font color - Updated llama-stack-client package Screenshot 2025-07-12 at 11 34
35 PM Screenshot 2025-07-12 at 11 57
09 PM ## Test Plan --------- Signed-off-by: Francisco Javier Arceo --- .../ui/app/logs/vector-stores/[id]/page.tsx | 82 +++ .../ui/app/logs/vector-stores/layout.tsx | 16 + .../ui/app/logs/vector-stores/page.tsx | 121 +++++ .../ui/components/layout/app-sidebar.tsx | 16 +- .../ui/components/layout/detail-layout.tsx | 8 +- .../ui/components/ui/message-components.tsx | 4 +- .../vector-stores/vector-store-detail.tsx | 128 +++++ llama_stack/ui/package-lock.json | 474 +----------------- llama_stack/ui/package.json | 2 +- 9 files changed, 378 insertions(+), 473 deletions(-) create mode 100644 llama_stack/ui/app/logs/vector-stores/[id]/page.tsx create mode 100644 llama_stack/ui/app/logs/vector-stores/layout.tsx create mode 100644 llama_stack/ui/app/logs/vector-stores/page.tsx create mode 100644 llama_stack/ui/components/vector-stores/vector-store-detail.tsx diff --git a/llama_stack/ui/app/logs/vector-stores/[id]/page.tsx b/llama_stack/ui/app/logs/vector-stores/[id]/page.tsx new file mode 100644 index 000000000..f27c9d802 --- /dev/null +++ b/llama_stack/ui/app/logs/vector-stores/[id]/page.tsx @@ -0,0 +1,82 @@ +"use client"; + +import { useEffect, useState } from "react"; +import { useParams, useRouter } from "next/navigation"; +import { useAuthClient } from "@/hooks/use-auth-client"; +import type { VectorStore } from "llama-stack-client/resources/vector-stores/vector-stores"; +import type { VectorStoreFile } from "llama-stack-client/resources/vector-stores/files"; +import { VectorStoreDetailView } from "@/components/vector-stores/vector-store-detail"; + +export default function VectorStoreDetailPage() { + const params = useParams(); + const id = params.id as string; + const client = useAuthClient(); + const router = useRouter(); + + const [store, setStore] = useState(null); + const [files, setFiles] = useState([]); + const [isLoadingStore, setIsLoadingStore] = useState(true); + const [isLoadingFiles, setIsLoadingFiles] = useState(true); + const [errorStore, setErrorStore] = useState(null); + const [errorFiles, setErrorFiles] = useState(null); + + useEffect(() => { + if (!id) { + setErrorStore(new Error("Vector Store ID is missing.")); + setIsLoadingStore(false); + return; + } + const fetchStore = async () => { + setIsLoadingStore(true); + setErrorStore(null); + try { + const response = await client.vectorStores.retrieve(id); + setStore(response as VectorStore); + } catch (err) { + setErrorStore( + err instanceof Error + ? err + : new Error("Failed to load vector store."), + ); + } finally { + setIsLoadingStore(false); + } + }; + fetchStore(); + }, [id, client]); + + useEffect(() => { + if (!id) { + setErrorFiles(new Error("Vector Store ID is missing.")); + setIsLoadingFiles(false); + return; + } + const fetchFiles = async () => { + setIsLoadingFiles(true); + setErrorFiles(null); + try { + const result = await client.vectorStores.files.list(id as any); + setFiles((result as any).data); + } catch (err) { + setErrorFiles( + err instanceof Error ? err : new Error("Failed to load files."), + ); + } finally { + setIsLoadingFiles(false); + } + }; + fetchFiles(); + }, [id]); + + return ( + + ); +} diff --git a/llama_stack/ui/app/logs/vector-stores/layout.tsx b/llama_stack/ui/app/logs/vector-stores/layout.tsx new file mode 100644 index 000000000..9245f5486 --- /dev/null +++ b/llama_stack/ui/app/logs/vector-stores/layout.tsx @@ -0,0 +1,16 @@ +"use client"; + +import React from "react"; +import LogsLayout from "@/components/layout/logs-layout"; + +export default function VectorStoresLayout({ + children, +}: { + children: React.ReactNode; +}) { + return ( + + {children} + + ); +} diff --git a/llama_stack/ui/app/logs/vector-stores/page.tsx b/llama_stack/ui/app/logs/vector-stores/page.tsx new file mode 100644 index 000000000..29e1fabd6 --- /dev/null +++ b/llama_stack/ui/app/logs/vector-stores/page.tsx @@ -0,0 +1,121 @@ +"use client"; + +import React from "react"; +import { useAuthClient } from "@/hooks/use-auth-client"; +import type { + ListVectorStoresResponse, + VectorStore, +} from "llama-stack-client/resources/vector-stores/vector-stores"; +import { useRouter } from "next/navigation"; +import { usePagination } from "@/hooks/use-pagination"; +import { + Table, + TableBody, + TableCaption, + TableCell, + TableHead, + TableHeader, + TableRow, +} from "@/components/ui/table"; +import { Skeleton } from "@/components/ui/skeleton"; + +export default function VectorStoresPage() { + const client = useAuthClient(); + const router = useRouter(); + const { + data: stores, + status, + hasMore, + error, + loadMore, + } = usePagination({ + limit: 20, + order: "desc", + fetchFunction: async (client, params) => { + const response = await client.vectorStores.list({ + after: params.after, + limit: params.limit, + order: params.order, + } as any); + return response as ListVectorStoresResponse; + }, + errorMessagePrefix: "vector stores", + }); + + // Auto-load all pages for infinite scroll behavior (like Responses) + React.useEffect(() => { + if (status === "idle" && hasMore) { + loadMore(); + } + }, [status, hasMore, loadMore]); + + if (status === "loading") { + return ( +
+ + + +
+ ); + } + + if (status === "error") { + return
Error: {error?.message}
; + } + + if (!stores || stores.length === 0) { + return

No vector stores found.

; + } + + return ( +
+ + + + ID + Name + Created + Completed + Cancelled + Failed + In Progress + Total + Usage Bytes + Provider ID + Provider Vector DB ID + + + + {stores.map((store) => { + const fileCounts = store.file_counts; + const metadata = store.metadata || {}; + const providerId = metadata.provider_id ?? ""; + const providerDbId = metadata.provider_vector_db_id ?? ""; + + return ( + router.push(`/logs/vector-stores/${store.id}`)} + className="cursor-pointer hover:bg-muted/50" + > + {store.id} + {store.name} + + {new Date(store.created_at * 1000).toLocaleString()} + + {fileCounts.completed} + {fileCounts.cancelled} + {fileCounts.failed} + {fileCounts.in_progress} + {fileCounts.total} + {store.usage_bytes} + {providerId} + {providerDbId} + + ); + })} + +
+
+ ); +} diff --git a/llama_stack/ui/components/layout/app-sidebar.tsx b/llama_stack/ui/components/layout/app-sidebar.tsx index 1c53d6cc5..532e43dbd 100644 --- a/llama_stack/ui/components/layout/app-sidebar.tsx +++ b/llama_stack/ui/components/layout/app-sidebar.tsx @@ -1,6 +1,11 @@ "use client"; -import { MessageSquareText, MessagesSquare, MoveUpRight } from "lucide-react"; +import { + MessageSquareText, + MessagesSquare, + MoveUpRight, + Database, +} from "lucide-react"; import Link from "next/link"; import { usePathname } from "next/navigation"; import { cn } from "@/lib/utils"; @@ -28,6 +33,11 @@ const logItems = [ url: "/logs/responses", icon: MessagesSquare, }, + { + title: "Vector Stores", + url: "/logs/vector-stores", + icon: Database, + }, { title: "Documentation", url: "https://llama-stack.readthedocs.io/en/latest/references/api_reference/index.html", @@ -57,13 +67,13 @@ export function AppSidebar() { className={cn( "justify-start", isActive && - "bg-gray-200 hover:bg-gray-200 text-primary hover:text-primary", + "bg-gray-200 dark:bg-gray-700 hover:bg-gray-200 dark:hover:bg-gray-700 text-gray-900 dark:text-gray-100", )} > diff --git a/llama_stack/ui/components/layout/detail-layout.tsx b/llama_stack/ui/components/layout/detail-layout.tsx index 58b912703..3013195a2 100644 --- a/llama_stack/ui/components/layout/detail-layout.tsx +++ b/llama_stack/ui/components/layout/detail-layout.tsx @@ -93,7 +93,9 @@ export function PropertyItem({ > {label}:{" "} {typeof value === "string" || typeof value === "number" ? ( - {value} + + {value} + ) : ( value )} @@ -112,7 +114,9 @@ export function PropertiesCard({ children }: PropertiesCardProps) { Properties -
    {children}
+
    + {children} +
); diff --git a/llama_stack/ui/components/ui/message-components.tsx b/llama_stack/ui/components/ui/message-components.tsx index 50ccd623e..39cb570b7 100644 --- a/llama_stack/ui/components/ui/message-components.tsx +++ b/llama_stack/ui/components/ui/message-components.tsx @@ -17,10 +17,10 @@ export const MessageBlock: React.FC = ({ }) => { return (
-

+

{label} {labelDetail && ( - + {labelDetail} )} diff --git a/llama_stack/ui/components/vector-stores/vector-store-detail.tsx b/llama_stack/ui/components/vector-stores/vector-store-detail.tsx new file mode 100644 index 000000000..7c5c91dd3 --- /dev/null +++ b/llama_stack/ui/components/vector-stores/vector-store-detail.tsx @@ -0,0 +1,128 @@ +"use client"; + +import type { VectorStore } from "llama-stack-client/resources/vector-stores/vector-stores"; +import type { VectorStoreFile } from "llama-stack-client/resources/vector-stores/files"; +import { Card, CardContent, CardHeader, CardTitle } from "@/components/ui/card"; +import { Skeleton } from "@/components/ui/skeleton"; +import { + DetailLoadingView, + DetailErrorView, + DetailNotFoundView, + DetailLayout, + PropertiesCard, + PropertyItem, +} from "@/components/layout/detail-layout"; +import { + Table, + TableBody, + TableCaption, + TableCell, + TableHead, + TableHeader, + TableRow, +} from "@/components/ui/table"; + +interface VectorStoreDetailViewProps { + store: VectorStore | null; + files: VectorStoreFile[]; + isLoadingStore: boolean; + isLoadingFiles: boolean; + errorStore: Error | null; + errorFiles: Error | null; + id: string; +} + +export function VectorStoreDetailView({ + store, + files, + isLoadingStore, + isLoadingFiles, + errorStore, + errorFiles, + id, +}: VectorStoreDetailViewProps) { + const title = "Vector Store Details"; + + if (errorStore) { + return ; + } + if (isLoadingStore) { + return ; + } + if (!store) { + return ; + } + + const mainContent = ( + <> + + + Files + + + {isLoadingFiles ? ( + + ) : errorFiles ? ( +

+ Error loading files: {errorFiles.message} +
+ ) : files.length > 0 ? ( + + Files in this vector store + + + ID + Status + Created + Usage Bytes + + + + {files.map((file) => ( + + {file.id} + {file.status} + + {new Date(file.created_at * 1000).toLocaleString()} + + {file.usage_bytes} + + ))} + +
+ ) : ( +

+ No files in this vector store. +

+ )} + + + + ); + + const sidebar = ( + + + + + + + + + + + ); + + return ( + + ); +} diff --git a/llama_stack/ui/package-lock.json b/llama_stack/ui/package-lock.json index 8fd5fb56c..158569241 100644 --- a/llama_stack/ui/package-lock.json +++ b/llama_stack/ui/package-lock.json @@ -15,7 +15,7 @@ "@radix-ui/react-tooltip": "^1.2.6", "class-variance-authority": "^0.7.1", "clsx": "^2.1.1", - "llama-stack-client": "0.2.13", + "llama-stack-client": "^0.2.14", "lucide-react": "^0.510.0", "next": "15.3.3", "next-auth": "^4.24.11", @@ -676,406 +676,6 @@ "tslib": "^2.4.0" } }, - "node_modules/@esbuild/aix-ppc64": { - "version": "0.25.5", - "resolved": "https://registry.npmjs.org/@esbuild/aix-ppc64/-/aix-ppc64-0.25.5.tgz", - "integrity": "sha512-9o3TMmpmftaCMepOdA5k/yDw8SfInyzWWTjYTFCX3kPSDJMROQTb8jg+h9Cnwnmm1vOzvxN7gIfB5V2ewpjtGA==", - "cpu": [ - "ppc64" - ], - "license": "MIT", - "optional": true, - "os": [ - "aix" - ], - "engines": { - "node": ">=18" - } - }, - "node_modules/@esbuild/android-arm": { - "version": "0.25.5", - "resolved": "https://registry.npmjs.org/@esbuild/android-arm/-/android-arm-0.25.5.tgz", - "integrity": "sha512-AdJKSPeEHgi7/ZhuIPtcQKr5RQdo6OO2IL87JkianiMYMPbCtot9fxPbrMiBADOWWm3T2si9stAiVsGbTQFkbA==", - "cpu": [ - "arm" - ], - "license": "MIT", - "optional": true, - "os": [ - "android" - ], - "engines": { - "node": ">=18" - } - }, - "node_modules/@esbuild/android-arm64": { - "version": "0.25.5", - "resolved": "https://registry.npmjs.org/@esbuild/android-arm64/-/android-arm64-0.25.5.tgz", - "integrity": "sha512-VGzGhj4lJO+TVGV1v8ntCZWJktV7SGCs3Pn1GRWI1SBFtRALoomm8k5E9Pmwg3HOAal2VDc2F9+PM/rEY6oIDg==", - "cpu": [ - "arm64" - ], - "license": "MIT", - "optional": true, - "os": [ - "android" - ], - "engines": { - "node": ">=18" - } - }, - "node_modules/@esbuild/android-x64": { - "version": "0.25.5", - "resolved": "https://registry.npmjs.org/@esbuild/android-x64/-/android-x64-0.25.5.tgz", - "integrity": "sha512-D2GyJT1kjvO//drbRT3Hib9XPwQeWd9vZoBJn+bu/lVsOZ13cqNdDeqIF/xQ5/VmWvMduP6AmXvylO/PIc2isw==", - "cpu": [ - "x64" - ], - "license": "MIT", - "optional": true, - "os": [ - "android" - ], - "engines": { - "node": ">=18" - } - }, - "node_modules/@esbuild/darwin-arm64": { - "version": "0.25.5", - "resolved": "https://registry.npmjs.org/@esbuild/darwin-arm64/-/darwin-arm64-0.25.5.tgz", - "integrity": "sha512-GtaBgammVvdF7aPIgH2jxMDdivezgFu6iKpmT+48+F8Hhg5J/sfnDieg0aeG/jfSvkYQU2/pceFPDKlqZzwnfQ==", - "cpu": [ - "arm64" - ], - "license": "MIT", - "optional": true, - "os": [ - "darwin" - ], - "engines": { - "node": ">=18" - } - }, - "node_modules/@esbuild/darwin-x64": { - "version": "0.25.5", - "resolved": "https://registry.npmjs.org/@esbuild/darwin-x64/-/darwin-x64-0.25.5.tgz", - "integrity": "sha512-1iT4FVL0dJ76/q1wd7XDsXrSW+oLoquptvh4CLR4kITDtqi2e/xwXwdCVH8hVHU43wgJdsq7Gxuzcs6Iq/7bxQ==", - "cpu": [ - "x64" - ], - "license": "MIT", - "optional": true, - "os": [ - "darwin" - ], - "engines": { - "node": ">=18" - } - }, - "node_modules/@esbuild/freebsd-arm64": { - "version": "0.25.5", - "resolved": "https://registry.npmjs.org/@esbuild/freebsd-arm64/-/freebsd-arm64-0.25.5.tgz", - "integrity": "sha512-nk4tGP3JThz4La38Uy/gzyXtpkPW8zSAmoUhK9xKKXdBCzKODMc2adkB2+8om9BDYugz+uGV7sLmpTYzvmz6Sw==", - "cpu": [ - "arm64" - ], - "license": "MIT", - "optional": true, - "os": [ - "freebsd" - ], - "engines": { - "node": ">=18" - } - }, - "node_modules/@esbuild/freebsd-x64": { - "version": "0.25.5", - "resolved": "https://registry.npmjs.org/@esbuild/freebsd-x64/-/freebsd-x64-0.25.5.tgz", - "integrity": "sha512-PrikaNjiXdR2laW6OIjlbeuCPrPaAl0IwPIaRv+SMV8CiM8i2LqVUHFC1+8eORgWyY7yhQY+2U2fA55mBzReaw==", - "cpu": [ - "x64" - ], - "license": "MIT", - "optional": true, - "os": [ - "freebsd" - ], - "engines": { - "node": ">=18" - } - }, - "node_modules/@esbuild/linux-arm": { - "version": "0.25.5", - "resolved": "https://registry.npmjs.org/@esbuild/linux-arm/-/linux-arm-0.25.5.tgz", - "integrity": "sha512-cPzojwW2okgh7ZlRpcBEtsX7WBuqbLrNXqLU89GxWbNt6uIg78ET82qifUy3W6OVww6ZWobWub5oqZOVtwolfw==", - "cpu": [ - "arm" - ], - "license": "MIT", - "optional": true, - "os": [ - "linux" - ], - "engines": { - "node": ">=18" - } - }, - "node_modules/@esbuild/linux-arm64": { - "version": "0.25.5", - "resolved": "https://registry.npmjs.org/@esbuild/linux-arm64/-/linux-arm64-0.25.5.tgz", - "integrity": "sha512-Z9kfb1v6ZlGbWj8EJk9T6czVEjjq2ntSYLY2cw6pAZl4oKtfgQuS4HOq41M/BcoLPzrUbNd+R4BXFyH//nHxVg==", - "cpu": [ - "arm64" - ], - "license": "MIT", - "optional": true, - "os": [ - "linux" - ], - "engines": { - "node": ">=18" - } - }, - "node_modules/@esbuild/linux-ia32": { - "version": "0.25.5", - "resolved": "https://registry.npmjs.org/@esbuild/linux-ia32/-/linux-ia32-0.25.5.tgz", - "integrity": "sha512-sQ7l00M8bSv36GLV95BVAdhJ2QsIbCuCjh/uYrWiMQSUuV+LpXwIqhgJDcvMTj+VsQmqAHL2yYaasENvJ7CDKA==", - "cpu": [ - "ia32" - ], - "license": "MIT", - "optional": true, - "os": [ - "linux" - ], - "engines": { - "node": ">=18" - } - }, - "node_modules/@esbuild/linux-loong64": { - "version": "0.25.5", - "resolved": "https://registry.npmjs.org/@esbuild/linux-loong64/-/linux-loong64-0.25.5.tgz", - "integrity": "sha512-0ur7ae16hDUC4OL5iEnDb0tZHDxYmuQyhKhsPBV8f99f6Z9KQM02g33f93rNH5A30agMS46u2HP6qTdEt6Q1kg==", - "cpu": [ - "loong64" - ], - "license": "MIT", - "optional": true, - "os": [ - "linux" - ], - "engines": { - "node": ">=18" - } - }, - "node_modules/@esbuild/linux-mips64el": { - "version": "0.25.5", - "resolved": "https://registry.npmjs.org/@esbuild/linux-mips64el/-/linux-mips64el-0.25.5.tgz", - "integrity": "sha512-kB/66P1OsHO5zLz0i6X0RxlQ+3cu0mkxS3TKFvkb5lin6uwZ/ttOkP3Z8lfR9mJOBk14ZwZ9182SIIWFGNmqmg==", - "cpu": [ - "mips64el" - ], - "license": "MIT", - "optional": true, - "os": [ - "linux" - ], - "engines": { - "node": ">=18" - } - }, - "node_modules/@esbuild/linux-ppc64": { - "version": "0.25.5", - "resolved": "https://registry.npmjs.org/@esbuild/linux-ppc64/-/linux-ppc64-0.25.5.tgz", - "integrity": "sha512-UZCmJ7r9X2fe2D6jBmkLBMQetXPXIsZjQJCjgwpVDz+YMcS6oFR27alkgGv3Oqkv07bxdvw7fyB71/olceJhkQ==", - "cpu": [ - "ppc64" - ], - "license": "MIT", - "optional": true, - "os": [ - "linux" - ], - "engines": { - "node": ">=18" - } - }, - "node_modules/@esbuild/linux-riscv64": { - "version": "0.25.5", - "resolved": "https://registry.npmjs.org/@esbuild/linux-riscv64/-/linux-riscv64-0.25.5.tgz", - "integrity": "sha512-kTxwu4mLyeOlsVIFPfQo+fQJAV9mh24xL+y+Bm6ej067sYANjyEw1dNHmvoqxJUCMnkBdKpvOn0Ahql6+4VyeA==", - "cpu": [ - "riscv64" - ], - "license": "MIT", - "optional": true, - "os": [ - "linux" - ], - "engines": { - "node": ">=18" - } - }, - "node_modules/@esbuild/linux-s390x": { - "version": "0.25.5", - "resolved": "https://registry.npmjs.org/@esbuild/linux-s390x/-/linux-s390x-0.25.5.tgz", - "integrity": "sha512-K2dSKTKfmdh78uJ3NcWFiqyRrimfdinS5ErLSn3vluHNeHVnBAFWC8a4X5N+7FgVE1EjXS1QDZbpqZBjfrqMTQ==", - "cpu": [ - "s390x" - ], - "license": "MIT", - "optional": true, - "os": [ - "linux" - ], - "engines": { - "node": ">=18" - } - }, - "node_modules/@esbuild/linux-x64": { - "version": "0.25.5", - "resolved": "https://registry.npmjs.org/@esbuild/linux-x64/-/linux-x64-0.25.5.tgz", - "integrity": "sha512-uhj8N2obKTE6pSZ+aMUbqq+1nXxNjZIIjCjGLfsWvVpy7gKCOL6rsY1MhRh9zLtUtAI7vpgLMK6DxjO8Qm9lJw==", - "cpu": [ - "x64" - ], - "license": "MIT", - "optional": true, - "os": [ - "linux" - ], - "engines": { - "node": ">=18" - } - }, - "node_modules/@esbuild/netbsd-arm64": { - "version": "0.25.5", - "resolved": "https://registry.npmjs.org/@esbuild/netbsd-arm64/-/netbsd-arm64-0.25.5.tgz", - "integrity": "sha512-pwHtMP9viAy1oHPvgxtOv+OkduK5ugofNTVDilIzBLpoWAM16r7b/mxBvfpuQDpRQFMfuVr5aLcn4yveGvBZvw==", - "cpu": [ - "arm64" - ], - "license": "MIT", - "optional": true, - "os": [ - "netbsd" - ], - "engines": { - "node": ">=18" - } - }, - "node_modules/@esbuild/netbsd-x64": { - "version": "0.25.5", - "resolved": "https://registry.npmjs.org/@esbuild/netbsd-x64/-/netbsd-x64-0.25.5.tgz", - "integrity": "sha512-WOb5fKrvVTRMfWFNCroYWWklbnXH0Q5rZppjq0vQIdlsQKuw6mdSihwSo4RV/YdQ5UCKKvBy7/0ZZYLBZKIbwQ==", - "cpu": [ - "x64" - ], - "license": "MIT", - "optional": true, - "os": [ - "netbsd" - ], - "engines": { - "node": ">=18" - } - }, - "node_modules/@esbuild/openbsd-arm64": { - "version": "0.25.5", - "resolved": "https://registry.npmjs.org/@esbuild/openbsd-arm64/-/openbsd-arm64-0.25.5.tgz", - "integrity": "sha512-7A208+uQKgTxHd0G0uqZO8UjK2R0DDb4fDmERtARjSHWxqMTye4Erz4zZafx7Di9Cv+lNHYuncAkiGFySoD+Mw==", - "cpu": [ - "arm64" - ], - "license": "MIT", - "optional": true, - "os": [ - "openbsd" - ], - "engines": { - "node": ">=18" - } - }, - "node_modules/@esbuild/openbsd-x64": { - "version": "0.25.5", - "resolved": "https://registry.npmjs.org/@esbuild/openbsd-x64/-/openbsd-x64-0.25.5.tgz", - "integrity": "sha512-G4hE405ErTWraiZ8UiSoesH8DaCsMm0Cay4fsFWOOUcz8b8rC6uCvnagr+gnioEjWn0wC+o1/TAHt+It+MpIMg==", - "cpu": [ - "x64" - ], - "license": "MIT", - "optional": true, - "os": [ - "openbsd" - ], - "engines": { - "node": ">=18" - } - }, - "node_modules/@esbuild/sunos-x64": { - "version": "0.25.5", - "resolved": "https://registry.npmjs.org/@esbuild/sunos-x64/-/sunos-x64-0.25.5.tgz", - "integrity": "sha512-l+azKShMy7FxzY0Rj4RCt5VD/q8mG/e+mDivgspo+yL8zW7qEwctQ6YqKX34DTEleFAvCIUviCFX1SDZRSyMQA==", - "cpu": [ - "x64" - ], - "license": "MIT", - "optional": true, - "os": [ - "sunos" - ], - "engines": { - "node": ">=18" - } - }, - "node_modules/@esbuild/win32-arm64": { - "version": "0.25.5", - "resolved": "https://registry.npmjs.org/@esbuild/win32-arm64/-/win32-arm64-0.25.5.tgz", - "integrity": "sha512-O2S7SNZzdcFG7eFKgvwUEZ2VG9D/sn/eIiz8XRZ1Q/DO5a3s76Xv0mdBzVM5j5R639lXQmPmSo0iRpHqUUrsxw==", - "cpu": [ - "arm64" - ], - "license": "MIT", - "optional": true, - "os": [ - "win32" - ], - "engines": { - "node": ">=18" - } - }, - "node_modules/@esbuild/win32-ia32": { - "version": "0.25.5", - "resolved": "https://registry.npmjs.org/@esbuild/win32-ia32/-/win32-ia32-0.25.5.tgz", - "integrity": "sha512-onOJ02pqs9h1iMJ1PQphR+VZv8qBMQ77Klcsqv9CNW2w6yLqoURLcgERAIurY6QE63bbLuqgP9ATqajFLK5AMQ==", - "cpu": [ - "ia32" - ], - "license": "MIT", - "optional": true, - "os": [ - "win32" - ], - "engines": { - "node": ">=18" - } - }, - "node_modules/@esbuild/win32-x64": { - "version": "0.25.5", - "resolved": "https://registry.npmjs.org/@esbuild/win32-x64/-/win32-x64-0.25.5.tgz", - "integrity": "sha512-TXv6YnJ8ZMVdX+SXWVBo/0p8LTcrUYngpWjvm91TMjjBQii7Oz11Lw5lbDV5Y0TzuhSJHwiH4hEtC1I42mMS0g==", - "cpu": [ - "x64" - ], - "license": "MIT", - "optional": true, - "os": [ - "win32" - ], - "engines": { - "node": ">=18" - } - }, "node_modules/@eslint-community/eslint-utils": { "version": "4.7.0", "resolved": "https://registry.npmjs.org/@eslint-community/eslint-utils/-/eslint-utils-4.7.0.tgz", @@ -5999,46 +5599,6 @@ "url": "https://github.com/sponsors/ljharb" } }, - "node_modules/esbuild": { - "version": "0.25.5", - "resolved": "https://registry.npmjs.org/esbuild/-/esbuild-0.25.5.tgz", - "integrity": "sha512-P8OtKZRv/5J5hhz0cUAdu/cLuPIKXpQl1R9pZtvmHWQvrAUVd0UNIPT4IB4W3rNOqVO0rlqHmCIbSwxh/c9yUQ==", - "hasInstallScript": true, - "license": "MIT", - "bin": { - "esbuild": "bin/esbuild" - }, - "engines": { - "node": ">=18" - }, - "optionalDependencies": { - "@esbuild/aix-ppc64": "0.25.5", - "@esbuild/android-arm": "0.25.5", - "@esbuild/android-arm64": "0.25.5", - "@esbuild/android-x64": "0.25.5", - "@esbuild/darwin-arm64": "0.25.5", - "@esbuild/darwin-x64": "0.25.5", - "@esbuild/freebsd-arm64": "0.25.5", - "@esbuild/freebsd-x64": "0.25.5", - "@esbuild/linux-arm": "0.25.5", - "@esbuild/linux-arm64": "0.25.5", - "@esbuild/linux-ia32": "0.25.5", - "@esbuild/linux-loong64": "0.25.5", - "@esbuild/linux-mips64el": "0.25.5", - "@esbuild/linux-ppc64": "0.25.5", - "@esbuild/linux-riscv64": "0.25.5", - "@esbuild/linux-s390x": "0.25.5", - "@esbuild/linux-x64": "0.25.5", - "@esbuild/netbsd-arm64": "0.25.5", - "@esbuild/netbsd-x64": "0.25.5", - "@esbuild/openbsd-arm64": "0.25.5", - "@esbuild/openbsd-x64": "0.25.5", - "@esbuild/sunos-x64": "0.25.5", - "@esbuild/win32-arm64": "0.25.5", - "@esbuild/win32-ia32": "0.25.5", - "@esbuild/win32-x64": "0.25.5" - } - }, "node_modules/escalade": { "version": "3.2.0", "resolved": "https://registry.npmjs.org/escalade/-/escalade-3.2.0.tgz", @@ -6993,6 +6553,7 @@ "version": "2.3.3", "resolved": "https://registry.npmjs.org/fsevents/-/fsevents-2.3.3.tgz", "integrity": "sha512-5xoDfX+fL7faATnagmWPpbFtwh/R77WmMMqqHGS65C3vvB0YHrgF+B1YmZ3441tMj5n63k0212XNoJwzlhffQw==", + "dev": true, "hasInstallScript": true, "license": "MIT", "optional": true, @@ -7154,6 +6715,7 @@ "version": "4.10.0", "resolved": "https://registry.npmjs.org/get-tsconfig/-/get-tsconfig-4.10.0.tgz", "integrity": "sha512-kGzZ3LWWQcGIAmg6iWvXn0ei6WDtV26wzHRMwDSzmAbcXrTEXxHy6IehI6/4eT6VRKyMP1eF1VqwrVUmE/LR7A==", + "dev": true, "license": "MIT", "dependencies": { "resolve-pkg-maps": "^1.0.0" @@ -9537,9 +9099,10 @@ "license": "MIT" }, "node_modules/llama-stack-client": { - "version": "0.2.13", - "resolved": "https://registry.npmjs.org/llama-stack-client/-/llama-stack-client-0.2.13.tgz", - "integrity": "sha512-R1rTFLwgUimr+KjEUkzUvFL6vLASwS9qj3UDSVkJ5BmrKAs5GwVAMeL7yZaTBXGuPUVh124WSlC4d9H0FjWqLA==", + "version": "0.2.14", + "resolved": "https://registry.npmjs.org/llama-stack-client/-/llama-stack-client-0.2.14.tgz", + "integrity": "sha512-bVU3JHp+EPEKR0Vb9vcd9ZyQj/72jSDuptKLwOXET9WrkphIQ8xuW5ueecMTgq8UEls3lwB3HiZM2cDOR9eDsQ==", + "license": "Apache-2.0", "dependencies": { "@types/node": "^18.11.18", "@types/node-fetch": "^2.6.4", @@ -9547,8 +9110,7 @@ "agentkeepalive": "^4.2.1", "form-data-encoder": "1.7.2", "formdata-node": "^4.3.2", - "node-fetch": "^2.6.7", - "tsx": "^4.19.2" + "node-fetch": "^2.6.7" } }, "node_modules/llama-stack-client/node_modules/@types/node": { @@ -11148,6 +10710,7 @@ "version": "1.0.0", "resolved": "https://registry.npmjs.org/resolve-pkg-maps/-/resolve-pkg-maps-1.0.0.tgz", "integrity": "sha512-seS2Tj26TBVOC2NIc2rOe2y2ZO7efxITtLZcGSOnHHNOQ7CkiUBfw0Iw2ck6xkIhPwLhKNLS8BO+hEpngQlqzw==", + "dev": true, "license": "MIT", "funding": { "url": "https://github.com/privatenumber/resolve-pkg-maps?sponsor=1" @@ -12198,25 +11761,6 @@ "integrity": "sha512-oJFu94HQb+KVduSUQL7wnpmqnfmLsOA/nAh6b6EH0wCEoK0/mPeXU6c3wKDV83MkOuHPRHtSXKKU99IBazS/2w==", "license": "0BSD" }, - "node_modules/tsx": { - "version": "4.19.4", - "resolved": "https://registry.npmjs.org/tsx/-/tsx-4.19.4.tgz", - "integrity": "sha512-gK5GVzDkJK1SI1zwHf32Mqxf2tSJkNx+eYcNly5+nHvWqXUJYUkWBQtKauoESz3ymezAI++ZwT855x5p5eop+Q==", - "license": "MIT", - "dependencies": { - "esbuild": "~0.25.0", - "get-tsconfig": "^4.7.5" - }, - "bin": { - "tsx": "dist/cli.mjs" - }, - "engines": { - "node": ">=18.0.0" - }, - "optionalDependencies": { - "fsevents": "~2.3.3" - } - }, "node_modules/tw-animate-css": { "version": "1.2.9", "resolved": "https://registry.npmjs.org/tw-animate-css/-/tw-animate-css-1.2.9.tgz", diff --git a/llama_stack/ui/package.json b/llama_stack/ui/package.json index 9524ce0a5..b38efe309 100644 --- a/llama_stack/ui/package.json +++ b/llama_stack/ui/package.json @@ -20,7 +20,7 @@ "@radix-ui/react-tooltip": "^1.2.6", "class-variance-authority": "^0.7.1", "clsx": "^2.1.1", - "llama-stack-client": "0.2.13", + "llama-stack-client": "^0.2.14", "lucide-react": "^0.510.0", "next": "15.3.3", "next-auth": "^4.24.11",