From 6ca2a67a9f1bfec5c4e520b6d82407d4d8ecd914 Mon Sep 17 00:00:00 2001 From: Charlie Doern Date: Wed, 12 Nov 2025 04:09:14 -0500 Subject: [PATCH 1/9] chore: remove dead code (#4125) # What does this PR do? build_image is not used because `llama stack build` is gone. Remove it. Signed-off-by: Charlie Doern --- src/llama_stack/core/build.py | 65 ----------------------------------- 1 file changed, 65 deletions(-) diff --git a/src/llama_stack/core/build.py b/src/llama_stack/core/build.py index 2ceb9e9be..fb3a22109 100644 --- a/src/llama_stack/core/build.py +++ b/src/llama_stack/core/build.py @@ -4,7 +4,6 @@ # This source code is licensed under the terms described in the LICENSE file in # the root directory of this source tree. -import importlib.resources import sys from pydantic import BaseModel @@ -12,9 +11,6 @@ from termcolor import cprint from llama_stack.core.datatypes import BuildConfig from llama_stack.core.distribution import get_provider_registry -from llama_stack.core.external import load_external_apis -from llama_stack.core.utils.exec import run_command -from llama_stack.core.utils.image_types import LlamaStackImageType from llama_stack.distributions.template import DistributionTemplate from llama_stack.log import get_logger from llama_stack.providers.datatypes import Api @@ -101,64 +97,3 @@ def print_pip_install_help(config: BuildConfig): for special_dep in special_deps: cprint(f"uv pip install {special_dep}", color="yellow", file=sys.stderr) print() - - -def build_image( - build_config: BuildConfig, - image_name: str, - distro_or_config: str, - run_config: str | None = None, -): - container_base = build_config.distribution_spec.container_image or "python:3.12-slim" - - normal_deps, special_deps, external_provider_deps = get_provider_dependencies(build_config) - normal_deps += SERVER_DEPENDENCIES - if build_config.external_apis_dir: - external_apis = load_external_apis(build_config) - if external_apis: - for _, api_spec in external_apis.items(): - normal_deps.extend(api_spec.pip_packages) - - if build_config.image_type == LlamaStackImageType.CONTAINER.value: - script = str(importlib.resources.files("llama_stack") / "core/build_container.sh") - args = [ - script, - "--distro-or-config", - distro_or_config, - "--image-name", - image_name, - "--container-base", - container_base, - "--normal-deps", - " ".join(normal_deps), - ] - # When building from a config file (not a template), include the run config path in the - # build arguments - if run_config is not None: - args.extend(["--run-config", run_config]) - else: - script = str(importlib.resources.files("llama_stack") / "core/build_venv.sh") - args = [ - script, - "--env-name", - str(image_name), - "--normal-deps", - " ".join(normal_deps), - ] - - # Always pass both arguments, even if empty, to maintain consistent positional arguments - if special_deps: - args.extend(["--optional-deps", "#".join(special_deps)]) - if external_provider_deps: - args.extend( - ["--external-provider-deps", "#".join(external_provider_deps)] - ) # the script will install external provider module, get its deps, and install those too. - - return_code = run_command(args) - - if return_code != 0: - log.error( - f"Failed to build target {image_name} with return code {return_code}", - ) - - return return_code From 539b9c08f38269a80aa5f79cc348b5a2a6032ba3 Mon Sep 17 00:00:00 2001 From: Akshay Ghodake Date: Wed, 12 Nov 2025 14:54:19 +0530 Subject: [PATCH 2/9] chore(deps): update pypdf to fix DoS vulnerabilities (#4121) Update pypdf dependency to address vulnerabilities causing potential denial of service through infinite loops or excessive memory usage when handling malicious PDFs. The update remains fully backward compatible, with no changes to the PdfReader API. # What does this PR do? Fixes #4120 ## Test Plan Co-authored-by: Francisco Arceo --- pyproject.toml | 4 ++-- uv.lock | 12 ++++++------ 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 653c6d613..e6808af8a 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -112,7 +112,7 @@ unit = [ "aiosqlite", "aiohttp", "psycopg2-binary>=2.9.0", - "pypdf", + "pypdf>=6.1.3", "mcp", "chardet", "sqlalchemy", @@ -135,7 +135,7 @@ test = [ "torchvision>=0.21.0", "chardet", "psycopg2-binary>=2.9.0", - "pypdf", + "pypdf>=6.1.3", "mcp", "datasets>=4.0.0", "autoevals", diff --git a/uv.lock b/uv.lock index ba9a862a3..f1808f005 100644 --- a/uv.lock +++ b/uv.lock @@ -1,5 +1,5 @@ version = 1 -revision = 3 +revision = 2 requires-python = ">=3.12" resolution-markers = [ "(python_full_version >= '3.13' and platform_machine != 'aarch64' and sys_platform == 'linux') or (python_full_version >= '3.13' and sys_platform != 'darwin' and sys_platform != 'linux')", @@ -2166,7 +2166,7 @@ test = [ { name = "milvus-lite", specifier = ">=2.5.0" }, { name = "psycopg2-binary", specifier = ">=2.9.0" }, { name = "pymilvus", specifier = ">=2.6.1" }, - { name = "pypdf" }, + { name = "pypdf", specifier = ">=6.1.3" }, { name = "qdrant-client" }, { name = "requests" }, { name = "sqlalchemy" }, @@ -2219,7 +2219,7 @@ unit = [ { name = "moto", extras = ["s3"], specifier = ">=5.1.10" }, { name = "ollama" }, { name = "psycopg2-binary", specifier = ">=2.9.0" }, - { name = "pypdf" }, + { name = "pypdf", specifier = ">=6.1.3" }, { name = "sqlalchemy" }, { name = "sqlalchemy", extras = ["asyncio"], specifier = ">=2.0.41" }, { name = "sqlite-vec" }, @@ -3973,11 +3973,11 @@ wheels = [ [[package]] name = "pypdf" -version = "5.9.0" +version = "6.2.0" source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/89/3a/584b97a228950ed85aec97c811c68473d9b8d149e6a8c155668287cf1a28/pypdf-5.9.0.tar.gz", hash = "sha256:30f67a614d558e495e1fbb157ba58c1de91ffc1718f5e0dfeb82a029233890a1", size = 5035118, upload-time = "2025-07-27T14:04:52.364Z" } +sdist = { url = "https://files.pythonhosted.org/packages/4e/2b/8795ec0378384000b0a37a2b5e6d67fa3d84802945aa2c612a78a784d7d4/pypdf-6.2.0.tar.gz", hash = "sha256:46b4d8495d68ae9c818e7964853cd9984e6a04c19fe7112760195395992dce48", size = 5272001, upload-time = "2025-11-09T11:10:41.911Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/48/d9/6cff57c80a6963e7dd183bf09e9f21604a77716644b1e580e97b259f7612/pypdf-5.9.0-py3-none-any.whl", hash = "sha256:be10a4c54202f46d9daceaa8788be07aa8cd5ea8c25c529c50dd509206382c35", size = 313193, upload-time = "2025-07-27T14:04:50.53Z" }, + { url = "https://files.pythonhosted.org/packages/de/ba/743ddcaf1a8fb439342399645921e2cf2c600464cba5531a11f1cc0822b6/pypdf-6.2.0-py3-none-any.whl", hash = "sha256:4c0f3e62677217a777ab79abe22bf1285442d70efabf552f61c7a03b6f5c569f", size = 326592, upload-time = "2025-11-09T11:10:39.941Z" }, ] [[package]] From 63137f9af1fde09eee62a0b28798297a9166c42e Mon Sep 17 00:00:00 2001 From: Sam El-Borai Date: Wed, 12 Nov 2025 17:39:21 +0100 Subject: [PATCH 3/9] chore(stainless): add config for file header (#4126) # What does this PR do? This PR adds Stainless config to specify the Meta copyright file header for generated files. Doing it via config instead of custom code will reduce the probability of git conflict. ## Test Plan - review preview builds --- client-sdks/stainless/config.yml | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/client-sdks/stainless/config.yml b/client-sdks/stainless/config.yml index ab9342c49..c61b53654 100644 --- a/client-sdks/stainless/config.yml +++ b/client-sdks/stainless/config.yml @@ -463,6 +463,12 @@ resources: settings: license: MIT unwrap_response_fields: [data] + file_header: | + Copyright (c) Meta Platforms, Inc. and affiliates. + All rights reserved. + + This source code is licensed under the terms described in the LICENSE file in + the root directory of this source tree. openapi: transformations: From 37853ca5581a832ef7db9a130b2064ae705bcce3 Mon Sep 17 00:00:00 2001 From: Charlie Doern Date: Wed, 12 Nov 2025 12:17:13 -0500 Subject: [PATCH 4/9] fix(tests): add OpenAI client connection cleanup to prevent CI hangs (#4119) # What does this PR do? Add explicit connection cleanup and shorter timeouts to OpenAI client fixtures. Fixes CI deadlock after 25+ tests due to connection pool exhaustion. Also adds 60s timeout to test_conversation_context_loading as safety net. ## Test Plan tests pass Signed-off-by: Charlie Doern --- tests/integration/fixtures/common.py | 8 +++++++- tests/integration/responses/fixtures/fixtures.py | 10 +++++++++- .../responses/test_conversation_responses.py | 8 +++++++- 3 files changed, 23 insertions(+), 3 deletions(-) diff --git a/tests/integration/fixtures/common.py b/tests/integration/fixtures/common.py index d5e4c15f7..407564c15 100644 --- a/tests/integration/fixtures/common.py +++ b/tests/integration/fixtures/common.py @@ -323,7 +323,13 @@ def require_server(llama_stack_client): @pytest.fixture(scope="session") def openai_client(llama_stack_client, require_server): base_url = f"{llama_stack_client.base_url}/v1" - return OpenAI(base_url=base_url, api_key="fake") + client = OpenAI(base_url=base_url, api_key="fake", max_retries=0, timeout=30.0) + yield client + # Cleanup: close HTTP connections + try: + client.close() + except Exception: + pass @pytest.fixture(params=["openai_client", "client_with_models"]) diff --git a/tests/integration/responses/fixtures/fixtures.py b/tests/integration/responses/fixtures/fixtures.py index dbf67e138..b06117b98 100644 --- a/tests/integration/responses/fixtures/fixtures.py +++ b/tests/integration/responses/fixtures/fixtures.py @@ -115,7 +115,15 @@ def openai_client(base_url, api_key, provider): client = LlamaStackAsLibraryClient(config, skip_logger_removal=True) return client - return OpenAI( + client = OpenAI( base_url=base_url, api_key=api_key, + max_retries=0, + timeout=30.0, ) + yield client + # Cleanup: close HTTP connections + try: + client.close() + except Exception: + pass diff --git a/tests/integration/responses/test_conversation_responses.py b/tests/integration/responses/test_conversation_responses.py index ef7ea7c4e..babb77793 100644 --- a/tests/integration/responses/test_conversation_responses.py +++ b/tests/integration/responses/test_conversation_responses.py @@ -65,8 +65,14 @@ class TestConversationResponses: conversation_items = openai_client.conversations.items.list(conversation.id) assert len(conversation_items.data) >= 4 # 2 user + 2 assistant messages + @pytest.mark.timeout(60, method="thread") def test_conversation_context_loading(self, openai_client, text_model_id): - """Test that conversation context is properly loaded for responses.""" + """Test that conversation context is properly loaded for responses. + + Note: 60s timeout added due to CI-specific deadlock in pytest/OpenAI client/httpx + after running 25+ tests. Hangs before first HTTP request is made. Works fine locally. + Investigation needed: connection pool exhaustion or event loop state issue. + """ conversation = openai_client.conversations.create( items=[ {"type": "message", "role": "user", "content": "My name is Alice. I like to eat apples."}, From eb3f9ac2781d0079eb65ea14b77296fcd3d317d4 Mon Sep 17 00:00:00 2001 From: Francisco Arceo Date: Wed, 12 Nov 2025 12:59:48 -0500 Subject: [PATCH 5/9] feat: allow returning embeddings and metadata from `/vector_stores/` methods; disallow changing Provider ID (#4046) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit # What does this PR do? - Updates `/vector_stores/{vector_store_id}/files/{file_id}/content` to allow returning `embeddings` and `metadata` using the `extra_query` - Updates the UI accordingly to display them. - Update UI to support CRUD operations in the Vector Stores section and adds a new modal exposing the functionality. - Updates Vector Store update to fail if a user tries to update Provider ID (which doesn't make sense to allow) ```python In [1]: client.vector_stores.files.content( vector_store_id=vector_store.id, file_id=file.id, extra_query={"include_embeddings": True, "include_metadata": True} ) Out [1]: FileContentResponse(attributes={}, content=[Content(text='This is a test document to check if embeddings are generated properly.\n', type='text', embedding=[0.33760684728622437, ...,], chunk_metadata={'chunk_id': '62a63ae0-c202-f060-1b86-0a688995b8d3', 'document_id': 'file-27291dbc679642ac94ffac6d2810c339', 'source': None, 'created_timestamp': 1762053437, 'updated_timestamp': 1762053437, 'chunk_window': '0-13', 'chunk_tokenizer': 'DEFAULT_TIKTOKEN_TOKENIZER', 'chunk_embedding_model': 'sentence-transformers/nomic -ai/nomic-embed-text-v1.5', 'chunk_embedding_dimension': 768, 'content_token_count': 13, 'metadata_token_count': 9}, metadata={'filename': 'test-embedding.txt', 'chunk_id': '62a63ae0-c202-f060-1b86-0a688995b8d3', 'document_id': 'file-27291dbc679642ac94ffac6d2810c339', 'token_count': 13, 'metadata_token_count': 9})], file_id='file-27291dbc679642ac94ffac6d2810c339', filename='test-embedding.txt') ``` Screenshots of UI are displayed below: ### List Vector Store with Added "Create New Vector Store" Screenshot 2025-11-06 at 10 47
25 PM ### Create New Vector Store Screenshot 2025-11-06 at 10 47
49 PM ### Edit Vector Store Screenshot 2025-11-06 at 10 48
32 PM ### Vector Store Files Contents page (with Embeddings) Screenshot 2025-11-06 at 11 54
32 PM ### Vector Store Files Contents Details page (with Embeddings) Screenshot 2025-11-06 at 11 55
00 PM ## Test Plan Tests added for Middleware extension and Provider failures. --------- Signed-off-by: Francisco Javier Arceo --- client-sdks/stainless/openapi.yml | 40 +- docs/static/llama-stack-spec.yaml | 40 +- docs/static/stainless-llama-stack-spec.yaml | 40 +- src/llama_stack/apis/vector_io/vector_io.py | 46 ++- src/llama_stack/core/library_client.py | 6 + src/llama_stack/core/routers/vector_io.py | 20 +- .../core/routing_tables/vector_stores.py | 5 + .../utils/memory/openai_vector_store_mixin.py | 64 +-- .../app/logs/vector-stores/page.tsx | 386 +++++++++++++++--- .../components/prompts/prompt-editor.test.tsx | 2 +- .../vector-store-detail.test.tsx | 14 + .../vector-stores/vector-store-detail.tsx | 183 ++++++++- .../vector-stores/vector-store-editor.tsx | 235 +++++++++++ src/llama_stack_ui/lib/contents-api.ts | 40 +- .../vector_io/test_openai_vector_stores.py | 95 +++++ tests/unit/core/routers/test_vector_io.py | 62 +++ tests/unit/server/test_sse.py | 8 +- 17 files changed, 1161 insertions(+), 125 deletions(-) create mode 100644 src/llama_stack_ui/components/vector-stores/vector-store-editor.tsx diff --git a/client-sdks/stainless/openapi.yml b/client-sdks/stainless/openapi.yml index 9f3ef15b5..1be4af6c9 100644 --- a/client-sdks/stainless/openapi.yml +++ b/client-sdks/stainless/openapi.yml @@ -2691,7 +2691,8 @@ paths: responses: '200': description: >- - A VectorStoreFileContentResponse representing the file contents. + File contents, optionally with embeddings and metadata based on query + parameters. content: application/json: schema: @@ -2726,6 +2727,20 @@ paths: required: true schema: type: string + - name: include_embeddings + in: query + description: >- + Whether to include embedding vectors in the response. + required: false + schema: + $ref: '#/components/schemas/bool' + - name: include_metadata + in: query + description: >- + Whether to include chunk metadata in the response. + required: false + schema: + $ref: '#/components/schemas/bool' deprecated: false /v1/vector_stores/{vector_store_id}/search: post: @@ -10091,6 +10106,8 @@ components: title: VectorStoreFileDeleteResponse description: >- Response from deleting a vector store file. + bool: + type: boolean VectorStoreContent: type: object properties: @@ -10102,6 +10119,26 @@ components: text: type: string description: The actual text content + embedding: + type: array + items: + type: number + description: >- + Optional embedding vector for this content chunk + chunk_metadata: + $ref: '#/components/schemas/ChunkMetadata' + description: Optional chunk metadata + metadata: + type: object + additionalProperties: + oneOf: + - type: 'null' + - type: boolean + - type: number + - type: string + - type: array + - type: object + description: Optional user-defined metadata additionalProperties: false required: - type @@ -10125,6 +10162,7 @@ components: description: Parsed content of the file has_more: type: boolean + default: false description: >- Indicates if there are more content pages to fetch next_page: diff --git a/docs/static/llama-stack-spec.yaml b/docs/static/llama-stack-spec.yaml index ce8708b68..66eda78c7 100644 --- a/docs/static/llama-stack-spec.yaml +++ b/docs/static/llama-stack-spec.yaml @@ -2688,7 +2688,8 @@ paths: responses: '200': description: >- - A VectorStoreFileContentResponse representing the file contents. + File contents, optionally with embeddings and metadata based on query + parameters. content: application/json: schema: @@ -2723,6 +2724,20 @@ paths: required: true schema: type: string + - name: include_embeddings + in: query + description: >- + Whether to include embedding vectors in the response. + required: false + schema: + $ref: '#/components/schemas/bool' + - name: include_metadata + in: query + description: >- + Whether to include chunk metadata in the response. + required: false + schema: + $ref: '#/components/schemas/bool' deprecated: false /v1/vector_stores/{vector_store_id}/search: post: @@ -9375,6 +9390,8 @@ components: title: VectorStoreFileDeleteResponse description: >- Response from deleting a vector store file. + bool: + type: boolean VectorStoreContent: type: object properties: @@ -9386,6 +9403,26 @@ components: text: type: string description: The actual text content + embedding: + type: array + items: + type: number + description: >- + Optional embedding vector for this content chunk + chunk_metadata: + $ref: '#/components/schemas/ChunkMetadata' + description: Optional chunk metadata + metadata: + type: object + additionalProperties: + oneOf: + - type: 'null' + - type: boolean + - type: number + - type: string + - type: array + - type: object + description: Optional user-defined metadata additionalProperties: false required: - type @@ -9409,6 +9446,7 @@ components: description: Parsed content of the file has_more: type: boolean + default: false description: >- Indicates if there are more content pages to fetch next_page: diff --git a/docs/static/stainless-llama-stack-spec.yaml b/docs/static/stainless-llama-stack-spec.yaml index 9f3ef15b5..1be4af6c9 100644 --- a/docs/static/stainless-llama-stack-spec.yaml +++ b/docs/static/stainless-llama-stack-spec.yaml @@ -2691,7 +2691,8 @@ paths: responses: '200': description: >- - A VectorStoreFileContentResponse representing the file contents. + File contents, optionally with embeddings and metadata based on query + parameters. content: application/json: schema: @@ -2726,6 +2727,20 @@ paths: required: true schema: type: string + - name: include_embeddings + in: query + description: >- + Whether to include embedding vectors in the response. + required: false + schema: + $ref: '#/components/schemas/bool' + - name: include_metadata + in: query + description: >- + Whether to include chunk metadata in the response. + required: false + schema: + $ref: '#/components/schemas/bool' deprecated: false /v1/vector_stores/{vector_store_id}/search: post: @@ -10091,6 +10106,8 @@ components: title: VectorStoreFileDeleteResponse description: >- Response from deleting a vector store file. + bool: + type: boolean VectorStoreContent: type: object properties: @@ -10102,6 +10119,26 @@ components: text: type: string description: The actual text content + embedding: + type: array + items: + type: number + description: >- + Optional embedding vector for this content chunk + chunk_metadata: + $ref: '#/components/schemas/ChunkMetadata' + description: Optional chunk metadata + metadata: + type: object + additionalProperties: + oneOf: + - type: 'null' + - type: boolean + - type: number + - type: string + - type: array + - type: object + description: Optional user-defined metadata additionalProperties: false required: - type @@ -10125,6 +10162,7 @@ components: description: Parsed content of the file has_more: type: boolean + default: false description: >- Indicates if there are more content pages to fetch next_page: diff --git a/src/llama_stack/apis/vector_io/vector_io.py b/src/llama_stack/apis/vector_io/vector_io.py index 846c6f191..699241128 100644 --- a/src/llama_stack/apis/vector_io/vector_io.py +++ b/src/llama_stack/apis/vector_io/vector_io.py @@ -10,7 +10,7 @@ # the root directory of this source tree. from typing import Annotated, Any, Literal, Protocol, runtime_checkable -from fastapi import Body +from fastapi import Body, Query from pydantic import BaseModel, Field from llama_stack.apis.common.tracing import telemetry_traceable @@ -224,10 +224,16 @@ class VectorStoreContent(BaseModel): :param type: Content type, currently only "text" is supported :param text: The actual text content + :param embedding: Optional embedding vector for this content chunk + :param chunk_metadata: Optional chunk metadata + :param metadata: Optional user-defined metadata """ type: Literal["text"] text: str + embedding: list[float] | None = None + chunk_metadata: ChunkMetadata | None = None + metadata: dict[str, Any] | None = None @json_schema_type @@ -280,6 +286,22 @@ class VectorStoreDeleteResponse(BaseModel): deleted: bool = True +@json_schema_type +class VectorStoreFileContentResponse(BaseModel): + """Represents the parsed content of a vector store file. + + :param object: The object type, which is always `vector_store.file_content.page` + :param data: Parsed content of the file + :param has_more: Indicates if there are more content pages to fetch + :param next_page: The token for the next page, if any + """ + + object: Literal["vector_store.file_content.page"] = "vector_store.file_content.page" + data: list[VectorStoreContent] + has_more: bool = False + next_page: str | None = None + + @json_schema_type class VectorStoreChunkingStrategyAuto(BaseModel): """Automatic chunking strategy for vector store files. @@ -395,22 +417,6 @@ class VectorStoreListFilesResponse(BaseModel): has_more: bool = False -@json_schema_type -class VectorStoreFileContentResponse(BaseModel): - """Represents the parsed content of a vector store file. - - :param object: The object type, which is always `vector_store.file_content.page` - :param data: Parsed content of the file - :param has_more: Indicates if there are more content pages to fetch - :param next_page: The token for the next page, if any - """ - - object: Literal["vector_store.file_content.page"] = "vector_store.file_content.page" - data: list[VectorStoreContent] - has_more: bool - next_page: str | None = None - - @json_schema_type class VectorStoreFileDeleteResponse(BaseModel): """Response from deleting a vector store file. @@ -732,12 +738,16 @@ class VectorIO(Protocol): self, vector_store_id: str, file_id: str, + include_embeddings: Annotated[bool | None, Query(default=False)] = False, + include_metadata: Annotated[bool | None, Query(default=False)] = False, ) -> VectorStoreFileContentResponse: """Retrieves the contents of a vector store file. :param vector_store_id: The ID of the vector store containing the file to retrieve. :param file_id: The ID of the file to retrieve. - :returns: A VectorStoreFileContentResponse representing the file contents. + :param include_embeddings: Whether to include embedding vectors in the response. + :param include_metadata: Whether to include chunk metadata in the response. + :returns: File contents, optionally with embeddings and metadata based on query parameters. """ ... diff --git a/src/llama_stack/core/library_client.py b/src/llama_stack/core/library_client.py index b8f9f715f..db990368b 100644 --- a/src/llama_stack/core/library_client.py +++ b/src/llama_stack/core/library_client.py @@ -389,6 +389,12 @@ class AsyncLlamaStackAsLibraryClient(AsyncLlamaStackClient): matched_func, path_params, route_path, webmethod = find_matching_route(options.method, path, self.route_impls) body |= path_params + # Pass through params that aren't already handled as path params + if options.params: + extra_query_params = {k: v for k, v in options.params.items() if k not in path_params} + if extra_query_params: + body["extra_query"] = extra_query_params + body, field_names = self._handle_file_uploads(options, body) body = self._convert_body(matched_func, body, exclude_params=set(field_names)) diff --git a/src/llama_stack/core/routers/vector_io.py b/src/llama_stack/core/routers/vector_io.py index 9dac461db..ed5fb8253 100644 --- a/src/llama_stack/core/routers/vector_io.py +++ b/src/llama_stack/core/routers/vector_io.py @@ -247,6 +247,13 @@ class VectorIORouter(VectorIO): metadata: dict[str, Any] | None = None, ) -> VectorStoreObject: logger.debug(f"VectorIORouter.openai_update_vector_store: {vector_store_id}") + + # Check if provider_id is being changed (not supported) + if metadata and "provider_id" in metadata: + current_store = await self.routing_table.get_object_by_identifier("vector_store", vector_store_id) + if current_store and current_store.provider_id != metadata["provider_id"]: + raise ValueError("provider_id cannot be changed after vector store creation") + provider = await self.routing_table.get_provider_impl(vector_store_id) return await provider.openai_update_vector_store( vector_store_id=vector_store_id, @@ -338,12 +345,19 @@ class VectorIORouter(VectorIO): self, vector_store_id: str, file_id: str, + include_embeddings: bool | None = False, + include_metadata: bool | None = False, ) -> VectorStoreFileContentResponse: - logger.debug(f"VectorIORouter.openai_retrieve_vector_store_file_contents: {vector_store_id}, {file_id}") - provider = await self.routing_table.get_provider_impl(vector_store_id) - return await provider.openai_retrieve_vector_store_file_contents( + logger.debug( + f"VectorIORouter.openai_retrieve_vector_store_file_contents: {vector_store_id}, {file_id}, " + f"include_embeddings={include_embeddings}, include_metadata={include_metadata}" + ) + + return await self.routing_table.openai_retrieve_vector_store_file_contents( vector_store_id=vector_store_id, file_id=file_id, + include_embeddings=include_embeddings, + include_metadata=include_metadata, ) async def openai_update_vector_store_file( diff --git a/src/llama_stack/core/routing_tables/vector_stores.py b/src/llama_stack/core/routing_tables/vector_stores.py index f95a4dbe3..e77739abe 100644 --- a/src/llama_stack/core/routing_tables/vector_stores.py +++ b/src/llama_stack/core/routing_tables/vector_stores.py @@ -195,12 +195,17 @@ class VectorStoresRoutingTable(CommonRoutingTableImpl): self, vector_store_id: str, file_id: str, + include_embeddings: bool | None = False, + include_metadata: bool | None = False, ) -> VectorStoreFileContentResponse: await self.assert_action_allowed("read", "vector_store", vector_store_id) + provider = await self.get_provider_impl(vector_store_id) return await provider.openai_retrieve_vector_store_file_contents( vector_store_id=vector_store_id, file_id=file_id, + include_embeddings=include_embeddings, + include_metadata=include_metadata, ) async def openai_update_vector_store_file( diff --git a/src/llama_stack/providers/utils/memory/openai_vector_store_mixin.py b/src/llama_stack/providers/utils/memory/openai_vector_store_mixin.py index 86e6ea013..853245598 100644 --- a/src/llama_stack/providers/utils/memory/openai_vector_store_mixin.py +++ b/src/llama_stack/providers/utils/memory/openai_vector_store_mixin.py @@ -704,34 +704,35 @@ class OpenAIVectorStoreMixin(ABC): # Unknown filter type, default to no match raise ValueError(f"Unsupported filter type: {filter_type}") - def _chunk_to_vector_store_content(self, chunk: Chunk) -> list[VectorStoreContent]: - # content is InterleavedContent + def _chunk_to_vector_store_content( + self, chunk: Chunk, include_embeddings: bool = False, include_metadata: bool = False + ) -> list[VectorStoreContent]: + def extract_fields() -> dict: + """Extract embedding and metadata fields from chunk based on include flags.""" + return { + "embedding": chunk.embedding if include_embeddings else None, + "chunk_metadata": chunk.chunk_metadata if include_metadata else None, + "metadata": chunk.metadata if include_metadata else None, + } + + fields = extract_fields() + if isinstance(chunk.content, str): - content = [ - VectorStoreContent( - type="text", - text=chunk.content, - ) - ] + content_item = VectorStoreContent(type="text", text=chunk.content, **fields) + content = [content_item] elif isinstance(chunk.content, list): # TODO: Add support for other types of content - content = [ - VectorStoreContent( - type="text", - text=item.text, - ) - for item in chunk.content - if item.type == "text" - ] + content = [] + for item in chunk.content: + if item.type == "text": + content_item = VectorStoreContent(type="text", text=item.text, **fields) + content.append(content_item) else: if chunk.content.type != "text": raise ValueError(f"Unsupported content type: {chunk.content.type}") - content = [ - VectorStoreContent( - type="text", - text=chunk.content.text, - ) - ] + + content_item = VectorStoreContent(type="text", text=chunk.content.text, **fields) + content = [content_item] return content async def openai_attach_file_to_vector_store( @@ -820,13 +821,12 @@ class OpenAIVectorStoreMixin(ABC): message=str(e), ) - # Create OpenAI vector store file metadata + # Save vector store file to persistent storage AFTER insert_chunks + # so that chunks include the embeddings that were generated file_info = vector_store_file_object.model_dump(exclude={"last_error"}) file_info["filename"] = file_response.filename if file_response else "" - # Save vector store file to persistent storage (provider-specific) dict_chunks = [c.model_dump() for c in chunks] - # This should be updated to include chunk_id await self._save_openai_vector_store_file(vector_store_id, file_id, file_info, dict_chunks) # Update file_ids and file_counts in vector store metadata @@ -921,21 +921,27 @@ class OpenAIVectorStoreMixin(ABC): self, vector_store_id: str, file_id: str, + include_embeddings: bool | None = False, + include_metadata: bool | None = False, ) -> VectorStoreFileContentResponse: """Retrieves the contents of a vector store file.""" if vector_store_id not in self.openai_vector_stores: raise VectorStoreNotFoundError(vector_store_id) + # Parameters are already provided directly + # include_embeddings and include_metadata are now function parameters + dict_chunks = await self._load_openai_vector_store_file_contents(vector_store_id, file_id) chunks = [Chunk.model_validate(c) for c in dict_chunks] content = [] for chunk in chunks: - content.extend(self._chunk_to_vector_store_content(chunk)) + content.extend( + self._chunk_to_vector_store_content( + chunk, include_embeddings=include_embeddings or False, include_metadata=include_metadata or False + ) + ) return VectorStoreFileContentResponse( - object="vector_store.file_content.page", data=content, - has_more=False, - next_page=None, ) async def openai_update_vector_store_file( diff --git a/src/llama_stack_ui/app/logs/vector-stores/page.tsx b/src/llama_stack_ui/app/logs/vector-stores/page.tsx index 72196d496..84680e01a 100644 --- a/src/llama_stack_ui/app/logs/vector-stores/page.tsx +++ b/src/llama_stack_ui/app/logs/vector-stores/page.tsx @@ -8,6 +8,9 @@ import type { import { useRouter } from "next/navigation"; import { usePagination } from "@/hooks/use-pagination"; import { Button } from "@/components/ui/button"; +import { Plus, Trash2, Search, Edit, X } from "lucide-react"; +import { useState } from "react"; +import { Input } from "@/components/ui/input"; import { Table, TableBody, @@ -17,9 +20,21 @@ import { TableRow, } from "@/components/ui/table"; import { Skeleton } from "@/components/ui/skeleton"; +import { useAuthClient } from "@/hooks/use-auth-client"; +import { + VectorStoreEditor, + VectorStoreFormData, +} from "@/components/vector-stores/vector-store-editor"; export default function VectorStoresPage() { const router = useRouter(); + const client = useAuthClient(); + const [deletingStores, setDeletingStores] = useState>(new Set()); + const [searchTerm, setSearchTerm] = useState(""); + const [showVectorStoreModal, setShowVectorStoreModal] = useState(false); + const [editingStore, setEditingStore] = useState(null); + const [modalError, setModalError] = useState(null); + const [showSuccessState, setShowSuccessState] = useState(false); const { data: stores, status, @@ -47,6 +62,142 @@ export default function VectorStoresPage() { } }, [status, hasMore, loadMore]); + // Handle ESC key to close modal + React.useEffect(() => { + const handleEscape = (event: KeyboardEvent) => { + if (event.key === "Escape" && showVectorStoreModal) { + handleCancel(); + } + }; + + document.addEventListener("keydown", handleEscape); + return () => document.removeEventListener("keydown", handleEscape); + }, [showVectorStoreModal]); + + const handleDeleteVectorStore = async (storeId: string) => { + if ( + !confirm( + "Are you sure you want to delete this vector store? This action cannot be undone." + ) + ) { + return; + } + + setDeletingStores(prev => new Set([...prev, storeId])); + + try { + await client.vectorStores.delete(storeId); + // Reload the data to reflect the deletion + window.location.reload(); + } catch (err: unknown) { + console.error("Failed to delete vector store:", err); + const errorMessage = err instanceof Error ? err.message : "Unknown error"; + alert(`Failed to delete vector store: ${errorMessage}`); + } finally { + setDeletingStores(prev => { + const newSet = new Set(prev); + newSet.delete(storeId); + return newSet; + }); + } + }; + + const handleSaveVectorStore = async (formData: VectorStoreFormData) => { + try { + setModalError(null); + + if (editingStore) { + // Update existing vector store + const updateParams: { + name?: string; + extra_body?: Record; + } = {}; + + // Only include fields that have changed or are provided + if (formData.name && formData.name !== editingStore.name) { + updateParams.name = formData.name; + } + + // Add all parameters to extra_body (except provider_id which can't be changed) + const extraBody: Record = {}; + if (formData.embedding_model) { + extraBody.embedding_model = formData.embedding_model; + } + if (formData.embedding_dimension) { + extraBody.embedding_dimension = formData.embedding_dimension; + } + + if (Object.keys(extraBody).length > 0) { + updateParams.extra_body = extraBody; + } + + await client.vectorStores.update(editingStore.id, updateParams); + + // Show success state with close button + setShowSuccessState(true); + setModalError( + "✅ Vector store updated successfully! You can close this modal and refresh the page to see changes." + ); + return; + } + + const createParams: { + name?: string; + provider_id?: string; + extra_body?: Record; + } = { + name: formData.name || undefined, + }; + + // Extract provider_id to top-level (like Python client does) + if (formData.provider_id) { + createParams.provider_id = formData.provider_id; + } + + // Add remaining parameters to extra_body + const extraBody: Record = {}; + if (formData.provider_id) { + extraBody.provider_id = formData.provider_id; + } + if (formData.embedding_model) { + extraBody.embedding_model = formData.embedding_model; + } + if (formData.embedding_dimension) { + extraBody.embedding_dimension = formData.embedding_dimension; + } + + if (Object.keys(extraBody).length > 0) { + createParams.extra_body = extraBody; + } + + await client.vectorStores.create(createParams); + + // Show success state with close button + setShowSuccessState(true); + setModalError( + "✅ Vector store created successfully! You can close this modal and refresh the page to see changes." + ); + } catch (err: unknown) { + console.error("Failed to create vector store:", err); + const errorMessage = + err instanceof Error ? err.message : "Failed to create vector store"; + setModalError(errorMessage); + } + }; + + const handleEditVectorStore = (store: VectorStore) => { + setEditingStore(store); + setShowVectorStoreModal(true); + setModalError(null); + }; + + const handleCancel = () => { + setShowVectorStoreModal(false); + setEditingStore(null); + setModalError(null); + setShowSuccessState(false); + }; + const renderContent = () => { if (status === "loading") { return ( @@ -66,73 +217,190 @@ export default function VectorStoresPage() { return

No vector stores found.

; } - return ( -
- - - - ID - Name - Created - Completed - Cancelled - Failed - In Progress - Total - Usage Bytes - Provider ID - Provider Vector DB ID - - - - {stores.map(store => { - const fileCounts = store.file_counts; - const metadata = store.metadata || {}; - const providerId = metadata.provider_id ?? ""; - const providerDbId = metadata.provider_vector_db_id ?? ""; + // Filter stores based on search term + const filteredStores = stores.filter(store => { + if (!searchTerm) return true; - return ( - router.push(`/logs/vector-stores/${store.id}`)} - className="cursor-pointer hover:bg-muted/50" - > - - - - {store.name} - - {new Date(store.created_at * 1000).toLocaleString()} - - {fileCounts.completed} - {fileCounts.cancelled} - {fileCounts.failed} - {fileCounts.in_progress} - {fileCounts.total} - {store.usage_bytes} - {providerId} - {providerDbId} - - ); - })} - -
+ const searchLower = searchTerm.toLowerCase(); + return ( + store.id.toLowerCase().includes(searchLower) || + (store.name && store.name.toLowerCase().includes(searchLower)) || + (store.metadata?.provider_id && + String(store.metadata.provider_id) + .toLowerCase() + .includes(searchLower)) || + (store.metadata?.provider_vector_db_id && + String(store.metadata.provider_vector_db_id) + .toLowerCase() + .includes(searchLower)) + ); + }); + + return ( +
+ {/* Search Bar */} +
+ + setSearchTerm(e.target.value)} + className="pl-10" + /> +
+ +
+ + + + ID + Name + Created + Completed + Cancelled + Failed + In Progress + Total + Usage Bytes + Provider ID + Provider Vector DB ID + Actions + + + + {filteredStores.map(store => { + const fileCounts = store.file_counts; + const metadata = store.metadata || {}; + const providerId = metadata.provider_id ?? ""; + const providerDbId = metadata.provider_vector_db_id ?? ""; + + return ( + + router.push(`/logs/vector-stores/${store.id}`) + } + className="cursor-pointer hover:bg-muted/50" + > + + + + {store.name} + + {new Date(store.created_at * 1000).toLocaleString()} + + {fileCounts.completed} + {fileCounts.cancelled} + {fileCounts.failed} + {fileCounts.in_progress} + {fileCounts.total} + {store.usage_bytes} + {providerId} + {providerDbId} + +
+ + +
+
+
+ ); + })} +
+
+
); }; return (
-

Vector Stores

+
+

Vector Stores

+ +
{renderContent()} + + {/* Create Vector Store Modal */} + {showVectorStoreModal && ( +
+
+
+

+ {editingStore ? "Edit Vector Store" : "Create New Vector Store"} +

+ +
+
+ +
+
+
+ )}
); } diff --git a/src/llama_stack_ui/components/prompts/prompt-editor.test.tsx b/src/llama_stack_ui/components/prompts/prompt-editor.test.tsx index 458a5f942..70e0e4e66 100644 --- a/src/llama_stack_ui/components/prompts/prompt-editor.test.tsx +++ b/src/llama_stack_ui/components/prompts/prompt-editor.test.tsx @@ -2,7 +2,7 @@ import React from "react"; import { render, screen, fireEvent } from "@testing-library/react"; import "@testing-library/jest-dom"; import { PromptEditor } from "./prompt-editor"; -import type { Prompt, PromptFormData } from "./types"; +import type { Prompt } from "./types"; describe("PromptEditor", () => { const mockOnSave = jest.fn(); diff --git a/src/llama_stack_ui/components/vector-stores/vector-store-detail.test.tsx b/src/llama_stack_ui/components/vector-stores/vector-store-detail.test.tsx index 08f90ac0d..78bec8147 100644 --- a/src/llama_stack_ui/components/vector-stores/vector-store-detail.test.tsx +++ b/src/llama_stack_ui/components/vector-stores/vector-store-detail.test.tsx @@ -12,6 +12,20 @@ jest.mock("next/navigation", () => ({ }), })); +// Mock NextAuth +jest.mock("next-auth/react", () => ({ + useSession: () => ({ + data: { + accessToken: "mock-access-token", + user: { + id: "mock-user-id", + email: "test@example.com", + }, + }, + status: "authenticated", + }), +})); + describe("VectorStoreDetailView", () => { const defaultProps = { store: null, diff --git a/src/llama_stack_ui/components/vector-stores/vector-store-detail.tsx b/src/llama_stack_ui/components/vector-stores/vector-store-detail.tsx index d3d0fa249..f5b6281e7 100644 --- a/src/llama_stack_ui/components/vector-stores/vector-store-detail.tsx +++ b/src/llama_stack_ui/components/vector-stores/vector-store-detail.tsx @@ -1,16 +1,18 @@ "use client"; import { useRouter } from "next/navigation"; +import { useState, useEffect } from "react"; import type { VectorStore } from "llama-stack-client/resources/vector-stores/vector-stores"; import type { VectorStoreFile } from "llama-stack-client/resources/vector-stores/files"; import { Card, CardContent, CardHeader, CardTitle } from "@/components/ui/card"; import { Skeleton } from "@/components/ui/skeleton"; import { Button } from "@/components/ui/button"; +import { useAuthClient } from "@/hooks/use-auth-client"; +import { Edit2, Trash2, X } from "lucide-react"; import { DetailLoadingView, DetailErrorView, DetailNotFoundView, - DetailLayout, PropertiesCard, PropertyItem, } from "@/components/layout/detail-layout"; @@ -23,6 +25,7 @@ import { TableHeader, TableRow, } from "@/components/ui/table"; +import { VectorStoreEditor, VectorStoreFormData } from "./vector-store-editor"; interface VectorStoreDetailViewProps { store: VectorStore | null; @@ -43,21 +46,122 @@ export function VectorStoreDetailView({ errorFiles, id, }: VectorStoreDetailViewProps) { - const title = "Vector Store Details"; const router = useRouter(); + const client = useAuthClient(); + const [isDeleting, setIsDeleting] = useState(false); + const [showEditModal, setShowEditModal] = useState(false); + const [modalError, setModalError] = useState(null); + const [showSuccessState, setShowSuccessState] = useState(false); + + // Handle ESC key to close modal + useEffect(() => { + const handleEscape = (event: KeyboardEvent) => { + if (event.key === "Escape" && showEditModal) { + handleCancel(); + } + }; + + document.addEventListener("keydown", handleEscape); + return () => document.removeEventListener("keydown", handleEscape); + }, [showEditModal]); const handleFileClick = (fileId: string) => { router.push(`/logs/vector-stores/${id}/files/${fileId}`); }; + const handleEditVectorStore = () => { + setShowEditModal(true); + setModalError(null); + setShowSuccessState(false); + }; + + const handleCancel = () => { + setShowEditModal(false); + setModalError(null); + setShowSuccessState(false); + }; + + const handleSaveVectorStore = async (formData: VectorStoreFormData) => { + try { + setModalError(null); + + // Update existing vector store (same logic as list page) + const updateParams: { + name?: string; + extra_body?: Record; + } = {}; + + // Only include fields that have changed or are provided + if (formData.name && formData.name !== store?.name) { + updateParams.name = formData.name; + } + + // Add all parameters to extra_body (except provider_id which can't be changed) + const extraBody: Record = {}; + if (formData.embedding_model) { + extraBody.embedding_model = formData.embedding_model; + } + if (formData.embedding_dimension) { + extraBody.embedding_dimension = formData.embedding_dimension; + } + + if (Object.keys(extraBody).length > 0) { + updateParams.extra_body = extraBody; + } + + await client.vectorStores.update(id, updateParams); + + // Show success state + setShowSuccessState(true); + setModalError( + "✅ Vector store updated successfully! You can close this modal and refresh the page to see changes." + ); + } catch (err: unknown) { + console.error("Failed to update vector store:", err); + const errorMessage = + err instanceof Error ? err.message : "Failed to update vector store"; + setModalError(errorMessage); + } + }; + + const handleDeleteVectorStore = async () => { + if ( + !confirm( + "Are you sure you want to delete this vector store? This action cannot be undone." + ) + ) { + return; + } + + setIsDeleting(true); + + try { + await client.vectorStores.delete(id); + // Redirect to the vector stores list after successful deletion + router.push("/logs/vector-stores"); + } catch (err: unknown) { + console.error("Failed to delete vector store:", err); + const errorMessage = err instanceof Error ? err.message : "Unknown error"; + alert(`Failed to delete vector store: ${errorMessage}`); + } finally { + setIsDeleting(false); + } + }; + if (errorStore) { - return ; + return ( + + ); } if (isLoadingStore) { - return ; + return ; } if (!store) { - return ; + return ; } const mainContent = ( @@ -138,6 +242,73 @@ export function VectorStoreDetailView({ ); return ( - + <> +
+

Vector Store Details

+
+ + +
+
+
+
{mainContent}
+
{sidebar}
+
+ + {/* Edit Vector Store Modal */} + {showEditModal && ( +
+
+
+

Edit Vector Store

+ +
+
+ +
+
+
+ )} + ); } diff --git a/src/llama_stack_ui/components/vector-stores/vector-store-editor.tsx b/src/llama_stack_ui/components/vector-stores/vector-store-editor.tsx new file mode 100644 index 000000000..719a2a9fd --- /dev/null +++ b/src/llama_stack_ui/components/vector-stores/vector-store-editor.tsx @@ -0,0 +1,235 @@ +"use client"; + +import { useState, useEffect } from "react"; +import { Button } from "@/components/ui/button"; +import { Input } from "@/components/ui/input"; +import { Label } from "@/components/ui/label"; +import { Card, CardContent } from "@/components/ui/card"; +import { + Select, + SelectContent, + SelectItem, + SelectTrigger, + SelectValue, +} from "@/components/ui/select"; +import { useAuthClient } from "@/hooks/use-auth-client"; +import type { Model } from "llama-stack-client/resources/models"; + +export interface VectorStoreFormData { + name: string; + embedding_model?: string; + embedding_dimension?: number; + provider_id?: string; +} + +interface VectorStoreEditorProps { + onSave: (formData: VectorStoreFormData) => Promise; + onCancel: () => void; + error?: string | null; + initialData?: VectorStoreFormData; + showSuccessState?: boolean; + isEditing?: boolean; +} + +export function VectorStoreEditor({ + onSave, + onCancel, + error, + initialData, + showSuccessState, + isEditing = false, +}: VectorStoreEditorProps) { + const client = useAuthClient(); + const [formData, setFormData] = useState( + initialData || { + name: "", + embedding_model: "", + embedding_dimension: 768, + provider_id: "", + } + ); + const [loading, setLoading] = useState(false); + const [models, setModels] = useState([]); + const [modelsLoading, setModelsLoading] = useState(true); + const [modelsError, setModelsError] = useState(null); + + const embeddingModels = models.filter( + model => model.custom_metadata?.model_type === "embedding" + ); + + useEffect(() => { + const fetchModels = async () => { + try { + setModelsLoading(true); + setModelsError(null); + const modelList = await client.models.list(); + setModels(modelList); + + // Set default embedding model if available + const embeddingModelsList = modelList.filter(model => { + return model.custom_metadata?.model_type === "embedding"; + }); + if (embeddingModelsList.length > 0 && !formData.embedding_model) { + setFormData(prev => ({ + ...prev, + embedding_model: embeddingModelsList[0].id, + })); + } + } catch (err) { + console.error("Failed to load models:", err); + setModelsError( + err instanceof Error ? err.message : "Failed to load models" + ); + } finally { + setModelsLoading(false); + } + }; + + fetchModels(); + }, [client]); + + const handleSubmit = async (e: React.FormEvent) => { + e.preventDefault(); + setLoading(true); + + try { + await onSave(formData); + } finally { + setLoading(false); + } + }; + + return ( + + +
+
+ + setFormData({ ...formData, name: e.target.value })} + placeholder="Enter vector store name" + required + /> +
+ +
+ + {modelsLoading ? ( +
+ Loading models... ({models.length} loaded) +
+ ) : modelsError ? ( +
+ Error: {modelsError} +
+ ) : embeddingModels.length === 0 ? ( +
+ No embedding models available ({models.length} total models) +
+ ) : ( + + )} + {formData.embedding_model && ( +

+ Dimension:{" "} + {embeddingModels.find(m => m.id === formData.embedding_model) + ?.custom_metadata?.embedding_dimension || "Unknown"} +

+ )} +
+ +
+ + + setFormData({ + ...formData, + embedding_dimension: parseInt(e.target.value) || 768, + }) + } + placeholder="768" + /> +
+ +
+ + + setFormData({ ...formData, provider_id: e.target.value }) + } + placeholder="e.g., faiss, chroma, sqlite" + disabled={isEditing} + /> + {isEditing && ( +

+ Provider ID cannot be changed after vector store creation +

+ )} +
+ + {error && ( +
+ {error} +
+ )} + +
+ {showSuccessState ? ( + + ) : ( + <> + + + + )} +
+
+
+
+ ); +} diff --git a/src/llama_stack_ui/lib/contents-api.ts b/src/llama_stack_ui/lib/contents-api.ts index f4920f3db..35456faff 100644 --- a/src/llama_stack_ui/lib/contents-api.ts +++ b/src/llama_stack_ui/lib/contents-api.ts @@ -34,9 +34,35 @@ export class ContentsAPI { async getFileContents( vectorStoreId: string, - fileId: string + fileId: string, + includeEmbeddings: boolean = true, + includeMetadata: boolean = true ): Promise { - return this.client.vectorStores.files.content(vectorStoreId, fileId); + try { + // Use query parameters to pass embeddings and metadata flags (OpenAI-compatible pattern) + const extraQuery: Record = {}; + if (includeEmbeddings) { + extraQuery.include_embeddings = true; + } + if (includeMetadata) { + extraQuery.include_metadata = true; + } + + const result = await this.client.vectorStores.files.content( + vectorStoreId, + fileId, + { + query: { + include_embeddings: includeEmbeddings, + include_metadata: includeMetadata, + }, + } + ); + return result; + } catch (error) { + console.error("ContentsAPI.getFileContents error:", error); + throw error; + } } async getContent( @@ -70,11 +96,15 @@ export class ContentsAPI { order?: string; after?: string; before?: string; + includeEmbeddings?: boolean; + includeMetadata?: boolean; } ): Promise { - const fileContents = await this.client.vectorStores.files.content( + const fileContents = await this.getFileContents( vectorStoreId, - fileId + fileId, + options?.includeEmbeddings ?? true, + options?.includeMetadata ?? true ); const contentItems: VectorStoreContentItem[] = []; @@ -82,7 +112,7 @@ export class ContentsAPI { const rawContent = content as Record; // Extract actual fields from the API response - const embedding = rawContent.embedding || undefined; + const embedding = rawContent.embedding as number[] | undefined; const created_timestamp = rawContent.created_timestamp || rawContent.created_at || diff --git a/tests/integration/vector_io/test_openai_vector_stores.py b/tests/integration/vector_io/test_openai_vector_stores.py index 20f9d2978..1043d4903 100644 --- a/tests/integration/vector_io/test_openai_vector_stores.py +++ b/tests/integration/vector_io/test_openai_vector_stores.py @@ -11,6 +11,7 @@ import pytest from llama_stack_client import BadRequestError from openai import BadRequestError as OpenAIBadRequestError +from llama_stack.apis.files import ExpiresAfter from llama_stack.apis.vector_io import Chunk from llama_stack.core.library_client import LlamaStackAsLibraryClient from llama_stack.log import get_logger @@ -1604,3 +1605,97 @@ def test_openai_vector_store_embedding_config_from_metadata( assert "metadata_config_store" in store_names assert "consistent_config_store" in store_names + + +@vector_provider_wrapper +def test_openai_vector_store_file_contents_with_extra_query( + compat_client_with_empty_stores, client_with_models, embedding_model_id, embedding_dimension, vector_io_provider_id +): + """Test that vector store file contents endpoint supports extra_query parameter.""" + skip_if_provider_doesnt_support_openai_vector_stores(client_with_models) + compat_client = compat_client_with_empty_stores + + # Create a vector store + vector_store = compat_client.vector_stores.create( + name="test_extra_query_store", + extra_body={ + "embedding_model": embedding_model_id, + "provider_id": vector_io_provider_id, + }, + ) + + # Create and attach a file + test_content = b"This is test content for extra_query validation." + with BytesIO(test_content) as file_buffer: + file_buffer.name = "test_extra_query.txt" + file = compat_client.files.create( + file=file_buffer, + purpose="assistants", + expires_after=ExpiresAfter(anchor="created_at", seconds=86400), + ) + + file_attach_response = compat_client.vector_stores.files.create( + vector_store_id=vector_store.id, + file_id=file.id, + extra_body={"embedding_model": embedding_model_id}, + ) + assert file_attach_response.status == "completed" + + # Wait for processing + time.sleep(2) + + # Test that extra_query parameter is accepted and processed + content_with_extra_query = compat_client.vector_stores.files.content( + vector_store_id=vector_store.id, + file_id=file.id, + extra_query={"include_embeddings": True, "include_metadata": True}, + ) + + # Test without extra_query for comparison + content_without_extra_query = compat_client.vector_stores.files.content( + vector_store_id=vector_store.id, + file_id=file.id, + ) + + # Validate that both calls succeed + assert content_with_extra_query is not None + assert content_without_extra_query is not None + assert len(content_with_extra_query.data) > 0 + assert len(content_without_extra_query.data) > 0 + + # Validate that extra_query parameter is processed correctly + # Both should have the embedding/metadata fields available (may be None based on flags) + first_chunk_with_flags = content_with_extra_query.data[0] + first_chunk_without_flags = content_without_extra_query.data[0] + + # The key validation: extra_query fields are present in the response + # Handle both dict and object responses (different clients may return different formats) + def has_field(obj, field): + if isinstance(obj, dict): + return field in obj + else: + return hasattr(obj, field) + + # Validate that all expected fields are present in both responses + expected_fields = ["embedding", "chunk_metadata", "metadata", "text"] + for field in expected_fields: + assert has_field(first_chunk_with_flags, field), f"Field '{field}' missing from response with extra_query" + assert has_field(first_chunk_without_flags, field), f"Field '{field}' missing from response without extra_query" + + # Validate content is the same + def get_field(obj, field): + if isinstance(obj, dict): + return obj[field] + else: + return getattr(obj, field) + + assert get_field(first_chunk_with_flags, "text") == test_content.decode("utf-8") + assert get_field(first_chunk_without_flags, "text") == test_content.decode("utf-8") + + with_flags_embedding = get_field(first_chunk_with_flags, "embedding") + without_flags_embedding = get_field(first_chunk_without_flags, "embedding") + + # Validate that embeddings are included when requested and excluded when not requested + assert with_flags_embedding is not None, "Embeddings should be included when include_embeddings=True" + assert len(with_flags_embedding) > 0, "Embedding should be a non-empty list" + assert without_flags_embedding is None, "Embeddings should not be included when include_embeddings=False" diff --git a/tests/unit/core/routers/test_vector_io.py b/tests/unit/core/routers/test_vector_io.py index dd3246cb3..f9bd84a37 100644 --- a/tests/unit/core/routers/test_vector_io.py +++ b/tests/unit/core/routers/test_vector_io.py @@ -55,3 +55,65 @@ async def test_create_vector_stores_multiple_providers_missing_provider_id_error with pytest.raises(ValueError, match="Multiple vector_io providers available"): await router.openai_create_vector_store(request) + + +async def test_update_vector_store_provider_id_change_fails(): + """Test that updating a vector store with a different provider_id fails with clear error.""" + mock_routing_table = Mock() + + # Mock an existing vector store with provider_id "faiss" + mock_existing_store = Mock() + mock_existing_store.provider_id = "inline::faiss" + mock_existing_store.identifier = "vs_123" + + mock_routing_table.get_object_by_identifier = AsyncMock(return_value=mock_existing_store) + mock_routing_table.get_provider_impl = AsyncMock( + return_value=Mock(openai_update_vector_store=AsyncMock(return_value=Mock(id="vs_123"))) + ) + + router = VectorIORouter(mock_routing_table) + + # Try to update with different provider_id in metadata - this should fail + with pytest.raises(ValueError, match="provider_id cannot be changed after vector store creation"): + await router.openai_update_vector_store( + vector_store_id="vs_123", + name="updated_name", + metadata={"provider_id": "inline::sqlite"}, # Different provider_id + ) + + # Verify the existing store was looked up to check provider_id + mock_routing_table.get_object_by_identifier.assert_called_once_with("vector_store", "vs_123") + + # Provider should not be called since validation failed + mock_routing_table.get_provider_impl.assert_not_called() + + +async def test_update_vector_store_same_provider_id_succeeds(): + """Test that updating a vector store with the same provider_id succeeds.""" + mock_routing_table = Mock() + + # Mock an existing vector store with provider_id "faiss" + mock_existing_store = Mock() + mock_existing_store.provider_id = "inline::faiss" + mock_existing_store.identifier = "vs_123" + + mock_routing_table.get_object_by_identifier = AsyncMock(return_value=mock_existing_store) + mock_routing_table.get_provider_impl = AsyncMock( + return_value=Mock(openai_update_vector_store=AsyncMock(return_value=Mock(id="vs_123"))) + ) + + router = VectorIORouter(mock_routing_table) + + # Update with same provider_id should succeed + await router.openai_update_vector_store( + vector_store_id="vs_123", + name="updated_name", + metadata={"provider_id": "inline::faiss"}, # Same provider_id + ) + + # Verify the provider update method was called + mock_routing_table.get_provider_impl.assert_called_once_with("vs_123") + provider = await mock_routing_table.get_provider_impl("vs_123") + provider.openai_update_vector_store.assert_called_once_with( + vector_store_id="vs_123", name="updated_name", expires_after=None, metadata={"provider_id": "inline::faiss"} + ) diff --git a/tests/unit/server/test_sse.py b/tests/unit/server/test_sse.py index f36c8c181..0303a6ded 100644 --- a/tests/unit/server/test_sse.py +++ b/tests/unit/server/test_sse.py @@ -104,12 +104,18 @@ async def test_paginated_response_url_setting(): route_handler = create_dynamic_typed_route(mock_api_method, "get", "/test/route") - # Mock minimal request + # Mock minimal request with proper state object request = MagicMock() request.scope = {"user_attributes": {}, "principal": ""} request.headers = {} request.body = AsyncMock(return_value=b"") + # Create a simple state object without auto-generating attributes + class MockState: + pass + + request.state = MockState() + result = await route_handler(request) assert isinstance(result, PaginatedResponse) From 94e977c257f70296c73a913dd4218d9119558632 Mon Sep 17 00:00:00 2001 From: Ken Dreyer Date: Wed, 12 Nov 2025 13:04:56 -0500 Subject: [PATCH 6/9] fix(docs): link to test replay-record docs for discoverability (#4134) Help users find the comprehensive integration testing docs by linking to the record-replay documentation. This clarifies that the technical README complements the main docs. --- tests/integration/recordings/README.md | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/tests/integration/recordings/README.md b/tests/integration/recordings/README.md index 621a07562..bdf4f532f 100644 --- a/tests/integration/recordings/README.md +++ b/tests/integration/recordings/README.md @@ -2,6 +2,10 @@ This directory contains recorded inference API responses used for deterministic testing without requiring live API access. +For more information, see the +[docs](https://llamastack.github.io/docs/contributing/testing/record-replay). +This README provides more technical information. + ## Structure - `responses/` - JSON files containing request/response pairs for inference operations From 356f37b1bae1e98f23ed8f2dd224973b249827ec Mon Sep 17 00:00:00 2001 From: Derek Higgins Date: Wed, 12 Nov 2025 18:13:26 +0000 Subject: [PATCH 7/9] docs: clarify model identification uses provider_model_id not model_id (#4128) Updated documentation to accurately reflect current behavior where models are identified as provider_id/provider_model_id in the system. Changes: o Clarify that model_id is for configuration purposes only o Explain models are accessed as provider_id/provider_model_id o Remove outdated aliasing example that suggested model_id could be used as a custom identifier This corrects the documentation which previously suggested model_id could be used to create friendly aliases, which is not how the code actually works. Signed-off-by: Derek Higgins --- docs/docs/distributions/configuration.mdx | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/docs/docs/distributions/configuration.mdx b/docs/docs/distributions/configuration.mdx index ff50c406a..46ecfa475 100644 --- a/docs/docs/distributions/configuration.mdx +++ b/docs/docs/distributions/configuration.mdx @@ -221,7 +221,15 @@ models: ``` A Model is an instance of a "Resource" (see [Concepts](../concepts/)) and is associated with a specific inference provider (in this case, the provider with identifier `ollama`). This is an instance of a "pre-registered" model. While we always encourage the clients to register models before using them, some Stack servers may come up a list of "already known and available" models. -What's with the `provider_model_id` field? This is an identifier for the model inside the provider's model catalog. Contrast it with `model_id` which is the identifier for the same model for Llama Stack's purposes. For example, you may want to name "llama3.2:vision-11b" as "image_captioning_model" when you use it in your Stack interactions. When omitted, the server will set `provider_model_id` to be the same as `model_id`. +What's with the `provider_model_id` field? This is an identifier for the model inside the provider's model catalog. The `model_id` field is provided for configuration purposes but is not used as part of the model identifier. + +**Important:** Models are identified as `provider_id/provider_model_id` in the system and when making API calls. When `provider_model_id` is omitted, the server will set it to be the same as `model_id`. + +Examples: +- Config: `model_id: llama3.2`, `provider_id: ollama`, `provider_model_id: null` + → Access as: `ollama/llama3.2` +- Config: `model_id: my-llama`, `provider_id: vllm-inference`, `provider_model_id: llama-3-2-3b` + → Access as: `vllm-inference/llama-3-2-3b` (the `model_id` is not used in the identifier) If you need to conditionally register a model in the configuration, such as only when specific environment variable(s) are set, this can be accomplished by utilizing a special `__disabled__` string as the default value of an environment variable substitution, as shown below: From 492f79ca9b2c4ac5c77346fc91ec8a9811dec342 Mon Sep 17 00:00:00 2001 From: Ashwin Bharambe Date: Wed, 12 Nov 2025 10:35:39 -0800 Subject: [PATCH 8/9] fix: harden storage semantics (#4118) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fixes issues in the storage system by guaranteeing immediate durability for responses and ensuring background writers stay alive. Three related fixes: * Responses to the OpenAI-compatible API now write directly to Postgres/SQLite inside the request instead of detouring through an async queue that might never drain; this restores the expected read-after-write behavior and removes the "response not found" races reported by users. * The access-control shim was stamping owner_principal/access_attributes as SQL NULL, which Postgres interprets as non-public rows; fixing it to use the empty-string/JSON-null pattern means conversations and responses stored without an authenticated user stay queryable (matching SQLite). * The inference-store queue remains for batching, but its worker tasks now start lazily on the live event loop so server startup doesn't cancel them—writes keep flowing even when the stack is launched via llama stack run. Closes #4115 ### Test Plan Added a matrix entry to test our "base" suite against Postgres as the store. --- .../actions/setup-test-environment/action.yml | 26 ++ .github/workflows/integration-tests.yml | 12 +- .../distributions/ci-tests/ci_tests.py | 1 - .../ci-tests/run-with-postgres-store.yaml | 293 ++++++++++++++++++ .../starter-gpu/run-with-postgres-store.yaml | 58 ++-- .../starter/run-with-postgres-store.yaml | 58 ++-- .../distributions/starter/starter.py | 105 ++----- .../utils/inference/inference_store.py | 27 +- .../utils/responses/responses_store.py | 75 +---- .../utils/sqlstore/authorized_sqlstore.py | 34 +- tests/integration/ci_matrix.json | 1 + tests/integration/fixtures/common.py | 17 +- tests/integration/suites.py | 20 ++ 13 files changed, 516 insertions(+), 211 deletions(-) create mode 100644 src/llama_stack/distributions/ci-tests/run-with-postgres-store.yaml diff --git a/.github/actions/setup-test-environment/action.yml b/.github/actions/setup-test-environment/action.yml index 7b306fef5..1c9d019cc 100644 --- a/.github/actions/setup-test-environment/action.yml +++ b/.github/actions/setup-test-environment/action.yml @@ -39,6 +39,32 @@ runs: if: ${{ inputs.setup == 'vllm' && inputs.inference-mode == 'record' }} uses: ./.github/actions/setup-vllm + - name: Start Postgres service + if: ${{ contains(inputs.setup, 'postgres') }} + shell: bash + run: | + sudo docker rm -f postgres-ci || true + sudo docker run -d --name postgres-ci \ + -e POSTGRES_USER=llamastack \ + -e POSTGRES_PASSWORD=llamastack \ + -e POSTGRES_DB=llamastack \ + -p 5432:5432 \ + postgres:16 + + echo "Waiting for Postgres to become ready..." + for i in {1..30}; do + if sudo docker exec postgres-ci pg_isready -U llamastack -d llamastack >/dev/null 2>&1; then + echo "Postgres is ready" + break + fi + if [ "$i" -eq 30 ]; then + echo "Postgres failed to start in time" + sudo docker logs postgres-ci || true + exit 1 + fi + sleep 2 + done + - name: Build Llama Stack shell: bash run: | diff --git a/.github/workflows/integration-tests.yml b/.github/workflows/integration-tests.yml index 2c797e906..71c7933b4 100644 --- a/.github/workflows/integration-tests.yml +++ b/.github/workflows/integration-tests.yml @@ -66,12 +66,12 @@ jobs: run-replay-mode-tests: needs: generate-matrix runs-on: ubuntu-latest - name: ${{ format('Integration Tests ({0}, {1}, {2}, client={3}, {4})', matrix.client-type, matrix.config.setup, matrix.python-version, matrix.client-version, matrix.config.suite) }} + name: ${{ format('Integration Tests ({0}, {1}, {2}, client={3}, {4})', matrix.client, matrix.config.setup, matrix.python-version, matrix.client-version, matrix.config.suite) }} strategy: fail-fast: false matrix: - client-type: [library, docker, server] + client: [library, docker, server] # Use Python 3.13 only on nightly schedule (daily latest client test), otherwise use 3.12 python-version: ${{ github.event.schedule == '0 0 * * *' && fromJSON('["3.12", "3.13"]') || fromJSON('["3.12"]') }} client-version: ${{ (github.event.schedule == '0 0 * * *' || github.event.inputs.test-all-client-versions == 'true') && fromJSON('["published", "latest"]') || fromJSON('["latest"]') }} @@ -84,6 +84,7 @@ jobs: uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0 - name: Setup test environment + if: ${{ matrix.config.allowed_clients == null || contains(matrix.config.allowed_clients, matrix.client) }} uses: ./.github/actions/setup-test-environment with: python-version: ${{ matrix.python-version }} @@ -93,11 +94,16 @@ jobs: inference-mode: 'replay' - name: Run tests + if: ${{ matrix.config.allowed_clients == null || contains(matrix.config.allowed_clients, matrix.client) }} uses: ./.github/actions/run-and-record-tests env: OPENAI_API_KEY: dummy with: - stack-config: ${{ matrix.client-type == 'library' && 'ci-tests' || matrix.client-type == 'server' && 'server:ci-tests' || 'docker:ci-tests' }} + stack-config: >- + ${{ matrix.config.stack_config + || (matrix.client == 'library' && 'ci-tests') + || (matrix.client == 'server' && 'server:ci-tests') + || 'docker:ci-tests' }} setup: ${{ matrix.config.setup }} inference-mode: 'replay' suite: ${{ matrix.config.suite }} diff --git a/src/llama_stack/distributions/ci-tests/ci_tests.py b/src/llama_stack/distributions/ci-tests/ci_tests.py index c06b1b98d..ab102f5f3 100644 --- a/src/llama_stack/distributions/ci-tests/ci_tests.py +++ b/src/llama_stack/distributions/ci-tests/ci_tests.py @@ -13,6 +13,5 @@ from ..starter.starter import get_distribution_template as get_starter_distribut def get_distribution_template() -> DistributionTemplate: template = get_starter_distribution_template(name="ci-tests") template.description = "CI tests for Llama Stack" - template.run_configs.pop("run-with-postgres-store.yaml", None) return template diff --git a/src/llama_stack/distributions/ci-tests/run-with-postgres-store.yaml b/src/llama_stack/distributions/ci-tests/run-with-postgres-store.yaml new file mode 100644 index 000000000..5384b58fe --- /dev/null +++ b/src/llama_stack/distributions/ci-tests/run-with-postgres-store.yaml @@ -0,0 +1,293 @@ +version: 2 +image_name: ci-tests +apis: +- agents +- batches +- datasetio +- eval +- files +- inference +- post_training +- safety +- scoring +- tool_runtime +- vector_io +providers: + inference: + - provider_id: ${env.CEREBRAS_API_KEY:+cerebras} + provider_type: remote::cerebras + config: + base_url: https://api.cerebras.ai + api_key: ${env.CEREBRAS_API_KEY:=} + - provider_id: ${env.OLLAMA_URL:+ollama} + provider_type: remote::ollama + config: + url: ${env.OLLAMA_URL:=http://localhost:11434} + - provider_id: ${env.VLLM_URL:+vllm} + provider_type: remote::vllm + config: + url: ${env.VLLM_URL:=} + max_tokens: ${env.VLLM_MAX_TOKENS:=4096} + api_token: ${env.VLLM_API_TOKEN:=fake} + tls_verify: ${env.VLLM_TLS_VERIFY:=true} + - provider_id: ${env.TGI_URL:+tgi} + provider_type: remote::tgi + config: + url: ${env.TGI_URL:=} + - provider_id: fireworks + provider_type: remote::fireworks + config: + url: https://api.fireworks.ai/inference/v1 + api_key: ${env.FIREWORKS_API_KEY:=} + - provider_id: together + provider_type: remote::together + config: + url: https://api.together.xyz/v1 + api_key: ${env.TOGETHER_API_KEY:=} + - provider_id: bedrock + provider_type: remote::bedrock + config: + api_key: ${env.AWS_BEDROCK_API_KEY:=} + region_name: ${env.AWS_DEFAULT_REGION:=us-east-2} + - provider_id: ${env.NVIDIA_API_KEY:+nvidia} + provider_type: remote::nvidia + config: + url: ${env.NVIDIA_BASE_URL:=https://integrate.api.nvidia.com} + api_key: ${env.NVIDIA_API_KEY:=} + append_api_version: ${env.NVIDIA_APPEND_API_VERSION:=True} + - provider_id: openai + provider_type: remote::openai + config: + api_key: ${env.OPENAI_API_KEY:=} + base_url: ${env.OPENAI_BASE_URL:=https://api.openai.com/v1} + - provider_id: anthropic + provider_type: remote::anthropic + config: + api_key: ${env.ANTHROPIC_API_KEY:=} + - provider_id: gemini + provider_type: remote::gemini + config: + api_key: ${env.GEMINI_API_KEY:=} + - provider_id: ${env.VERTEX_AI_PROJECT:+vertexai} + provider_type: remote::vertexai + config: + project: ${env.VERTEX_AI_PROJECT:=} + location: ${env.VERTEX_AI_LOCATION:=us-central1} + - provider_id: groq + provider_type: remote::groq + config: + url: https://api.groq.com + api_key: ${env.GROQ_API_KEY:=} + - provider_id: sambanova + provider_type: remote::sambanova + config: + url: https://api.sambanova.ai/v1 + api_key: ${env.SAMBANOVA_API_KEY:=} + - provider_id: ${env.AZURE_API_KEY:+azure} + provider_type: remote::azure + config: + api_key: ${env.AZURE_API_KEY:=} + api_base: ${env.AZURE_API_BASE:=} + api_version: ${env.AZURE_API_VERSION:=} + api_type: ${env.AZURE_API_TYPE:=} + - provider_id: sentence-transformers + provider_type: inline::sentence-transformers + vector_io: + - provider_id: faiss + provider_type: inline::faiss + config: + persistence: + namespace: vector_io::faiss + backend: kv_default + - provider_id: sqlite-vec + provider_type: inline::sqlite-vec + config: + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ci-tests}/sqlite_vec.db + persistence: + namespace: vector_io::sqlite_vec + backend: kv_default + - provider_id: ${env.MILVUS_URL:+milvus} + provider_type: inline::milvus + config: + db_path: ${env.MILVUS_DB_PATH:=~/.llama/distributions/ci-tests}/milvus.db + persistence: + namespace: vector_io::milvus + backend: kv_default + - provider_id: ${env.CHROMADB_URL:+chromadb} + provider_type: remote::chromadb + config: + url: ${env.CHROMADB_URL:=} + persistence: + namespace: vector_io::chroma_remote + backend: kv_default + - provider_id: ${env.PGVECTOR_DB:+pgvector} + provider_type: remote::pgvector + config: + host: ${env.PGVECTOR_HOST:=localhost} + port: ${env.PGVECTOR_PORT:=5432} + db: ${env.PGVECTOR_DB:=} + user: ${env.PGVECTOR_USER:=} + password: ${env.PGVECTOR_PASSWORD:=} + persistence: + namespace: vector_io::pgvector + backend: kv_default + - provider_id: ${env.QDRANT_URL:+qdrant} + provider_type: remote::qdrant + config: + api_key: ${env.QDRANT_API_KEY:=} + persistence: + namespace: vector_io::qdrant_remote + backend: kv_default + - provider_id: ${env.WEAVIATE_CLUSTER_URL:+weaviate} + provider_type: remote::weaviate + config: + weaviate_api_key: null + weaviate_cluster_url: ${env.WEAVIATE_CLUSTER_URL:=localhost:8080} + persistence: + namespace: vector_io::weaviate + backend: kv_default + files: + - provider_id: meta-reference-files + provider_type: inline::localfs + config: + storage_dir: ${env.FILES_STORAGE_DIR:=~/.llama/distributions/ci-tests/files} + metadata_store: + table_name: files_metadata + backend: sql_default + safety: + - provider_id: llama-guard + provider_type: inline::llama-guard + config: + excluded_categories: [] + - provider_id: code-scanner + provider_type: inline::code-scanner + agents: + - provider_id: meta-reference + provider_type: inline::meta-reference + config: + persistence: + agent_state: + namespace: agents + backend: kv_default + responses: + table_name: responses + backend: sql_default + max_write_queue_size: 10000 + num_writers: 4 + post_training: + - provider_id: torchtune-cpu + provider_type: inline::torchtune-cpu + config: + checkpoint_format: meta + eval: + - provider_id: meta-reference + provider_type: inline::meta-reference + config: + kvstore: + namespace: eval + backend: kv_default + datasetio: + - provider_id: huggingface + provider_type: remote::huggingface + config: + kvstore: + namespace: datasetio::huggingface + backend: kv_default + - provider_id: localfs + provider_type: inline::localfs + config: + kvstore: + namespace: datasetio::localfs + backend: kv_default + scoring: + - provider_id: basic + provider_type: inline::basic + - provider_id: llm-as-judge + provider_type: inline::llm-as-judge + - provider_id: braintrust + provider_type: inline::braintrust + config: + openai_api_key: ${env.OPENAI_API_KEY:=} + tool_runtime: + - provider_id: brave-search + provider_type: remote::brave-search + config: + api_key: ${env.BRAVE_SEARCH_API_KEY:=} + max_results: 3 + - provider_id: tavily-search + provider_type: remote::tavily-search + config: + api_key: ${env.TAVILY_SEARCH_API_KEY:=} + max_results: 3 + - provider_id: rag-runtime + provider_type: inline::rag-runtime + - provider_id: model-context-protocol + provider_type: remote::model-context-protocol + batches: + - provider_id: reference + provider_type: inline::reference + config: + kvstore: + namespace: batches + backend: kv_default +storage: + backends: + kv_default: + type: kv_postgres + host: ${env.POSTGRES_HOST:=localhost} + port: ${env.POSTGRES_PORT:=5432} + db: ${env.POSTGRES_DB:=llamastack} + user: ${env.POSTGRES_USER:=llamastack} + password: ${env.POSTGRES_PASSWORD:=llamastack} + table_name: ${env.POSTGRES_TABLE_NAME:=llamastack_kvstore} + sql_default: + type: sql_postgres + host: ${env.POSTGRES_HOST:=localhost} + port: ${env.POSTGRES_PORT:=5432} + db: ${env.POSTGRES_DB:=llamastack} + user: ${env.POSTGRES_USER:=llamastack} + password: ${env.POSTGRES_PASSWORD:=llamastack} + stores: + metadata: + namespace: registry + backend: kv_default + inference: + table_name: inference_store + backend: sql_default + max_write_queue_size: 10000 + num_writers: 4 + conversations: + table_name: openai_conversations + backend: sql_default + prompts: + namespace: prompts + backend: kv_default +registered_resources: + models: [] + shields: + - shield_id: llama-guard + provider_id: ${env.SAFETY_MODEL:+llama-guard} + provider_shield_id: ${env.SAFETY_MODEL:=} + - shield_id: code-scanner + provider_id: ${env.CODE_SCANNER_MODEL:+code-scanner} + provider_shield_id: ${env.CODE_SCANNER_MODEL:=} + vector_dbs: [] + datasets: [] + scoring_fns: [] + benchmarks: [] + tool_groups: + - toolgroup_id: builtin::websearch + provider_id: tavily-search + - toolgroup_id: builtin::rag + provider_id: rag-runtime +server: + port: 8321 +telemetry: + enabled: true +vector_stores: + default_provider_id: faiss + default_embedding_model: + provider_id: sentence-transformers + model_id: nomic-ai/nomic-embed-text-v1.5 +safety: + default_shield_id: llama-guard diff --git a/src/llama_stack/distributions/starter-gpu/run-with-postgres-store.yaml b/src/llama_stack/distributions/starter-gpu/run-with-postgres-store.yaml index 1920ebd9d..e29ada6f4 100644 --- a/src/llama_stack/distributions/starter-gpu/run-with-postgres-store.yaml +++ b/src/llama_stack/distributions/starter-gpu/run-with-postgres-store.yaml @@ -165,20 +165,15 @@ providers: - provider_id: meta-reference provider_type: inline::meta-reference config: - persistence_store: - type: sql_postgres - host: ${env.POSTGRES_HOST:=localhost} - port: ${env.POSTGRES_PORT:=5432} - db: ${env.POSTGRES_DB:=llamastack} - user: ${env.POSTGRES_USER:=llamastack} - password: ${env.POSTGRES_PASSWORD:=llamastack} - responses_store: - type: sql_postgres - host: ${env.POSTGRES_HOST:=localhost} - port: ${env.POSTGRES_PORT:=5432} - db: ${env.POSTGRES_DB:=llamastack} - user: ${env.POSTGRES_USER:=llamastack} - password: ${env.POSTGRES_PASSWORD:=llamastack} + persistence: + agent_state: + namespace: agents + backend: kv_default + responses: + table_name: responses + backend: sql_default + max_write_queue_size: 10000 + num_writers: 4 post_training: - provider_id: huggingface-gpu provider_type: inline::huggingface-gpu @@ -237,10 +232,10 @@ providers: config: kvstore: namespace: batches - backend: kv_postgres + backend: kv_default storage: backends: - kv_postgres: + kv_default: type: kv_postgres host: ${env.POSTGRES_HOST:=localhost} port: ${env.POSTGRES_PORT:=5432} @@ -248,7 +243,7 @@ storage: user: ${env.POSTGRES_USER:=llamastack} password: ${env.POSTGRES_PASSWORD:=llamastack} table_name: ${env.POSTGRES_TABLE_NAME:=llamastack_kvstore} - sql_postgres: + sql_default: type: sql_postgres host: ${env.POSTGRES_HOST:=localhost} port: ${env.POSTGRES_PORT:=5432} @@ -258,27 +253,44 @@ storage: stores: metadata: namespace: registry - backend: kv_postgres + backend: kv_default inference: table_name: inference_store - backend: sql_postgres + backend: sql_default max_write_queue_size: 10000 num_writers: 4 conversations: table_name: openai_conversations - backend: sql_postgres + backend: sql_default prompts: namespace: prompts - backend: kv_postgres + backend: kv_default registered_resources: models: [] - shields: [] + shields: + - shield_id: llama-guard + provider_id: ${env.SAFETY_MODEL:+llama-guard} + provider_shield_id: ${env.SAFETY_MODEL:=} + - shield_id: code-scanner + provider_id: ${env.CODE_SCANNER_MODEL:+code-scanner} + provider_shield_id: ${env.CODE_SCANNER_MODEL:=} vector_dbs: [] datasets: [] scoring_fns: [] benchmarks: [] - tool_groups: [] + tool_groups: + - toolgroup_id: builtin::websearch + provider_id: tavily-search + - toolgroup_id: builtin::rag + provider_id: rag-runtime server: port: 8321 telemetry: enabled: true +vector_stores: + default_provider_id: faiss + default_embedding_model: + provider_id: sentence-transformers + model_id: nomic-ai/nomic-embed-text-v1.5 +safety: + default_shield_id: llama-guard diff --git a/src/llama_stack/distributions/starter/run-with-postgres-store.yaml b/src/llama_stack/distributions/starter/run-with-postgres-store.yaml index 702f95381..437674bf9 100644 --- a/src/llama_stack/distributions/starter/run-with-postgres-store.yaml +++ b/src/llama_stack/distributions/starter/run-with-postgres-store.yaml @@ -165,20 +165,15 @@ providers: - provider_id: meta-reference provider_type: inline::meta-reference config: - persistence_store: - type: sql_postgres - host: ${env.POSTGRES_HOST:=localhost} - port: ${env.POSTGRES_PORT:=5432} - db: ${env.POSTGRES_DB:=llamastack} - user: ${env.POSTGRES_USER:=llamastack} - password: ${env.POSTGRES_PASSWORD:=llamastack} - responses_store: - type: sql_postgres - host: ${env.POSTGRES_HOST:=localhost} - port: ${env.POSTGRES_PORT:=5432} - db: ${env.POSTGRES_DB:=llamastack} - user: ${env.POSTGRES_USER:=llamastack} - password: ${env.POSTGRES_PASSWORD:=llamastack} + persistence: + agent_state: + namespace: agents + backend: kv_default + responses: + table_name: responses + backend: sql_default + max_write_queue_size: 10000 + num_writers: 4 post_training: - provider_id: torchtune-cpu provider_type: inline::torchtune-cpu @@ -234,10 +229,10 @@ providers: config: kvstore: namespace: batches - backend: kv_postgres + backend: kv_default storage: backends: - kv_postgres: + kv_default: type: kv_postgres host: ${env.POSTGRES_HOST:=localhost} port: ${env.POSTGRES_PORT:=5432} @@ -245,7 +240,7 @@ storage: user: ${env.POSTGRES_USER:=llamastack} password: ${env.POSTGRES_PASSWORD:=llamastack} table_name: ${env.POSTGRES_TABLE_NAME:=llamastack_kvstore} - sql_postgres: + sql_default: type: sql_postgres host: ${env.POSTGRES_HOST:=localhost} port: ${env.POSTGRES_PORT:=5432} @@ -255,27 +250,44 @@ storage: stores: metadata: namespace: registry - backend: kv_postgres + backend: kv_default inference: table_name: inference_store - backend: sql_postgres + backend: sql_default max_write_queue_size: 10000 num_writers: 4 conversations: table_name: openai_conversations - backend: sql_postgres + backend: sql_default prompts: namespace: prompts - backend: kv_postgres + backend: kv_default registered_resources: models: [] - shields: [] + shields: + - shield_id: llama-guard + provider_id: ${env.SAFETY_MODEL:+llama-guard} + provider_shield_id: ${env.SAFETY_MODEL:=} + - shield_id: code-scanner + provider_id: ${env.CODE_SCANNER_MODEL:+code-scanner} + provider_shield_id: ${env.CODE_SCANNER_MODEL:=} vector_dbs: [] datasets: [] scoring_fns: [] benchmarks: [] - tool_groups: [] + tool_groups: + - toolgroup_id: builtin::websearch + provider_id: tavily-search + - toolgroup_id: builtin::rag + provider_id: rag-runtime server: port: 8321 telemetry: enabled: true +vector_stores: + default_provider_id: faiss + default_embedding_model: + provider_id: sentence-transformers + model_id: nomic-ai/nomic-embed-text-v1.5 +safety: + default_shield_id: llama-guard diff --git a/src/llama_stack/distributions/starter/starter.py b/src/llama_stack/distributions/starter/starter.py index 88cd3a4fe..7b7773289 100644 --- a/src/llama_stack/distributions/starter/starter.py +++ b/src/llama_stack/distributions/starter/starter.py @@ -17,11 +17,6 @@ from llama_stack.core.datatypes import ( ToolGroupInput, VectorStoresConfig, ) -from llama_stack.core.storage.datatypes import ( - InferenceStoreReference, - KVStoreReference, - SqlStoreReference, -) from llama_stack.core.utils.dynamic import instantiate_class_type from llama_stack.distributions.template import DistributionTemplate, RunConfigSettings from llama_stack.providers.datatypes import RemoteProviderSpec @@ -154,10 +149,11 @@ def get_distribution_template(name: str = "starter") -> DistributionTemplate: BuildProvider(provider_type="inline::reference"), ], } + files_config = LocalfsFilesImplConfig.sample_run_config(f"~/.llama/distributions/{name}") files_provider = Provider( provider_id="meta-reference-files", provider_type="inline::localfs", - config=LocalfsFilesImplConfig.sample_run_config(f"~/.llama/distributions/{name}"), + config=files_config, ) embedding_provider = Provider( provider_id="sentence-transformers", @@ -187,7 +183,8 @@ def get_distribution_template(name: str = "starter") -> DistributionTemplate: provider_shield_id="${env.CODE_SCANNER_MODEL:=}", ), ] - postgres_config = PostgresSqlStoreConfig.sample_run_config() + postgres_sql_config = PostgresSqlStoreConfig.sample_run_config() + postgres_kv_config = PostgresKVStoreConfig.sample_run_config() default_overrides = { "inference": remote_inference_providers + [embedding_provider], "vector_io": [ @@ -244,6 +241,33 @@ def get_distribution_template(name: str = "starter") -> DistributionTemplate: "files": [files_provider], } + base_run_settings = RunConfigSettings( + provider_overrides=default_overrides, + default_models=[], + default_tool_groups=default_tool_groups, + default_shields=default_shields, + vector_stores_config=VectorStoresConfig( + default_provider_id="faiss", + default_embedding_model=QualifiedModel( + provider_id="sentence-transformers", + model_id="nomic-ai/nomic-embed-text-v1.5", + ), + ), + safety_config=SafetyConfig( + default_shield_id="llama-guard", + ), + ) + + postgres_run_settings = base_run_settings.model_copy( + update={ + "storage_backends": { + "kv_default": postgres_kv_config, + "sql_default": postgres_sql_config, + } + }, + deep=True, + ) + return DistributionTemplate( name=name, distro_type="self_hosted", @@ -253,71 +277,8 @@ def get_distribution_template(name: str = "starter") -> DistributionTemplate: providers=providers, additional_pip_packages=list(set(PostgresSqlStoreConfig.pip_packages() + PostgresKVStoreConfig.pip_packages())), run_configs={ - "run.yaml": RunConfigSettings( - provider_overrides=default_overrides, - default_models=[], - default_tool_groups=default_tool_groups, - default_shields=default_shields, - vector_stores_config=VectorStoresConfig( - default_provider_id="faiss", - default_embedding_model=QualifiedModel( - provider_id="sentence-transformers", - model_id="nomic-ai/nomic-embed-text-v1.5", - ), - ), - safety_config=SafetyConfig( - default_shield_id="llama-guard", - ), - ), - "run-with-postgres-store.yaml": RunConfigSettings( - provider_overrides={ - **default_overrides, - "agents": [ - Provider( - provider_id="meta-reference", - provider_type="inline::meta-reference", - config=dict( - persistence_store=postgres_config, - responses_store=postgres_config, - ), - ) - ], - "batches": [ - Provider( - provider_id="reference", - provider_type="inline::reference", - config=dict( - kvstore=KVStoreReference( - backend="kv_postgres", - namespace="batches", - ).model_dump(exclude_none=True), - ), - ) - ], - }, - storage_backends={ - "kv_postgres": PostgresKVStoreConfig.sample_run_config(), - "sql_postgres": postgres_config, - }, - storage_stores={ - "metadata": KVStoreReference( - backend="kv_postgres", - namespace="registry", - ).model_dump(exclude_none=True), - "inference": InferenceStoreReference( - backend="sql_postgres", - table_name="inference_store", - ).model_dump(exclude_none=True), - "conversations": SqlStoreReference( - backend="sql_postgres", - table_name="openai_conversations", - ).model_dump(exclude_none=True), - "prompts": KVStoreReference( - backend="kv_postgres", - namespace="prompts", - ).model_dump(exclude_none=True), - }, - ), + "run.yaml": base_run_settings, + "run-with-postgres-store.yaml": postgres_run_settings, }, run_config_env_vars={ "LLAMA_STACK_PORT": ( diff --git a/src/llama_stack/providers/utils/inference/inference_store.py b/src/llama_stack/providers/utils/inference/inference_store.py index 2bf947a8d..a3a28aec0 100644 --- a/src/llama_stack/providers/utils/inference/inference_store.py +++ b/src/llama_stack/providers/utils/inference/inference_store.py @@ -66,14 +66,6 @@ class InferenceStore: }, ) - if self.enable_write_queue: - self._queue = asyncio.Queue(maxsize=self._max_write_queue_size) - for _ in range(self._num_writers): - self._worker_tasks.append(asyncio.create_task(self._worker_loop())) - logger.debug( - f"Inference store write queue enabled with {self._num_writers} writers, max queue size {self._max_write_queue_size}" - ) - async def shutdown(self) -> None: if not self._worker_tasks: return @@ -94,10 +86,29 @@ class InferenceStore: if self.enable_write_queue and self._queue is not None: await self._queue.join() + async def _ensure_workers_started(self) -> None: + """Ensure the async write queue workers run on the current loop.""" + if not self.enable_write_queue: + return + + if self._queue is None: + self._queue = asyncio.Queue(maxsize=self._max_write_queue_size) + logger.debug( + f"Inference store write queue created with max size {self._max_write_queue_size} " + f"and {self._num_writers} writers" + ) + + if not self._worker_tasks: + loop = asyncio.get_running_loop() + for _ in range(self._num_writers): + task = loop.create_task(self._worker_loop()) + self._worker_tasks.append(task) + async def store_chat_completion( self, chat_completion: OpenAIChatCompletion, input_messages: list[OpenAIMessageParam] ) -> None: if self.enable_write_queue: + await self._ensure_workers_started() if self._queue is None: raise ValueError("Inference store is not initialized") try: diff --git a/src/llama_stack/providers/utils/responses/responses_store.py b/src/llama_stack/providers/utils/responses/responses_store.py index 40466d00c..f5024a9ed 100644 --- a/src/llama_stack/providers/utils/responses/responses_store.py +++ b/src/llama_stack/providers/utils/responses/responses_store.py @@ -3,8 +3,6 @@ # # This source code is licensed under the terms described in the LICENSE file in # the root directory of this source tree. -import asyncio -from typing import Any from llama_stack.apis.agents import ( Order, @@ -19,12 +17,12 @@ from llama_stack.apis.agents.openai_responses import ( ) from llama_stack.apis.inference import OpenAIMessageParam from llama_stack.core.datatypes import AccessRule -from llama_stack.core.storage.datatypes import ResponsesStoreReference, SqlStoreReference, StorageBackendType +from llama_stack.core.storage.datatypes import ResponsesStoreReference, SqlStoreReference from llama_stack.log import get_logger from ..sqlstore.api import ColumnDefinition, ColumnType from ..sqlstore.authorized_sqlstore import AuthorizedSqlStore -from ..sqlstore.sqlstore import _SQLSTORE_BACKENDS, sqlstore_impl +from ..sqlstore.sqlstore import sqlstore_impl logger = get_logger(name=__name__, category="openai_responses") @@ -55,28 +53,12 @@ class ResponsesStore: self.policy = policy self.sql_store = None - self.enable_write_queue = True - - # Async write queue and worker control - self._queue: ( - asyncio.Queue[tuple[OpenAIResponseObject, list[OpenAIResponseInput], list[OpenAIMessageParam]]] | None - ) = None - self._worker_tasks: list[asyncio.Task[Any]] = [] - self._max_write_queue_size: int = self.reference.max_write_queue_size - self._num_writers: int = max(1, self.reference.num_writers) async def initialize(self): """Create the necessary tables if they don't exist.""" base_store = sqlstore_impl(self.reference) self.sql_store = AuthorizedSqlStore(base_store, self.policy) - # Disable write queue for SQLite since WAL mode handles concurrency - # Keep it enabled for other backends (like Postgres) for performance - backend_config = _SQLSTORE_BACKENDS.get(self.reference.backend) - if backend_config and backend_config.type == StorageBackendType.SQL_SQLITE: - self.enable_write_queue = False - logger.debug("Write queue disabled for SQLite (WAL mode handles concurrency)") - await self.sql_store.create_table( "openai_responses", { @@ -95,33 +77,12 @@ class ResponsesStore: }, ) - if self.enable_write_queue: - self._queue = asyncio.Queue(maxsize=self._max_write_queue_size) - for _ in range(self._num_writers): - self._worker_tasks.append(asyncio.create_task(self._worker_loop())) - logger.debug( - f"Responses store write queue enabled with {self._num_writers} writers, max queue size {self._max_write_queue_size}" - ) - async def shutdown(self) -> None: - if not self._worker_tasks: - return - if self._queue is not None: - await self._queue.join() - for t in self._worker_tasks: - if not t.done(): - t.cancel() - for t in self._worker_tasks: - try: - await t - except asyncio.CancelledError: - pass - self._worker_tasks.clear() + return async def flush(self) -> None: - """Wait for all queued writes to complete. Useful for testing.""" - if self.enable_write_queue and self._queue is not None: - await self._queue.join() + """Maintained for compatibility; no-op now that writes are synchronous.""" + return async def store_response_object( self, @@ -129,31 +90,7 @@ class ResponsesStore: input: list[OpenAIResponseInput], messages: list[OpenAIMessageParam], ) -> None: - if self.enable_write_queue: - if self._queue is None: - raise ValueError("Responses store is not initialized") - try: - self._queue.put_nowait((response_object, input, messages)) - except asyncio.QueueFull: - logger.warning(f"Write queue full; adding response id={getattr(response_object, 'id', '')}") - await self._queue.put((response_object, input, messages)) - else: - await self._write_response_object(response_object, input, messages) - - async def _worker_loop(self) -> None: - assert self._queue is not None - while True: - try: - item = await self._queue.get() - except asyncio.CancelledError: - break - response_object, input, messages = item - try: - await self._write_response_object(response_object, input, messages) - except Exception as e: # noqa: BLE001 - logger.error(f"Error writing response object: {e}") - finally: - self._queue.task_done() + await self._write_response_object(response_object, input, messages) async def _write_response_object( self, diff --git a/src/llama_stack/providers/utils/sqlstore/authorized_sqlstore.py b/src/llama_stack/providers/utils/sqlstore/authorized_sqlstore.py index 3dfc82677..eb2d9a491 100644 --- a/src/llama_stack/providers/utils/sqlstore/authorized_sqlstore.py +++ b/src/llama_stack/providers/utils/sqlstore/authorized_sqlstore.py @@ -45,8 +45,13 @@ def _enhance_item_with_access_control(item: Mapping[str, Any], current_user: Use enhanced["owner_principal"] = current_user.principal enhanced["access_attributes"] = current_user.attributes else: - enhanced["owner_principal"] = None - enhanced["access_attributes"] = None + # IMPORTANT: Use empty string and null value (not None) to match public access filter + # The public access filter in _get_public_access_conditions() expects: + # - owner_principal = '' (empty string) + # - access_attributes = null (JSON null, which serializes to the string 'null') + # Setting them to None (SQL NULL) will cause rows to be filtered out on read. + enhanced["owner_principal"] = "" + enhanced["access_attributes"] = None # Pydantic/JSON will serialize this as JSON null return enhanced @@ -188,8 +193,9 @@ class AuthorizedSqlStore: enhanced_data["owner_principal"] = current_user.principal enhanced_data["access_attributes"] = current_user.attributes else: - enhanced_data["owner_principal"] = None - enhanced_data["access_attributes"] = None + # IMPORTANT: Use empty string for owner_principal to match public access filter + enhanced_data["owner_principal"] = "" + enhanced_data["access_attributes"] = None # Will serialize as JSON null await self.sql_store.update(table, enhanced_data, where) @@ -245,14 +251,24 @@ class AuthorizedSqlStore: raise ValueError(f"Unsupported database type: {self.database_type}") def _get_public_access_conditions(self) -> list[str]: - """Get the SQL conditions for public access.""" - # Public records are records that have no owner_principal or access_attributes + """Get the SQL conditions for public access. + + Public records are those with: + - owner_principal = '' (empty string) + - access_attributes is either SQL NULL or JSON null + + Note: Different databases serialize None differently: + - SQLite: None → JSON null (text = 'null') + - Postgres: None → SQL NULL (IS NULL) + """ conditions = ["owner_principal = ''"] if self.database_type == StorageBackendType.SQL_POSTGRES.value: - # Postgres stores JSON null as 'null' - conditions.append("access_attributes::text = 'null'") + # Accept both SQL NULL and JSON null for Postgres compatibility + # This handles both old rows (SQL NULL) and new rows (JSON null) + conditions.append("(access_attributes IS NULL OR access_attributes::text = 'null')") elif self.database_type == StorageBackendType.SQL_SQLITE.value: - conditions.append("access_attributes = 'null'") + # SQLite serializes None as JSON null + conditions.append("(access_attributes IS NULL OR access_attributes = 'null')") else: raise ValueError(f"Unsupported database type: {self.database_type}") return conditions diff --git a/tests/integration/ci_matrix.json b/tests/integration/ci_matrix.json index 858176dff..43678e5c7 100644 --- a/tests/integration/ci_matrix.json +++ b/tests/integration/ci_matrix.json @@ -1,6 +1,7 @@ { "default": [ {"suite": "base", "setup": "ollama"}, + {"suite": "base", "setup": "ollama-postgres", "allowed_clients": ["server"], "stack_config": "server:ci-tests::run-with-postgres-store.yaml"}, {"suite": "vision", "setup": "ollama-vision"}, {"suite": "responses", "setup": "gpt"}, {"suite": "base-vllm-subset", "setup": "vllm"} diff --git a/tests/integration/fixtures/common.py b/tests/integration/fixtures/common.py index 407564c15..817180cfe 100644 --- a/tests/integration/fixtures/common.py +++ b/tests/integration/fixtures/common.py @@ -233,10 +233,21 @@ def instantiate_llama_stack_client(session): raise ValueError("You must specify either --stack-config or LLAMA_STACK_CONFIG") # Handle server: format or server:: + # Also handles server::: format if config.startswith("server:"): - parts = config.split(":") - config_name = parts[1] - port = int(parts[2]) if len(parts) > 2 else int(os.environ.get("LLAMA_STACK_PORT", DEFAULT_PORT)) + # Strip the "server:" prefix first + config_part = config[7:] # len("server:") == 7 + + # Check for :: (distro::runfile format) + if "::" in config_part: + config_name = config_part + port = int(os.environ.get("LLAMA_STACK_PORT", DEFAULT_PORT)) + else: + # Single colon format: either or : + parts = config_part.split(":") + config_name = parts[0] + port = int(parts[1]) if len(parts) > 1 else int(os.environ.get("LLAMA_STACK_PORT", DEFAULT_PORT)) + base_url = f"http://localhost:{port}" force_restart = os.environ.get("LLAMA_STACK_TEST_FORCE_SERVER_RESTART") == "1" diff --git a/tests/integration/suites.py b/tests/integration/suites.py index 0cec66afe..7689657b4 100644 --- a/tests/integration/suites.py +++ b/tests/integration/suites.py @@ -71,6 +71,26 @@ SETUP_DEFINITIONS: dict[str, Setup] = { "embedding_model": "ollama/nomic-embed-text:v1.5", }, ), + "ollama-postgres": Setup( + name="ollama-postgres", + description="Server-mode tests with Postgres-backed persistence", + env={ + "OLLAMA_URL": "http://0.0.0.0:11434", + "SAFETY_MODEL": "ollama/llama-guard3:1b", + "POSTGRES_HOST": "127.0.0.1", + "POSTGRES_PORT": "5432", + "POSTGRES_DB": "llamastack", + "POSTGRES_USER": "llamastack", + "POSTGRES_PASSWORD": "llamastack", + "LLAMA_STACK_LOGGING": "openai_responses=info", + }, + defaults={ + "text_model": "ollama/llama3.2:3b-instruct-fp16", + "embedding_model": "sentence-transformers/nomic-embed-text-v1.5", + "safety_model": "ollama/llama-guard3:1b", + "safety_shield": "llama-guard", + }, + ), "vllm": Setup( name="vllm", description="vLLM provider with a text model", From fcf649b97a8bb99f52097b558acfe2d0285f4ef3 Mon Sep 17 00:00:00 2001 From: Ashwin Bharambe Date: Wed, 12 Nov 2025 12:14:26 -0800 Subject: [PATCH 9/9] feat(storage): share sql/kv instances and add upsert support (#4140) A few changes to the storage layer to ensure we reduce unnecessary contention arising out of our design choices (and letting the database layer do its correct thing): - SQL stores now share a single `SqlAlchemySqlStoreImpl` per backend, and `kvstore_impl` caches instances per `(backend, namespace)`. This avoids spawning multiple SQLite connections for the same file, reducing lock contention and aligning the cache story for all backends. - Added an async upsert API (with SQLite/Postgres dialect inserts) and routed it through `AuthorizedSqlStore`, then switched conversations and responses to call it. Using native `ON CONFLICT DO UPDATE` eliminates the insert-then-update retry window that previously caused long WAL lock retries. ### Test Plan Existing tests, added a unit test for `upsert()` --- .../core/conversations/conversations.py | 15 ++--- .../providers/utils/kvstore/kvstore.py | 57 +++++++++++++------ .../utils/responses/responses_store.py | 19 ++----- .../providers/utils/sqlstore/api.py | 12 ++++ .../utils/sqlstore/authorized_sqlstore.py | 17 ++++++ .../utils/sqlstore/sqlalchemy_sqlstore.py | 39 ++++++++++++- .../providers/utils/sqlstore/sqlstore.py | 30 ++++++++-- tests/unit/utils/sqlstore/test_sqlstore.py | 34 ++++++++++- 8 files changed, 172 insertions(+), 51 deletions(-) diff --git a/src/llama_stack/core/conversations/conversations.py b/src/llama_stack/core/conversations/conversations.py index 951de5e9d..f83834522 100644 --- a/src/llama_stack/core/conversations/conversations.py +++ b/src/llama_stack/core/conversations/conversations.py @@ -203,16 +203,11 @@ class ConversationServiceImpl(Conversations): "item_data": item_dict, } - # TODO: Add support for upsert in sql_store, this will fail first if ID exists and then update - try: - await self.sql_store.insert(table="conversation_items", data=item_record) - except Exception: - # If insert fails due to ID conflict, update existing record - await self.sql_store.update( - table="conversation_items", - data={"created_at": created_at, "item_data": item_dict}, - where={"id": item_id}, - ) + await self.sql_store.upsert( + table="conversation_items", + data=item_record, + conflict_columns=["id"], + ) created_items.append(item_dict) diff --git a/src/llama_stack/providers/utils/kvstore/kvstore.py b/src/llama_stack/providers/utils/kvstore/kvstore.py index eee51e5d9..5b8d77102 100644 --- a/src/llama_stack/providers/utils/kvstore/kvstore.py +++ b/src/llama_stack/providers/utils/kvstore/kvstore.py @@ -11,6 +11,9 @@ from __future__ import annotations +import asyncio +from collections import defaultdict + from llama_stack.core.storage.datatypes import KVStoreReference, StorageBackendConfig, StorageBackendType from .api import KVStore @@ -53,45 +56,63 @@ class InmemoryKVStoreImpl(KVStore): _KVSTORE_BACKENDS: dict[str, KVStoreConfig] = {} +_KVSTORE_INSTANCES: dict[tuple[str, str], KVStore] = {} +_KVSTORE_LOCKS: defaultdict[tuple[str, str], asyncio.Lock] = defaultdict(asyncio.Lock) def register_kvstore_backends(backends: dict[str, StorageBackendConfig]) -> None: """Register the set of available KV store backends for reference resolution.""" global _KVSTORE_BACKENDS + global _KVSTORE_INSTANCES + global _KVSTORE_LOCKS _KVSTORE_BACKENDS.clear() + _KVSTORE_INSTANCES.clear() + _KVSTORE_LOCKS.clear() for name, cfg in backends.items(): _KVSTORE_BACKENDS[name] = cfg async def kvstore_impl(reference: KVStoreReference) -> KVStore: backend_name = reference.backend + cache_key = (backend_name, reference.namespace) + + existing = _KVSTORE_INSTANCES.get(cache_key) + if existing: + return existing backend_config = _KVSTORE_BACKENDS.get(backend_name) if backend_config is None: raise ValueError(f"Unknown KVStore backend '{backend_name}'. Registered backends: {sorted(_KVSTORE_BACKENDS)}") - config = backend_config.model_copy() - config.namespace = reference.namespace + lock = _KVSTORE_LOCKS[cache_key] + async with lock: + existing = _KVSTORE_INSTANCES.get(cache_key) + if existing: + return existing - if config.type == StorageBackendType.KV_REDIS.value: - from .redis import RedisKVStoreImpl + config = backend_config.model_copy() + config.namespace = reference.namespace - impl = RedisKVStoreImpl(config) - elif config.type == StorageBackendType.KV_SQLITE.value: - from .sqlite import SqliteKVStoreImpl + if config.type == StorageBackendType.KV_REDIS.value: + from .redis import RedisKVStoreImpl - impl = SqliteKVStoreImpl(config) - elif config.type == StorageBackendType.KV_POSTGRES.value: - from .postgres import PostgresKVStoreImpl + impl = RedisKVStoreImpl(config) + elif config.type == StorageBackendType.KV_SQLITE.value: + from .sqlite import SqliteKVStoreImpl - impl = PostgresKVStoreImpl(config) - elif config.type == StorageBackendType.KV_MONGODB.value: - from .mongodb import MongoDBKVStoreImpl + impl = SqliteKVStoreImpl(config) + elif config.type == StorageBackendType.KV_POSTGRES.value: + from .postgres import PostgresKVStoreImpl - impl = MongoDBKVStoreImpl(config) - else: - raise ValueError(f"Unknown kvstore type {config.type}") + impl = PostgresKVStoreImpl(config) + elif config.type == StorageBackendType.KV_MONGODB.value: + from .mongodb import MongoDBKVStoreImpl - await impl.initialize() - return impl + impl = MongoDBKVStoreImpl(config) + else: + raise ValueError(f"Unknown kvstore type {config.type}") + + await impl.initialize() + _KVSTORE_INSTANCES[cache_key] = impl + return impl diff --git a/src/llama_stack/providers/utils/responses/responses_store.py b/src/llama_stack/providers/utils/responses/responses_store.py index f5024a9ed..fdca8ddee 100644 --- a/src/llama_stack/providers/utils/responses/responses_store.py +++ b/src/llama_stack/providers/utils/responses/responses_store.py @@ -252,19 +252,12 @@ class ResponsesStore: # Serialize messages to dict format for JSON storage messages_data = [msg.model_dump() for msg in messages] - # Upsert: try insert first, update if exists - try: - await self.sql_store.insert( - table="conversation_messages", - data={"conversation_id": conversation_id, "messages": messages_data}, - ) - except Exception: - # If insert fails due to ID conflict, update existing record - await self.sql_store.update( - table="conversation_messages", - data={"messages": messages_data}, - where={"conversation_id": conversation_id}, - ) + await self.sql_store.upsert( + table="conversation_messages", + data={"conversation_id": conversation_id, "messages": messages_data}, + conflict_columns=["conversation_id"], + update_columns=["messages"], + ) logger.debug(f"Stored {len(messages)} messages for conversation {conversation_id}") diff --git a/src/llama_stack/providers/utils/sqlstore/api.py b/src/llama_stack/providers/utils/sqlstore/api.py index a61fd1090..bcd224234 100644 --- a/src/llama_stack/providers/utils/sqlstore/api.py +++ b/src/llama_stack/providers/utils/sqlstore/api.py @@ -47,6 +47,18 @@ class SqlStore(Protocol): """ pass + async def upsert( + self, + table: str, + data: Mapping[str, Any], + conflict_columns: list[str], + update_columns: list[str] | None = None, + ) -> None: + """ + Insert a row and update specified columns when conflicts occur. + """ + pass + async def fetch_all( self, table: str, diff --git a/src/llama_stack/providers/utils/sqlstore/authorized_sqlstore.py b/src/llama_stack/providers/utils/sqlstore/authorized_sqlstore.py index eb2d9a491..ba95dd120 100644 --- a/src/llama_stack/providers/utils/sqlstore/authorized_sqlstore.py +++ b/src/llama_stack/providers/utils/sqlstore/authorized_sqlstore.py @@ -129,6 +129,23 @@ class AuthorizedSqlStore: enhanced_data = [_enhance_item_with_access_control(item, current_user) for item in data] await self.sql_store.insert(table, enhanced_data) + async def upsert( + self, + table: str, + data: Mapping[str, Any], + conflict_columns: list[str], + update_columns: list[str] | None = None, + ) -> None: + """Upsert a row with automatic access control attribute capture.""" + current_user = get_authenticated_user() + enhanced_data = _enhance_item_with_access_control(data, current_user) + await self.sql_store.upsert( + table=table, + data=enhanced_data, + conflict_columns=conflict_columns, + update_columns=update_columns, + ) + async def fetch_all( self, table: str, diff --git a/src/llama_stack/providers/utils/sqlstore/sqlalchemy_sqlstore.py b/src/llama_stack/providers/utils/sqlstore/sqlalchemy_sqlstore.py index 356f49ed1..cfc3131f4 100644 --- a/src/llama_stack/providers/utils/sqlstore/sqlalchemy_sqlstore.py +++ b/src/llama_stack/providers/utils/sqlstore/sqlalchemy_sqlstore.py @@ -72,13 +72,14 @@ def _build_where_expr(column: ColumnElement, value: Any) -> ColumnElement: class SqlAlchemySqlStoreImpl(SqlStore): def __init__(self, config: SqlAlchemySqlStoreConfig): self.config = config + self._is_sqlite_backend = "sqlite" in self.config.engine_str self.async_session = async_sessionmaker(self.create_engine()) self.metadata = MetaData() def create_engine(self) -> AsyncEngine: # Configure connection args for better concurrency support connect_args = {} - if "sqlite" in self.config.engine_str: + if self._is_sqlite_backend: # SQLite-specific optimizations for concurrent access # With WAL mode, most locks resolve in milliseconds, but allow up to 5s for edge cases connect_args["timeout"] = 5.0 @@ -91,7 +92,7 @@ class SqlAlchemySqlStoreImpl(SqlStore): ) # Enable WAL mode for SQLite to support concurrent readers and writers - if "sqlite" in self.config.engine_str: + if self._is_sqlite_backend: @event.listens_for(engine.sync_engine, "connect") def set_sqlite_pragma(dbapi_conn, connection_record): @@ -151,6 +152,29 @@ class SqlAlchemySqlStoreImpl(SqlStore): await session.execute(self.metadata.tables[table].insert(), data) await session.commit() + async def upsert( + self, + table: str, + data: Mapping[str, Any], + conflict_columns: list[str], + update_columns: list[str] | None = None, + ) -> None: + table_obj = self.metadata.tables[table] + dialect_insert = self._get_dialect_insert(table_obj) + insert_stmt = dialect_insert.values(**data) + + if update_columns is None: + update_columns = [col for col in data.keys() if col not in conflict_columns] + + update_mapping = {col: getattr(insert_stmt.excluded, col) for col in update_columns} + conflict_cols = [table_obj.c[col] for col in conflict_columns] + + stmt = insert_stmt.on_conflict_do_update(index_elements=conflict_cols, set_=update_mapping) + + async with self.async_session() as session: + await session.execute(stmt) + await session.commit() + async def fetch_all( self, table: str, @@ -333,9 +357,18 @@ class SqlAlchemySqlStoreImpl(SqlStore): add_column_sql = text(f"ALTER TABLE {table} ADD COLUMN {column_name} {compiled_type}{nullable_clause}") await conn.execute(add_column_sql) - except Exception as e: # If any error occurs during migration, log it but don't fail # The table creation will handle adding the column logger.error(f"Error adding column {column_name} to table {table}: {e}") pass + + def _get_dialect_insert(self, table: Table): + if self._is_sqlite_backend: + from sqlalchemy.dialects.sqlite import insert as sqlite_insert + + return sqlite_insert(table) + else: + from sqlalchemy.dialects.postgresql import insert as pg_insert + + return pg_insert(table) diff --git a/src/llama_stack/providers/utils/sqlstore/sqlstore.py b/src/llama_stack/providers/utils/sqlstore/sqlstore.py index 31801c4ca..9409b7d00 100644 --- a/src/llama_stack/providers/utils/sqlstore/sqlstore.py +++ b/src/llama_stack/providers/utils/sqlstore/sqlstore.py @@ -4,6 +4,7 @@ # This source code is licensed under the terms described in the LICENSE file in # the root directory of this source tree. +from threading import Lock from typing import Annotated, cast from pydantic import Field @@ -21,6 +22,8 @@ from .api import SqlStore sql_store_pip_packages = ["sqlalchemy[asyncio]", "aiosqlite", "asyncpg"] _SQLSTORE_BACKENDS: dict[str, StorageBackendConfig] = {} +_SQLSTORE_INSTANCES: dict[str, SqlStore] = {} +_SQLSTORE_LOCKS: dict[str, Lock] = {} SqlStoreConfig = Annotated[ @@ -52,19 +55,34 @@ def sqlstore_impl(reference: SqlStoreReference) -> SqlStore: f"Unknown SQL store backend '{backend_name}'. Registered backends: {sorted(_SQLSTORE_BACKENDS)}" ) - if isinstance(backend_config, SqliteSqlStoreConfig | PostgresSqlStoreConfig): - from .sqlalchemy_sqlstore import SqlAlchemySqlStoreImpl + existing = _SQLSTORE_INSTANCES.get(backend_name) + if existing: + return existing - config = cast(SqliteSqlStoreConfig | PostgresSqlStoreConfig, backend_config).model_copy() - return SqlAlchemySqlStoreImpl(config) - else: - raise ValueError(f"Unknown sqlstore type {backend_config.type}") + lock = _SQLSTORE_LOCKS.setdefault(backend_name, Lock()) + with lock: + existing = _SQLSTORE_INSTANCES.get(backend_name) + if existing: + return existing + + if isinstance(backend_config, SqliteSqlStoreConfig | PostgresSqlStoreConfig): + from .sqlalchemy_sqlstore import SqlAlchemySqlStoreImpl + + config = cast(SqliteSqlStoreConfig | PostgresSqlStoreConfig, backend_config).model_copy() + instance = SqlAlchemySqlStoreImpl(config) + _SQLSTORE_INSTANCES[backend_name] = instance + return instance + else: + raise ValueError(f"Unknown sqlstore type {backend_config.type}") def register_sqlstore_backends(backends: dict[str, StorageBackendConfig]) -> None: """Register the set of available SQL store backends for reference resolution.""" global _SQLSTORE_BACKENDS + global _SQLSTORE_INSTANCES _SQLSTORE_BACKENDS.clear() + _SQLSTORE_INSTANCES.clear() + _SQLSTORE_LOCKS.clear() for name, cfg in backends.items(): _SQLSTORE_BACKENDS[name] = cfg diff --git a/tests/unit/utils/sqlstore/test_sqlstore.py b/tests/unit/utils/sqlstore/test_sqlstore.py index 00669b698..d7ba0dc89 100644 --- a/tests/unit/utils/sqlstore/test_sqlstore.py +++ b/tests/unit/utils/sqlstore/test_sqlstore.py @@ -9,7 +9,7 @@ from tempfile import TemporaryDirectory import pytest -from llama_stack.providers.utils.sqlstore.api import ColumnType +from llama_stack.providers.utils.sqlstore.api import ColumnDefinition, ColumnType from llama_stack.providers.utils.sqlstore.sqlalchemy_sqlstore import SqlAlchemySqlStoreImpl from llama_stack.providers.utils.sqlstore.sqlstore import SqliteSqlStoreConfig @@ -65,6 +65,38 @@ async def test_sqlite_sqlstore(): assert result.has_more is False +async def test_sqlstore_upsert_support(): + with TemporaryDirectory() as tmp_dir: + db_path = tmp_dir + "/upsert.db" + store = SqlAlchemySqlStoreImpl(SqliteSqlStoreConfig(db_path=db_path)) + + await store.create_table( + "items", + { + "id": ColumnDefinition(type=ColumnType.STRING, primary_key=True), + "value": ColumnType.STRING, + "updated_at": ColumnType.INTEGER, + }, + ) + + await store.upsert( + table="items", + data={"id": "item_1", "value": "first", "updated_at": 1}, + conflict_columns=["id"], + ) + row = await store.fetch_one("items", {"id": "item_1"}) + assert row == {"id": "item_1", "value": "first", "updated_at": 1} + + await store.upsert( + table="items", + data={"id": "item_1", "value": "second", "updated_at": 2}, + conflict_columns=["id"], + update_columns=["value", "updated_at"], + ) + row = await store.fetch_one("items", {"id": "item_1"}) + assert row == {"id": "item_1", "value": "second", "updated_at": 2} + + async def test_sqlstore_pagination_basic(): """Test basic pagination functionality at the SQL store level.""" with TemporaryDirectory() as tmp_dir: