Merge branch 'main' into feat/add-dana-agent-provider-stub

2025-12-03 09:53:45 +00:00 · 2025-11-12 19:46:26 -08:00 · 2025-11-12 19:46:26 -08:00 · 0d038391f1
commit 0d038391f1
parent 3f85df3da2 fcf649b97a
55 changed files with 2164 additions and 478 deletions
--- a/.github/actions/setup-test-environment/action.yml
+++ b/.github/actions/setup-test-environment/action.yml
@ -39,6 +39,32 @@ runs:
      if: ${{ inputs.setup == 'vllm' && inputs.inference-mode == 'record' }}
      uses: ./.github/actions/setup-vllm
    - name: Start Postgres service
      if: ${{ contains(inputs.setup, 'postgres') }}
      shell: bash
      run: |
        sudo docker rm -f postgres-ci || true
        sudo docker run -d --name postgres-ci \
          -e POSTGRES_USER=llamastack \
          -e POSTGRES_PASSWORD=llamastack \
          -e POSTGRES_DB=llamastack \
          -p 5432:5432 \
          postgres:16
        echo "Waiting for Postgres to become ready..."
        for i in {1..30}; do
          if sudo docker exec postgres-ci pg_isready -U llamastack -d llamastack >/dev/null 2>&1; then
            echo "Postgres is ready"
            break
          fi
          if [ "$i" -eq 30 ]; then
            echo "Postgres failed to start in time"
            sudo docker logs postgres-ci || true
            exit 1
          fi
          sleep 2
        done
    - name: Build Llama Stack
      shell: bash
      run: |
--- a/.github/workflows/integration-tests.yml
+++ b/.github/workflows/integration-tests.yml
@ -66,12 +66,12 @@ jobs:
  run-replay-mode-tests:
    needs: generate-matrix
    runs-on: ubuntu-latest
-    name: ${{ format('Integration Tests ({0}, {1}, {2}, client={3}, {4})', matrix.client-type, matrix.config.setup, matrix.python-version, matrix.client-version, matrix.config.suite) }}
+    name: ${{ format('Integration Tests ({0}, {1}, {2}, client={3}, {4})', matrix.client, matrix.config.setup, matrix.python-version, matrix.client-version, matrix.config.suite) }}
    strategy:
      fail-fast: false
      matrix:
-        client-type: [library, docker, server]
+        client: [library, docker, server]
        # Use Python 3.13 only on nightly schedule (daily latest client test), otherwise use 3.12
        python-version: ${{ github.event.schedule == '0 0 * * *' && fromJSON('["3.12", "3.13"]') || fromJSON('["3.12"]') }}
        client-version: ${{ (github.event.schedule == '0 0 * * *' || github.event.inputs.test-all-client-versions == 'true') && fromJSON('["published", "latest"]') || fromJSON('["latest"]') }}
@ -84,6 +84,7 @@ jobs:
        uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0
      - name: Setup test environment
        if: ${{ matrix.config.allowed_clients == null || contains(matrix.config.allowed_clients, matrix.client) }}
        uses: ./.github/actions/setup-test-environment
        with:
          python-version: ${{ matrix.python-version }}
@ -93,11 +94,16 @@ jobs:
          inference-mode: 'replay'
      - name: Run tests
        if: ${{ matrix.config.allowed_clients == null || contains(matrix.config.allowed_clients, matrix.client) }}
        uses: ./.github/actions/run-and-record-tests
        env:
          OPENAI_API_KEY: dummy
        with:
-          stack-config: ${{ matrix.client-type == 'library' && 'ci-tests' || matrix.client-type == 'server' && 'server:ci-tests' || 'docker:ci-tests' }}
+          stack-config: >-
            ${{ matrix.config.stack_config
                || (matrix.client == 'library' && 'ci-tests')
                || (matrix.client == 'server' && 'server:ci-tests')
                || 'docker:ci-tests' }}
          setup: ${{ matrix.config.setup }}
          inference-mode: 'replay'
          suite: ${{ matrix.config.suite }}
--- a/client-sdks/stainless/config.yml
+++ b/client-sdks/stainless/config.yml
@ -463,6 +463,12 @@ resources:
 settings:
  license: MIT
  unwrap_response_fields: [data]
  file_header: |
    Copyright (c) Meta Platforms, Inc. and affiliates.
    All rights reserved.
    This source code is licensed under the terms described in the LICENSE file in
    the root directory of this source tree.
 openapi:
  transformations:
--- a/client-sdks/stainless/openapi.yml
+++ b/client-sdks/stainless/openapi.yml
@ -2691,7 +2691,8 @@ paths:
      responses:
        '200':
          description: >-
-            A VectorStoreFileContentResponse representing the file contents.
+            File contents, optionally with embeddings and metadata based on query
            parameters.
          content:
            application/json:
              schema:
@ -2726,6 +2727,20 @@ paths:
          required: true
          schema:
            type: string
        - name: include_embeddings
          in: query
          description: >-
            Whether to include embedding vectors in the response.
          required: false
          schema:
            $ref: '#/components/schemas/bool'
        - name: include_metadata
          in: query
          description: >-
            Whether to include chunk metadata in the response.
          required: false
          schema:
            $ref: '#/components/schemas/bool'
      deprecated: false
  /v1/vector_stores/{vector_store_id}/search:
    post:
@ -10091,6 +10106,8 @@ components:
      title: VectorStoreFileDeleteResponse
      description: >-
        Response from deleting a vector store file.
    bool:
      type: boolean
    VectorStoreContent:
      type: object
      properties:
@ -10102,6 +10119,26 @@ components:
        text:
          type: string
          description: The actual text content
        embedding:
          type: array
          items:
            type: number
          description: >-
            Optional embedding vector for this content chunk
        chunk_metadata:
          $ref: '#/components/schemas/ChunkMetadata'
          description: Optional chunk metadata
        metadata:
          type: object
          additionalProperties:
            oneOf:
              - type: 'null'
              - type: boolean
              - type: number
              - type: string
              - type: array
              - type: object
          description: Optional user-defined metadata
      additionalProperties: false
      required:
        - type
@ -10125,6 +10162,7 @@ components:
          description: Parsed content of the file
        has_more:
          type: boolean
          default: false
          description: >-
            Indicates if there are more content pages to fetch
        next_page:
--- a/docs/docs/distributions/configuration.mdx
+++ b/docs/docs/distributions/configuration.mdx
@ -221,7 +221,15 @@ models:
 ```
 A Model is an instance of a "Resource" (see [Concepts](../concepts/)) and is associated with a specific inference provider (in this case, the provider with identifier `ollama`). This is an instance of a "pre-registered" model. While we always encourage the clients to register models before using them, some Stack servers may come up a list of "already known and available" models.
-What's with the `provider_model_id` field? This is an identifier for the model inside the provider's model catalog. Contrast it with `model_id` which is the identifier for the same model for Llama Stack's purposes. For example, you may want to name "llama3.2:vision-11b" as "image_captioning_model" when you use it in your Stack interactions. When omitted, the server will set `provider_model_id` to be the same as `model_id`.
+What's with the `provider_model_id` field? This is an identifier for the model inside the provider's model catalog. The `model_id` field is provided for configuration purposes but is not used as part of the model identifier.
 **Important:** Models are identified as `provider_id/provider_model_id` in the system and when making API calls. When `provider_model_id` is omitted, the server will set it to be the same as `model_id`.
 Examples:
 - Config: `model_id: llama3.2`, `provider_id: ollama`, `provider_model_id: null`
  → Access as: `ollama/llama3.2`
 - Config: `model_id: my-llama`, `provider_id: vllm-inference`, `provider_model_id: llama-3-2-3b`
  → Access as: `vllm-inference/llama-3-2-3b` (the `model_id` is not used in the identifier)
 If you need to conditionally register a model in the configuration, such as only when specific environment variable(s) are set, this can be accomplished by utilizing a special `__disabled__` string as the default value of an environment variable substitution, as shown below:
--- a/docs/docs/distributions/index.mdx
+++ b/docs/docs/distributions/index.mdx
@ -19,3 +19,4 @@ This section provides an overview of the distributions available in Llama Stack.
 - **[Starting Llama Stack Server](./starting_llama_stack_server.mdx)** - How to run distributions
 - **[Importing as Library](./importing_as_library.mdx)** - Use distributions in your code
 - **[Configuration Reference](./configuration.mdx)** - Configuration file format details
 - **[Llama Stack UI](./llama_stack_ui.mdx)** - Web-based user interface for interacting with Llama Stack servers
--- a/docs/docs/distributions/llama_stack_ui.mdx
+++ b/docs/docs/distributions/llama_stack_ui.mdx
@ -0,0 +1,109 @@
 ---
 title: Llama Stack UI
 description: Web-based user interface for interacting with Llama Stack servers
 sidebar_label: Llama Stack UI
 sidebar_position: 8
 ---
 # Llama Stack UI
 The Llama Stack UI is a web-based interface for interacting with Llama Stack servers. Built with Next.js and React, it provides a visual way to work with agents, manage resources, and view logs.
 ## Features
 - **Logs & Monitoring**: View chat completions, agent responses, and vector store activity
 - **Vector Stores**: Create and manage vector databases for RAG (Retrieval-Augmented Generation) workflows
 - **Prompt Management**: Create and manage reusable prompts
 ## Prerequisites
 You need a running Llama Stack server. The UI is a client that connects to the Llama Stack backend.
 If you don't have a Llama Stack server running yet, see the [Starting Llama Stack Server](../getting_started/starting_llama_stack_server.mdx) guide.
 ## Running the UI
 ### Option 1: Using npx (Recommended for Quick Start)
 The fastest way to get started is using `npx`:
 ```bash
 npx llama-stack-ui
 ```
 This will start the UI server on `http://localhost:8322` (default port).
 ### Option 2: Using Docker
 Run the UI in a container:
 ```bash
 docker run -p 8322:8322 llamastack/ui
 ```
 Access the UI at `http://localhost:8322`.
 ## Environment Variables
 The UI can be configured using the following environment variables:
 | Variable | Description | Default |
 |----------|-------------|---------|
 | `LLAMA_STACK_BACKEND_URL` | URL of your Llama Stack server | `http://localhost:8321` |
 | `LLAMA_STACK_UI_PORT` | Port for the UI server | `8322` |
 If the Llama Stack server is running with authentication enabled, you can configure the UI to use it by setting the following environment variables:
 | Variable | Description | Default |
 |----------|-------------|---------|
 | `NEXTAUTH_URL` | NextAuth URL for authentication | `http://localhost:8322` |
 | `GITHUB_CLIENT_ID` | GitHub OAuth client ID (optional, for authentication) | - |
 | `GITHUB_CLIENT_SECRET` | GitHub OAuth client secret (optional, for authentication) | - |
 ### Setting Environment Variables
 #### For npx:
 ```bash
 LLAMA_STACK_BACKEND_URL=http://localhost:8321 \
 LLAMA_STACK_UI_PORT=8080 \
 npx llama-stack-ui
 ```
 #### For Docker:
 ```bash
 docker run -p 8080:8080 \
  -e LLAMA_STACK_BACKEND_URL=http://localhost:8321 \
  -e LLAMA_STACK_UI_PORT=8080 \
  llamastack/ui
 ```
 ## Using the UI
 ### Managing Resources
 - **Vector Stores**: Create vector databases for RAG workflows, view stored documents and embeddings
 - **Prompts**: Create and manage reusable prompt templates
 - **Chat Completions**: View history of chat interactions
 - **Responses**: Browse detailed agent responses and tool calls
 ## Development
 If you want to run the UI from source for development:
 ```bash
 # From the project root
 cd src/llama_stack_ui
 # Install dependencies
 npm install
 # Set environment variables
 export LLAMA_STACK_BACKEND_URL=http://localhost:8321
 # Start the development server
 npm run dev
 ```
 The development server will start on `http://localhost:8322` with hot reloading enabled.
--- a/docs/docs/getting_started/detailed_tutorial.mdx
+++ b/docs/docs/getting_started/detailed_tutorial.mdx
@ -144,7 +144,7 @@ source .venv/bin/activate
 ```bash
 uv venv client --python 3.12
 source client/bin/activate
-pip install llama-stack-client
+uv pip install llama-stack-client
 ```
 </TabItem>
 </Tabs>
--- a/docs/sidebars.ts
+++ b/docs/sidebars.ts
@ -57,6 +57,7 @@ const sidebars: SidebarsConfig = {
        'distributions/importing_as_library',
        'distributions/configuration',
        'distributions/starting_llama_stack_server',
        'distributions/llama_stack_ui',
        {
          type: 'category',
          label: 'Self-Hosted Distributions',
--- a/docs/static/llama-stack-spec.yaml
+++ b/docs/static/llama-stack-spec.yaml
@ -2688,7 +2688,8 @@ paths:
      responses:
        '200':
          description: >-
-            A VectorStoreFileContentResponse representing the file contents.
+            File contents, optionally with embeddings and metadata based on query
            parameters.
          content:
            application/json:
              schema:
@ -2723,6 +2724,20 @@ paths:
          required: true
          schema:
            type: string
        - name: include_embeddings
          in: query
          description: >-
            Whether to include embedding vectors in the response.
          required: false
          schema:
            $ref: '#/components/schemas/bool'
        - name: include_metadata
          in: query
          description: >-
            Whether to include chunk metadata in the response.
          required: false
          schema:
            $ref: '#/components/schemas/bool'
      deprecated: false
  /v1/vector_stores/{vector_store_id}/search:
    post:
@ -9375,6 +9390,8 @@ components:
      title: VectorStoreFileDeleteResponse
      description: >-
        Response from deleting a vector store file.
    bool:
      type: boolean
    VectorStoreContent:
      type: object
      properties:
@ -9386,6 +9403,26 @@ components:
        text:
          type: string
          description: The actual text content
        embedding:
          type: array
          items:
            type: number
          description: >-
            Optional embedding vector for this content chunk
        chunk_metadata:
          $ref: '#/components/schemas/ChunkMetadata'
          description: Optional chunk metadata
        metadata:
          type: object
          additionalProperties:
            oneOf:
              - type: 'null'
              - type: boolean
              - type: number
              - type: string
              - type: array
              - type: object
          description: Optional user-defined metadata
      additionalProperties: false
      required:
        - type
@ -9409,6 +9446,7 @@ components:
          description: Parsed content of the file
        has_more:
          type: boolean
          default: false
          description: >-
            Indicates if there are more content pages to fetch
        next_page:
--- a/docs/static/stainless-llama-stack-spec.yaml
+++ b/docs/static/stainless-llama-stack-spec.yaml
@ -2691,7 +2691,8 @@ paths:
      responses:
        '200':
          description: >-
-            A VectorStoreFileContentResponse representing the file contents.
+            File contents, optionally with embeddings and metadata based on query
            parameters.
          content:
            application/json:
              schema:
@ -2726,6 +2727,20 @@ paths:
          required: true
          schema:
            type: string
        - name: include_embeddings
          in: query
          description: >-
            Whether to include embedding vectors in the response.
          required: false
          schema:
            $ref: '#/components/schemas/bool'
        - name: include_metadata
          in: query
          description: >-
            Whether to include chunk metadata in the response.
          required: false
          schema:
            $ref: '#/components/schemas/bool'
      deprecated: false
  /v1/vector_stores/{vector_store_id}/search:
    post:
@ -10091,6 +10106,8 @@ components:
      title: VectorStoreFileDeleteResponse
      description: >-
        Response from deleting a vector store file.
    bool:
      type: boolean
    VectorStoreContent:
      type: object
      properties:
@ -10102,6 +10119,26 @@ components:
        text:
          type: string
          description: The actual text content
        embedding:
          type: array
          items:
            type: number
          description: >-
            Optional embedding vector for this content chunk
        chunk_metadata:
          $ref: '#/components/schemas/ChunkMetadata'
          description: Optional chunk metadata
        metadata:
          type: object
          additionalProperties:
            oneOf:
              - type: 'null'
              - type: boolean
              - type: number
              - type: string
              - type: array
              - type: object
          description: Optional user-defined metadata
      additionalProperties: false
      required:
        - type
@ -10125,6 +10162,7 @@ components:
          description: Parsed content of the file
        has_more:
          type: boolean
          default: false
          description: >-
            Indicates if there are more content pages to fetch
        next_page:
--- a/pyproject.toml
+++ b/pyproject.toml
@ -112,7 +112,7 @@ unit = [
    "aiosqlite",
    "aiohttp",
    "psycopg2-binary>=2.9.0",
-    "pypdf",
+    "pypdf>=6.1.3",
    "mcp",
    "chardet",
    "sqlalchemy",
@ -135,7 +135,7 @@ test = [
    "torchvision>=0.21.0",
    "chardet",
    "psycopg2-binary>=2.9.0",
-    "pypdf",
+    "pypdf>=6.1.3",
    "mcp",
    "datasets>=4.0.0",
    "autoevals",
--- a/src/llama_stack/apis/vector_io/vector_io.py
+++ b/src/llama_stack/apis/vector_io/vector_io.py
@ -10,7 +10,7 @@
 # the root directory of this source tree.
 from typing import Annotated, Any, Literal, Protocol, runtime_checkable
-from fastapi import Body
+from fastapi import Body, Query
 from pydantic import BaseModel, Field
 from llama_stack.apis.common.tracing import telemetry_traceable
@ -224,10 +224,16 @@ class VectorStoreContent(BaseModel):
    :param type: Content type, currently only "text" is supported
    :param text: The actual text content
    :param embedding: Optional embedding vector for this content chunk
    :param chunk_metadata: Optional chunk metadata
    :param metadata: Optional user-defined metadata
    """
    type: Literal["text"]
    text: str
    embedding: list[float] | None = None
    chunk_metadata: ChunkMetadata | None = None
    metadata: dict[str, Any] | None = None
@json_schema_type
@ -280,6 +286,22 @@ class VectorStoreDeleteResponse(BaseModel):
    deleted: bool = True
@json_schema_type
 class VectorStoreFileContentResponse(BaseModel):
    """Represents the parsed content of a vector store file.
    :param object: The object type, which is always `vector_store.file_content.page`
    :param data: Parsed content of the file
    :param has_more: Indicates if there are more content pages to fetch
    :param next_page: The token for the next page, if any
    """
    object: Literal["vector_store.file_content.page"] = "vector_store.file_content.page"
    data: list[VectorStoreContent]
    has_more: bool = False
    next_page: str | None = None
@json_schema_type
 class VectorStoreChunkingStrategyAuto(BaseModel):
    """Automatic chunking strategy for vector store files.
@ -395,22 +417,6 @@ class VectorStoreListFilesResponse(BaseModel):
    has_more: bool = False
@json_schema_type
 class VectorStoreFileContentResponse(BaseModel):
    """Represents the parsed content of a vector store file.
    :param object: The object type, which is always `vector_store.file_content.page`
    :param data: Parsed content of the file
    :param has_more: Indicates if there are more content pages to fetch
    :param next_page: The token for the next page, if any
    """
    object: Literal["vector_store.file_content.page"] = "vector_store.file_content.page"
    data: list[VectorStoreContent]
    has_more: bool
    next_page: str | None = None
@json_schema_type
 class VectorStoreFileDeleteResponse(BaseModel):
    """Response from deleting a vector store file.
@ -732,12 +738,16 @@ class VectorIO(Protocol):
        self,
        vector_store_id: str,
        file_id: str,
        include_embeddings: Annotated[bool | None, Query(default=False)] = False,
        include_metadata: Annotated[bool | None, Query(default=False)] = False,
    ) -> VectorStoreFileContentResponse:
        """Retrieves the contents of a vector store file.
        :param vector_store_id: The ID of the vector store containing the file to retrieve.
        :param file_id: The ID of the file to retrieve.
-        :returns: A VectorStoreFileContentResponse representing the file contents.
+        :param include_embeddings: Whether to include embedding vectors in the response.
        :param include_metadata: Whether to include chunk metadata in the response.
        :returns: File contents, optionally with embeddings and metadata based on query parameters.
        """
        ...
--- a/src/llama_stack/core/build.py
+++ b/src/llama_stack/core/build.py
@ -4,7 +4,6 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 import importlib.resources
 import sys
 from pydantic import BaseModel
@ -12,9 +11,6 @@ from termcolor import cprint
 from llama_stack.core.datatypes import BuildConfig
 from llama_stack.core.distribution import get_provider_registry
 from llama_stack.core.external import load_external_apis
 from llama_stack.core.utils.exec import run_command
 from llama_stack.core.utils.image_types import LlamaStackImageType
 from llama_stack.distributions.template import DistributionTemplate
 from llama_stack.log import get_logger
 from llama_stack.providers.datatypes import Api
@ -101,64 +97,3 @@ def print_pip_install_help(config: BuildConfig):
    for special_dep in special_deps:
        cprint(f"uv pip install {special_dep}", color="yellow", file=sys.stderr)
    print()
 def build_image(
    build_config: BuildConfig,
    image_name: str,
    distro_or_config: str,
    run_config: str | None = None,
 ):
    container_base = build_config.distribution_spec.container_image or "python:3.12-slim"
    normal_deps, special_deps, external_provider_deps = get_provider_dependencies(build_config)
    normal_deps += SERVER_DEPENDENCIES
    if build_config.external_apis_dir:
        external_apis = load_external_apis(build_config)
        if external_apis:
            for _, api_spec in external_apis.items():
                normal_deps.extend(api_spec.pip_packages)
    if build_config.image_type == LlamaStackImageType.CONTAINER.value:
        script = str(importlib.resources.files("llama_stack") / "core/build_container.sh")
        args = [
            script,
            "--distro-or-config",
            distro_or_config,
            "--image-name",
            image_name,
            "--container-base",
            container_base,
            "--normal-deps",
            " ".join(normal_deps),
        ]
        # When building from a config file (not a template), include the run config path in the
        # build arguments
        if run_config is not None:
            args.extend(["--run-config", run_config])
    else:
        script = str(importlib.resources.files("llama_stack") / "core/build_venv.sh")
        args = [
            script,
            "--env-name",
            str(image_name),
            "--normal-deps",
            " ".join(normal_deps),
        ]
    # Always pass both arguments, even if empty, to maintain consistent positional arguments
    if special_deps:
        args.extend(["--optional-deps", "#".join(special_deps)])
    if external_provider_deps:
        args.extend(
            ["--external-provider-deps", "#".join(external_provider_deps)]
        )  # the script will install external provider module, get its deps, and install those too.
    return_code = run_command(args)
    if return_code != 0:
        log.error(
            f"Failed to build target {image_name} with return code {return_code}",
        )
    return return_code
--- a/src/llama_stack/core/conversations/conversations.py
+++ b/src/llama_stack/core/conversations/conversations.py
@ -203,15 +203,10 @@ class ConversationServiceImpl(Conversations):
                "item_data": item_dict,
            }
-            # TODO: Add support for upsert in sql_store, this will fail first if ID exists and then update
+            await self.sql_store.upsert(
            try:
                await self.sql_store.insert(table="conversation_items", data=item_record)
            except Exception:
                # If insert fails due to ID conflict, update existing record
                await self.sql_store.update(
                table="conversation_items",
-                    data={"created_at": created_at, "item_data": item_dict},
+                data=item_record,
-                    where={"id": item_id},
+                conflict_columns=["id"],
            )
            created_items.append(item_dict)
--- a/src/llama_stack/core/library_client.py
+++ b/src/llama_stack/core/library_client.py
@ -389,6 +389,12 @@ class AsyncLlamaStackAsLibraryClient(AsyncLlamaStackClient):
        matched_func, path_params, route_path, webmethod = find_matching_route(options.method, path, self.route_impls)
        body |= path_params
        # Pass through params that aren't already handled as path params
        if options.params:
            extra_query_params = {k: v for k, v in options.params.items() if k not in path_params}
            if extra_query_params:
                body["extra_query"] = extra_query_params
        body, field_names = self._handle_file_uploads(options, body)
        body = self._convert_body(matched_func, body, exclude_params=set(field_names))
--- a/src/llama_stack/core/routers/vector_io.py
+++ b/src/llama_stack/core/routers/vector_io.py
@ -247,6 +247,13 @@ class VectorIORouter(VectorIO):
        metadata: dict[str, Any] | None = None,
    ) -> VectorStoreObject:
        logger.debug(f"VectorIORouter.openai_update_vector_store: {vector_store_id}")
        # Check if provider_id is being changed (not supported)
        if metadata and "provider_id" in metadata:
            current_store = await self.routing_table.get_object_by_identifier("vector_store", vector_store_id)
            if current_store and current_store.provider_id != metadata["provider_id"]:
                raise ValueError("provider_id cannot be changed after vector store creation")
        provider = await self.routing_table.get_provider_impl(vector_store_id)
        return await provider.openai_update_vector_store(
            vector_store_id=vector_store_id,
@ -338,12 +345,19 @@ class VectorIORouter(VectorIO):
        self,
        vector_store_id: str,
        file_id: str,
        include_embeddings: bool | None = False,
        include_metadata: bool | None = False,
    ) -> VectorStoreFileContentResponse:
-        logger.debug(f"VectorIORouter.openai_retrieve_vector_store_file_contents: {vector_store_id}, {file_id}")
+        logger.debug(
-        provider = await self.routing_table.get_provider_impl(vector_store_id)
+            f"VectorIORouter.openai_retrieve_vector_store_file_contents: {vector_store_id}, {file_id}, "
-        return await provider.openai_retrieve_vector_store_file_contents(
+            f"include_embeddings={include_embeddings}, include_metadata={include_metadata}"
        )
        return await self.routing_table.openai_retrieve_vector_store_file_contents(
            vector_store_id=vector_store_id,
            file_id=file_id,
            include_embeddings=include_embeddings,
            include_metadata=include_metadata,
        )
    async def openai_update_vector_store_file(
--- a/src/llama_stack/core/routing_tables/vector_stores.py
+++ b/src/llama_stack/core/routing_tables/vector_stores.py
@ -195,12 +195,17 @@ class VectorStoresRoutingTable(CommonRoutingTableImpl):
        self,
        vector_store_id: str,
        file_id: str,
        include_embeddings: bool | None = False,
        include_metadata: bool | None = False,
    ) -> VectorStoreFileContentResponse:
        await self.assert_action_allowed("read", "vector_store", vector_store_id)
        provider = await self.get_provider_impl(vector_store_id)
        return await provider.openai_retrieve_vector_store_file_contents(
            vector_store_id=vector_store_id,
            file_id=file_id,
            include_embeddings=include_embeddings,
            include_metadata=include_metadata,
        )
    async def openai_update_vector_store_file(
--- a/src/llama_stack/distributions/ci-tests/ci_tests.py
+++ b/src/llama_stack/distributions/ci-tests/ci_tests.py
@ -13,6 +13,5 @@ from ..starter.starter import get_distribution_template as get_starter_distribut
 def get_distribution_template() -> DistributionTemplate:
    template = get_starter_distribution_template(name="ci-tests")
    template.description = "CI tests for Llama Stack"
    template.run_configs.pop("run-with-postgres-store.yaml", None)
    return template
--- a/src/llama_stack/distributions/ci-tests/run-with-postgres-store.yaml
+++ b/src/llama_stack/distributions/ci-tests/run-with-postgres-store.yaml
@ -0,0 +1,293 @@
 version: 2
 image_name: ci-tests
 apis:
 - agents
 - batches
 - datasetio
 - eval
 - files
 - inference
 - post_training
 - safety
 - scoring
 - tool_runtime
 - vector_io
 providers:
  inference:
  - provider_id: ${env.CEREBRAS_API_KEY:+cerebras}
    provider_type: remote::cerebras
    config:
      base_url: https://api.cerebras.ai
      api_key: ${env.CEREBRAS_API_KEY:=}
  - provider_id: ${env.OLLAMA_URL:+ollama}
    provider_type: remote::ollama
    config:
      url: ${env.OLLAMA_URL:=http://localhost:11434}
  - provider_id: ${env.VLLM_URL:+vllm}
    provider_type: remote::vllm
    config:
      url: ${env.VLLM_URL:=}
      max_tokens: ${env.VLLM_MAX_TOKENS:=4096}
      api_token: ${env.VLLM_API_TOKEN:=fake}
      tls_verify: ${env.VLLM_TLS_VERIFY:=true}
  - provider_id: ${env.TGI_URL:+tgi}
    provider_type: remote::tgi
    config:
      url: ${env.TGI_URL:=}
  - provider_id: fireworks
    provider_type: remote::fireworks
    config:
      url: https://api.fireworks.ai/inference/v1
      api_key: ${env.FIREWORKS_API_KEY:=}
  - provider_id: together
    provider_type: remote::together
    config:
      url: https://api.together.xyz/v1
      api_key: ${env.TOGETHER_API_KEY:=}
  - provider_id: bedrock
    provider_type: remote::bedrock
    config:
      api_key: ${env.AWS_BEDROCK_API_KEY:=}
      region_name: ${env.AWS_DEFAULT_REGION:=us-east-2}
  - provider_id: ${env.NVIDIA_API_KEY:+nvidia}
    provider_type: remote::nvidia
    config:
      url: ${env.NVIDIA_BASE_URL:=https://integrate.api.nvidia.com}
      api_key: ${env.NVIDIA_API_KEY:=}
      append_api_version: ${env.NVIDIA_APPEND_API_VERSION:=True}
  - provider_id: openai
    provider_type: remote::openai
    config:
      api_key: ${env.OPENAI_API_KEY:=}
      base_url: ${env.OPENAI_BASE_URL:=https://api.openai.com/v1}
  - provider_id: anthropic
    provider_type: remote::anthropic
    config:
      api_key: ${env.ANTHROPIC_API_KEY:=}
  - provider_id: gemini
    provider_type: remote::gemini
    config:
      api_key: ${env.GEMINI_API_KEY:=}
  - provider_id: ${env.VERTEX_AI_PROJECT:+vertexai}
    provider_type: remote::vertexai
    config:
      project: ${env.VERTEX_AI_PROJECT:=}
      location: ${env.VERTEX_AI_LOCATION:=us-central1}
  - provider_id: groq
    provider_type: remote::groq
    config:
      url: https://api.groq.com
      api_key: ${env.GROQ_API_KEY:=}
  - provider_id: sambanova
    provider_type: remote::sambanova
    config:
      url: https://api.sambanova.ai/v1
      api_key: ${env.SAMBANOVA_API_KEY:=}
  - provider_id: ${env.AZURE_API_KEY:+azure}
    provider_type: remote::azure
    config:
      api_key: ${env.AZURE_API_KEY:=}
      api_base: ${env.AZURE_API_BASE:=}
      api_version: ${env.AZURE_API_VERSION:=}
      api_type: ${env.AZURE_API_TYPE:=}
  - provider_id: sentence-transformers
    provider_type: inline::sentence-transformers
  vector_io:
  - provider_id: faiss
    provider_type: inline::faiss
    config:
      persistence:
        namespace: vector_io::faiss
        backend: kv_default
  - provider_id: sqlite-vec
    provider_type: inline::sqlite-vec
    config:
      db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ci-tests}/sqlite_vec.db
      persistence:
        namespace: vector_io::sqlite_vec
        backend: kv_default
  - provider_id: ${env.MILVUS_URL:+milvus}
    provider_type: inline::milvus
    config:
      db_path: ${env.MILVUS_DB_PATH:=~/.llama/distributions/ci-tests}/milvus.db
      persistence:
        namespace: vector_io::milvus
        backend: kv_default
  - provider_id: ${env.CHROMADB_URL:+chromadb}
    provider_type: remote::chromadb
    config:
      url: ${env.CHROMADB_URL:=}
      persistence:
        namespace: vector_io::chroma_remote
        backend: kv_default
  - provider_id: ${env.PGVECTOR_DB:+pgvector}
    provider_type: remote::pgvector
    config:
      host: ${env.PGVECTOR_HOST:=localhost}
      port: ${env.PGVECTOR_PORT:=5432}
      db: ${env.PGVECTOR_DB:=}
      user: ${env.PGVECTOR_USER:=}
      password: ${env.PGVECTOR_PASSWORD:=}
      persistence:
        namespace: vector_io::pgvector
        backend: kv_default
  - provider_id: ${env.QDRANT_URL:+qdrant}
    provider_type: remote::qdrant
    config:
      api_key: ${env.QDRANT_API_KEY:=}
      persistence:
        namespace: vector_io::qdrant_remote
        backend: kv_default
  - provider_id: ${env.WEAVIATE_CLUSTER_URL:+weaviate}
    provider_type: remote::weaviate
    config:
      weaviate_api_key: null
      weaviate_cluster_url: ${env.WEAVIATE_CLUSTER_URL:=localhost:8080}
      persistence:
        namespace: vector_io::weaviate
        backend: kv_default
  files:
  - provider_id: meta-reference-files
    provider_type: inline::localfs
    config:
      storage_dir: ${env.FILES_STORAGE_DIR:=~/.llama/distributions/ci-tests/files}
      metadata_store:
        table_name: files_metadata
        backend: sql_default
  safety:
  - provider_id: llama-guard
    provider_type: inline::llama-guard
    config:
      excluded_categories: []
  - provider_id: code-scanner
    provider_type: inline::code-scanner
  agents:
  - provider_id: meta-reference
    provider_type: inline::meta-reference
    config:
      persistence:
        agent_state:
          namespace: agents
          backend: kv_default
        responses:
          table_name: responses
          backend: sql_default
          max_write_queue_size: 10000
          num_writers: 4
  post_training:
  - provider_id: torchtune-cpu
    provider_type: inline::torchtune-cpu
    config:
      checkpoint_format: meta
  eval:
  - provider_id: meta-reference
    provider_type: inline::meta-reference
    config:
      kvstore:
        namespace: eval
        backend: kv_default
  datasetio:
  - provider_id: huggingface
    provider_type: remote::huggingface
    config:
      kvstore:
        namespace: datasetio::huggingface
        backend: kv_default
  - provider_id: localfs
    provider_type: inline::localfs
    config:
      kvstore:
        namespace: datasetio::localfs
        backend: kv_default
  scoring:
  - provider_id: basic
    provider_type: inline::basic
  - provider_id: llm-as-judge
    provider_type: inline::llm-as-judge
  - provider_id: braintrust
    provider_type: inline::braintrust
    config:
      openai_api_key: ${env.OPENAI_API_KEY:=}
  tool_runtime:
  - provider_id: brave-search
    provider_type: remote::brave-search
    config:
      api_key: ${env.BRAVE_SEARCH_API_KEY:=}
      max_results: 3
  - provider_id: tavily-search
    provider_type: remote::tavily-search
    config:
      api_key: ${env.TAVILY_SEARCH_API_KEY:=}
      max_results: 3
  - provider_id: rag-runtime
    provider_type: inline::rag-runtime
  - provider_id: model-context-protocol
    provider_type: remote::model-context-protocol
  batches:
  - provider_id: reference
    provider_type: inline::reference
    config:
      kvstore:
        namespace: batches
        backend: kv_default
 storage:
  backends:
    kv_default:
      type: kv_postgres
      host: ${env.POSTGRES_HOST:=localhost}
      port: ${env.POSTGRES_PORT:=5432}
      db: ${env.POSTGRES_DB:=llamastack}
      user: ${env.POSTGRES_USER:=llamastack}
      password: ${env.POSTGRES_PASSWORD:=llamastack}
      table_name: ${env.POSTGRES_TABLE_NAME:=llamastack_kvstore}
    sql_default:
      type: sql_postgres
      host: ${env.POSTGRES_HOST:=localhost}
      port: ${env.POSTGRES_PORT:=5432}
      db: ${env.POSTGRES_DB:=llamastack}
      user: ${env.POSTGRES_USER:=llamastack}
      password: ${env.POSTGRES_PASSWORD:=llamastack}
  stores:
    metadata:
      namespace: registry
      backend: kv_default
    inference:
      table_name: inference_store
      backend: sql_default
      max_write_queue_size: 10000
      num_writers: 4
    conversations:
      table_name: openai_conversations
      backend: sql_default
    prompts:
      namespace: prompts
      backend: kv_default
 registered_resources:
  models: []
  shields:
  - shield_id: llama-guard
    provider_id: ${env.SAFETY_MODEL:+llama-guard}
    provider_shield_id: ${env.SAFETY_MODEL:=}
  - shield_id: code-scanner
    provider_id: ${env.CODE_SCANNER_MODEL:+code-scanner}
    provider_shield_id: ${env.CODE_SCANNER_MODEL:=}
  vector_dbs: []
  datasets: []
  scoring_fns: []
  benchmarks: []
  tool_groups:
  - toolgroup_id: builtin::websearch
    provider_id: tavily-search
  - toolgroup_id: builtin::rag
    provider_id: rag-runtime
 server:
  port: 8321
 telemetry:
  enabled: true
 vector_stores:
  default_provider_id: faiss
  default_embedding_model:
    provider_id: sentence-transformers
    model_id: nomic-ai/nomic-embed-text-v1.5
 safety:
  default_shield_id: llama-guard
--- a/src/llama_stack/distributions/starter-gpu/run-with-postgres-store.yaml
+++ b/src/llama_stack/distributions/starter-gpu/run-with-postgres-store.yaml
@ -165,20 +165,15 @@ providers:
  - provider_id: meta-reference
    provider_type: inline::meta-reference
    config:
-      persistence_store:
+      persistence:
-        type: sql_postgres
+        agent_state:
-        host: ${env.POSTGRES_HOST:=localhost}
+          namespace: agents
-        port: ${env.POSTGRES_PORT:=5432}
+          backend: kv_default
-        db: ${env.POSTGRES_DB:=llamastack}
+        responses:
-        user: ${env.POSTGRES_USER:=llamastack}
+          table_name: responses
-        password: ${env.POSTGRES_PASSWORD:=llamastack}
+          backend: sql_default
-      responses_store:
+          max_write_queue_size: 10000
-        type: sql_postgres
+          num_writers: 4
        host: ${env.POSTGRES_HOST:=localhost}
        port: ${env.POSTGRES_PORT:=5432}
        db: ${env.POSTGRES_DB:=llamastack}
        user: ${env.POSTGRES_USER:=llamastack}
        password: ${env.POSTGRES_PASSWORD:=llamastack}
  post_training:
  - provider_id: huggingface-gpu
    provider_type: inline::huggingface-gpu
@ -237,10 +232,10 @@ providers:
    config:
      kvstore:
        namespace: batches
-        backend: kv_postgres
+        backend: kv_default
 storage:
  backends:
-    kv_postgres:
+    kv_default:
      type: kv_postgres
      host: ${env.POSTGRES_HOST:=localhost}
      port: ${env.POSTGRES_PORT:=5432}
@ -248,7 +243,7 @@ storage:
      user: ${env.POSTGRES_USER:=llamastack}
      password: ${env.POSTGRES_PASSWORD:=llamastack}
      table_name: ${env.POSTGRES_TABLE_NAME:=llamastack_kvstore}
-    sql_postgres:
+    sql_default:
      type: sql_postgres
      host: ${env.POSTGRES_HOST:=localhost}
      port: ${env.POSTGRES_PORT:=5432}
@ -258,27 +253,44 @@ storage:
  stores:
    metadata:
      namespace: registry
-      backend: kv_postgres
+      backend: kv_default
    inference:
      table_name: inference_store
-      backend: sql_postgres
+      backend: sql_default
      max_write_queue_size: 10000
      num_writers: 4
    conversations:
      table_name: openai_conversations
-      backend: sql_postgres
+      backend: sql_default
    prompts:
      namespace: prompts
-      backend: kv_postgres
+      backend: kv_default
 registered_resources:
  models: []
-  shields: []
+  shields:
  - shield_id: llama-guard
    provider_id: ${env.SAFETY_MODEL:+llama-guard}
    provider_shield_id: ${env.SAFETY_MODEL:=}
  - shield_id: code-scanner
    provider_id: ${env.CODE_SCANNER_MODEL:+code-scanner}
    provider_shield_id: ${env.CODE_SCANNER_MODEL:=}
  vector_dbs: []
  datasets: []
  scoring_fns: []
  benchmarks: []
-  tool_groups: []
+  tool_groups:
  - toolgroup_id: builtin::websearch
    provider_id: tavily-search
  - toolgroup_id: builtin::rag
    provider_id: rag-runtime
 server:
  port: 8321
 telemetry:
  enabled: true
 vector_stores:
  default_provider_id: faiss
  default_embedding_model:
    provider_id: sentence-transformers
    model_id: nomic-ai/nomic-embed-text-v1.5
 safety:
  default_shield_id: llama-guard
--- a/src/llama_stack/distributions/starter/run-with-postgres-store.yaml
+++ b/src/llama_stack/distributions/starter/run-with-postgres-store.yaml
@ -165,20 +165,15 @@ providers:
  - provider_id: meta-reference
    provider_type: inline::meta-reference
    config:
-      persistence_store:
+      persistence:
-        type: sql_postgres
+        agent_state:
-        host: ${env.POSTGRES_HOST:=localhost}
+          namespace: agents
-        port: ${env.POSTGRES_PORT:=5432}
+          backend: kv_default
-        db: ${env.POSTGRES_DB:=llamastack}
+        responses:
-        user: ${env.POSTGRES_USER:=llamastack}
+          table_name: responses
-        password: ${env.POSTGRES_PASSWORD:=llamastack}
+          backend: sql_default
-      responses_store:
+          max_write_queue_size: 10000
-        type: sql_postgres
+          num_writers: 4
        host: ${env.POSTGRES_HOST:=localhost}
        port: ${env.POSTGRES_PORT:=5432}
        db: ${env.POSTGRES_DB:=llamastack}
        user: ${env.POSTGRES_USER:=llamastack}
        password: ${env.POSTGRES_PASSWORD:=llamastack}
  post_training:
  - provider_id: torchtune-cpu
    provider_type: inline::torchtune-cpu
@ -234,10 +229,10 @@ providers:
    config:
      kvstore:
        namespace: batches
-        backend: kv_postgres
+        backend: kv_default
 storage:
  backends:
-    kv_postgres:
+    kv_default:
      type: kv_postgres
      host: ${env.POSTGRES_HOST:=localhost}
      port: ${env.POSTGRES_PORT:=5432}
@ -245,7 +240,7 @@ storage:
      user: ${env.POSTGRES_USER:=llamastack}
      password: ${env.POSTGRES_PASSWORD:=llamastack}
      table_name: ${env.POSTGRES_TABLE_NAME:=llamastack_kvstore}
-    sql_postgres:
+    sql_default:
      type: sql_postgres
      host: ${env.POSTGRES_HOST:=localhost}
      port: ${env.POSTGRES_PORT:=5432}
@ -255,27 +250,44 @@ storage:
  stores:
    metadata:
      namespace: registry
-      backend: kv_postgres
+      backend: kv_default
    inference:
      table_name: inference_store
-      backend: sql_postgres
+      backend: sql_default
      max_write_queue_size: 10000
      num_writers: 4
    conversations:
      table_name: openai_conversations
-      backend: sql_postgres
+      backend: sql_default
    prompts:
      namespace: prompts
-      backend: kv_postgres
+      backend: kv_default
 registered_resources:
  models: []
-  shields: []
+  shields:
  - shield_id: llama-guard
    provider_id: ${env.SAFETY_MODEL:+llama-guard}
    provider_shield_id: ${env.SAFETY_MODEL:=}
  - shield_id: code-scanner
    provider_id: ${env.CODE_SCANNER_MODEL:+code-scanner}
    provider_shield_id: ${env.CODE_SCANNER_MODEL:=}
  vector_dbs: []
  datasets: []
  scoring_fns: []
  benchmarks: []
-  tool_groups: []
+  tool_groups:
  - toolgroup_id: builtin::websearch
    provider_id: tavily-search
  - toolgroup_id: builtin::rag
    provider_id: rag-runtime
 server:
  port: 8321
 telemetry:
  enabled: true
 vector_stores:
  default_provider_id: faiss
  default_embedding_model:
    provider_id: sentence-transformers
    model_id: nomic-ai/nomic-embed-text-v1.5
 safety:
  default_shield_id: llama-guard
--- a/src/llama_stack/distributions/starter/starter.py
+++ b/src/llama_stack/distributions/starter/starter.py
@ -17,11 +17,6 @@ from llama_stack.core.datatypes import (
    ToolGroupInput,
    VectorStoresConfig,
 )
 from llama_stack.core.storage.datatypes import (
    InferenceStoreReference,
    KVStoreReference,
    SqlStoreReference,
 )
 from llama_stack.core.utils.dynamic import instantiate_class_type
 from llama_stack.distributions.template import DistributionTemplate, RunConfigSettings
 from llama_stack.providers.datatypes import RemoteProviderSpec
@ -157,10 +152,11 @@ def get_distribution_template(name: str = "starter") -> DistributionTemplate:
            BuildProvider(provider_type="inline::reference"),
        ],
    }
    files_config = LocalfsFilesImplConfig.sample_run_config(f"~/.llama/distributions/{name}")
    files_provider = Provider(
        provider_id="meta-reference-files",
        provider_type="inline::localfs",
-        config=LocalfsFilesImplConfig.sample_run_config(f"~/.llama/distributions/{name}"),
+        config=files_config,
    )
    embedding_provider = Provider(
        provider_id="sentence-transformers",
@ -190,7 +186,8 @@ def get_distribution_template(name: str = "starter") -> DistributionTemplate:
            provider_shield_id="${env.CODE_SCANNER_MODEL:=}",
        ),
    ]
-    postgres_config = PostgresSqlStoreConfig.sample_run_config()
+    postgres_sql_config = PostgresSqlStoreConfig.sample_run_config()
    postgres_kv_config = PostgresKVStoreConfig.sample_run_config()
    default_overrides = {
        "inference": remote_inference_providers + [embedding_provider],
        "vector_io": [
@ -247,16 +244,7 @@ def get_distribution_template(name: str = "starter") -> DistributionTemplate:
        "files": [files_provider],
    }
-    return DistributionTemplate(
+    base_run_settings = RunConfigSettings(
        name=name,
        distro_type="self_hosted",
        description="Quick start template for running Llama Stack with several popular providers. This distribution is intended for CPU-only environments.",
        container_image=None,
        template_path=None,
        providers=providers,
        additional_pip_packages=list(set(PostgresSqlStoreConfig.pip_packages() + PostgresKVStoreConfig.pip_packages())),
        run_configs={
            "run.yaml": RunConfigSettings(
        provider_overrides=default_overrides,
        default_models=[],
        default_tool_groups=default_tool_groups,
@ -271,56 +259,29 @@ def get_distribution_template(name: str = "starter") -> DistributionTemplate:
        safety_config=SafetyConfig(
            default_shield_id="llama-guard",
        ),
            ),
            "run-with-postgres-store.yaml": RunConfigSettings(
                provider_overrides={
                    **default_overrides,
                    "agents": [
                        Provider(
                            provider_id="meta-reference",
                            provider_type="inline::meta-reference",
                            config=dict(
                                persistence_store=postgres_config,
                                responses_store=postgres_config,
                            ),
    )
-                    ],
+
-                    "batches": [
+    postgres_run_settings = base_run_settings.model_copy(
-                        Provider(
+        update={
-                            provider_id="reference",
+            "storage_backends": {
-                            provider_type="inline::reference",
+                "kv_default": postgres_kv_config,
-                            config=dict(
+                "sql_default": postgres_sql_config,
-                                kvstore=KVStoreReference(
+            }
-                                    backend="kv_postgres",
+        },
-                                    namespace="batches",
+        deep=True,
                                ).model_dump(exclude_none=True),
                            ),
    )
-                    ],
+
-                },
+    return DistributionTemplate(
-                storage_backends={
+        name=name,
-                    "kv_postgres": PostgresKVStoreConfig.sample_run_config(),
+        distro_type="self_hosted",
-                    "sql_postgres": postgres_config,
+        description="Quick start template for running Llama Stack with several popular providers. This distribution is intended for CPU-only environments.",
-                },
+        container_image=None,
-                storage_stores={
+        template_path=None,
-                    "metadata": KVStoreReference(
+        providers=providers,
-                        backend="kv_postgres",
+        additional_pip_packages=list(set(PostgresSqlStoreConfig.pip_packages() + PostgresKVStoreConfig.pip_packages())),
-                        namespace="registry",
+        run_configs={
-                    ).model_dump(exclude_none=True),
+            "run.yaml": base_run_settings,
-                    "inference": InferenceStoreReference(
+            "run-with-postgres-store.yaml": postgres_run_settings,
                        backend="sql_postgres",
                        table_name="inference_store",
                    ).model_dump(exclude_none=True),
                    "conversations": SqlStoreReference(
                        backend="sql_postgres",
                        table_name="openai_conversations",
                    ).model_dump(exclude_none=True),
                    "prompts": KVStoreReference(
                        backend="kv_postgres",
                        namespace="prompts",
                    ).model_dump(exclude_none=True),
                },
            ),
        },
        run_config_env_vars={
            "LLAMA_STACK_PORT": (
--- a/src/llama_stack/providers/utils/inference/inference_store.py
+++ b/src/llama_stack/providers/utils/inference/inference_store.py
@ -66,14 +66,6 @@ class InferenceStore:
            },
        )
        if self.enable_write_queue:
            self._queue = asyncio.Queue(maxsize=self._max_write_queue_size)
            for _ in range(self._num_writers):
                self._worker_tasks.append(asyncio.create_task(self._worker_loop()))
            logger.debug(
                f"Inference store write queue enabled with {self._num_writers} writers, max queue size {self._max_write_queue_size}"
            )
    async def shutdown(self) -> None:
        if not self._worker_tasks:
            return
@ -94,10 +86,29 @@ class InferenceStore:
        if self.enable_write_queue and self._queue is not None:
            await self._queue.join()
    async def _ensure_workers_started(self) -> None:
        """Ensure the async write queue workers run on the current loop."""
        if not self.enable_write_queue:
            return
        if self._queue is None:
            self._queue = asyncio.Queue(maxsize=self._max_write_queue_size)
            logger.debug(
                f"Inference store write queue created with max size {self._max_write_queue_size} "
                f"and {self._num_writers} writers"
            )
        if not self._worker_tasks:
            loop = asyncio.get_running_loop()
            for _ in range(self._num_writers):
                task = loop.create_task(self._worker_loop())
                self._worker_tasks.append(task)
    async def store_chat_completion(
        self, chat_completion: OpenAIChatCompletion, input_messages: list[OpenAIMessageParam]
    ) -> None:
        if self.enable_write_queue:
            await self._ensure_workers_started()
            if self._queue is None:
                raise ValueError("Inference store is not initialized")
            try:
--- a/src/llama_stack/providers/utils/kvstore/kvstore.py
+++ b/src/llama_stack/providers/utils/kvstore/kvstore.py
@ -11,6 +11,9 @@
 from __future__ import annotations
 import asyncio
 from collections import defaultdict
 from llama_stack.core.storage.datatypes import KVStoreReference, StorageBackendConfig, StorageBackendType
 from .api import KVStore
@ -53,24 +56,41 @@ class InmemoryKVStoreImpl(KVStore):
 _KVSTORE_BACKENDS: dict[str, KVStoreConfig] = {}
 _KVSTORE_INSTANCES: dict[tuple[str, str], KVStore] = {}
 _KVSTORE_LOCKS: defaultdict[tuple[str, str], asyncio.Lock] = defaultdict(asyncio.Lock)
 def register_kvstore_backends(backends: dict[str, StorageBackendConfig]) -> None:
    """Register the set of available KV store backends for reference resolution."""
    global _KVSTORE_BACKENDS
    global _KVSTORE_INSTANCES
    global _KVSTORE_LOCKS
    _KVSTORE_BACKENDS.clear()
    _KVSTORE_INSTANCES.clear()
    _KVSTORE_LOCKS.clear()
    for name, cfg in backends.items():
        _KVSTORE_BACKENDS[name] = cfg
 async def kvstore_impl(reference: KVStoreReference) -> KVStore:
    backend_name = reference.backend
    cache_key = (backend_name, reference.namespace)
    existing = _KVSTORE_INSTANCES.get(cache_key)
    if existing:
        return existing
    backend_config = _KVSTORE_BACKENDS.get(backend_name)
    if backend_config is None:
        raise ValueError(f"Unknown KVStore backend '{backend_name}'. Registered backends: {sorted(_KVSTORE_BACKENDS)}")
    lock = _KVSTORE_LOCKS[cache_key]
    async with lock:
        existing = _KVSTORE_INSTANCES.get(cache_key)
        if existing:
            return existing
        config = backend_config.model_copy()
        config.namespace = reference.namespace
@ -94,4 +114,5 @@ async def kvstore_impl(reference: KVStoreReference) -> KVStore:
            raise ValueError(f"Unknown kvstore type {config.type}")
        await impl.initialize()
        _KVSTORE_INSTANCES[cache_key] = impl
        return impl
--- a/src/llama_stack/providers/utils/memory/openai_vector_store_mixin.py
+++ b/src/llama_stack/providers/utils/memory/openai_vector_store_mixin.py
@ -704,34 +704,35 @@ class OpenAIVectorStoreMixin(ABC):
            # Unknown filter type, default to no match
            raise ValueError(f"Unsupported filter type: {filter_type}")
-    def _chunk_to_vector_store_content(self, chunk: Chunk) -> list[VectorStoreContent]:
+    def _chunk_to_vector_store_content(
-        # content is InterleavedContent
+        self, chunk: Chunk, include_embeddings: bool = False, include_metadata: bool = False
    ) -> list[VectorStoreContent]:
        def extract_fields() -> dict:
            """Extract embedding and metadata fields from chunk based on include flags."""
            return {
                "embedding": chunk.embedding if include_embeddings else None,
                "chunk_metadata": chunk.chunk_metadata if include_metadata else None,
                "metadata": chunk.metadata if include_metadata else None,
            }
        fields = extract_fields()
        if isinstance(chunk.content, str):
-            content = [
+            content_item = VectorStoreContent(type="text", text=chunk.content, **fields)
-                VectorStoreContent(
+            content = [content_item]
                    type="text",
                    text=chunk.content,
                )
            ]
        elif isinstance(chunk.content, list):
            # TODO: Add support for other types of content
-            content = [
+            content = []
-                VectorStoreContent(
+            for item in chunk.content:
-                    type="text",
+                if item.type == "text":
-                    text=item.text,
+                    content_item = VectorStoreContent(type="text", text=item.text, **fields)
-                )
+                    content.append(content_item)
                for item in chunk.content
                if item.type == "text"
            ]
        else:
            if chunk.content.type != "text":
                raise ValueError(f"Unsupported content type: {chunk.content.type}")
-            content = [
+
-                VectorStoreContent(
+            content_item = VectorStoreContent(type="text", text=chunk.content.text, **fields)
-                    type="text",
+            content = [content_item]
                    text=chunk.content.text,
                )
            ]
        return content
    async def openai_attach_file_to_vector_store(
@ -820,13 +821,12 @@ class OpenAIVectorStoreMixin(ABC):
                message=str(e),
            )
-        # Create OpenAI vector store file metadata
+        # Save vector store file to persistent storage AFTER insert_chunks
        # so that chunks include the embeddings that were generated
        file_info = vector_store_file_object.model_dump(exclude={"last_error"})
        file_info["filename"] = file_response.filename if file_response else ""
        # Save vector store file to persistent storage (provider-specific)
        dict_chunks = [c.model_dump() for c in chunks]
        # This should be updated to include chunk_id
        await self._save_openai_vector_store_file(vector_store_id, file_id, file_info, dict_chunks)
        # Update file_ids and file_counts in vector store metadata
@ -921,21 +921,27 @@ class OpenAIVectorStoreMixin(ABC):
        self,
        vector_store_id: str,
        file_id: str,
        include_embeddings: bool | None = False,
        include_metadata: bool | None = False,
    ) -> VectorStoreFileContentResponse:
        """Retrieves the contents of a vector store file."""
        if vector_store_id not in self.openai_vector_stores:
            raise VectorStoreNotFoundError(vector_store_id)
        # Parameters are already provided directly
        # include_embeddings and include_metadata are now function parameters
        dict_chunks = await self._load_openai_vector_store_file_contents(vector_store_id, file_id)
        chunks = [Chunk.model_validate(c) for c in dict_chunks]
        content = []
        for chunk in chunks:
-            content.extend(self._chunk_to_vector_store_content(chunk))
+            content.extend(
                self._chunk_to_vector_store_content(
                    chunk, include_embeddings=include_embeddings or False, include_metadata=include_metadata or False
                )
            )
        return VectorStoreFileContentResponse(
            object="vector_store.file_content.page",
            data=content,
            has_more=False,
            next_page=None,
        )
    async def openai_update_vector_store_file(
--- a/src/llama_stack/providers/utils/responses/responses_store.py
+++ b/src/llama_stack/providers/utils/responses/responses_store.py
@ -3,8 +3,6 @@
 #
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 import asyncio
 from typing import Any
 from llama_stack.apis.agents import (
    Order,
@ -19,12 +17,12 @@ from llama_stack.apis.agents.openai_responses import (
 )
 from llama_stack.apis.inference import OpenAIMessageParam
 from llama_stack.core.datatypes import AccessRule
-from llama_stack.core.storage.datatypes import ResponsesStoreReference, SqlStoreReference, StorageBackendType
+from llama_stack.core.storage.datatypes import ResponsesStoreReference, SqlStoreReference
 from llama_stack.log import get_logger
 from ..sqlstore.api import ColumnDefinition, ColumnType
 from ..sqlstore.authorized_sqlstore import AuthorizedSqlStore
-from ..sqlstore.sqlstore import _SQLSTORE_BACKENDS, sqlstore_impl
+from ..sqlstore.sqlstore import sqlstore_impl
 logger = get_logger(name=__name__, category="openai_responses")
@ -55,28 +53,12 @@ class ResponsesStore:
        self.policy = policy
        self.sql_store = None
        self.enable_write_queue = True
        # Async write queue and worker control
        self._queue: (
            asyncio.Queue[tuple[OpenAIResponseObject, list[OpenAIResponseInput], list[OpenAIMessageParam]]] | None
        ) = None
        self._worker_tasks: list[asyncio.Task[Any]] = []
        self._max_write_queue_size: int = self.reference.max_write_queue_size
        self._num_writers: int = max(1, self.reference.num_writers)
    async def initialize(self):
        """Create the necessary tables if they don't exist."""
        base_store = sqlstore_impl(self.reference)
        self.sql_store = AuthorizedSqlStore(base_store, self.policy)
        # Disable write queue for SQLite since WAL mode handles concurrency
        # Keep it enabled for other backends (like Postgres) for performance
        backend_config = _SQLSTORE_BACKENDS.get(self.reference.backend)
        if backend_config and backend_config.type == StorageBackendType.SQL_SQLITE:
            self.enable_write_queue = False
            logger.debug("Write queue disabled for SQLite (WAL mode handles concurrency)")
        await self.sql_store.create_table(
            "openai_responses",
            {
@ -95,33 +77,12 @@ class ResponsesStore:
            },
        )
        if self.enable_write_queue:
            self._queue = asyncio.Queue(maxsize=self._max_write_queue_size)
            for _ in range(self._num_writers):
                self._worker_tasks.append(asyncio.create_task(self._worker_loop()))
            logger.debug(
                f"Responses store write queue enabled with {self._num_writers} writers, max queue size {self._max_write_queue_size}"
            )
    async def shutdown(self) -> None:
        if not self._worker_tasks:
        return
        if self._queue is not None:
            await self._queue.join()
        for t in self._worker_tasks:
            if not t.done():
                t.cancel()
        for t in self._worker_tasks:
            try:
                await t
            except asyncio.CancelledError:
                pass
        self._worker_tasks.clear()
    async def flush(self) -> None:
-        """Wait for all queued writes to complete. Useful for testing."""
+        """Maintained for compatibility; no-op now that writes are synchronous."""
-        if self.enable_write_queue and self._queue is not None:
+        return
            await self._queue.join()
    async def store_response_object(
        self,
@ -129,32 +90,8 @@ class ResponsesStore:
        input: list[OpenAIResponseInput],
        messages: list[OpenAIMessageParam],
    ) -> None:
        if self.enable_write_queue:
            if self._queue is None:
                raise ValueError("Responses store is not initialized")
            try:
                self._queue.put_nowait((response_object, input, messages))
            except asyncio.QueueFull:
                logger.warning(f"Write queue full; adding response id={getattr(response_object, 'id', '<unknown>')}")
                await self._queue.put((response_object, input, messages))
        else:
        await self._write_response_object(response_object, input, messages)
    async def _worker_loop(self) -> None:
        assert self._queue is not None
        while True:
            try:
                item = await self._queue.get()
            except asyncio.CancelledError:
                break
            response_object, input, messages = item
            try:
                await self._write_response_object(response_object, input, messages)
            except Exception as e:  # noqa: BLE001
                logger.error(f"Error writing response object: {e}")
            finally:
                self._queue.task_done()
    async def _write_response_object(
        self,
        response_object: OpenAIResponseObject,
@ -315,18 +252,11 @@ class ResponsesStore:
        # Serialize messages to dict format for JSON storage
        messages_data = [msg.model_dump() for msg in messages]
-        # Upsert: try insert first, update if exists
+        await self.sql_store.upsert(
        try:
            await self.sql_store.insert(
            table="conversation_messages",
            data={"conversation_id": conversation_id, "messages": messages_data},
-            )
+            conflict_columns=["conversation_id"],
-        except Exception:
+            update_columns=["messages"],
            # If insert fails due to ID conflict, update existing record
            await self.sql_store.update(
                table="conversation_messages",
                data={"messages": messages_data},
                where={"conversation_id": conversation_id},
        )
        logger.debug(f"Stored {len(messages)} messages for conversation {conversation_id}")
--- a/src/llama_stack/providers/utils/sqlstore/api.py
+++ b/src/llama_stack/providers/utils/sqlstore/api.py
@ -47,6 +47,18 @@ class SqlStore(Protocol):
        """
        pass
    async def upsert(
        self,
        table: str,
        data: Mapping[str, Any],
        conflict_columns: list[str],
        update_columns: list[str] | None = None,
    ) -> None:
        """
        Insert a row and update specified columns when conflicts occur.
        """
        pass
    async def fetch_all(
        self,
        table: str,
--- a/src/llama_stack/providers/utils/sqlstore/authorized_sqlstore.py
+++ b/src/llama_stack/providers/utils/sqlstore/authorized_sqlstore.py
@ -45,8 +45,13 @@ def _enhance_item_with_access_control(item: Mapping[str, Any], current_user: Use
        enhanced["owner_principal"] = current_user.principal
        enhanced["access_attributes"] = current_user.attributes
    else:
-        enhanced["owner_principal"] = None
+        # IMPORTANT: Use empty string and null value (not None) to match public access filter
-        enhanced["access_attributes"] = None
+        # The public access filter in _get_public_access_conditions() expects:
        # - owner_principal = '' (empty string)
        # - access_attributes = null (JSON null, which serializes to the string 'null')
        # Setting them to None (SQL NULL) will cause rows to be filtered out on read.
        enhanced["owner_principal"] = ""
        enhanced["access_attributes"] = None  # Pydantic/JSON will serialize this as JSON null
    return enhanced
@ -124,6 +129,23 @@ class AuthorizedSqlStore:
            enhanced_data = [_enhance_item_with_access_control(item, current_user) for item in data]
        await self.sql_store.insert(table, enhanced_data)
    async def upsert(
        self,
        table: str,
        data: Mapping[str, Any],
        conflict_columns: list[str],
        update_columns: list[str] | None = None,
    ) -> None:
        """Upsert a row with automatic access control attribute capture."""
        current_user = get_authenticated_user()
        enhanced_data = _enhance_item_with_access_control(data, current_user)
        await self.sql_store.upsert(
            table=table,
            data=enhanced_data,
            conflict_columns=conflict_columns,
            update_columns=update_columns,
        )
    async def fetch_all(
        self,
        table: str,
@ -188,8 +210,9 @@ class AuthorizedSqlStore:
            enhanced_data["owner_principal"] = current_user.principal
            enhanced_data["access_attributes"] = current_user.attributes
        else:
-            enhanced_data["owner_principal"] = None
+            # IMPORTANT: Use empty string for owner_principal to match public access filter
-            enhanced_data["access_attributes"] = None
+            enhanced_data["owner_principal"] = ""
            enhanced_data["access_attributes"] = None  # Will serialize as JSON null
        await self.sql_store.update(table, enhanced_data, where)
@ -245,14 +268,24 @@ class AuthorizedSqlStore:
            raise ValueError(f"Unsupported database type: {self.database_type}")
    def _get_public_access_conditions(self) -> list[str]:
-        """Get the SQL conditions for public access."""
+        """Get the SQL conditions for public access.
-        # Public records are records that have no owner_principal or access_attributes
+
        Public records are those with:
        - owner_principal = '' (empty string)
        - access_attributes is either SQL NULL or JSON null
        Note: Different databases serialize None differently:
        - SQLite: None → JSON null (text = 'null')
        - Postgres: None → SQL NULL (IS NULL)
        """
        conditions = ["owner_principal = ''"]
        if self.database_type == StorageBackendType.SQL_POSTGRES.value:
-            # Postgres stores JSON null as 'null'
+            # Accept both SQL NULL and JSON null for Postgres compatibility
-            conditions.append("access_attributes::text = 'null'")
+            # This handles both old rows (SQL NULL) and new rows (JSON null)
            conditions.append("(access_attributes IS NULL OR access_attributes::text = 'null')")
        elif self.database_type == StorageBackendType.SQL_SQLITE.value:
-            conditions.append("access_attributes = 'null'")
+            # SQLite serializes None as JSON null
            conditions.append("(access_attributes IS NULL OR access_attributes = 'null')")
        else:
            raise ValueError(f"Unsupported database type: {self.database_type}")
        return conditions
--- a/src/llama_stack/providers/utils/sqlstore/sqlalchemy_sqlstore.py
+++ b/src/llama_stack/providers/utils/sqlstore/sqlalchemy_sqlstore.py
@ -72,13 +72,14 @@ def _build_where_expr(column: ColumnElement, value: Any) -> ColumnElement:
 class SqlAlchemySqlStoreImpl(SqlStore):
    def __init__(self, config: SqlAlchemySqlStoreConfig):
        self.config = config
        self._is_sqlite_backend = "sqlite" in self.config.engine_str
        self.async_session = async_sessionmaker(self.create_engine())
        self.metadata = MetaData()
    def create_engine(self) -> AsyncEngine:
        # Configure connection args for better concurrency support
        connect_args = {}
-        if "sqlite" in self.config.engine_str:
+        if self._is_sqlite_backend:
            # SQLite-specific optimizations for concurrent access
            # With WAL mode, most locks resolve in milliseconds, but allow up to 5s for edge cases
            connect_args["timeout"] = 5.0
@ -91,7 +92,7 @@ class SqlAlchemySqlStoreImpl(SqlStore):
        )
        # Enable WAL mode for SQLite to support concurrent readers and writers
-        if "sqlite" in self.config.engine_str:
+        if self._is_sqlite_backend:
            @event.listens_for(engine.sync_engine, "connect")
            def set_sqlite_pragma(dbapi_conn, connection_record):
@ -151,6 +152,29 @@ class SqlAlchemySqlStoreImpl(SqlStore):
            await session.execute(self.metadata.tables[table].insert(), data)
            await session.commit()
    async def upsert(
        self,
        table: str,
        data: Mapping[str, Any],
        conflict_columns: list[str],
        update_columns: list[str] | None = None,
    ) -> None:
        table_obj = self.metadata.tables[table]
        dialect_insert = self._get_dialect_insert(table_obj)
        insert_stmt = dialect_insert.values(**data)
        if update_columns is None:
            update_columns = [col for col in data.keys() if col not in conflict_columns]
        update_mapping = {col: getattr(insert_stmt.excluded, col) for col in update_columns}
        conflict_cols = [table_obj.c[col] for col in conflict_columns]
        stmt = insert_stmt.on_conflict_do_update(index_elements=conflict_cols, set_=update_mapping)
        async with self.async_session() as session:
            await session.execute(stmt)
            await session.commit()
    async def fetch_all(
        self,
        table: str,
@ -333,9 +357,18 @@ class SqlAlchemySqlStoreImpl(SqlStore):
                add_column_sql = text(f"ALTER TABLE {table} ADD COLUMN {column_name} {compiled_type}{nullable_clause}")
                await conn.execute(add_column_sql)
        except Exception as e:
            # If any error occurs during migration, log it but don't fail
            # The table creation will handle adding the column
            logger.error(f"Error adding column {column_name} to table {table}: {e}")
            pass
    def _get_dialect_insert(self, table: Table):
        if self._is_sqlite_backend:
            from sqlalchemy.dialects.sqlite import insert as sqlite_insert
            return sqlite_insert(table)
        else:
            from sqlalchemy.dialects.postgresql import insert as pg_insert
            return pg_insert(table)
--- a/src/llama_stack/providers/utils/sqlstore/sqlstore.py
+++ b/src/llama_stack/providers/utils/sqlstore/sqlstore.py
@ -4,6 +4,7 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 from threading import Lock
 from typing import Annotated, cast
 from pydantic import Field
@ -21,6 +22,8 @@ from .api import SqlStore
 sql_store_pip_packages = ["sqlalchemy[asyncio]", "aiosqlite", "asyncpg"]
 _SQLSTORE_BACKENDS: dict[str, StorageBackendConfig] = {}
 _SQLSTORE_INSTANCES: dict[str, SqlStore] = {}
 _SQLSTORE_LOCKS: dict[str, Lock] = {}
 SqlStoreConfig = Annotated[
@ -52,11 +55,23 @@ def sqlstore_impl(reference: SqlStoreReference) -> SqlStore:
            f"Unknown SQL store backend '{backend_name}'. Registered backends: {sorted(_SQLSTORE_BACKENDS)}"
        )
    existing = _SQLSTORE_INSTANCES.get(backend_name)
    if existing:
        return existing
    lock = _SQLSTORE_LOCKS.setdefault(backend_name, Lock())
    with lock:
        existing = _SQLSTORE_INSTANCES.get(backend_name)
        if existing:
            return existing
        if isinstance(backend_config, SqliteSqlStoreConfig | PostgresSqlStoreConfig):
            from .sqlalchemy_sqlstore import SqlAlchemySqlStoreImpl
            config = cast(SqliteSqlStoreConfig | PostgresSqlStoreConfig, backend_config).model_copy()
-        return SqlAlchemySqlStoreImpl(config)
+            instance = SqlAlchemySqlStoreImpl(config)
            _SQLSTORE_INSTANCES[backend_name] = instance
            return instance
        else:
            raise ValueError(f"Unknown sqlstore type {backend_config.type}")
@ -64,7 +79,10 @@ def sqlstore_impl(reference: SqlStoreReference) -> SqlStore:
 def register_sqlstore_backends(backends: dict[str, StorageBackendConfig]) -> None:
    """Register the set of available SQL store backends for reference resolution."""
    global _SQLSTORE_BACKENDS
    global _SQLSTORE_INSTANCES
    _SQLSTORE_BACKENDS.clear()
    _SQLSTORE_INSTANCES.clear()
    _SQLSTORE_LOCKS.clear()
    for name, cfg in backends.items():
        _SQLSTORE_BACKENDS[name] = cfg
--- a/src/llama_stack_ui/.dockerignore
+++ b/src/llama_stack_ui/.dockerignore
@ -0,0 +1,20 @@
 .git
 .gitignore
 .env.local
 .env.*.local
 .next
 node_modules
 npm-debug.log
 *.md
 .DS_Store
 .vscode
 .idea
 playwright-report
 e2e
 jest.config.ts
 jest.setup.ts
 eslint.config.mjs
 .prettierrc
 .prettierignore
 .nvmrc
 playwright.config.ts
--- a/src/llama_stack_ui/Containerfile
+++ b/src/llama_stack_ui/Containerfile
@ -0,0 +1,18 @@
 FROM node:22.5.1-alpine
 ENV NODE_ENV=production
 # Install dumb-init for proper signal handling
 RUN apk add --no-cache dumb-init
 # Create non-root user for security
 RUN addgroup --system --gid 1001 nodejs
 RUN adduser --system --uid 1001 nextjs
 # Install llama-stack-ui from npm
 RUN npm install -g llama-stack-ui
 USER nextjs
 ENTRYPOINT ["dumb-init", "--"]
 CMD ["llama-stack-ui"]
--- a/src/llama_stack_ui/app/logs/vector-stores/page.tsx
+++ b/src/llama_stack_ui/app/logs/vector-stores/page.tsx
@ -8,6 +8,9 @@ import type {
 import { useRouter } from "next/navigation";
 import { usePagination } from "@/hooks/use-pagination";
 import { Button } from "@/components/ui/button";
 import { Plus, Trash2, Search, Edit, X } from "lucide-react";
 import { useState } from "react";
 import { Input } from "@/components/ui/input";
 import {
  Table,
  TableBody,
@ -17,9 +20,21 @@ import {
  TableRow,
 } from "@/components/ui/table";
 import { Skeleton } from "@/components/ui/skeleton";
 import { useAuthClient } from "@/hooks/use-auth-client";
 import {
  VectorStoreEditor,
  VectorStoreFormData,
 } from "@/components/vector-stores/vector-store-editor";
 export default function VectorStoresPage() {
  const router = useRouter();
  const client = useAuthClient();
  const [deletingStores, setDeletingStores] = useState<Set<string>>(new Set());
  const [searchTerm, setSearchTerm] = useState("");
  const [showVectorStoreModal, setShowVectorStoreModal] = useState(false);
  const [editingStore, setEditingStore] = useState<VectorStore | null>(null);
  const [modalError, setModalError] = useState<string | null>(null);
  const [showSuccessState, setShowSuccessState] = useState(false);
  const {
    data: stores,
    status,
@ -47,6 +62,142 @@ export default function VectorStoresPage() {
    }
  }, [status, hasMore, loadMore]);
  // Handle ESC key to close modal
  React.useEffect(() => {
    const handleEscape = (event: KeyboardEvent) => {
      if (event.key === "Escape" && showVectorStoreModal) {
        handleCancel();
      }
    };
    document.addEventListener("keydown", handleEscape);
    return () => document.removeEventListener("keydown", handleEscape);
  }, [showVectorStoreModal]);
  const handleDeleteVectorStore = async (storeId: string) => {
    if (
      !confirm(
        "Are you sure you want to delete this vector store? This action cannot be undone."
      )
    ) {
      return;
    }
    setDeletingStores(prev => new Set([...prev, storeId]));
    try {
      await client.vectorStores.delete(storeId);
      // Reload the data to reflect the deletion
      window.location.reload();
    } catch (err: unknown) {
      console.error("Failed to delete vector store:", err);
      const errorMessage = err instanceof Error ? err.message : "Unknown error";
      alert(`Failed to delete vector store: ${errorMessage}`);
    } finally {
      setDeletingStores(prev => {
        const newSet = new Set(prev);
        newSet.delete(storeId);
        return newSet;
      });
    }
  };
  const handleSaveVectorStore = async (formData: VectorStoreFormData) => {
    try {
      setModalError(null);
      if (editingStore) {
        // Update existing vector store
        const updateParams: {
          name?: string;
          extra_body?: Record<string, unknown>;
        } = {};
        // Only include fields that have changed or are provided
        if (formData.name && formData.name !== editingStore.name) {
          updateParams.name = formData.name;
        }
        // Add all parameters to extra_body (except provider_id which can't be changed)
        const extraBody: Record<string, unknown> = {};
        if (formData.embedding_model) {
          extraBody.embedding_model = formData.embedding_model;
        }
        if (formData.embedding_dimension) {
          extraBody.embedding_dimension = formData.embedding_dimension;
        }
        if (Object.keys(extraBody).length > 0) {
          updateParams.extra_body = extraBody;
        }
        await client.vectorStores.update(editingStore.id, updateParams);
        // Show success state with close button
        setShowSuccessState(true);
        setModalError(
          "✅ Vector store updated successfully! You can close this modal and refresh the page to see changes."
        );
        return;
      }
      const createParams: {
        name?: string;
        provider_id?: string;
        extra_body?: Record<string, unknown>;
      } = {
        name: formData.name || undefined,
      };
      // Extract provider_id to top-level (like Python client does)
      if (formData.provider_id) {
        createParams.provider_id = formData.provider_id;
      }
      // Add remaining parameters to extra_body
      const extraBody: Record<string, unknown> = {};
      if (formData.provider_id) {
        extraBody.provider_id = formData.provider_id;
      }
      if (formData.embedding_model) {
        extraBody.embedding_model = formData.embedding_model;
      }
      if (formData.embedding_dimension) {
        extraBody.embedding_dimension = formData.embedding_dimension;
      }
      if (Object.keys(extraBody).length > 0) {
        createParams.extra_body = extraBody;
      }
      await client.vectorStores.create(createParams);
      // Show success state with close button
      setShowSuccessState(true);
      setModalError(
        "✅ Vector store created successfully! You can close this modal and refresh the page to see changes."
      );
    } catch (err: unknown) {
      console.error("Failed to create vector store:", err);
      const errorMessage =
        err instanceof Error ? err.message : "Failed to create vector store";
      setModalError(errorMessage);
    }
  };
  const handleEditVectorStore = (store: VectorStore) => {
    setEditingStore(store);
    setShowVectorStoreModal(true);
    setModalError(null);
  };
  const handleCancel = () => {
    setShowVectorStoreModal(false);
    setEditingStore(null);
    setModalError(null);
    setShowSuccessState(false);
  };
  const renderContent = () => {
    if (status === "loading") {
      return (
@ -66,7 +217,38 @@ export default function VectorStoresPage() {
      return <p>No vector stores found.</p>;
    }
    // Filter stores based on search term
    const filteredStores = stores.filter(store => {
      if (!searchTerm) return true;
      const searchLower = searchTerm.toLowerCase();
      return (
        store.id.toLowerCase().includes(searchLower) ||
        (store.name && store.name.toLowerCase().includes(searchLower)) ||
        (store.metadata?.provider_id &&
          String(store.metadata.provider_id)
            .toLowerCase()
            .includes(searchLower)) ||
        (store.metadata?.provider_vector_db_id &&
          String(store.metadata.provider_vector_db_id)
            .toLowerCase()
            .includes(searchLower))
      );
    });
    return (
      <div className="space-y-4">
        {/* Search Bar */}
        <div className="relative flex-1 max-w-md">
          <Search className="absolute left-3 top-1/2 transform -translate-y-1/2 text-muted-foreground h-4 w-4" />
          <Input
            placeholder="Search vector stores..."
            value={searchTerm}
            onChange={e => setSearchTerm(e.target.value)}
            className="pl-10"
          />
        </div>
        <div className="overflow-auto flex-1 min-h-0">
          <Table>
            <TableHeader>
@ -82,10 +264,11 @@ export default function VectorStoresPage() {
                <TableHead>Usage Bytes</TableHead>
                <TableHead>Provider ID</TableHead>
                <TableHead>Provider Vector DB ID</TableHead>
                <TableHead>Actions</TableHead>
              </TableRow>
            </TableHeader>
            <TableBody>
-            {stores.map(store => {
+              {filteredStores.map(store => {
                const fileCounts = store.file_counts;
                const metadata = store.metadata || {};
                const providerId = metadata.provider_id ?? "";
@ -94,7 +277,9 @@ export default function VectorStoresPage() {
                return (
                  <TableRow
                    key={store.id}
-                  onClick={() => router.push(`/logs/vector-stores/${store.id}`)}
+                    onClick={() =>
                      router.push(`/logs/vector-stores/${store.id}`)
                    }
                    className="cursor-pointer hover:bg-muted/50"
                  >
                    <TableCell>
@ -120,19 +305,102 @@ export default function VectorStoresPage() {
                    <TableCell>{store.usage_bytes}</TableCell>
                    <TableCell>{providerId}</TableCell>
                    <TableCell>{providerDbId}</TableCell>
                    <TableCell>
                      <div className="flex gap-2">
                        <Button
                          variant="outline"
                          size="sm"
                          onClick={e => {
                            e.stopPropagation();
                            handleEditVectorStore(store);
                          }}
                        >
                          <Edit className="h-4 w-4" />
                        </Button>
                        <Button
                          variant="outline"
                          size="sm"
                          onClick={e => {
                            e.stopPropagation();
                            handleDeleteVectorStore(store.id);
                          }}
                          disabled={deletingStores.has(store.id)}
                        >
                          {deletingStores.has(store.id) ? (
                            "Deleting..."
                          ) : (
                            <>
                              <Trash2 className="h-4 w-4" />
                            </>
                          )}
                        </Button>
                      </div>
                    </TableCell>
                  </TableRow>
                );
              })}
            </TableBody>
          </Table>
        </div>
      </div>
    );
  };
  return (
    <div className="space-y-4">
      <div className="flex items-center justify-between">
        <h1 className="text-2xl font-semibold">Vector Stores</h1>
        <Button
          onClick={() => setShowVectorStoreModal(true)}
          disabled={status === "loading"}
        >
          <Plus className="h-4 w-4 mr-2" />
          New Vector Store
        </Button>
      </div>
      {renderContent()}
      {/* Create Vector Store Modal */}
      {showVectorStoreModal && (
        <div className="fixed inset-0 bg-black/50 flex items-center justify-center z-50">
          <div className="bg-background border rounded-lg shadow-lg max-w-2xl w-full mx-4 max-h-[90vh] overflow-hidden">
            <div className="p-6 border-b flex items-center justify-between">
              <h2 className="text-2xl font-bold">
                {editingStore ? "Edit Vector Store" : "Create New Vector Store"}
              </h2>
              <Button
                variant="ghost"
                size="sm"
                onClick={handleCancel}
                className="p-1 h-auto"
              >
                <X className="h-4 w-4" />
              </Button>
            </div>
            <div className="p-6 overflow-y-auto max-h-[calc(90vh-120px)]">
              <VectorStoreEditor
                onSave={handleSaveVectorStore}
                onCancel={handleCancel}
                error={modalError}
                showSuccessState={showSuccessState}
                isEditing={!!editingStore}
                initialData={
                  editingStore
                    ? {
                        name: editingStore.name || "",
                        embedding_model:
                          editingStore.metadata?.embedding_model || "",
                        embedding_dimension:
                          editingStore.metadata?.embedding_dimension || 768,
                        provider_id: editingStore.metadata?.provider_id || "",
                      }
                    : undefined
                }
              />
            </div>
          </div>
        </div>
      )}
    </div>
  );
 }
--- a/src/llama_stack_ui/bin/cli.js
+++ b/src/llama_stack_ui/bin/cli.js
@ -0,0 +1,34 @@
 #!/usr/bin/env node
 const { spawn } = require('child_process');
 const path = require('path');
 const port = process.env.LLAMA_STACK_UI_PORT || 8322;
 const uiDir = path.resolve(__dirname, '..');
 const serverPath = path.join(uiDir, '.next', 'standalone', 'ui', 'src', 'llama_stack_ui', 'server.js');
 const serverDir = path.dirname(serverPath);
 console.log(`Starting Llama Stack UI on http://localhost:${port}`);
 const child = spawn(process.execPath, [serverPath], {
  cwd: serverDir,
  stdio: 'inherit',
  env: {
    ...process.env,
    PORT: port,
  },
 });
 process.on('SIGINT', () => {
  child.kill('SIGINT');
  process.exit(0);
 });
 process.on('SIGTERM', () => {
  child.kill('SIGTERM');
  process.exit(0);
 });
 child.on('exit', (code) => {
  process.exit(code);
 });
--- a/src/llama_stack_ui/components/prompts/prompt-editor.test.tsx
+++ b/src/llama_stack_ui/components/prompts/prompt-editor.test.tsx
@ -2,7 +2,7 @@ import React from "react";
 import { render, screen, fireEvent } from "@testing-library/react";
 import "@testing-library/jest-dom";
 import { PromptEditor } from "./prompt-editor";
-import type { Prompt, PromptFormData } from "./types";
+import type { Prompt } from "./types";
 describe("PromptEditor", () => {
  const mockOnSave = jest.fn();
--- a/src/llama_stack_ui/components/vector-stores/vector-store-detail.test.tsx
+++ b/src/llama_stack_ui/components/vector-stores/vector-store-detail.test.tsx
@ -12,6 +12,20 @@ jest.mock("next/navigation", () => ({
  }),
 }));
 // Mock NextAuth
 jest.mock("next-auth/react", () => ({
  useSession: () => ({
    data: {
      accessToken: "mock-access-token",
      user: {
        id: "mock-user-id",
        email: "test@example.com",
      },
    },
    status: "authenticated",
  }),
 }));
 describe("VectorStoreDetailView", () => {
  const defaultProps = {
    store: null,
--- a/src/llama_stack_ui/components/vector-stores/vector-store-detail.tsx
+++ b/src/llama_stack_ui/components/vector-stores/vector-store-detail.tsx
@ -1,16 +1,18 @@
 "use client";
 import { useRouter } from "next/navigation";
 import { useState, useEffect } from "react";
 import type { VectorStore } from "llama-stack-client/resources/vector-stores/vector-stores";
 import type { VectorStoreFile } from "llama-stack-client/resources/vector-stores/files";
 import { Card, CardContent, CardHeader, CardTitle } from "@/components/ui/card";
 import { Skeleton } from "@/components/ui/skeleton";
 import { Button } from "@/components/ui/button";
 import { useAuthClient } from "@/hooks/use-auth-client";
 import { Edit2, Trash2, X } from "lucide-react";
 import {
  DetailLoadingView,
  DetailErrorView,
  DetailNotFoundView,
  DetailLayout,
  PropertiesCard,
  PropertyItem,
 } from "@/components/layout/detail-layout";
@ -23,6 +25,7 @@ import {
  TableHeader,
  TableRow,
 } from "@/components/ui/table";
 import { VectorStoreEditor, VectorStoreFormData } from "./vector-store-editor";
 interface VectorStoreDetailViewProps {
  store: VectorStore | null;
@ -43,21 +46,122 @@ export function VectorStoreDetailView({
  errorFiles,
  id,
 }: VectorStoreDetailViewProps) {
  const title = "Vector Store Details";
  const router = useRouter();
  const client = useAuthClient();
  const [isDeleting, setIsDeleting] = useState(false);
  const [showEditModal, setShowEditModal] = useState(false);
  const [modalError, setModalError] = useState<string | null>(null);
  const [showSuccessState, setShowSuccessState] = useState(false);
  // Handle ESC key to close modal
  useEffect(() => {
    const handleEscape = (event: KeyboardEvent) => {
      if (event.key === "Escape" && showEditModal) {
        handleCancel();
      }
    };
    document.addEventListener("keydown", handleEscape);
    return () => document.removeEventListener("keydown", handleEscape);
  }, [showEditModal]);
  const handleFileClick = (fileId: string) => {
    router.push(`/logs/vector-stores/${id}/files/${fileId}`);
  };
  const handleEditVectorStore = () => {
    setShowEditModal(true);
    setModalError(null);
    setShowSuccessState(false);
  };
  const handleCancel = () => {
    setShowEditModal(false);
    setModalError(null);
    setShowSuccessState(false);
  };
  const handleSaveVectorStore = async (formData: VectorStoreFormData) => {
    try {
      setModalError(null);
      // Update existing vector store (same logic as list page)
      const updateParams: {
        name?: string;
        extra_body?: Record<string, unknown>;
      } = {};
      // Only include fields that have changed or are provided
      if (formData.name && formData.name !== store?.name) {
        updateParams.name = formData.name;
      }
      // Add all parameters to extra_body (except provider_id which can't be changed)
      const extraBody: Record<string, unknown> = {};
      if (formData.embedding_model) {
        extraBody.embedding_model = formData.embedding_model;
      }
      if (formData.embedding_dimension) {
        extraBody.embedding_dimension = formData.embedding_dimension;
      }
      if (Object.keys(extraBody).length > 0) {
        updateParams.extra_body = extraBody;
      }
      await client.vectorStores.update(id, updateParams);
      // Show success state
      setShowSuccessState(true);
      setModalError(
        "✅ Vector store updated successfully! You can close this modal and refresh the page to see changes."
      );
    } catch (err: unknown) {
      console.error("Failed to update vector store:", err);
      const errorMessage =
        err instanceof Error ? err.message : "Failed to update vector store";
      setModalError(errorMessage);
    }
  };
  const handleDeleteVectorStore = async () => {
    if (
      !confirm(
        "Are you sure you want to delete this vector store? This action cannot be undone."
      )
    ) {
      return;
    }
    setIsDeleting(true);
    try {
      await client.vectorStores.delete(id);
      // Redirect to the vector stores list after successful deletion
      router.push("/logs/vector-stores");
    } catch (err: unknown) {
      console.error("Failed to delete vector store:", err);
      const errorMessage = err instanceof Error ? err.message : "Unknown error";
      alert(`Failed to delete vector store: ${errorMessage}`);
    } finally {
      setIsDeleting(false);
    }
  };
  if (errorStore) {
-    return <DetailErrorView title={title} id={id} error={errorStore} />;
+    return (
      <DetailErrorView
        title="Vector Store Details"
        id={id}
        error={errorStore}
      />
    );
  }
  if (isLoadingStore) {
-    return <DetailLoadingView title={title} />;
+    return <DetailLoadingView />;
  }
  if (!store) {
-    return <DetailNotFoundView title={title} id={id} />;
+    return <DetailNotFoundView title="Vector Store Details" id={id} />;
  }
  const mainContent = (
@ -138,6 +242,73 @@ export function VectorStoreDetailView({
  );
  return (
-    <DetailLayout title={title} mainContent={mainContent} sidebar={sidebar} />
+    <>
      <div className="flex items-center justify-between mb-6">
        <h1 className="text-2xl font-bold">Vector Store Details</h1>
        <div className="flex gap-2">
          <Button
            variant="outline"
            onClick={handleEditVectorStore}
            disabled={isDeleting}
          >
            <Edit2 className="h-4 w-4 mr-2" />
            Edit
          </Button>
          <Button
            variant="destructive"
            onClick={handleDeleteVectorStore}
            disabled={isDeleting}
          >
            {isDeleting ? (
              "Deleting..."
            ) : (
              <>
                <Trash2 className="h-4 w-4 mr-2" />
                Delete
              </>
            )}
          </Button>
        </div>
      </div>
      <div className="flex flex-col md:flex-row gap-6">
        <div className="flex-grow md:w-2/3 space-y-6">{mainContent}</div>
        <div className="md:w-1/3">{sidebar}</div>
      </div>
      {/* Edit Vector Store Modal */}
      {showEditModal && (
        <div className="fixed inset-0 bg-black/50 flex items-center justify-center z-50">
          <div className="bg-background border rounded-lg shadow-lg max-w-2xl w-full mx-4 max-h-[90vh] overflow-hidden">
            <div className="p-6 border-b flex items-center justify-between">
              <h2 className="text-2xl font-bold">Edit Vector Store</h2>
              <Button
                variant="ghost"
                size="sm"
                onClick={handleCancel}
                className="p-1 h-auto"
              >
                <X className="h-4 w-4" />
              </Button>
            </div>
            <div className="p-6 overflow-y-auto max-h-[calc(90vh-120px)]">
              <VectorStoreEditor
                onSave={handleSaveVectorStore}
                onCancel={handleCancel}
                error={modalError}
                showSuccessState={showSuccessState}
                isEditing={true}
                initialData={{
                  name: store?.name || "",
                  embedding_model: store?.metadata?.embedding_model || "",
                  embedding_dimension:
                    store?.metadata?.embedding_dimension || 768,
                  provider_id: store?.metadata?.provider_id || "",
                }}
              />
            </div>
          </div>
        </div>
      )}
    </>
  );
 }
--- a/src/llama_stack_ui/components/vector-stores/vector-store-editor.tsx
+++ b/src/llama_stack_ui/components/vector-stores/vector-store-editor.tsx
@ -0,0 +1,235 @@
 "use client";
 import { useState, useEffect } from "react";
 import { Button } from "@/components/ui/button";
 import { Input } from "@/components/ui/input";
 import { Label } from "@/components/ui/label";
 import { Card, CardContent } from "@/components/ui/card";
 import {
  Select,
  SelectContent,
  SelectItem,
  SelectTrigger,
  SelectValue,
 } from "@/components/ui/select";
 import { useAuthClient } from "@/hooks/use-auth-client";
 import type { Model } from "llama-stack-client/resources/models";
 export interface VectorStoreFormData {
  name: string;
  embedding_model?: string;
  embedding_dimension?: number;
  provider_id?: string;
 }
 interface VectorStoreEditorProps {
  onSave: (formData: VectorStoreFormData) => Promise<void>;
  onCancel: () => void;
  error?: string | null;
  initialData?: VectorStoreFormData;
  showSuccessState?: boolean;
  isEditing?: boolean;
 }
 export function VectorStoreEditor({
  onSave,
  onCancel,
  error,
  initialData,
  showSuccessState,
  isEditing = false,
 }: VectorStoreEditorProps) {
  const client = useAuthClient();
  const [formData, setFormData] = useState<VectorStoreFormData>(
    initialData || {
      name: "",
      embedding_model: "",
      embedding_dimension: 768,
      provider_id: "",
    }
  );
  const [loading, setLoading] = useState(false);
  const [models, setModels] = useState<Model[]>([]);
  const [modelsLoading, setModelsLoading] = useState(true);
  const [modelsError, setModelsError] = useState<string | null>(null);
  const embeddingModels = models.filter(
    model => model.custom_metadata?.model_type === "embedding"
  );
  useEffect(() => {
    const fetchModels = async () => {
      try {
        setModelsLoading(true);
        setModelsError(null);
        const modelList = await client.models.list();
        setModels(modelList);
        // Set default embedding model if available
        const embeddingModelsList = modelList.filter(model => {
          return model.custom_metadata?.model_type === "embedding";
        });
        if (embeddingModelsList.length > 0 && !formData.embedding_model) {
          setFormData(prev => ({
            ...prev,
            embedding_model: embeddingModelsList[0].id,
          }));
        }
      } catch (err) {
        console.error("Failed to load models:", err);
        setModelsError(
          err instanceof Error ? err.message : "Failed to load models"
        );
      } finally {
        setModelsLoading(false);
      }
    };
    fetchModels();
  }, [client]);
  const handleSubmit = async (e: React.FormEvent) => {
    e.preventDefault();
    setLoading(true);
    try {
      await onSave(formData);
    } finally {
      setLoading(false);
    }
  };
  return (
    <Card>
      <CardContent className="pt-6">
        <form onSubmit={handleSubmit} className="space-y-4">
          <div className="space-y-2">
            <Label htmlFor="name">Name</Label>
            <Input
              id="name"
              value={formData.name}
              onChange={e => setFormData({ ...formData, name: e.target.value })}
              placeholder="Enter vector store name"
              required
            />
          </div>
          <div className="space-y-2">
            <Label htmlFor="embedding_model">Embedding Model (Optional)</Label>
            {modelsLoading ? (
              <div className="text-sm text-muted-foreground">
                Loading models... ({models.length} loaded)
              </div>
            ) : modelsError ? (
              <div className="text-sm text-destructive">
                Error: {modelsError}
              </div>
            ) : embeddingModels.length === 0 ? (
              <div className="text-sm text-muted-foreground">
                No embedding models available ({models.length} total models)
              </div>
            ) : (
              <Select
                value={formData.embedding_model}
                onValueChange={value =>
                  setFormData({ ...formData, embedding_model: value })
                }
              >
                <SelectTrigger>
                  <SelectValue placeholder="Select Embedding Model" />
                </SelectTrigger>
                <SelectContent>
                  {embeddingModels.map((model, index) => (
                    <SelectItem key={model.id} value={model.id}>
                      {model.id}
                    </SelectItem>
                  ))}
                </SelectContent>
              </Select>
            )}
            {formData.embedding_model && (
              <p className="text-xs text-muted-foreground mt-1">
                Dimension:{" "}
                {embeddingModels.find(m => m.id === formData.embedding_model)
                  ?.custom_metadata?.embedding_dimension || "Unknown"}
              </p>
            )}
          </div>
          <div className="space-y-2">
            <Label htmlFor="embedding_dimension">
              Embedding Dimension (Optional)
            </Label>
            <Input
              id="embedding_dimension"
              type="number"
              value={formData.embedding_dimension}
              onChange={e =>
                setFormData({
                  ...formData,
                  embedding_dimension: parseInt(e.target.value) || 768,
                })
              }
              placeholder="768"
            />
          </div>
          <div className="space-y-2">
            <Label htmlFor="provider_id">
              Provider ID {isEditing ? "(Read-only)" : "(Optional)"}
            </Label>
            <Input
              id="provider_id"
              value={formData.provider_id}
              onChange={e =>
                setFormData({ ...formData, provider_id: e.target.value })
              }
              placeholder="e.g., faiss, chroma, sqlite"
              disabled={isEditing}
            />
            {isEditing && (
              <p className="text-xs text-muted-foreground">
                Provider ID cannot be changed after vector store creation
              </p>
            )}
          </div>
          {error && (
            <div
              className={`text-sm p-3 rounded ${
                error.startsWith("✅")
                  ? "text-green-700 bg-green-50 border border-green-200"
                  : "text-destructive bg-destructive/10"
              }`}
            >
              {error}
            </div>
          )}
          <div className="flex gap-2 pt-4">
            {showSuccessState ? (
              <Button type="button" onClick={onCancel}>
                Close
              </Button>
            ) : (
              <>
                <Button type="submit" disabled={loading}>
                  {loading
                    ? initialData
                      ? "Updating..."
                      : "Creating..."
                    : initialData
                      ? "Update Vector Store"
                      : "Create Vector Store"}
                </Button>
                <Button type="button" variant="outline" onClick={onCancel}>
                  Cancel
                </Button>
              </>
            )}
          </div>
        </form>
      </CardContent>
    </Card>
  );
 }
--- a/src/llama_stack_ui/lib/contents-api.ts
+++ b/src/llama_stack_ui/lib/contents-api.ts
@ -34,9 +34,35 @@ export class ContentsAPI {
  async getFileContents(
    vectorStoreId: string,
-    fileId: string
+    fileId: string,
    includeEmbeddings: boolean = true,
    includeMetadata: boolean = true
  ): Promise<VectorStoreContentsResponse> {
-    return this.client.vectorStores.files.content(vectorStoreId, fileId);
+    try {
      // Use query parameters to pass embeddings and metadata flags (OpenAI-compatible pattern)
      const extraQuery: Record<string, boolean> = {};
      if (includeEmbeddings) {
        extraQuery.include_embeddings = true;
      }
      if (includeMetadata) {
        extraQuery.include_metadata = true;
      }
      const result = await this.client.vectorStores.files.content(
        vectorStoreId,
        fileId,
        {
          query: {
            include_embeddings: includeEmbeddings,
            include_metadata: includeMetadata,
          },
        }
      );
      return result;
    } catch (error) {
      console.error("ContentsAPI.getFileContents error:", error);
      throw error;
    }
  }
  async getContent(
@ -70,11 +96,15 @@ export class ContentsAPI {
      order?: string;
      after?: string;
      before?: string;
      includeEmbeddings?: boolean;
      includeMetadata?: boolean;
    }
  ): Promise<VectorStoreListContentsResponse> {
-    const fileContents = await this.client.vectorStores.files.content(
+    const fileContents = await this.getFileContents(
      vectorStoreId,
-      fileId
+      fileId,
      options?.includeEmbeddings ?? true,
      options?.includeMetadata ?? true
    );
    const contentItems: VectorStoreContentItem[] = [];
@ -82,7 +112,7 @@ export class ContentsAPI {
      const rawContent = content as Record<string, unknown>;
      // Extract actual fields from the API response
-      const embedding = rawContent.embedding || undefined;
+      const embedding = rawContent.embedding as number[] | undefined;
      const created_timestamp =
        rawContent.created_timestamp ||
        rawContent.created_at ||
--- a/src/llama_stack_ui/next.config.ts
+++ b/src/llama_stack_ui/next.config.ts
@ -1,7 +1,13 @@
 import type { NextConfig } from "next";
 const nextConfig: NextConfig = {
-  /* config options here */
+  typescript: {
    ignoreBuildErrors: true,
  },
  output: "standalone",
  images: {
    unoptimized: true,
  },
 };
 export default nextConfig;
--- a/src/llama_stack_ui/package-lock.json
+++ b/src/llama_stack_ui/package-lock.json
@ -1,12 +1,13 @@
 {
-  "name": "ui",
+  "name": "llama-stack-ui",
-  "version": "0.1.0",
+  "version": "0.4.0-alpha.1",
  "lockfileVersion": 3,
  "requires": true,
  "packages": {
    "": {
-      "name": "ui",
+      "name": "llama-stack-ui",
-      "version": "0.1.0",
+      "version": "0.4.0-alpha.1",
      "license": "MIT",
      "dependencies": {
        "@radix-ui/react-collapsible": "^1.1.12",
        "@radix-ui/react-dialog": "^1.1.15",
@ -20,7 +21,7 @@
        "class-variance-authority": "^0.7.1",
        "clsx": "^2.1.1",
        "framer-motion": "^12.23.24",
-        "llama-stack-client": "github:llamastack/llama-stack-client-typescript",
+        "llama-stack-client": "^0.3.1",
        "lucide-react": "^0.545.0",
        "next": "15.5.4",
        "next-auth": "^4.24.11",
@ -9684,8 +9685,9 @@
      "license": "MIT"
    },
    "node_modules/llama-stack-client": {
-      "version": "0.4.0-alpha.1",
+      "version": "0.3.1",
-      "resolved": "git+ssh://git@github.com/llamastack/llama-stack-client-typescript.git#78de4862c4b7d77939ac210fa9f9bde77a2c5c5f",
+      "resolved": "https://registry.npmjs.org/llama-stack-client/-/llama-stack-client-0.3.1.tgz",
      "integrity": "sha512-4aYoF2aAQiBSfxyZEtczeQmJn8q9T22ePDqGhR+ej5RG6a8wvl5B3v7ZoKuFkft+vcP/kbJ58GQZEPLekxekZA==",
      "license": "MIT",
      "dependencies": {
        "@types/node": "^18.11.18",
--- a/src/llama_stack_ui/package.json
+++ b/src/llama_stack_ui/package.json
@ -1,11 +1,31 @@
 {
-  "name": "ui",
+  "name": "llama-stack-ui",
-  "version": "0.1.0",
+  "version": "0.4.0-alpha.4",
-  "private": true,
+  "description": "Web UI for Llama Stack",
  "license": "MIT",
  "author": "Llama Stack <llamastack@meta.com>",
  "repository": {
    "type": "git",
    "url": "https://github.com/llamastack/llama-stack.git",
    "directory": "llama_stack_ui"
  },
  "bin": {
    "llama-stack-ui": "bin/cli.js"
  },
  "files": [
    "bin",
    ".next",
    "public",
    "next.config.ts",
    "instrumentation.ts",
    "tsconfig.json",
    "package.json"
  ],
  "scripts": {
    "dev": "next dev --turbopack --port ${LLAMA_STACK_UI_PORT:-8322}",
-    "build": "next build",
+    "build": "next build && node scripts/postbuild.js",
    "start": "next start",
    "prepublishOnly": "npm run build",
    "lint": "next lint",
    "format": "prettier --write \"./**/*.{ts,tsx}\"",
    "format:check": "prettier --check \"./**/*.{ts,tsx}\"",
@ -25,7 +45,7 @@
    "class-variance-authority": "^0.7.1",
    "clsx": "^2.1.1",
    "framer-motion": "^12.23.24",
-    "llama-stack-client": "github:llamastack/llama-stack-client-typescript",
+    "llama-stack-client": "^0.3.1",
    "lucide-react": "^0.545.0",
    "next": "15.5.4",
    "next-auth": "^4.24.11",
--- a/src/llama_stack_ui/scripts/postbuild.js
+++ b/src/llama_stack_ui/scripts/postbuild.js
@ -0,0 +1,40 @@
 const fs = require('fs');
 const path = require('path');
 // Copy public directory to standalone
 const publicSrc = path.join(__dirname, '..', 'public');
 const publicDest = path.join(__dirname, '..', '.next', 'standalone', 'ui', 'src', 'llama_stack_ui', 'public');
 if (fs.existsSync(publicSrc) && !fs.existsSync(publicDest)) {
  console.log('Copying public directory to standalone...');
  copyDir(publicSrc, publicDest);
 }
 // Copy .next/static to standalone
 const staticSrc = path.join(__dirname, '..', '.next', 'static');
 const staticDest = path.join(__dirname, '..', '.next', 'standalone', 'ui', 'src', 'llama_stack_ui', '.next', 'static');
 if (fs.existsSync(staticSrc) && !fs.existsSync(staticDest)) {
  console.log('Copying .next/static to standalone...');
  copyDir(staticSrc, staticDest);
 }
 function copyDir(src, dest) {
  if (!fs.existsSync(dest)) {
    fs.mkdirSync(dest, { recursive: true });
  }
  const files = fs.readdirSync(src);
  files.forEach((file) => {
    const srcFile = path.join(src, file);
    const destFile = path.join(dest, file);
    if (fs.statSync(srcFile).isDirectory()) {
      copyDir(srcFile, destFile);
    } else {
      fs.copyFileSync(srcFile, destFile);
    }
  });
 }
 console.log('Postbuild complete!');
--- a/tests/integration/ci_matrix.json
+++ b/tests/integration/ci_matrix.json
@ -1,6 +1,7 @@
 {
  "default": [
    {"suite": "base", "setup": "ollama"},
    {"suite": "base", "setup": "ollama-postgres", "allowed_clients": ["server"], "stack_config": "server:ci-tests::run-with-postgres-store.yaml"},
    {"suite": "vision", "setup": "ollama-vision"},
    {"suite": "responses", "setup": "gpt"},
    {"suite": "base-vllm-subset", "setup": "vllm"}
--- a/tests/integration/fixtures/common.py
+++ b/tests/integration/fixtures/common.py
@ -233,10 +233,21 @@ def instantiate_llama_stack_client(session):
        raise ValueError("You must specify either --stack-config or LLAMA_STACK_CONFIG")
    # Handle server:<config_name> format or server:<config_name>:<port>
    # Also handles server:<distro>::<run_file.yaml> format
    if config.startswith("server:"):
-        parts = config.split(":")
+        # Strip the "server:" prefix first
-        config_name = parts[1]
+        config_part = config[7:]  # len("server:") == 7
-        port = int(parts[2]) if len(parts) > 2 else int(os.environ.get("LLAMA_STACK_PORT", DEFAULT_PORT))
+
        # Check for :: (distro::runfile format)
        if "::" in config_part:
            config_name = config_part
            port = int(os.environ.get("LLAMA_STACK_PORT", DEFAULT_PORT))
        else:
            # Single colon format: either <name> or <name>:<port>
            parts = config_part.split(":")
            config_name = parts[0]
            port = int(parts[1]) if len(parts) > 1 else int(os.environ.get("LLAMA_STACK_PORT", DEFAULT_PORT))
        base_url = f"http://localhost:{port}"
        force_restart = os.environ.get("LLAMA_STACK_TEST_FORCE_SERVER_RESTART") == "1"
@ -323,7 +334,13 @@ def require_server(llama_stack_client):
@pytest.fixture(scope="session")
 def openai_client(llama_stack_client, require_server):
    base_url = f"{llama_stack_client.base_url}/v1"
-    return OpenAI(base_url=base_url, api_key="fake")
+    client = OpenAI(base_url=base_url, api_key="fake", max_retries=0, timeout=30.0)
    yield client
    # Cleanup: close HTTP connections
    try:
        client.close()
    except Exception:
        pass
@pytest.fixture(params=["openai_client", "client_with_models"])
--- a/tests/integration/recordings/README.md
+++ b/tests/integration/recordings/README.md
@ -2,6 +2,10 @@
 This directory contains recorded inference API responses used for deterministic testing without requiring live API access.
 For more information, see the
 [docs](https://llamastack.github.io/docs/contributing/testing/record-replay).
 This README provides more technical information.
 ## Structure
 - `responses/` - JSON files containing request/response pairs for inference operations
--- a/tests/integration/responses/fixtures/fixtures.py
+++ b/tests/integration/responses/fixtures/fixtures.py
@ -115,7 +115,15 @@ def openai_client(base_url, api_key, provider):
        client = LlamaStackAsLibraryClient(config, skip_logger_removal=True)
        return client
-    return OpenAI(
+    client = OpenAI(
        base_url=base_url,
        api_key=api_key,
        max_retries=0,
        timeout=30.0,
    )
    yield client
    # Cleanup: close HTTP connections
    try:
        client.close()
    except Exception:
        pass
--- a/tests/integration/responses/test_conversation_responses.py
+++ b/tests/integration/responses/test_conversation_responses.py
@ -65,8 +65,14 @@ class TestConversationResponses:
        conversation_items = openai_client.conversations.items.list(conversation.id)
        assert len(conversation_items.data) >= 4  # 2 user + 2 assistant messages
    @pytest.mark.timeout(60, method="thread")
    def test_conversation_context_loading(self, openai_client, text_model_id):
-        """Test that conversation context is properly loaded for responses."""
+        """Test that conversation context is properly loaded for responses.
        Note: 60s timeout added due to CI-specific deadlock in pytest/OpenAI client/httpx
        after running 25+ tests. Hangs before first HTTP request is made. Works fine locally.
        Investigation needed: connection pool exhaustion or event loop state issue.
        """
        conversation = openai_client.conversations.create(
            items=[
                {"type": "message", "role": "user", "content": "My name is Alice. I like to eat apples."},
--- a/tests/integration/suites.py
+++ b/tests/integration/suites.py
@ -71,6 +71,26 @@ SETUP_DEFINITIONS: dict[str, Setup] = {
            "embedding_model": "ollama/nomic-embed-text:v1.5",
        },
    ),
    "ollama-postgres": Setup(
        name="ollama-postgres",
        description="Server-mode tests with Postgres-backed persistence",
        env={
            "OLLAMA_URL": "http://0.0.0.0:11434",
            "SAFETY_MODEL": "ollama/llama-guard3:1b",
            "POSTGRES_HOST": "127.0.0.1",
            "POSTGRES_PORT": "5432",
            "POSTGRES_DB": "llamastack",
            "POSTGRES_USER": "llamastack",
            "POSTGRES_PASSWORD": "llamastack",
            "LLAMA_STACK_LOGGING": "openai_responses=info",
        },
        defaults={
            "text_model": "ollama/llama3.2:3b-instruct-fp16",
            "embedding_model": "sentence-transformers/nomic-embed-text-v1.5",
            "safety_model": "ollama/llama-guard3:1b",
            "safety_shield": "llama-guard",
        },
    ),
    "vllm": Setup(
        name="vllm",
        description="vLLM provider with a text model",
--- a/tests/integration/vector_io/test_openai_vector_stores.py
+++ b/tests/integration/vector_io/test_openai_vector_stores.py
@ -11,6 +11,7 @@ import pytest
 from llama_stack_client import BadRequestError
 from openai import BadRequestError as OpenAIBadRequestError
 from llama_stack.apis.files import ExpiresAfter
 from llama_stack.apis.vector_io import Chunk
 from llama_stack.core.library_client import LlamaStackAsLibraryClient
 from llama_stack.log import get_logger
@ -1604,3 +1605,97 @@ def test_openai_vector_store_embedding_config_from_metadata(
    assert "metadata_config_store" in store_names
    assert "consistent_config_store" in store_names
@vector_provider_wrapper
 def test_openai_vector_store_file_contents_with_extra_query(
    compat_client_with_empty_stores, client_with_models, embedding_model_id, embedding_dimension, vector_io_provider_id
 ):
    """Test that vector store file contents endpoint supports extra_query parameter."""
    skip_if_provider_doesnt_support_openai_vector_stores(client_with_models)
    compat_client = compat_client_with_empty_stores
    # Create a vector store
    vector_store = compat_client.vector_stores.create(
        name="test_extra_query_store",
        extra_body={
            "embedding_model": embedding_model_id,
            "provider_id": vector_io_provider_id,
        },
    )
    # Create and attach a file
    test_content = b"This is test content for extra_query validation."
    with BytesIO(test_content) as file_buffer:
        file_buffer.name = "test_extra_query.txt"
        file = compat_client.files.create(
            file=file_buffer,
            purpose="assistants",
            expires_after=ExpiresAfter(anchor="created_at", seconds=86400),
        )
    file_attach_response = compat_client.vector_stores.files.create(
        vector_store_id=vector_store.id,
        file_id=file.id,
        extra_body={"embedding_model": embedding_model_id},
    )
    assert file_attach_response.status == "completed"
    # Wait for processing
    time.sleep(2)
    # Test that extra_query parameter is accepted and processed
    content_with_extra_query = compat_client.vector_stores.files.content(
        vector_store_id=vector_store.id,
        file_id=file.id,
        extra_query={"include_embeddings": True, "include_metadata": True},
    )
    # Test without extra_query for comparison
    content_without_extra_query = compat_client.vector_stores.files.content(
        vector_store_id=vector_store.id,
        file_id=file.id,
    )
    # Validate that both calls succeed
    assert content_with_extra_query is not None
    assert content_without_extra_query is not None
    assert len(content_with_extra_query.data) > 0
    assert len(content_without_extra_query.data) > 0
    # Validate that extra_query parameter is processed correctly
    # Both should have the embedding/metadata fields available (may be None based on flags)
    first_chunk_with_flags = content_with_extra_query.data[0]
    first_chunk_without_flags = content_without_extra_query.data[0]
    # The key validation: extra_query fields are present in the response
    # Handle both dict and object responses (different clients may return different formats)
    def has_field(obj, field):
        if isinstance(obj, dict):
            return field in obj
        else:
            return hasattr(obj, field)
    # Validate that all expected fields are present in both responses
    expected_fields = ["embedding", "chunk_metadata", "metadata", "text"]
    for field in expected_fields:
        assert has_field(first_chunk_with_flags, field), f"Field '{field}' missing from response with extra_query"
        assert has_field(first_chunk_without_flags, field), f"Field '{field}' missing from response without extra_query"
    # Validate content is the same
    def get_field(obj, field):
        if isinstance(obj, dict):
            return obj[field]
        else:
            return getattr(obj, field)
    assert get_field(first_chunk_with_flags, "text") == test_content.decode("utf-8")
    assert get_field(first_chunk_without_flags, "text") == test_content.decode("utf-8")
    with_flags_embedding = get_field(first_chunk_with_flags, "embedding")
    without_flags_embedding = get_field(first_chunk_without_flags, "embedding")
    # Validate that embeddings are included when requested and excluded when not requested
    assert with_flags_embedding is not None, "Embeddings should be included when include_embeddings=True"
    assert len(with_flags_embedding) > 0, "Embedding should be a non-empty list"
    assert without_flags_embedding is None, "Embeddings should not be included when include_embeddings=False"
--- a/tests/unit/core/routers/test_vector_io.py
+++ b/tests/unit/core/routers/test_vector_io.py
@ -55,3 +55,65 @@ async def test_create_vector_stores_multiple_providers_missing_provider_id_error
    with pytest.raises(ValueError, match="Multiple vector_io providers available"):
        await router.openai_create_vector_store(request)
 async def test_update_vector_store_provider_id_change_fails():
    """Test that updating a vector store with a different provider_id fails with clear error."""
    mock_routing_table = Mock()
    # Mock an existing vector store with provider_id "faiss"
    mock_existing_store = Mock()
    mock_existing_store.provider_id = "inline::faiss"
    mock_existing_store.identifier = "vs_123"
    mock_routing_table.get_object_by_identifier = AsyncMock(return_value=mock_existing_store)
    mock_routing_table.get_provider_impl = AsyncMock(
        return_value=Mock(openai_update_vector_store=AsyncMock(return_value=Mock(id="vs_123")))
    )
    router = VectorIORouter(mock_routing_table)
    # Try to update with different provider_id in metadata - this should fail
    with pytest.raises(ValueError, match="provider_id cannot be changed after vector store creation"):
        await router.openai_update_vector_store(
            vector_store_id="vs_123",
            name="updated_name",
            metadata={"provider_id": "inline::sqlite"},  # Different provider_id
        )
    # Verify the existing store was looked up to check provider_id
    mock_routing_table.get_object_by_identifier.assert_called_once_with("vector_store", "vs_123")
    # Provider should not be called since validation failed
    mock_routing_table.get_provider_impl.assert_not_called()
 async def test_update_vector_store_same_provider_id_succeeds():
    """Test that updating a vector store with the same provider_id succeeds."""
    mock_routing_table = Mock()
    # Mock an existing vector store with provider_id "faiss"
    mock_existing_store = Mock()
    mock_existing_store.provider_id = "inline::faiss"
    mock_existing_store.identifier = "vs_123"
    mock_routing_table.get_object_by_identifier = AsyncMock(return_value=mock_existing_store)
    mock_routing_table.get_provider_impl = AsyncMock(
        return_value=Mock(openai_update_vector_store=AsyncMock(return_value=Mock(id="vs_123")))
    )
    router = VectorIORouter(mock_routing_table)
    # Update with same provider_id should succeed
    await router.openai_update_vector_store(
        vector_store_id="vs_123",
        name="updated_name",
        metadata={"provider_id": "inline::faiss"},  # Same provider_id
    )
    # Verify the provider update method was called
    mock_routing_table.get_provider_impl.assert_called_once_with("vs_123")
    provider = await mock_routing_table.get_provider_impl("vs_123")
    provider.openai_update_vector_store.assert_called_once_with(
        vector_store_id="vs_123", name="updated_name", expires_after=None, metadata={"provider_id": "inline::faiss"}
    )
--- a/tests/unit/server/test_sse.py
+++ b/tests/unit/server/test_sse.py
@ -104,12 +104,18 @@ async def test_paginated_response_url_setting():
    route_handler = create_dynamic_typed_route(mock_api_method, "get", "/test/route")
-    # Mock minimal request
+    # Mock minimal request with proper state object
    request = MagicMock()
    request.scope = {"user_attributes": {}, "principal": ""}
    request.headers = {}
    request.body = AsyncMock(return_value=b"")
    # Create a simple state object without auto-generating attributes
    class MockState:
        pass
    request.state = MockState()
    result = await route_handler(request)
    assert isinstance(result, PaginatedResponse)
--- a/tests/unit/utils/sqlstore/test_sqlstore.py
+++ b/tests/unit/utils/sqlstore/test_sqlstore.py
@ -9,7 +9,7 @@ from tempfile import TemporaryDirectory
 import pytest
-from llama_stack.providers.utils.sqlstore.api import ColumnType
+from llama_stack.providers.utils.sqlstore.api import ColumnDefinition, ColumnType
 from llama_stack.providers.utils.sqlstore.sqlalchemy_sqlstore import SqlAlchemySqlStoreImpl
 from llama_stack.providers.utils.sqlstore.sqlstore import SqliteSqlStoreConfig
@ -65,6 +65,38 @@ async def test_sqlite_sqlstore():
        assert result.has_more is False
 async def test_sqlstore_upsert_support():
    with TemporaryDirectory() as tmp_dir:
        db_path = tmp_dir + "/upsert.db"
        store = SqlAlchemySqlStoreImpl(SqliteSqlStoreConfig(db_path=db_path))
        await store.create_table(
            "items",
            {
                "id": ColumnDefinition(type=ColumnType.STRING, primary_key=True),
                "value": ColumnType.STRING,
                "updated_at": ColumnType.INTEGER,
            },
        )
        await store.upsert(
            table="items",
            data={"id": "item_1", "value": "first", "updated_at": 1},
            conflict_columns=["id"],
        )
        row = await store.fetch_one("items", {"id": "item_1"})
        assert row == {"id": "item_1", "value": "first", "updated_at": 1}
        await store.upsert(
            table="items",
            data={"id": "item_1", "value": "second", "updated_at": 2},
            conflict_columns=["id"],
            update_columns=["value", "updated_at"],
        )
        row = await store.fetch_one("items", {"id": "item_1"})
        assert row == {"id": "item_1", "value": "second", "updated_at": 2}
 async def test_sqlstore_pagination_basic():
    """Test basic pagination functionality at the SQL store level."""
    with TemporaryDirectory() as tmp_dir:
--- a/uv.lock
+++ b/uv.lock
@ -1,5 +1,5 @@
 version = 1
-revision = 3
+revision = 2
 requires-python = ">=3.12"
 resolution-markers = [
    "(python_full_version >= '3.13' and platform_machine != 'aarch64' and sys_platform == 'linux') or (python_full_version >= '3.13' and sys_platform != 'darwin' and sys_platform != 'linux')",
@ -2166,7 +2166,7 @@ test = [
    { name = "milvus-lite", specifier = ">=2.5.0" },
    { name = "psycopg2-binary", specifier = ">=2.9.0" },
    { name = "pymilvus", specifier = ">=2.6.1" },
-    { name = "pypdf" },
+    { name = "pypdf", specifier = ">=6.1.3" },
    { name = "qdrant-client" },
    { name = "requests" },
    { name = "sqlalchemy" },
@ -2219,7 +2219,7 @@ unit = [
    { name = "moto", extras = ["s3"], specifier = ">=5.1.10" },
    { name = "ollama" },
    { name = "psycopg2-binary", specifier = ">=2.9.0" },
-    { name = "pypdf" },
+    { name = "pypdf", specifier = ">=6.1.3" },
    { name = "sqlalchemy" },
    { name = "sqlalchemy", extras = ["asyncio"], specifier = ">=2.0.41" },
    { name = "sqlite-vec" },
@ -3973,11 +3973,11 @@ wheels = [
 [[package]]
 name = "pypdf"
-version = "5.9.0"
+version = "6.2.0"
 source = { registry = "https://pypi.org/simple" }
-sdist = { url = "https://files.pythonhosted.org/packages/89/3a/584b97a228950ed85aec97c811c68473d9b8d149e6a8c155668287cf1a28/pypdf-5.9.0.tar.gz", hash = "sha256:30f67a614d558e495e1fbb157ba58c1de91ffc1718f5e0dfeb82a029233890a1", size = 5035118, upload-time = "2025-07-27T14:04:52.364Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/4e/2b/8795ec0378384000b0a37a2b5e6d67fa3d84802945aa2c612a78a784d7d4/pypdf-6.2.0.tar.gz", hash = "sha256:46b4d8495d68ae9c818e7964853cd9984e6a04c19fe7112760195395992dce48", size = 5272001, upload-time = "2025-11-09T11:10:41.911Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/48/d9/6cff57c80a6963e7dd183bf09e9f21604a77716644b1e580e97b259f7612/pypdf-5.9.0-py3-none-any.whl", hash = "sha256:be10a4c54202f46d9daceaa8788be07aa8cd5ea8c25c529c50dd509206382c35", size = 313193, upload-time = "2025-07-27T14:04:50.53Z" },
+    { url = "https://files.pythonhosted.org/packages/de/ba/743ddcaf1a8fb439342399645921e2cf2c600464cba5531a11f1cc0822b6/pypdf-6.2.0-py3-none-any.whl", hash = "sha256:4c0f3e62677217a777ab79abe22bf1285442d70efabf552f61c7a03b6f5c569f", size = 326592, upload-time = "2025-11-09T11:10:39.941Z" },
 ]
 [[package]]