diff --git a/.github/actions/setup-test-environment/action.yml b/.github/actions/setup-test-environment/action.yml index 7b306fef5..1c9d019cc 100644 --- a/.github/actions/setup-test-environment/action.yml +++ b/.github/actions/setup-test-environment/action.yml @@ -39,6 +39,32 @@ runs: if: ${{ inputs.setup == 'vllm' && inputs.inference-mode == 'record' }} uses: ./.github/actions/setup-vllm + - name: Start Postgres service + if: ${{ contains(inputs.setup, 'postgres') }} + shell: bash + run: | + sudo docker rm -f postgres-ci || true + sudo docker run -d --name postgres-ci \ + -e POSTGRES_USER=llamastack \ + -e POSTGRES_PASSWORD=llamastack \ + -e POSTGRES_DB=llamastack \ + -p 5432:5432 \ + postgres:16 + + echo "Waiting for Postgres to become ready..." + for i in {1..30}; do + if sudo docker exec postgres-ci pg_isready -U llamastack -d llamastack >/dev/null 2>&1; then + echo "Postgres is ready" + break + fi + if [ "$i" -eq 30 ]; then + echo "Postgres failed to start in time" + sudo docker logs postgres-ci || true + exit 1 + fi + sleep 2 + done + - name: Build Llama Stack shell: bash run: | diff --git a/.github/workflows/integration-tests.yml b/.github/workflows/integration-tests.yml index 2c797e906..71c7933b4 100644 --- a/.github/workflows/integration-tests.yml +++ b/.github/workflows/integration-tests.yml @@ -66,12 +66,12 @@ jobs: run-replay-mode-tests: needs: generate-matrix runs-on: ubuntu-latest - name: ${{ format('Integration Tests ({0}, {1}, {2}, client={3}, {4})', matrix.client-type, matrix.config.setup, matrix.python-version, matrix.client-version, matrix.config.suite) }} + name: ${{ format('Integration Tests ({0}, {1}, {2}, client={3}, {4})', matrix.client, matrix.config.setup, matrix.python-version, matrix.client-version, matrix.config.suite) }} strategy: fail-fast: false matrix: - client-type: [library, docker, server] + client: [library, docker, server] # Use Python 3.13 only on nightly schedule (daily latest client test), otherwise use 3.12 python-version: ${{ github.event.schedule == '0 0 * * *' && fromJSON('["3.12", "3.13"]') || fromJSON('["3.12"]') }} client-version: ${{ (github.event.schedule == '0 0 * * *' || github.event.inputs.test-all-client-versions == 'true') && fromJSON('["published", "latest"]') || fromJSON('["latest"]') }} @@ -84,6 +84,7 @@ jobs: uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0 - name: Setup test environment + if: ${{ matrix.config.allowed_clients == null || contains(matrix.config.allowed_clients, matrix.client) }} uses: ./.github/actions/setup-test-environment with: python-version: ${{ matrix.python-version }} @@ -93,11 +94,16 @@ jobs: inference-mode: 'replay' - name: Run tests + if: ${{ matrix.config.allowed_clients == null || contains(matrix.config.allowed_clients, matrix.client) }} uses: ./.github/actions/run-and-record-tests env: OPENAI_API_KEY: dummy with: - stack-config: ${{ matrix.client-type == 'library' && 'ci-tests' || matrix.client-type == 'server' && 'server:ci-tests' || 'docker:ci-tests' }} + stack-config: >- + ${{ matrix.config.stack_config + || (matrix.client == 'library' && 'ci-tests') + || (matrix.client == 'server' && 'server:ci-tests') + || 'docker:ci-tests' }} setup: ${{ matrix.config.setup }} inference-mode: 'replay' suite: ${{ matrix.config.suite }} diff --git a/client-sdks/stainless/config.yml b/client-sdks/stainless/config.yml index ab9342c49..c61b53654 100644 --- a/client-sdks/stainless/config.yml +++ b/client-sdks/stainless/config.yml @@ -463,6 +463,12 @@ resources: settings: license: MIT unwrap_response_fields: [data] + file_header: | + Copyright (c) Meta Platforms, Inc. and affiliates. + All rights reserved. + + This source code is licensed under the terms described in the LICENSE file in + the root directory of this source tree. openapi: transformations: diff --git a/client-sdks/stainless/openapi.yml b/client-sdks/stainless/openapi.yml index 9f3ef15b5..1be4af6c9 100644 --- a/client-sdks/stainless/openapi.yml +++ b/client-sdks/stainless/openapi.yml @@ -2691,7 +2691,8 @@ paths: responses: '200': description: >- - A VectorStoreFileContentResponse representing the file contents. + File contents, optionally with embeddings and metadata based on query + parameters. content: application/json: schema: @@ -2726,6 +2727,20 @@ paths: required: true schema: type: string + - name: include_embeddings + in: query + description: >- + Whether to include embedding vectors in the response. + required: false + schema: + $ref: '#/components/schemas/bool' + - name: include_metadata + in: query + description: >- + Whether to include chunk metadata in the response. + required: false + schema: + $ref: '#/components/schemas/bool' deprecated: false /v1/vector_stores/{vector_store_id}/search: post: @@ -10091,6 +10106,8 @@ components: title: VectorStoreFileDeleteResponse description: >- Response from deleting a vector store file. + bool: + type: boolean VectorStoreContent: type: object properties: @@ -10102,6 +10119,26 @@ components: text: type: string description: The actual text content + embedding: + type: array + items: + type: number + description: >- + Optional embedding vector for this content chunk + chunk_metadata: + $ref: '#/components/schemas/ChunkMetadata' + description: Optional chunk metadata + metadata: + type: object + additionalProperties: + oneOf: + - type: 'null' + - type: boolean + - type: number + - type: string + - type: array + - type: object + description: Optional user-defined metadata additionalProperties: false required: - type @@ -10125,6 +10162,7 @@ components: description: Parsed content of the file has_more: type: boolean + default: false description: >- Indicates if there are more content pages to fetch next_page: diff --git a/docs/docs/distributions/configuration.mdx b/docs/docs/distributions/configuration.mdx index ff50c406a..46ecfa475 100644 --- a/docs/docs/distributions/configuration.mdx +++ b/docs/docs/distributions/configuration.mdx @@ -221,7 +221,15 @@ models: ``` A Model is an instance of a "Resource" (see [Concepts](../concepts/)) and is associated with a specific inference provider (in this case, the provider with identifier `ollama`). This is an instance of a "pre-registered" model. While we always encourage the clients to register models before using them, some Stack servers may come up a list of "already known and available" models. -What's with the `provider_model_id` field? This is an identifier for the model inside the provider's model catalog. Contrast it with `model_id` which is the identifier for the same model for Llama Stack's purposes. For example, you may want to name "llama3.2:vision-11b" as "image_captioning_model" when you use it in your Stack interactions. When omitted, the server will set `provider_model_id` to be the same as `model_id`. +What's with the `provider_model_id` field? This is an identifier for the model inside the provider's model catalog. The `model_id` field is provided for configuration purposes but is not used as part of the model identifier. + +**Important:** Models are identified as `provider_id/provider_model_id` in the system and when making API calls. When `provider_model_id` is omitted, the server will set it to be the same as `model_id`. + +Examples: +- Config: `model_id: llama3.2`, `provider_id: ollama`, `provider_model_id: null` + → Access as: `ollama/llama3.2` +- Config: `model_id: my-llama`, `provider_id: vllm-inference`, `provider_model_id: llama-3-2-3b` + → Access as: `vllm-inference/llama-3-2-3b` (the `model_id` is not used in the identifier) If you need to conditionally register a model in the configuration, such as only when specific environment variable(s) are set, this can be accomplished by utilizing a special `__disabled__` string as the default value of an environment variable substitution, as shown below: diff --git a/docs/docs/distributions/index.mdx b/docs/docs/distributions/index.mdx index 0149f143f..ebf4bd6ce 100644 --- a/docs/docs/distributions/index.mdx +++ b/docs/docs/distributions/index.mdx @@ -19,3 +19,4 @@ This section provides an overview of the distributions available in Llama Stack. - **[Starting Llama Stack Server](./starting_llama_stack_server.mdx)** - How to run distributions - **[Importing as Library](./importing_as_library.mdx)** - Use distributions in your code - **[Configuration Reference](./configuration.mdx)** - Configuration file format details +- **[Llama Stack UI](./llama_stack_ui.mdx)** - Web-based user interface for interacting with Llama Stack servers diff --git a/docs/docs/distributions/llama_stack_ui.mdx b/docs/docs/distributions/llama_stack_ui.mdx new file mode 100644 index 000000000..7ba47ea4d --- /dev/null +++ b/docs/docs/distributions/llama_stack_ui.mdx @@ -0,0 +1,109 @@ +--- +title: Llama Stack UI +description: Web-based user interface for interacting with Llama Stack servers +sidebar_label: Llama Stack UI +sidebar_position: 8 +--- + +# Llama Stack UI + +The Llama Stack UI is a web-based interface for interacting with Llama Stack servers. Built with Next.js and React, it provides a visual way to work with agents, manage resources, and view logs. + +## Features + +- **Logs & Monitoring**: View chat completions, agent responses, and vector store activity +- **Vector Stores**: Create and manage vector databases for RAG (Retrieval-Augmented Generation) workflows +- **Prompt Management**: Create and manage reusable prompts + +## Prerequisites + +You need a running Llama Stack server. The UI is a client that connects to the Llama Stack backend. + +If you don't have a Llama Stack server running yet, see the [Starting Llama Stack Server](../getting_started/starting_llama_stack_server.mdx) guide. + +## Running the UI + +### Option 1: Using npx (Recommended for Quick Start) + +The fastest way to get started is using `npx`: + +```bash +npx llama-stack-ui +``` + +This will start the UI server on `http://localhost:8322` (default port). + +### Option 2: Using Docker + +Run the UI in a container: + +```bash +docker run -p 8322:8322 llamastack/ui +``` + +Access the UI at `http://localhost:8322`. + +## Environment Variables + +The UI can be configured using the following environment variables: + +| Variable | Description | Default | +|----------|-------------|---------| +| `LLAMA_STACK_BACKEND_URL` | URL of your Llama Stack server | `http://localhost:8321` | +| `LLAMA_STACK_UI_PORT` | Port for the UI server | `8322` | + +If the Llama Stack server is running with authentication enabled, you can configure the UI to use it by setting the following environment variables: + +| Variable | Description | Default | +|----------|-------------|---------| +| `NEXTAUTH_URL` | NextAuth URL for authentication | `http://localhost:8322` | +| `GITHUB_CLIENT_ID` | GitHub OAuth client ID (optional, for authentication) | - | +| `GITHUB_CLIENT_SECRET` | GitHub OAuth client secret (optional, for authentication) | - | + +### Setting Environment Variables + +#### For npx: + +```bash +LLAMA_STACK_BACKEND_URL=http://localhost:8321 \ +LLAMA_STACK_UI_PORT=8080 \ +npx llama-stack-ui +``` + +#### For Docker: + +```bash +docker run -p 8080:8080 \ + -e LLAMA_STACK_BACKEND_URL=http://localhost:8321 \ + -e LLAMA_STACK_UI_PORT=8080 \ + llamastack/ui +``` + +## Using the UI + +### Managing Resources + +- **Vector Stores**: Create vector databases for RAG workflows, view stored documents and embeddings +- **Prompts**: Create and manage reusable prompt templates +- **Chat Completions**: View history of chat interactions +- **Responses**: Browse detailed agent responses and tool calls + +## Development + +If you want to run the UI from source for development: + +```bash +# From the project root +cd src/llama_stack_ui + +# Install dependencies +npm install + +# Set environment variables +export LLAMA_STACK_BACKEND_URL=http://localhost:8321 + +# Start the development server +npm run dev +``` + +The development server will start on `http://localhost:8322` with hot reloading enabled. diff --git a/docs/docs/getting_started/detailed_tutorial.mdx b/docs/docs/getting_started/detailed_tutorial.mdx index 623301d0d..2816f67a2 100644 --- a/docs/docs/getting_started/detailed_tutorial.mdx +++ b/docs/docs/getting_started/detailed_tutorial.mdx @@ -144,7 +144,7 @@ source .venv/bin/activate ```bash uv venv client --python 3.12 source client/bin/activate -pip install llama-stack-client +uv pip install llama-stack-client ``` diff --git a/docs/sidebars.ts b/docs/sidebars.ts index 641c2eed3..7b4ac5ac8 100644 --- a/docs/sidebars.ts +++ b/docs/sidebars.ts @@ -57,6 +57,7 @@ const sidebars: SidebarsConfig = { 'distributions/importing_as_library', 'distributions/configuration', 'distributions/starting_llama_stack_server', + 'distributions/llama_stack_ui', { type: 'category', label: 'Self-Hosted Distributions', diff --git a/docs/static/llama-stack-spec.yaml b/docs/static/llama-stack-spec.yaml index ce8708b68..66eda78c7 100644 --- a/docs/static/llama-stack-spec.yaml +++ b/docs/static/llama-stack-spec.yaml @@ -2688,7 +2688,8 @@ paths: responses: '200': description: >- - A VectorStoreFileContentResponse representing the file contents. + File contents, optionally with embeddings and metadata based on query + parameters. content: application/json: schema: @@ -2723,6 +2724,20 @@ paths: required: true schema: type: string + - name: include_embeddings + in: query + description: >- + Whether to include embedding vectors in the response. + required: false + schema: + $ref: '#/components/schemas/bool' + - name: include_metadata + in: query + description: >- + Whether to include chunk metadata in the response. + required: false + schema: + $ref: '#/components/schemas/bool' deprecated: false /v1/vector_stores/{vector_store_id}/search: post: @@ -9375,6 +9390,8 @@ components: title: VectorStoreFileDeleteResponse description: >- Response from deleting a vector store file. + bool: + type: boolean VectorStoreContent: type: object properties: @@ -9386,6 +9403,26 @@ components: text: type: string description: The actual text content + embedding: + type: array + items: + type: number + description: >- + Optional embedding vector for this content chunk + chunk_metadata: + $ref: '#/components/schemas/ChunkMetadata' + description: Optional chunk metadata + metadata: + type: object + additionalProperties: + oneOf: + - type: 'null' + - type: boolean + - type: number + - type: string + - type: array + - type: object + description: Optional user-defined metadata additionalProperties: false required: - type @@ -9409,6 +9446,7 @@ components: description: Parsed content of the file has_more: type: boolean + default: false description: >- Indicates if there are more content pages to fetch next_page: diff --git a/docs/static/stainless-llama-stack-spec.yaml b/docs/static/stainless-llama-stack-spec.yaml index 9f3ef15b5..1be4af6c9 100644 --- a/docs/static/stainless-llama-stack-spec.yaml +++ b/docs/static/stainless-llama-stack-spec.yaml @@ -2691,7 +2691,8 @@ paths: responses: '200': description: >- - A VectorStoreFileContentResponse representing the file contents. + File contents, optionally with embeddings and metadata based on query + parameters. content: application/json: schema: @@ -2726,6 +2727,20 @@ paths: required: true schema: type: string + - name: include_embeddings + in: query + description: >- + Whether to include embedding vectors in the response. + required: false + schema: + $ref: '#/components/schemas/bool' + - name: include_metadata + in: query + description: >- + Whether to include chunk metadata in the response. + required: false + schema: + $ref: '#/components/schemas/bool' deprecated: false /v1/vector_stores/{vector_store_id}/search: post: @@ -10091,6 +10106,8 @@ components: title: VectorStoreFileDeleteResponse description: >- Response from deleting a vector store file. + bool: + type: boolean VectorStoreContent: type: object properties: @@ -10102,6 +10119,26 @@ components: text: type: string description: The actual text content + embedding: + type: array + items: + type: number + description: >- + Optional embedding vector for this content chunk + chunk_metadata: + $ref: '#/components/schemas/ChunkMetadata' + description: Optional chunk metadata + metadata: + type: object + additionalProperties: + oneOf: + - type: 'null' + - type: boolean + - type: number + - type: string + - type: array + - type: object + description: Optional user-defined metadata additionalProperties: false required: - type @@ -10125,6 +10162,7 @@ components: description: Parsed content of the file has_more: type: boolean + default: false description: >- Indicates if there are more content pages to fetch next_page: diff --git a/pyproject.toml b/pyproject.toml index 653c6d613..e6808af8a 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -112,7 +112,7 @@ unit = [ "aiosqlite", "aiohttp", "psycopg2-binary>=2.9.0", - "pypdf", + "pypdf>=6.1.3", "mcp", "chardet", "sqlalchemy", @@ -135,7 +135,7 @@ test = [ "torchvision>=0.21.0", "chardet", "psycopg2-binary>=2.9.0", - "pypdf", + "pypdf>=6.1.3", "mcp", "datasets>=4.0.0", "autoevals", diff --git a/src/llama_stack/apis/vector_io/vector_io.py b/src/llama_stack/apis/vector_io/vector_io.py index 846c6f191..699241128 100644 --- a/src/llama_stack/apis/vector_io/vector_io.py +++ b/src/llama_stack/apis/vector_io/vector_io.py @@ -10,7 +10,7 @@ # the root directory of this source tree. from typing import Annotated, Any, Literal, Protocol, runtime_checkable -from fastapi import Body +from fastapi import Body, Query from pydantic import BaseModel, Field from llama_stack.apis.common.tracing import telemetry_traceable @@ -224,10 +224,16 @@ class VectorStoreContent(BaseModel): :param type: Content type, currently only "text" is supported :param text: The actual text content + :param embedding: Optional embedding vector for this content chunk + :param chunk_metadata: Optional chunk metadata + :param metadata: Optional user-defined metadata """ type: Literal["text"] text: str + embedding: list[float] | None = None + chunk_metadata: ChunkMetadata | None = None + metadata: dict[str, Any] | None = None @json_schema_type @@ -280,6 +286,22 @@ class VectorStoreDeleteResponse(BaseModel): deleted: bool = True +@json_schema_type +class VectorStoreFileContentResponse(BaseModel): + """Represents the parsed content of a vector store file. + + :param object: The object type, which is always `vector_store.file_content.page` + :param data: Parsed content of the file + :param has_more: Indicates if there are more content pages to fetch + :param next_page: The token for the next page, if any + """ + + object: Literal["vector_store.file_content.page"] = "vector_store.file_content.page" + data: list[VectorStoreContent] + has_more: bool = False + next_page: str | None = None + + @json_schema_type class VectorStoreChunkingStrategyAuto(BaseModel): """Automatic chunking strategy for vector store files. @@ -395,22 +417,6 @@ class VectorStoreListFilesResponse(BaseModel): has_more: bool = False -@json_schema_type -class VectorStoreFileContentResponse(BaseModel): - """Represents the parsed content of a vector store file. - - :param object: The object type, which is always `vector_store.file_content.page` - :param data: Parsed content of the file - :param has_more: Indicates if there are more content pages to fetch - :param next_page: The token for the next page, if any - """ - - object: Literal["vector_store.file_content.page"] = "vector_store.file_content.page" - data: list[VectorStoreContent] - has_more: bool - next_page: str | None = None - - @json_schema_type class VectorStoreFileDeleteResponse(BaseModel): """Response from deleting a vector store file. @@ -732,12 +738,16 @@ class VectorIO(Protocol): self, vector_store_id: str, file_id: str, + include_embeddings: Annotated[bool | None, Query(default=False)] = False, + include_metadata: Annotated[bool | None, Query(default=False)] = False, ) -> VectorStoreFileContentResponse: """Retrieves the contents of a vector store file. :param vector_store_id: The ID of the vector store containing the file to retrieve. :param file_id: The ID of the file to retrieve. - :returns: A VectorStoreFileContentResponse representing the file contents. + :param include_embeddings: Whether to include embedding vectors in the response. + :param include_metadata: Whether to include chunk metadata in the response. + :returns: File contents, optionally with embeddings and metadata based on query parameters. """ ... diff --git a/src/llama_stack/core/build.py b/src/llama_stack/core/build.py index 2ceb9e9be..fb3a22109 100644 --- a/src/llama_stack/core/build.py +++ b/src/llama_stack/core/build.py @@ -4,7 +4,6 @@ # This source code is licensed under the terms described in the LICENSE file in # the root directory of this source tree. -import importlib.resources import sys from pydantic import BaseModel @@ -12,9 +11,6 @@ from termcolor import cprint from llama_stack.core.datatypes import BuildConfig from llama_stack.core.distribution import get_provider_registry -from llama_stack.core.external import load_external_apis -from llama_stack.core.utils.exec import run_command -from llama_stack.core.utils.image_types import LlamaStackImageType from llama_stack.distributions.template import DistributionTemplate from llama_stack.log import get_logger from llama_stack.providers.datatypes import Api @@ -101,64 +97,3 @@ def print_pip_install_help(config: BuildConfig): for special_dep in special_deps: cprint(f"uv pip install {special_dep}", color="yellow", file=sys.stderr) print() - - -def build_image( - build_config: BuildConfig, - image_name: str, - distro_or_config: str, - run_config: str | None = None, -): - container_base = build_config.distribution_spec.container_image or "python:3.12-slim" - - normal_deps, special_deps, external_provider_deps = get_provider_dependencies(build_config) - normal_deps += SERVER_DEPENDENCIES - if build_config.external_apis_dir: - external_apis = load_external_apis(build_config) - if external_apis: - for _, api_spec in external_apis.items(): - normal_deps.extend(api_spec.pip_packages) - - if build_config.image_type == LlamaStackImageType.CONTAINER.value: - script = str(importlib.resources.files("llama_stack") / "core/build_container.sh") - args = [ - script, - "--distro-or-config", - distro_or_config, - "--image-name", - image_name, - "--container-base", - container_base, - "--normal-deps", - " ".join(normal_deps), - ] - # When building from a config file (not a template), include the run config path in the - # build arguments - if run_config is not None: - args.extend(["--run-config", run_config]) - else: - script = str(importlib.resources.files("llama_stack") / "core/build_venv.sh") - args = [ - script, - "--env-name", - str(image_name), - "--normal-deps", - " ".join(normal_deps), - ] - - # Always pass both arguments, even if empty, to maintain consistent positional arguments - if special_deps: - args.extend(["--optional-deps", "#".join(special_deps)]) - if external_provider_deps: - args.extend( - ["--external-provider-deps", "#".join(external_provider_deps)] - ) # the script will install external provider module, get its deps, and install those too. - - return_code = run_command(args) - - if return_code != 0: - log.error( - f"Failed to build target {image_name} with return code {return_code}", - ) - - return return_code diff --git a/src/llama_stack/core/conversations/conversations.py b/src/llama_stack/core/conversations/conversations.py index 951de5e9d..f83834522 100644 --- a/src/llama_stack/core/conversations/conversations.py +++ b/src/llama_stack/core/conversations/conversations.py @@ -203,16 +203,11 @@ class ConversationServiceImpl(Conversations): "item_data": item_dict, } - # TODO: Add support for upsert in sql_store, this will fail first if ID exists and then update - try: - await self.sql_store.insert(table="conversation_items", data=item_record) - except Exception: - # If insert fails due to ID conflict, update existing record - await self.sql_store.update( - table="conversation_items", - data={"created_at": created_at, "item_data": item_dict}, - where={"id": item_id}, - ) + await self.sql_store.upsert( + table="conversation_items", + data=item_record, + conflict_columns=["id"], + ) created_items.append(item_dict) diff --git a/src/llama_stack/core/library_client.py b/src/llama_stack/core/library_client.py index b8f9f715f..db990368b 100644 --- a/src/llama_stack/core/library_client.py +++ b/src/llama_stack/core/library_client.py @@ -389,6 +389,12 @@ class AsyncLlamaStackAsLibraryClient(AsyncLlamaStackClient): matched_func, path_params, route_path, webmethod = find_matching_route(options.method, path, self.route_impls) body |= path_params + # Pass through params that aren't already handled as path params + if options.params: + extra_query_params = {k: v for k, v in options.params.items() if k not in path_params} + if extra_query_params: + body["extra_query"] = extra_query_params + body, field_names = self._handle_file_uploads(options, body) body = self._convert_body(matched_func, body, exclude_params=set(field_names)) diff --git a/src/llama_stack/core/routers/vector_io.py b/src/llama_stack/core/routers/vector_io.py index 9dac461db..ed5fb8253 100644 --- a/src/llama_stack/core/routers/vector_io.py +++ b/src/llama_stack/core/routers/vector_io.py @@ -247,6 +247,13 @@ class VectorIORouter(VectorIO): metadata: dict[str, Any] | None = None, ) -> VectorStoreObject: logger.debug(f"VectorIORouter.openai_update_vector_store: {vector_store_id}") + + # Check if provider_id is being changed (not supported) + if metadata and "provider_id" in metadata: + current_store = await self.routing_table.get_object_by_identifier("vector_store", vector_store_id) + if current_store and current_store.provider_id != metadata["provider_id"]: + raise ValueError("provider_id cannot be changed after vector store creation") + provider = await self.routing_table.get_provider_impl(vector_store_id) return await provider.openai_update_vector_store( vector_store_id=vector_store_id, @@ -338,12 +345,19 @@ class VectorIORouter(VectorIO): self, vector_store_id: str, file_id: str, + include_embeddings: bool | None = False, + include_metadata: bool | None = False, ) -> VectorStoreFileContentResponse: - logger.debug(f"VectorIORouter.openai_retrieve_vector_store_file_contents: {vector_store_id}, {file_id}") - provider = await self.routing_table.get_provider_impl(vector_store_id) - return await provider.openai_retrieve_vector_store_file_contents( + logger.debug( + f"VectorIORouter.openai_retrieve_vector_store_file_contents: {vector_store_id}, {file_id}, " + f"include_embeddings={include_embeddings}, include_metadata={include_metadata}" + ) + + return await self.routing_table.openai_retrieve_vector_store_file_contents( vector_store_id=vector_store_id, file_id=file_id, + include_embeddings=include_embeddings, + include_metadata=include_metadata, ) async def openai_update_vector_store_file( diff --git a/src/llama_stack/core/routing_tables/vector_stores.py b/src/llama_stack/core/routing_tables/vector_stores.py index f95a4dbe3..e77739abe 100644 --- a/src/llama_stack/core/routing_tables/vector_stores.py +++ b/src/llama_stack/core/routing_tables/vector_stores.py @@ -195,12 +195,17 @@ class VectorStoresRoutingTable(CommonRoutingTableImpl): self, vector_store_id: str, file_id: str, + include_embeddings: bool | None = False, + include_metadata: bool | None = False, ) -> VectorStoreFileContentResponse: await self.assert_action_allowed("read", "vector_store", vector_store_id) + provider = await self.get_provider_impl(vector_store_id) return await provider.openai_retrieve_vector_store_file_contents( vector_store_id=vector_store_id, file_id=file_id, + include_embeddings=include_embeddings, + include_metadata=include_metadata, ) async def openai_update_vector_store_file( diff --git a/src/llama_stack/distributions/ci-tests/ci_tests.py b/src/llama_stack/distributions/ci-tests/ci_tests.py index c06b1b98d..ab102f5f3 100644 --- a/src/llama_stack/distributions/ci-tests/ci_tests.py +++ b/src/llama_stack/distributions/ci-tests/ci_tests.py @@ -13,6 +13,5 @@ from ..starter.starter import get_distribution_template as get_starter_distribut def get_distribution_template() -> DistributionTemplate: template = get_starter_distribution_template(name="ci-tests") template.description = "CI tests for Llama Stack" - template.run_configs.pop("run-with-postgres-store.yaml", None) return template diff --git a/src/llama_stack/distributions/ci-tests/run-with-postgres-store.yaml b/src/llama_stack/distributions/ci-tests/run-with-postgres-store.yaml new file mode 100644 index 000000000..5384b58fe --- /dev/null +++ b/src/llama_stack/distributions/ci-tests/run-with-postgres-store.yaml @@ -0,0 +1,293 @@ +version: 2 +image_name: ci-tests +apis: +- agents +- batches +- datasetio +- eval +- files +- inference +- post_training +- safety +- scoring +- tool_runtime +- vector_io +providers: + inference: + - provider_id: ${env.CEREBRAS_API_KEY:+cerebras} + provider_type: remote::cerebras + config: + base_url: https://api.cerebras.ai + api_key: ${env.CEREBRAS_API_KEY:=} + - provider_id: ${env.OLLAMA_URL:+ollama} + provider_type: remote::ollama + config: + url: ${env.OLLAMA_URL:=http://localhost:11434} + - provider_id: ${env.VLLM_URL:+vllm} + provider_type: remote::vllm + config: + url: ${env.VLLM_URL:=} + max_tokens: ${env.VLLM_MAX_TOKENS:=4096} + api_token: ${env.VLLM_API_TOKEN:=fake} + tls_verify: ${env.VLLM_TLS_VERIFY:=true} + - provider_id: ${env.TGI_URL:+tgi} + provider_type: remote::tgi + config: + url: ${env.TGI_URL:=} + - provider_id: fireworks + provider_type: remote::fireworks + config: + url: https://api.fireworks.ai/inference/v1 + api_key: ${env.FIREWORKS_API_KEY:=} + - provider_id: together + provider_type: remote::together + config: + url: https://api.together.xyz/v1 + api_key: ${env.TOGETHER_API_KEY:=} + - provider_id: bedrock + provider_type: remote::bedrock + config: + api_key: ${env.AWS_BEDROCK_API_KEY:=} + region_name: ${env.AWS_DEFAULT_REGION:=us-east-2} + - provider_id: ${env.NVIDIA_API_KEY:+nvidia} + provider_type: remote::nvidia + config: + url: ${env.NVIDIA_BASE_URL:=https://integrate.api.nvidia.com} + api_key: ${env.NVIDIA_API_KEY:=} + append_api_version: ${env.NVIDIA_APPEND_API_VERSION:=True} + - provider_id: openai + provider_type: remote::openai + config: + api_key: ${env.OPENAI_API_KEY:=} + base_url: ${env.OPENAI_BASE_URL:=https://api.openai.com/v1} + - provider_id: anthropic + provider_type: remote::anthropic + config: + api_key: ${env.ANTHROPIC_API_KEY:=} + - provider_id: gemini + provider_type: remote::gemini + config: + api_key: ${env.GEMINI_API_KEY:=} + - provider_id: ${env.VERTEX_AI_PROJECT:+vertexai} + provider_type: remote::vertexai + config: + project: ${env.VERTEX_AI_PROJECT:=} + location: ${env.VERTEX_AI_LOCATION:=us-central1} + - provider_id: groq + provider_type: remote::groq + config: + url: https://api.groq.com + api_key: ${env.GROQ_API_KEY:=} + - provider_id: sambanova + provider_type: remote::sambanova + config: + url: https://api.sambanova.ai/v1 + api_key: ${env.SAMBANOVA_API_KEY:=} + - provider_id: ${env.AZURE_API_KEY:+azure} + provider_type: remote::azure + config: + api_key: ${env.AZURE_API_KEY:=} + api_base: ${env.AZURE_API_BASE:=} + api_version: ${env.AZURE_API_VERSION:=} + api_type: ${env.AZURE_API_TYPE:=} + - provider_id: sentence-transformers + provider_type: inline::sentence-transformers + vector_io: + - provider_id: faiss + provider_type: inline::faiss + config: + persistence: + namespace: vector_io::faiss + backend: kv_default + - provider_id: sqlite-vec + provider_type: inline::sqlite-vec + config: + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ci-tests}/sqlite_vec.db + persistence: + namespace: vector_io::sqlite_vec + backend: kv_default + - provider_id: ${env.MILVUS_URL:+milvus} + provider_type: inline::milvus + config: + db_path: ${env.MILVUS_DB_PATH:=~/.llama/distributions/ci-tests}/milvus.db + persistence: + namespace: vector_io::milvus + backend: kv_default + - provider_id: ${env.CHROMADB_URL:+chromadb} + provider_type: remote::chromadb + config: + url: ${env.CHROMADB_URL:=} + persistence: + namespace: vector_io::chroma_remote + backend: kv_default + - provider_id: ${env.PGVECTOR_DB:+pgvector} + provider_type: remote::pgvector + config: + host: ${env.PGVECTOR_HOST:=localhost} + port: ${env.PGVECTOR_PORT:=5432} + db: ${env.PGVECTOR_DB:=} + user: ${env.PGVECTOR_USER:=} + password: ${env.PGVECTOR_PASSWORD:=} + persistence: + namespace: vector_io::pgvector + backend: kv_default + - provider_id: ${env.QDRANT_URL:+qdrant} + provider_type: remote::qdrant + config: + api_key: ${env.QDRANT_API_KEY:=} + persistence: + namespace: vector_io::qdrant_remote + backend: kv_default + - provider_id: ${env.WEAVIATE_CLUSTER_URL:+weaviate} + provider_type: remote::weaviate + config: + weaviate_api_key: null + weaviate_cluster_url: ${env.WEAVIATE_CLUSTER_URL:=localhost:8080} + persistence: + namespace: vector_io::weaviate + backend: kv_default + files: + - provider_id: meta-reference-files + provider_type: inline::localfs + config: + storage_dir: ${env.FILES_STORAGE_DIR:=~/.llama/distributions/ci-tests/files} + metadata_store: + table_name: files_metadata + backend: sql_default + safety: + - provider_id: llama-guard + provider_type: inline::llama-guard + config: + excluded_categories: [] + - provider_id: code-scanner + provider_type: inline::code-scanner + agents: + - provider_id: meta-reference + provider_type: inline::meta-reference + config: + persistence: + agent_state: + namespace: agents + backend: kv_default + responses: + table_name: responses + backend: sql_default + max_write_queue_size: 10000 + num_writers: 4 + post_training: + - provider_id: torchtune-cpu + provider_type: inline::torchtune-cpu + config: + checkpoint_format: meta + eval: + - provider_id: meta-reference + provider_type: inline::meta-reference + config: + kvstore: + namespace: eval + backend: kv_default + datasetio: + - provider_id: huggingface + provider_type: remote::huggingface + config: + kvstore: + namespace: datasetio::huggingface + backend: kv_default + - provider_id: localfs + provider_type: inline::localfs + config: + kvstore: + namespace: datasetio::localfs + backend: kv_default + scoring: + - provider_id: basic + provider_type: inline::basic + - provider_id: llm-as-judge + provider_type: inline::llm-as-judge + - provider_id: braintrust + provider_type: inline::braintrust + config: + openai_api_key: ${env.OPENAI_API_KEY:=} + tool_runtime: + - provider_id: brave-search + provider_type: remote::brave-search + config: + api_key: ${env.BRAVE_SEARCH_API_KEY:=} + max_results: 3 + - provider_id: tavily-search + provider_type: remote::tavily-search + config: + api_key: ${env.TAVILY_SEARCH_API_KEY:=} + max_results: 3 + - provider_id: rag-runtime + provider_type: inline::rag-runtime + - provider_id: model-context-protocol + provider_type: remote::model-context-protocol + batches: + - provider_id: reference + provider_type: inline::reference + config: + kvstore: + namespace: batches + backend: kv_default +storage: + backends: + kv_default: + type: kv_postgres + host: ${env.POSTGRES_HOST:=localhost} + port: ${env.POSTGRES_PORT:=5432} + db: ${env.POSTGRES_DB:=llamastack} + user: ${env.POSTGRES_USER:=llamastack} + password: ${env.POSTGRES_PASSWORD:=llamastack} + table_name: ${env.POSTGRES_TABLE_NAME:=llamastack_kvstore} + sql_default: + type: sql_postgres + host: ${env.POSTGRES_HOST:=localhost} + port: ${env.POSTGRES_PORT:=5432} + db: ${env.POSTGRES_DB:=llamastack} + user: ${env.POSTGRES_USER:=llamastack} + password: ${env.POSTGRES_PASSWORD:=llamastack} + stores: + metadata: + namespace: registry + backend: kv_default + inference: + table_name: inference_store + backend: sql_default + max_write_queue_size: 10000 + num_writers: 4 + conversations: + table_name: openai_conversations + backend: sql_default + prompts: + namespace: prompts + backend: kv_default +registered_resources: + models: [] + shields: + - shield_id: llama-guard + provider_id: ${env.SAFETY_MODEL:+llama-guard} + provider_shield_id: ${env.SAFETY_MODEL:=} + - shield_id: code-scanner + provider_id: ${env.CODE_SCANNER_MODEL:+code-scanner} + provider_shield_id: ${env.CODE_SCANNER_MODEL:=} + vector_dbs: [] + datasets: [] + scoring_fns: [] + benchmarks: [] + tool_groups: + - toolgroup_id: builtin::websearch + provider_id: tavily-search + - toolgroup_id: builtin::rag + provider_id: rag-runtime +server: + port: 8321 +telemetry: + enabled: true +vector_stores: + default_provider_id: faiss + default_embedding_model: + provider_id: sentence-transformers + model_id: nomic-ai/nomic-embed-text-v1.5 +safety: + default_shield_id: llama-guard diff --git a/src/llama_stack/distributions/starter-gpu/run-with-postgres-store.yaml b/src/llama_stack/distributions/starter-gpu/run-with-postgres-store.yaml index 1920ebd9d..e29ada6f4 100644 --- a/src/llama_stack/distributions/starter-gpu/run-with-postgres-store.yaml +++ b/src/llama_stack/distributions/starter-gpu/run-with-postgres-store.yaml @@ -165,20 +165,15 @@ providers: - provider_id: meta-reference provider_type: inline::meta-reference config: - persistence_store: - type: sql_postgres - host: ${env.POSTGRES_HOST:=localhost} - port: ${env.POSTGRES_PORT:=5432} - db: ${env.POSTGRES_DB:=llamastack} - user: ${env.POSTGRES_USER:=llamastack} - password: ${env.POSTGRES_PASSWORD:=llamastack} - responses_store: - type: sql_postgres - host: ${env.POSTGRES_HOST:=localhost} - port: ${env.POSTGRES_PORT:=5432} - db: ${env.POSTGRES_DB:=llamastack} - user: ${env.POSTGRES_USER:=llamastack} - password: ${env.POSTGRES_PASSWORD:=llamastack} + persistence: + agent_state: + namespace: agents + backend: kv_default + responses: + table_name: responses + backend: sql_default + max_write_queue_size: 10000 + num_writers: 4 post_training: - provider_id: huggingface-gpu provider_type: inline::huggingface-gpu @@ -237,10 +232,10 @@ providers: config: kvstore: namespace: batches - backend: kv_postgres + backend: kv_default storage: backends: - kv_postgres: + kv_default: type: kv_postgres host: ${env.POSTGRES_HOST:=localhost} port: ${env.POSTGRES_PORT:=5432} @@ -248,7 +243,7 @@ storage: user: ${env.POSTGRES_USER:=llamastack} password: ${env.POSTGRES_PASSWORD:=llamastack} table_name: ${env.POSTGRES_TABLE_NAME:=llamastack_kvstore} - sql_postgres: + sql_default: type: sql_postgres host: ${env.POSTGRES_HOST:=localhost} port: ${env.POSTGRES_PORT:=5432} @@ -258,27 +253,44 @@ storage: stores: metadata: namespace: registry - backend: kv_postgres + backend: kv_default inference: table_name: inference_store - backend: sql_postgres + backend: sql_default max_write_queue_size: 10000 num_writers: 4 conversations: table_name: openai_conversations - backend: sql_postgres + backend: sql_default prompts: namespace: prompts - backend: kv_postgres + backend: kv_default registered_resources: models: [] - shields: [] + shields: + - shield_id: llama-guard + provider_id: ${env.SAFETY_MODEL:+llama-guard} + provider_shield_id: ${env.SAFETY_MODEL:=} + - shield_id: code-scanner + provider_id: ${env.CODE_SCANNER_MODEL:+code-scanner} + provider_shield_id: ${env.CODE_SCANNER_MODEL:=} vector_dbs: [] datasets: [] scoring_fns: [] benchmarks: [] - tool_groups: [] + tool_groups: + - toolgroup_id: builtin::websearch + provider_id: tavily-search + - toolgroup_id: builtin::rag + provider_id: rag-runtime server: port: 8321 telemetry: enabled: true +vector_stores: + default_provider_id: faiss + default_embedding_model: + provider_id: sentence-transformers + model_id: nomic-ai/nomic-embed-text-v1.5 +safety: + default_shield_id: llama-guard diff --git a/src/llama_stack/distributions/starter/run-with-postgres-store.yaml b/src/llama_stack/distributions/starter/run-with-postgres-store.yaml index 702f95381..437674bf9 100644 --- a/src/llama_stack/distributions/starter/run-with-postgres-store.yaml +++ b/src/llama_stack/distributions/starter/run-with-postgres-store.yaml @@ -165,20 +165,15 @@ providers: - provider_id: meta-reference provider_type: inline::meta-reference config: - persistence_store: - type: sql_postgres - host: ${env.POSTGRES_HOST:=localhost} - port: ${env.POSTGRES_PORT:=5432} - db: ${env.POSTGRES_DB:=llamastack} - user: ${env.POSTGRES_USER:=llamastack} - password: ${env.POSTGRES_PASSWORD:=llamastack} - responses_store: - type: sql_postgres - host: ${env.POSTGRES_HOST:=localhost} - port: ${env.POSTGRES_PORT:=5432} - db: ${env.POSTGRES_DB:=llamastack} - user: ${env.POSTGRES_USER:=llamastack} - password: ${env.POSTGRES_PASSWORD:=llamastack} + persistence: + agent_state: + namespace: agents + backend: kv_default + responses: + table_name: responses + backend: sql_default + max_write_queue_size: 10000 + num_writers: 4 post_training: - provider_id: torchtune-cpu provider_type: inline::torchtune-cpu @@ -234,10 +229,10 @@ providers: config: kvstore: namespace: batches - backend: kv_postgres + backend: kv_default storage: backends: - kv_postgres: + kv_default: type: kv_postgres host: ${env.POSTGRES_HOST:=localhost} port: ${env.POSTGRES_PORT:=5432} @@ -245,7 +240,7 @@ storage: user: ${env.POSTGRES_USER:=llamastack} password: ${env.POSTGRES_PASSWORD:=llamastack} table_name: ${env.POSTGRES_TABLE_NAME:=llamastack_kvstore} - sql_postgres: + sql_default: type: sql_postgres host: ${env.POSTGRES_HOST:=localhost} port: ${env.POSTGRES_PORT:=5432} @@ -255,27 +250,44 @@ storage: stores: metadata: namespace: registry - backend: kv_postgres + backend: kv_default inference: table_name: inference_store - backend: sql_postgres + backend: sql_default max_write_queue_size: 10000 num_writers: 4 conversations: table_name: openai_conversations - backend: sql_postgres + backend: sql_default prompts: namespace: prompts - backend: kv_postgres + backend: kv_default registered_resources: models: [] - shields: [] + shields: + - shield_id: llama-guard + provider_id: ${env.SAFETY_MODEL:+llama-guard} + provider_shield_id: ${env.SAFETY_MODEL:=} + - shield_id: code-scanner + provider_id: ${env.CODE_SCANNER_MODEL:+code-scanner} + provider_shield_id: ${env.CODE_SCANNER_MODEL:=} vector_dbs: [] datasets: [] scoring_fns: [] benchmarks: [] - tool_groups: [] + tool_groups: + - toolgroup_id: builtin::websearch + provider_id: tavily-search + - toolgroup_id: builtin::rag + provider_id: rag-runtime server: port: 8321 telemetry: enabled: true +vector_stores: + default_provider_id: faiss + default_embedding_model: + provider_id: sentence-transformers + model_id: nomic-ai/nomic-embed-text-v1.5 +safety: + default_shield_id: llama-guard diff --git a/src/llama_stack/distributions/starter/starter.py b/src/llama_stack/distributions/starter/starter.py index 4b23f0728..c31ee0296 100644 --- a/src/llama_stack/distributions/starter/starter.py +++ b/src/llama_stack/distributions/starter/starter.py @@ -17,11 +17,6 @@ from llama_stack.core.datatypes import ( ToolGroupInput, VectorStoresConfig, ) -from llama_stack.core.storage.datatypes import ( - InferenceStoreReference, - KVStoreReference, - SqlStoreReference, -) from llama_stack.core.utils.dynamic import instantiate_class_type from llama_stack.distributions.template import DistributionTemplate, RunConfigSettings from llama_stack.providers.datatypes import RemoteProviderSpec @@ -157,10 +152,11 @@ def get_distribution_template(name: str = "starter") -> DistributionTemplate: BuildProvider(provider_type="inline::reference"), ], } + files_config = LocalfsFilesImplConfig.sample_run_config(f"~/.llama/distributions/{name}") files_provider = Provider( provider_id="meta-reference-files", provider_type="inline::localfs", - config=LocalfsFilesImplConfig.sample_run_config(f"~/.llama/distributions/{name}"), + config=files_config, ) embedding_provider = Provider( provider_id="sentence-transformers", @@ -190,7 +186,8 @@ def get_distribution_template(name: str = "starter") -> DistributionTemplate: provider_shield_id="${env.CODE_SCANNER_MODEL:=}", ), ] - postgres_config = PostgresSqlStoreConfig.sample_run_config() + postgres_sql_config = PostgresSqlStoreConfig.sample_run_config() + postgres_kv_config = PostgresKVStoreConfig.sample_run_config() default_overrides = { "inference": remote_inference_providers + [embedding_provider], "vector_io": [ @@ -247,6 +244,33 @@ def get_distribution_template(name: str = "starter") -> DistributionTemplate: "files": [files_provider], } + base_run_settings = RunConfigSettings( + provider_overrides=default_overrides, + default_models=[], + default_tool_groups=default_tool_groups, + default_shields=default_shields, + vector_stores_config=VectorStoresConfig( + default_provider_id="faiss", + default_embedding_model=QualifiedModel( + provider_id="sentence-transformers", + model_id="nomic-ai/nomic-embed-text-v1.5", + ), + ), + safety_config=SafetyConfig( + default_shield_id="llama-guard", + ), + ) + + postgres_run_settings = base_run_settings.model_copy( + update={ + "storage_backends": { + "kv_default": postgres_kv_config, + "sql_default": postgres_sql_config, + } + }, + deep=True, + ) + return DistributionTemplate( name=name, distro_type="self_hosted", @@ -256,71 +280,8 @@ def get_distribution_template(name: str = "starter") -> DistributionTemplate: providers=providers, additional_pip_packages=list(set(PostgresSqlStoreConfig.pip_packages() + PostgresKVStoreConfig.pip_packages())), run_configs={ - "run.yaml": RunConfigSettings( - provider_overrides=default_overrides, - default_models=[], - default_tool_groups=default_tool_groups, - default_shields=default_shields, - vector_stores_config=VectorStoresConfig( - default_provider_id="faiss", - default_embedding_model=QualifiedModel( - provider_id="sentence-transformers", - model_id="nomic-ai/nomic-embed-text-v1.5", - ), - ), - safety_config=SafetyConfig( - default_shield_id="llama-guard", - ), - ), - "run-with-postgres-store.yaml": RunConfigSettings( - provider_overrides={ - **default_overrides, - "agents": [ - Provider( - provider_id="meta-reference", - provider_type="inline::meta-reference", - config=dict( - persistence_store=postgres_config, - responses_store=postgres_config, - ), - ) - ], - "batches": [ - Provider( - provider_id="reference", - provider_type="inline::reference", - config=dict( - kvstore=KVStoreReference( - backend="kv_postgres", - namespace="batches", - ).model_dump(exclude_none=True), - ), - ) - ], - }, - storage_backends={ - "kv_postgres": PostgresKVStoreConfig.sample_run_config(), - "sql_postgres": postgres_config, - }, - storage_stores={ - "metadata": KVStoreReference( - backend="kv_postgres", - namespace="registry", - ).model_dump(exclude_none=True), - "inference": InferenceStoreReference( - backend="sql_postgres", - table_name="inference_store", - ).model_dump(exclude_none=True), - "conversations": SqlStoreReference( - backend="sql_postgres", - table_name="openai_conversations", - ).model_dump(exclude_none=True), - "prompts": KVStoreReference( - backend="kv_postgres", - namespace="prompts", - ).model_dump(exclude_none=True), - }, - ), + "run.yaml": base_run_settings, + "run-with-postgres-store.yaml": postgres_run_settings, }, run_config_env_vars={ "LLAMA_STACK_PORT": ( diff --git a/src/llama_stack/providers/utils/inference/inference_store.py b/src/llama_stack/providers/utils/inference/inference_store.py index 2bf947a8d..a3a28aec0 100644 --- a/src/llama_stack/providers/utils/inference/inference_store.py +++ b/src/llama_stack/providers/utils/inference/inference_store.py @@ -66,14 +66,6 @@ class InferenceStore: }, ) - if self.enable_write_queue: - self._queue = asyncio.Queue(maxsize=self._max_write_queue_size) - for _ in range(self._num_writers): - self._worker_tasks.append(asyncio.create_task(self._worker_loop())) - logger.debug( - f"Inference store write queue enabled with {self._num_writers} writers, max queue size {self._max_write_queue_size}" - ) - async def shutdown(self) -> None: if not self._worker_tasks: return @@ -94,10 +86,29 @@ class InferenceStore: if self.enable_write_queue and self._queue is not None: await self._queue.join() + async def _ensure_workers_started(self) -> None: + """Ensure the async write queue workers run on the current loop.""" + if not self.enable_write_queue: + return + + if self._queue is None: + self._queue = asyncio.Queue(maxsize=self._max_write_queue_size) + logger.debug( + f"Inference store write queue created with max size {self._max_write_queue_size} " + f"and {self._num_writers} writers" + ) + + if not self._worker_tasks: + loop = asyncio.get_running_loop() + for _ in range(self._num_writers): + task = loop.create_task(self._worker_loop()) + self._worker_tasks.append(task) + async def store_chat_completion( self, chat_completion: OpenAIChatCompletion, input_messages: list[OpenAIMessageParam] ) -> None: if self.enable_write_queue: + await self._ensure_workers_started() if self._queue is None: raise ValueError("Inference store is not initialized") try: diff --git a/src/llama_stack/providers/utils/kvstore/kvstore.py b/src/llama_stack/providers/utils/kvstore/kvstore.py index eee51e5d9..5b8d77102 100644 --- a/src/llama_stack/providers/utils/kvstore/kvstore.py +++ b/src/llama_stack/providers/utils/kvstore/kvstore.py @@ -11,6 +11,9 @@ from __future__ import annotations +import asyncio +from collections import defaultdict + from llama_stack.core.storage.datatypes import KVStoreReference, StorageBackendConfig, StorageBackendType from .api import KVStore @@ -53,45 +56,63 @@ class InmemoryKVStoreImpl(KVStore): _KVSTORE_BACKENDS: dict[str, KVStoreConfig] = {} +_KVSTORE_INSTANCES: dict[tuple[str, str], KVStore] = {} +_KVSTORE_LOCKS: defaultdict[tuple[str, str], asyncio.Lock] = defaultdict(asyncio.Lock) def register_kvstore_backends(backends: dict[str, StorageBackendConfig]) -> None: """Register the set of available KV store backends for reference resolution.""" global _KVSTORE_BACKENDS + global _KVSTORE_INSTANCES + global _KVSTORE_LOCKS _KVSTORE_BACKENDS.clear() + _KVSTORE_INSTANCES.clear() + _KVSTORE_LOCKS.clear() for name, cfg in backends.items(): _KVSTORE_BACKENDS[name] = cfg async def kvstore_impl(reference: KVStoreReference) -> KVStore: backend_name = reference.backend + cache_key = (backend_name, reference.namespace) + + existing = _KVSTORE_INSTANCES.get(cache_key) + if existing: + return existing backend_config = _KVSTORE_BACKENDS.get(backend_name) if backend_config is None: raise ValueError(f"Unknown KVStore backend '{backend_name}'. Registered backends: {sorted(_KVSTORE_BACKENDS)}") - config = backend_config.model_copy() - config.namespace = reference.namespace + lock = _KVSTORE_LOCKS[cache_key] + async with lock: + existing = _KVSTORE_INSTANCES.get(cache_key) + if existing: + return existing - if config.type == StorageBackendType.KV_REDIS.value: - from .redis import RedisKVStoreImpl + config = backend_config.model_copy() + config.namespace = reference.namespace - impl = RedisKVStoreImpl(config) - elif config.type == StorageBackendType.KV_SQLITE.value: - from .sqlite import SqliteKVStoreImpl + if config.type == StorageBackendType.KV_REDIS.value: + from .redis import RedisKVStoreImpl - impl = SqliteKVStoreImpl(config) - elif config.type == StorageBackendType.KV_POSTGRES.value: - from .postgres import PostgresKVStoreImpl + impl = RedisKVStoreImpl(config) + elif config.type == StorageBackendType.KV_SQLITE.value: + from .sqlite import SqliteKVStoreImpl - impl = PostgresKVStoreImpl(config) - elif config.type == StorageBackendType.KV_MONGODB.value: - from .mongodb import MongoDBKVStoreImpl + impl = SqliteKVStoreImpl(config) + elif config.type == StorageBackendType.KV_POSTGRES.value: + from .postgres import PostgresKVStoreImpl - impl = MongoDBKVStoreImpl(config) - else: - raise ValueError(f"Unknown kvstore type {config.type}") + impl = PostgresKVStoreImpl(config) + elif config.type == StorageBackendType.KV_MONGODB.value: + from .mongodb import MongoDBKVStoreImpl - await impl.initialize() - return impl + impl = MongoDBKVStoreImpl(config) + else: + raise ValueError(f"Unknown kvstore type {config.type}") + + await impl.initialize() + _KVSTORE_INSTANCES[cache_key] = impl + return impl diff --git a/src/llama_stack/providers/utils/memory/openai_vector_store_mixin.py b/src/llama_stack/providers/utils/memory/openai_vector_store_mixin.py index 86e6ea013..853245598 100644 --- a/src/llama_stack/providers/utils/memory/openai_vector_store_mixin.py +++ b/src/llama_stack/providers/utils/memory/openai_vector_store_mixin.py @@ -704,34 +704,35 @@ class OpenAIVectorStoreMixin(ABC): # Unknown filter type, default to no match raise ValueError(f"Unsupported filter type: {filter_type}") - def _chunk_to_vector_store_content(self, chunk: Chunk) -> list[VectorStoreContent]: - # content is InterleavedContent + def _chunk_to_vector_store_content( + self, chunk: Chunk, include_embeddings: bool = False, include_metadata: bool = False + ) -> list[VectorStoreContent]: + def extract_fields() -> dict: + """Extract embedding and metadata fields from chunk based on include flags.""" + return { + "embedding": chunk.embedding if include_embeddings else None, + "chunk_metadata": chunk.chunk_metadata if include_metadata else None, + "metadata": chunk.metadata if include_metadata else None, + } + + fields = extract_fields() + if isinstance(chunk.content, str): - content = [ - VectorStoreContent( - type="text", - text=chunk.content, - ) - ] + content_item = VectorStoreContent(type="text", text=chunk.content, **fields) + content = [content_item] elif isinstance(chunk.content, list): # TODO: Add support for other types of content - content = [ - VectorStoreContent( - type="text", - text=item.text, - ) - for item in chunk.content - if item.type == "text" - ] + content = [] + for item in chunk.content: + if item.type == "text": + content_item = VectorStoreContent(type="text", text=item.text, **fields) + content.append(content_item) else: if chunk.content.type != "text": raise ValueError(f"Unsupported content type: {chunk.content.type}") - content = [ - VectorStoreContent( - type="text", - text=chunk.content.text, - ) - ] + + content_item = VectorStoreContent(type="text", text=chunk.content.text, **fields) + content = [content_item] return content async def openai_attach_file_to_vector_store( @@ -820,13 +821,12 @@ class OpenAIVectorStoreMixin(ABC): message=str(e), ) - # Create OpenAI vector store file metadata + # Save vector store file to persistent storage AFTER insert_chunks + # so that chunks include the embeddings that were generated file_info = vector_store_file_object.model_dump(exclude={"last_error"}) file_info["filename"] = file_response.filename if file_response else "" - # Save vector store file to persistent storage (provider-specific) dict_chunks = [c.model_dump() for c in chunks] - # This should be updated to include chunk_id await self._save_openai_vector_store_file(vector_store_id, file_id, file_info, dict_chunks) # Update file_ids and file_counts in vector store metadata @@ -921,21 +921,27 @@ class OpenAIVectorStoreMixin(ABC): self, vector_store_id: str, file_id: str, + include_embeddings: bool | None = False, + include_metadata: bool | None = False, ) -> VectorStoreFileContentResponse: """Retrieves the contents of a vector store file.""" if vector_store_id not in self.openai_vector_stores: raise VectorStoreNotFoundError(vector_store_id) + # Parameters are already provided directly + # include_embeddings and include_metadata are now function parameters + dict_chunks = await self._load_openai_vector_store_file_contents(vector_store_id, file_id) chunks = [Chunk.model_validate(c) for c in dict_chunks] content = [] for chunk in chunks: - content.extend(self._chunk_to_vector_store_content(chunk)) + content.extend( + self._chunk_to_vector_store_content( + chunk, include_embeddings=include_embeddings or False, include_metadata=include_metadata or False + ) + ) return VectorStoreFileContentResponse( - object="vector_store.file_content.page", data=content, - has_more=False, - next_page=None, ) async def openai_update_vector_store_file( diff --git a/src/llama_stack/providers/utils/responses/responses_store.py b/src/llama_stack/providers/utils/responses/responses_store.py index 40466d00c..fdca8ddee 100644 --- a/src/llama_stack/providers/utils/responses/responses_store.py +++ b/src/llama_stack/providers/utils/responses/responses_store.py @@ -3,8 +3,6 @@ # # This source code is licensed under the terms described in the LICENSE file in # the root directory of this source tree. -import asyncio -from typing import Any from llama_stack.apis.agents import ( Order, @@ -19,12 +17,12 @@ from llama_stack.apis.agents.openai_responses import ( ) from llama_stack.apis.inference import OpenAIMessageParam from llama_stack.core.datatypes import AccessRule -from llama_stack.core.storage.datatypes import ResponsesStoreReference, SqlStoreReference, StorageBackendType +from llama_stack.core.storage.datatypes import ResponsesStoreReference, SqlStoreReference from llama_stack.log import get_logger from ..sqlstore.api import ColumnDefinition, ColumnType from ..sqlstore.authorized_sqlstore import AuthorizedSqlStore -from ..sqlstore.sqlstore import _SQLSTORE_BACKENDS, sqlstore_impl +from ..sqlstore.sqlstore import sqlstore_impl logger = get_logger(name=__name__, category="openai_responses") @@ -55,28 +53,12 @@ class ResponsesStore: self.policy = policy self.sql_store = None - self.enable_write_queue = True - - # Async write queue and worker control - self._queue: ( - asyncio.Queue[tuple[OpenAIResponseObject, list[OpenAIResponseInput], list[OpenAIMessageParam]]] | None - ) = None - self._worker_tasks: list[asyncio.Task[Any]] = [] - self._max_write_queue_size: int = self.reference.max_write_queue_size - self._num_writers: int = max(1, self.reference.num_writers) async def initialize(self): """Create the necessary tables if they don't exist.""" base_store = sqlstore_impl(self.reference) self.sql_store = AuthorizedSqlStore(base_store, self.policy) - # Disable write queue for SQLite since WAL mode handles concurrency - # Keep it enabled for other backends (like Postgres) for performance - backend_config = _SQLSTORE_BACKENDS.get(self.reference.backend) - if backend_config and backend_config.type == StorageBackendType.SQL_SQLITE: - self.enable_write_queue = False - logger.debug("Write queue disabled for SQLite (WAL mode handles concurrency)") - await self.sql_store.create_table( "openai_responses", { @@ -95,33 +77,12 @@ class ResponsesStore: }, ) - if self.enable_write_queue: - self._queue = asyncio.Queue(maxsize=self._max_write_queue_size) - for _ in range(self._num_writers): - self._worker_tasks.append(asyncio.create_task(self._worker_loop())) - logger.debug( - f"Responses store write queue enabled with {self._num_writers} writers, max queue size {self._max_write_queue_size}" - ) - async def shutdown(self) -> None: - if not self._worker_tasks: - return - if self._queue is not None: - await self._queue.join() - for t in self._worker_tasks: - if not t.done(): - t.cancel() - for t in self._worker_tasks: - try: - await t - except asyncio.CancelledError: - pass - self._worker_tasks.clear() + return async def flush(self) -> None: - """Wait for all queued writes to complete. Useful for testing.""" - if self.enable_write_queue and self._queue is not None: - await self._queue.join() + """Maintained for compatibility; no-op now that writes are synchronous.""" + return async def store_response_object( self, @@ -129,31 +90,7 @@ class ResponsesStore: input: list[OpenAIResponseInput], messages: list[OpenAIMessageParam], ) -> None: - if self.enable_write_queue: - if self._queue is None: - raise ValueError("Responses store is not initialized") - try: - self._queue.put_nowait((response_object, input, messages)) - except asyncio.QueueFull: - logger.warning(f"Write queue full; adding response id={getattr(response_object, 'id', '')}") - await self._queue.put((response_object, input, messages)) - else: - await self._write_response_object(response_object, input, messages) - - async def _worker_loop(self) -> None: - assert self._queue is not None - while True: - try: - item = await self._queue.get() - except asyncio.CancelledError: - break - response_object, input, messages = item - try: - await self._write_response_object(response_object, input, messages) - except Exception as e: # noqa: BLE001 - logger.error(f"Error writing response object: {e}") - finally: - self._queue.task_done() + await self._write_response_object(response_object, input, messages) async def _write_response_object( self, @@ -315,19 +252,12 @@ class ResponsesStore: # Serialize messages to dict format for JSON storage messages_data = [msg.model_dump() for msg in messages] - # Upsert: try insert first, update if exists - try: - await self.sql_store.insert( - table="conversation_messages", - data={"conversation_id": conversation_id, "messages": messages_data}, - ) - except Exception: - # If insert fails due to ID conflict, update existing record - await self.sql_store.update( - table="conversation_messages", - data={"messages": messages_data}, - where={"conversation_id": conversation_id}, - ) + await self.sql_store.upsert( + table="conversation_messages", + data={"conversation_id": conversation_id, "messages": messages_data}, + conflict_columns=["conversation_id"], + update_columns=["messages"], + ) logger.debug(f"Stored {len(messages)} messages for conversation {conversation_id}") diff --git a/src/llama_stack/providers/utils/sqlstore/api.py b/src/llama_stack/providers/utils/sqlstore/api.py index a61fd1090..bcd224234 100644 --- a/src/llama_stack/providers/utils/sqlstore/api.py +++ b/src/llama_stack/providers/utils/sqlstore/api.py @@ -47,6 +47,18 @@ class SqlStore(Protocol): """ pass + async def upsert( + self, + table: str, + data: Mapping[str, Any], + conflict_columns: list[str], + update_columns: list[str] | None = None, + ) -> None: + """ + Insert a row and update specified columns when conflicts occur. + """ + pass + async def fetch_all( self, table: str, diff --git a/src/llama_stack/providers/utils/sqlstore/authorized_sqlstore.py b/src/llama_stack/providers/utils/sqlstore/authorized_sqlstore.py index 3dfc82677..ba95dd120 100644 --- a/src/llama_stack/providers/utils/sqlstore/authorized_sqlstore.py +++ b/src/llama_stack/providers/utils/sqlstore/authorized_sqlstore.py @@ -45,8 +45,13 @@ def _enhance_item_with_access_control(item: Mapping[str, Any], current_user: Use enhanced["owner_principal"] = current_user.principal enhanced["access_attributes"] = current_user.attributes else: - enhanced["owner_principal"] = None - enhanced["access_attributes"] = None + # IMPORTANT: Use empty string and null value (not None) to match public access filter + # The public access filter in _get_public_access_conditions() expects: + # - owner_principal = '' (empty string) + # - access_attributes = null (JSON null, which serializes to the string 'null') + # Setting them to None (SQL NULL) will cause rows to be filtered out on read. + enhanced["owner_principal"] = "" + enhanced["access_attributes"] = None # Pydantic/JSON will serialize this as JSON null return enhanced @@ -124,6 +129,23 @@ class AuthorizedSqlStore: enhanced_data = [_enhance_item_with_access_control(item, current_user) for item in data] await self.sql_store.insert(table, enhanced_data) + async def upsert( + self, + table: str, + data: Mapping[str, Any], + conflict_columns: list[str], + update_columns: list[str] | None = None, + ) -> None: + """Upsert a row with automatic access control attribute capture.""" + current_user = get_authenticated_user() + enhanced_data = _enhance_item_with_access_control(data, current_user) + await self.sql_store.upsert( + table=table, + data=enhanced_data, + conflict_columns=conflict_columns, + update_columns=update_columns, + ) + async def fetch_all( self, table: str, @@ -188,8 +210,9 @@ class AuthorizedSqlStore: enhanced_data["owner_principal"] = current_user.principal enhanced_data["access_attributes"] = current_user.attributes else: - enhanced_data["owner_principal"] = None - enhanced_data["access_attributes"] = None + # IMPORTANT: Use empty string for owner_principal to match public access filter + enhanced_data["owner_principal"] = "" + enhanced_data["access_attributes"] = None # Will serialize as JSON null await self.sql_store.update(table, enhanced_data, where) @@ -245,14 +268,24 @@ class AuthorizedSqlStore: raise ValueError(f"Unsupported database type: {self.database_type}") def _get_public_access_conditions(self) -> list[str]: - """Get the SQL conditions for public access.""" - # Public records are records that have no owner_principal or access_attributes + """Get the SQL conditions for public access. + + Public records are those with: + - owner_principal = '' (empty string) + - access_attributes is either SQL NULL or JSON null + + Note: Different databases serialize None differently: + - SQLite: None → JSON null (text = 'null') + - Postgres: None → SQL NULL (IS NULL) + """ conditions = ["owner_principal = ''"] if self.database_type == StorageBackendType.SQL_POSTGRES.value: - # Postgres stores JSON null as 'null' - conditions.append("access_attributes::text = 'null'") + # Accept both SQL NULL and JSON null for Postgres compatibility + # This handles both old rows (SQL NULL) and new rows (JSON null) + conditions.append("(access_attributes IS NULL OR access_attributes::text = 'null')") elif self.database_type == StorageBackendType.SQL_SQLITE.value: - conditions.append("access_attributes = 'null'") + # SQLite serializes None as JSON null + conditions.append("(access_attributes IS NULL OR access_attributes = 'null')") else: raise ValueError(f"Unsupported database type: {self.database_type}") return conditions diff --git a/src/llama_stack/providers/utils/sqlstore/sqlalchemy_sqlstore.py b/src/llama_stack/providers/utils/sqlstore/sqlalchemy_sqlstore.py index 356f49ed1..cfc3131f4 100644 --- a/src/llama_stack/providers/utils/sqlstore/sqlalchemy_sqlstore.py +++ b/src/llama_stack/providers/utils/sqlstore/sqlalchemy_sqlstore.py @@ -72,13 +72,14 @@ def _build_where_expr(column: ColumnElement, value: Any) -> ColumnElement: class SqlAlchemySqlStoreImpl(SqlStore): def __init__(self, config: SqlAlchemySqlStoreConfig): self.config = config + self._is_sqlite_backend = "sqlite" in self.config.engine_str self.async_session = async_sessionmaker(self.create_engine()) self.metadata = MetaData() def create_engine(self) -> AsyncEngine: # Configure connection args for better concurrency support connect_args = {} - if "sqlite" in self.config.engine_str: + if self._is_sqlite_backend: # SQLite-specific optimizations for concurrent access # With WAL mode, most locks resolve in milliseconds, but allow up to 5s for edge cases connect_args["timeout"] = 5.0 @@ -91,7 +92,7 @@ class SqlAlchemySqlStoreImpl(SqlStore): ) # Enable WAL mode for SQLite to support concurrent readers and writers - if "sqlite" in self.config.engine_str: + if self._is_sqlite_backend: @event.listens_for(engine.sync_engine, "connect") def set_sqlite_pragma(dbapi_conn, connection_record): @@ -151,6 +152,29 @@ class SqlAlchemySqlStoreImpl(SqlStore): await session.execute(self.metadata.tables[table].insert(), data) await session.commit() + async def upsert( + self, + table: str, + data: Mapping[str, Any], + conflict_columns: list[str], + update_columns: list[str] | None = None, + ) -> None: + table_obj = self.metadata.tables[table] + dialect_insert = self._get_dialect_insert(table_obj) + insert_stmt = dialect_insert.values(**data) + + if update_columns is None: + update_columns = [col for col in data.keys() if col not in conflict_columns] + + update_mapping = {col: getattr(insert_stmt.excluded, col) for col in update_columns} + conflict_cols = [table_obj.c[col] for col in conflict_columns] + + stmt = insert_stmt.on_conflict_do_update(index_elements=conflict_cols, set_=update_mapping) + + async with self.async_session() as session: + await session.execute(stmt) + await session.commit() + async def fetch_all( self, table: str, @@ -333,9 +357,18 @@ class SqlAlchemySqlStoreImpl(SqlStore): add_column_sql = text(f"ALTER TABLE {table} ADD COLUMN {column_name} {compiled_type}{nullable_clause}") await conn.execute(add_column_sql) - except Exception as e: # If any error occurs during migration, log it but don't fail # The table creation will handle adding the column logger.error(f"Error adding column {column_name} to table {table}: {e}") pass + + def _get_dialect_insert(self, table: Table): + if self._is_sqlite_backend: + from sqlalchemy.dialects.sqlite import insert as sqlite_insert + + return sqlite_insert(table) + else: + from sqlalchemy.dialects.postgresql import insert as pg_insert + + return pg_insert(table) diff --git a/src/llama_stack/providers/utils/sqlstore/sqlstore.py b/src/llama_stack/providers/utils/sqlstore/sqlstore.py index 31801c4ca..9409b7d00 100644 --- a/src/llama_stack/providers/utils/sqlstore/sqlstore.py +++ b/src/llama_stack/providers/utils/sqlstore/sqlstore.py @@ -4,6 +4,7 @@ # This source code is licensed under the terms described in the LICENSE file in # the root directory of this source tree. +from threading import Lock from typing import Annotated, cast from pydantic import Field @@ -21,6 +22,8 @@ from .api import SqlStore sql_store_pip_packages = ["sqlalchemy[asyncio]", "aiosqlite", "asyncpg"] _SQLSTORE_BACKENDS: dict[str, StorageBackendConfig] = {} +_SQLSTORE_INSTANCES: dict[str, SqlStore] = {} +_SQLSTORE_LOCKS: dict[str, Lock] = {} SqlStoreConfig = Annotated[ @@ -52,19 +55,34 @@ def sqlstore_impl(reference: SqlStoreReference) -> SqlStore: f"Unknown SQL store backend '{backend_name}'. Registered backends: {sorted(_SQLSTORE_BACKENDS)}" ) - if isinstance(backend_config, SqliteSqlStoreConfig | PostgresSqlStoreConfig): - from .sqlalchemy_sqlstore import SqlAlchemySqlStoreImpl + existing = _SQLSTORE_INSTANCES.get(backend_name) + if existing: + return existing - config = cast(SqliteSqlStoreConfig | PostgresSqlStoreConfig, backend_config).model_copy() - return SqlAlchemySqlStoreImpl(config) - else: - raise ValueError(f"Unknown sqlstore type {backend_config.type}") + lock = _SQLSTORE_LOCKS.setdefault(backend_name, Lock()) + with lock: + existing = _SQLSTORE_INSTANCES.get(backend_name) + if existing: + return existing + + if isinstance(backend_config, SqliteSqlStoreConfig | PostgresSqlStoreConfig): + from .sqlalchemy_sqlstore import SqlAlchemySqlStoreImpl + + config = cast(SqliteSqlStoreConfig | PostgresSqlStoreConfig, backend_config).model_copy() + instance = SqlAlchemySqlStoreImpl(config) + _SQLSTORE_INSTANCES[backend_name] = instance + return instance + else: + raise ValueError(f"Unknown sqlstore type {backend_config.type}") def register_sqlstore_backends(backends: dict[str, StorageBackendConfig]) -> None: """Register the set of available SQL store backends for reference resolution.""" global _SQLSTORE_BACKENDS + global _SQLSTORE_INSTANCES _SQLSTORE_BACKENDS.clear() + _SQLSTORE_INSTANCES.clear() + _SQLSTORE_LOCKS.clear() for name, cfg in backends.items(): _SQLSTORE_BACKENDS[name] = cfg diff --git a/src/llama_stack_ui/.dockerignore b/src/llama_stack_ui/.dockerignore new file mode 100644 index 000000000..e3d1daae6 --- /dev/null +++ b/src/llama_stack_ui/.dockerignore @@ -0,0 +1,20 @@ +.git +.gitignore +.env.local +.env.*.local +.next +node_modules +npm-debug.log +*.md +.DS_Store +.vscode +.idea +playwright-report +e2e +jest.config.ts +jest.setup.ts +eslint.config.mjs +.prettierrc +.prettierignore +.nvmrc +playwright.config.ts diff --git a/src/llama_stack_ui/Containerfile b/src/llama_stack_ui/Containerfile new file mode 100644 index 000000000..6aea3dbfd --- /dev/null +++ b/src/llama_stack_ui/Containerfile @@ -0,0 +1,18 @@ +FROM node:22.5.1-alpine + +ENV NODE_ENV=production + +# Install dumb-init for proper signal handling +RUN apk add --no-cache dumb-init + +# Create non-root user for security +RUN addgroup --system --gid 1001 nodejs +RUN adduser --system --uid 1001 nextjs + +# Install llama-stack-ui from npm +RUN npm install -g llama-stack-ui + +USER nextjs + +ENTRYPOINT ["dumb-init", "--"] +CMD ["llama-stack-ui"] diff --git a/src/llama_stack_ui/app/logs/vector-stores/page.tsx b/src/llama_stack_ui/app/logs/vector-stores/page.tsx index 72196d496..84680e01a 100644 --- a/src/llama_stack_ui/app/logs/vector-stores/page.tsx +++ b/src/llama_stack_ui/app/logs/vector-stores/page.tsx @@ -8,6 +8,9 @@ import type { import { useRouter } from "next/navigation"; import { usePagination } from "@/hooks/use-pagination"; import { Button } from "@/components/ui/button"; +import { Plus, Trash2, Search, Edit, X } from "lucide-react"; +import { useState } from "react"; +import { Input } from "@/components/ui/input"; import { Table, TableBody, @@ -17,9 +20,21 @@ import { TableRow, } from "@/components/ui/table"; import { Skeleton } from "@/components/ui/skeleton"; +import { useAuthClient } from "@/hooks/use-auth-client"; +import { + VectorStoreEditor, + VectorStoreFormData, +} from "@/components/vector-stores/vector-store-editor"; export default function VectorStoresPage() { const router = useRouter(); + const client = useAuthClient(); + const [deletingStores, setDeletingStores] = useState>(new Set()); + const [searchTerm, setSearchTerm] = useState(""); + const [showVectorStoreModal, setShowVectorStoreModal] = useState(false); + const [editingStore, setEditingStore] = useState(null); + const [modalError, setModalError] = useState(null); + const [showSuccessState, setShowSuccessState] = useState(false); const { data: stores, status, @@ -47,6 +62,142 @@ export default function VectorStoresPage() { } }, [status, hasMore, loadMore]); + // Handle ESC key to close modal + React.useEffect(() => { + const handleEscape = (event: KeyboardEvent) => { + if (event.key === "Escape" && showVectorStoreModal) { + handleCancel(); + } + }; + + document.addEventListener("keydown", handleEscape); + return () => document.removeEventListener("keydown", handleEscape); + }, [showVectorStoreModal]); + + const handleDeleteVectorStore = async (storeId: string) => { + if ( + !confirm( + "Are you sure you want to delete this vector store? This action cannot be undone." + ) + ) { + return; + } + + setDeletingStores(prev => new Set([...prev, storeId])); + + try { + await client.vectorStores.delete(storeId); + // Reload the data to reflect the deletion + window.location.reload(); + } catch (err: unknown) { + console.error("Failed to delete vector store:", err); + const errorMessage = err instanceof Error ? err.message : "Unknown error"; + alert(`Failed to delete vector store: ${errorMessage}`); + } finally { + setDeletingStores(prev => { + const newSet = new Set(prev); + newSet.delete(storeId); + return newSet; + }); + } + }; + + const handleSaveVectorStore = async (formData: VectorStoreFormData) => { + try { + setModalError(null); + + if (editingStore) { + // Update existing vector store + const updateParams: { + name?: string; + extra_body?: Record; + } = {}; + + // Only include fields that have changed or are provided + if (formData.name && formData.name !== editingStore.name) { + updateParams.name = formData.name; + } + + // Add all parameters to extra_body (except provider_id which can't be changed) + const extraBody: Record = {}; + if (formData.embedding_model) { + extraBody.embedding_model = formData.embedding_model; + } + if (formData.embedding_dimension) { + extraBody.embedding_dimension = formData.embedding_dimension; + } + + if (Object.keys(extraBody).length > 0) { + updateParams.extra_body = extraBody; + } + + await client.vectorStores.update(editingStore.id, updateParams); + + // Show success state with close button + setShowSuccessState(true); + setModalError( + "✅ Vector store updated successfully! You can close this modal and refresh the page to see changes." + ); + return; + } + + const createParams: { + name?: string; + provider_id?: string; + extra_body?: Record; + } = { + name: formData.name || undefined, + }; + + // Extract provider_id to top-level (like Python client does) + if (formData.provider_id) { + createParams.provider_id = formData.provider_id; + } + + // Add remaining parameters to extra_body + const extraBody: Record = {}; + if (formData.provider_id) { + extraBody.provider_id = formData.provider_id; + } + if (formData.embedding_model) { + extraBody.embedding_model = formData.embedding_model; + } + if (formData.embedding_dimension) { + extraBody.embedding_dimension = formData.embedding_dimension; + } + + if (Object.keys(extraBody).length > 0) { + createParams.extra_body = extraBody; + } + + await client.vectorStores.create(createParams); + + // Show success state with close button + setShowSuccessState(true); + setModalError( + "✅ Vector store created successfully! You can close this modal and refresh the page to see changes." + ); + } catch (err: unknown) { + console.error("Failed to create vector store:", err); + const errorMessage = + err instanceof Error ? err.message : "Failed to create vector store"; + setModalError(errorMessage); + } + }; + + const handleEditVectorStore = (store: VectorStore) => { + setEditingStore(store); + setShowVectorStoreModal(true); + setModalError(null); + }; + + const handleCancel = () => { + setShowVectorStoreModal(false); + setEditingStore(null); + setModalError(null); + setShowSuccessState(false); + }; + const renderContent = () => { if (status === "loading") { return ( @@ -66,73 +217,190 @@ export default function VectorStoresPage() { return

No vector stores found.

; } - return ( -
- - - - ID - Name - Created - Completed - Cancelled - Failed - In Progress - Total - Usage Bytes - Provider ID - Provider Vector DB ID - - - - {stores.map(store => { - const fileCounts = store.file_counts; - const metadata = store.metadata || {}; - const providerId = metadata.provider_id ?? ""; - const providerDbId = metadata.provider_vector_db_id ?? ""; + // Filter stores based on search term + const filteredStores = stores.filter(store => { + if (!searchTerm) return true; - return ( - router.push(`/logs/vector-stores/${store.id}`)} - className="cursor-pointer hover:bg-muted/50" - > - - - - {store.name} - - {new Date(store.created_at * 1000).toLocaleString()} - - {fileCounts.completed} - {fileCounts.cancelled} - {fileCounts.failed} - {fileCounts.in_progress} - {fileCounts.total} - {store.usage_bytes} - {providerId} - {providerDbId} - - ); - })} - -
+ const searchLower = searchTerm.toLowerCase(); + return ( + store.id.toLowerCase().includes(searchLower) || + (store.name && store.name.toLowerCase().includes(searchLower)) || + (store.metadata?.provider_id && + String(store.metadata.provider_id) + .toLowerCase() + .includes(searchLower)) || + (store.metadata?.provider_vector_db_id && + String(store.metadata.provider_vector_db_id) + .toLowerCase() + .includes(searchLower)) + ); + }); + + return ( +
+ {/* Search Bar */} +
+ + setSearchTerm(e.target.value)} + className="pl-10" + /> +
+ +
+ + + + ID + Name + Created + Completed + Cancelled + Failed + In Progress + Total + Usage Bytes + Provider ID + Provider Vector DB ID + Actions + + + + {filteredStores.map(store => { + const fileCounts = store.file_counts; + const metadata = store.metadata || {}; + const providerId = metadata.provider_id ?? ""; + const providerDbId = metadata.provider_vector_db_id ?? ""; + + return ( + + router.push(`/logs/vector-stores/${store.id}`) + } + className="cursor-pointer hover:bg-muted/50" + > + + + + {store.name} + + {new Date(store.created_at * 1000).toLocaleString()} + + {fileCounts.completed} + {fileCounts.cancelled} + {fileCounts.failed} + {fileCounts.in_progress} + {fileCounts.total} + {store.usage_bytes} + {providerId} + {providerDbId} + +
+ + +
+
+
+ ); + })} +
+
+
); }; return (
-

Vector Stores

+
+

Vector Stores

+ +
{renderContent()} + + {/* Create Vector Store Modal */} + {showVectorStoreModal && ( +
+
+
+

+ {editingStore ? "Edit Vector Store" : "Create New Vector Store"} +

+ +
+
+ +
+
+
+ )}
); } diff --git a/src/llama_stack_ui/bin/cli.js b/src/llama_stack_ui/bin/cli.js new file mode 100755 index 000000000..6069d2f22 --- /dev/null +++ b/src/llama_stack_ui/bin/cli.js @@ -0,0 +1,34 @@ +#!/usr/bin/env node + +const { spawn } = require('child_process'); +const path = require('path'); + +const port = process.env.LLAMA_STACK_UI_PORT || 8322; +const uiDir = path.resolve(__dirname, '..'); +const serverPath = path.join(uiDir, '.next', 'standalone', 'ui', 'src', 'llama_stack_ui', 'server.js'); +const serverDir = path.dirname(serverPath); + +console.log(`Starting Llama Stack UI on http://localhost:${port}`); + +const child = spawn(process.execPath, [serverPath], { + cwd: serverDir, + stdio: 'inherit', + env: { + ...process.env, + PORT: port, + }, +}); + +process.on('SIGINT', () => { + child.kill('SIGINT'); + process.exit(0); +}); + +process.on('SIGTERM', () => { + child.kill('SIGTERM'); + process.exit(0); +}); + +child.on('exit', (code) => { + process.exit(code); +}); diff --git a/src/llama_stack_ui/components/prompts/prompt-editor.test.tsx b/src/llama_stack_ui/components/prompts/prompt-editor.test.tsx index 458a5f942..70e0e4e66 100644 --- a/src/llama_stack_ui/components/prompts/prompt-editor.test.tsx +++ b/src/llama_stack_ui/components/prompts/prompt-editor.test.tsx @@ -2,7 +2,7 @@ import React from "react"; import { render, screen, fireEvent } from "@testing-library/react"; import "@testing-library/jest-dom"; import { PromptEditor } from "./prompt-editor"; -import type { Prompt, PromptFormData } from "./types"; +import type { Prompt } from "./types"; describe("PromptEditor", () => { const mockOnSave = jest.fn(); diff --git a/src/llama_stack_ui/components/vector-stores/vector-store-detail.test.tsx b/src/llama_stack_ui/components/vector-stores/vector-store-detail.test.tsx index 08f90ac0d..78bec8147 100644 --- a/src/llama_stack_ui/components/vector-stores/vector-store-detail.test.tsx +++ b/src/llama_stack_ui/components/vector-stores/vector-store-detail.test.tsx @@ -12,6 +12,20 @@ jest.mock("next/navigation", () => ({ }), })); +// Mock NextAuth +jest.mock("next-auth/react", () => ({ + useSession: () => ({ + data: { + accessToken: "mock-access-token", + user: { + id: "mock-user-id", + email: "test@example.com", + }, + }, + status: "authenticated", + }), +})); + describe("VectorStoreDetailView", () => { const defaultProps = { store: null, diff --git a/src/llama_stack_ui/components/vector-stores/vector-store-detail.tsx b/src/llama_stack_ui/components/vector-stores/vector-store-detail.tsx index d3d0fa249..f5b6281e7 100644 --- a/src/llama_stack_ui/components/vector-stores/vector-store-detail.tsx +++ b/src/llama_stack_ui/components/vector-stores/vector-store-detail.tsx @@ -1,16 +1,18 @@ "use client"; import { useRouter } from "next/navigation"; +import { useState, useEffect } from "react"; import type { VectorStore } from "llama-stack-client/resources/vector-stores/vector-stores"; import type { VectorStoreFile } from "llama-stack-client/resources/vector-stores/files"; import { Card, CardContent, CardHeader, CardTitle } from "@/components/ui/card"; import { Skeleton } from "@/components/ui/skeleton"; import { Button } from "@/components/ui/button"; +import { useAuthClient } from "@/hooks/use-auth-client"; +import { Edit2, Trash2, X } from "lucide-react"; import { DetailLoadingView, DetailErrorView, DetailNotFoundView, - DetailLayout, PropertiesCard, PropertyItem, } from "@/components/layout/detail-layout"; @@ -23,6 +25,7 @@ import { TableHeader, TableRow, } from "@/components/ui/table"; +import { VectorStoreEditor, VectorStoreFormData } from "./vector-store-editor"; interface VectorStoreDetailViewProps { store: VectorStore | null; @@ -43,21 +46,122 @@ export function VectorStoreDetailView({ errorFiles, id, }: VectorStoreDetailViewProps) { - const title = "Vector Store Details"; const router = useRouter(); + const client = useAuthClient(); + const [isDeleting, setIsDeleting] = useState(false); + const [showEditModal, setShowEditModal] = useState(false); + const [modalError, setModalError] = useState(null); + const [showSuccessState, setShowSuccessState] = useState(false); + + // Handle ESC key to close modal + useEffect(() => { + const handleEscape = (event: KeyboardEvent) => { + if (event.key === "Escape" && showEditModal) { + handleCancel(); + } + }; + + document.addEventListener("keydown", handleEscape); + return () => document.removeEventListener("keydown", handleEscape); + }, [showEditModal]); const handleFileClick = (fileId: string) => { router.push(`/logs/vector-stores/${id}/files/${fileId}`); }; + const handleEditVectorStore = () => { + setShowEditModal(true); + setModalError(null); + setShowSuccessState(false); + }; + + const handleCancel = () => { + setShowEditModal(false); + setModalError(null); + setShowSuccessState(false); + }; + + const handleSaveVectorStore = async (formData: VectorStoreFormData) => { + try { + setModalError(null); + + // Update existing vector store (same logic as list page) + const updateParams: { + name?: string; + extra_body?: Record; + } = {}; + + // Only include fields that have changed or are provided + if (formData.name && formData.name !== store?.name) { + updateParams.name = formData.name; + } + + // Add all parameters to extra_body (except provider_id which can't be changed) + const extraBody: Record = {}; + if (formData.embedding_model) { + extraBody.embedding_model = formData.embedding_model; + } + if (formData.embedding_dimension) { + extraBody.embedding_dimension = formData.embedding_dimension; + } + + if (Object.keys(extraBody).length > 0) { + updateParams.extra_body = extraBody; + } + + await client.vectorStores.update(id, updateParams); + + // Show success state + setShowSuccessState(true); + setModalError( + "✅ Vector store updated successfully! You can close this modal and refresh the page to see changes." + ); + } catch (err: unknown) { + console.error("Failed to update vector store:", err); + const errorMessage = + err instanceof Error ? err.message : "Failed to update vector store"; + setModalError(errorMessage); + } + }; + + const handleDeleteVectorStore = async () => { + if ( + !confirm( + "Are you sure you want to delete this vector store? This action cannot be undone." + ) + ) { + return; + } + + setIsDeleting(true); + + try { + await client.vectorStores.delete(id); + // Redirect to the vector stores list after successful deletion + router.push("/logs/vector-stores"); + } catch (err: unknown) { + console.error("Failed to delete vector store:", err); + const errorMessage = err instanceof Error ? err.message : "Unknown error"; + alert(`Failed to delete vector store: ${errorMessage}`); + } finally { + setIsDeleting(false); + } + }; + if (errorStore) { - return ; + return ( + + ); } if (isLoadingStore) { - return ; + return ; } if (!store) { - return ; + return ; } const mainContent = ( @@ -138,6 +242,73 @@ export function VectorStoreDetailView({ ); return ( - + <> +
+

Vector Store Details

+
+ + +
+
+
+
{mainContent}
+
{sidebar}
+
+ + {/* Edit Vector Store Modal */} + {showEditModal && ( +
+
+
+

Edit Vector Store

+ +
+
+ +
+
+
+ )} + ); } diff --git a/src/llama_stack_ui/components/vector-stores/vector-store-editor.tsx b/src/llama_stack_ui/components/vector-stores/vector-store-editor.tsx new file mode 100644 index 000000000..719a2a9fd --- /dev/null +++ b/src/llama_stack_ui/components/vector-stores/vector-store-editor.tsx @@ -0,0 +1,235 @@ +"use client"; + +import { useState, useEffect } from "react"; +import { Button } from "@/components/ui/button"; +import { Input } from "@/components/ui/input"; +import { Label } from "@/components/ui/label"; +import { Card, CardContent } from "@/components/ui/card"; +import { + Select, + SelectContent, + SelectItem, + SelectTrigger, + SelectValue, +} from "@/components/ui/select"; +import { useAuthClient } from "@/hooks/use-auth-client"; +import type { Model } from "llama-stack-client/resources/models"; + +export interface VectorStoreFormData { + name: string; + embedding_model?: string; + embedding_dimension?: number; + provider_id?: string; +} + +interface VectorStoreEditorProps { + onSave: (formData: VectorStoreFormData) => Promise; + onCancel: () => void; + error?: string | null; + initialData?: VectorStoreFormData; + showSuccessState?: boolean; + isEditing?: boolean; +} + +export function VectorStoreEditor({ + onSave, + onCancel, + error, + initialData, + showSuccessState, + isEditing = false, +}: VectorStoreEditorProps) { + const client = useAuthClient(); + const [formData, setFormData] = useState( + initialData || { + name: "", + embedding_model: "", + embedding_dimension: 768, + provider_id: "", + } + ); + const [loading, setLoading] = useState(false); + const [models, setModels] = useState([]); + const [modelsLoading, setModelsLoading] = useState(true); + const [modelsError, setModelsError] = useState(null); + + const embeddingModels = models.filter( + model => model.custom_metadata?.model_type === "embedding" + ); + + useEffect(() => { + const fetchModels = async () => { + try { + setModelsLoading(true); + setModelsError(null); + const modelList = await client.models.list(); + setModels(modelList); + + // Set default embedding model if available + const embeddingModelsList = modelList.filter(model => { + return model.custom_metadata?.model_type === "embedding"; + }); + if (embeddingModelsList.length > 0 && !formData.embedding_model) { + setFormData(prev => ({ + ...prev, + embedding_model: embeddingModelsList[0].id, + })); + } + } catch (err) { + console.error("Failed to load models:", err); + setModelsError( + err instanceof Error ? err.message : "Failed to load models" + ); + } finally { + setModelsLoading(false); + } + }; + + fetchModels(); + }, [client]); + + const handleSubmit = async (e: React.FormEvent) => { + e.preventDefault(); + setLoading(true); + + try { + await onSave(formData); + } finally { + setLoading(false); + } + }; + + return ( + + +
+
+ + setFormData({ ...formData, name: e.target.value })} + placeholder="Enter vector store name" + required + /> +
+ +
+ + {modelsLoading ? ( +
+ Loading models... ({models.length} loaded) +
+ ) : modelsError ? ( +
+ Error: {modelsError} +
+ ) : embeddingModels.length === 0 ? ( +
+ No embedding models available ({models.length} total models) +
+ ) : ( + + )} + {formData.embedding_model && ( +

+ Dimension:{" "} + {embeddingModels.find(m => m.id === formData.embedding_model) + ?.custom_metadata?.embedding_dimension || "Unknown"} +

+ )} +
+ +
+ + + setFormData({ + ...formData, + embedding_dimension: parseInt(e.target.value) || 768, + }) + } + placeholder="768" + /> +
+ +
+ + + setFormData({ ...formData, provider_id: e.target.value }) + } + placeholder="e.g., faiss, chroma, sqlite" + disabled={isEditing} + /> + {isEditing && ( +

+ Provider ID cannot be changed after vector store creation +

+ )} +
+ + {error && ( +
+ {error} +
+ )} + +
+ {showSuccessState ? ( + + ) : ( + <> + + + + )} +
+
+
+
+ ); +} diff --git a/src/llama_stack_ui/lib/contents-api.ts b/src/llama_stack_ui/lib/contents-api.ts index f4920f3db..35456faff 100644 --- a/src/llama_stack_ui/lib/contents-api.ts +++ b/src/llama_stack_ui/lib/contents-api.ts @@ -34,9 +34,35 @@ export class ContentsAPI { async getFileContents( vectorStoreId: string, - fileId: string + fileId: string, + includeEmbeddings: boolean = true, + includeMetadata: boolean = true ): Promise { - return this.client.vectorStores.files.content(vectorStoreId, fileId); + try { + // Use query parameters to pass embeddings and metadata flags (OpenAI-compatible pattern) + const extraQuery: Record = {}; + if (includeEmbeddings) { + extraQuery.include_embeddings = true; + } + if (includeMetadata) { + extraQuery.include_metadata = true; + } + + const result = await this.client.vectorStores.files.content( + vectorStoreId, + fileId, + { + query: { + include_embeddings: includeEmbeddings, + include_metadata: includeMetadata, + }, + } + ); + return result; + } catch (error) { + console.error("ContentsAPI.getFileContents error:", error); + throw error; + } } async getContent( @@ -70,11 +96,15 @@ export class ContentsAPI { order?: string; after?: string; before?: string; + includeEmbeddings?: boolean; + includeMetadata?: boolean; } ): Promise { - const fileContents = await this.client.vectorStores.files.content( + const fileContents = await this.getFileContents( vectorStoreId, - fileId + fileId, + options?.includeEmbeddings ?? true, + options?.includeMetadata ?? true ); const contentItems: VectorStoreContentItem[] = []; @@ -82,7 +112,7 @@ export class ContentsAPI { const rawContent = content as Record; // Extract actual fields from the API response - const embedding = rawContent.embedding || undefined; + const embedding = rawContent.embedding as number[] | undefined; const created_timestamp = rawContent.created_timestamp || rawContent.created_at || diff --git a/src/llama_stack_ui/next.config.ts b/src/llama_stack_ui/next.config.ts index e9ffa3083..9f4a74eca 100644 --- a/src/llama_stack_ui/next.config.ts +++ b/src/llama_stack_ui/next.config.ts @@ -1,7 +1,13 @@ import type { NextConfig } from "next"; const nextConfig: NextConfig = { - /* config options here */ + typescript: { + ignoreBuildErrors: true, + }, + output: "standalone", + images: { + unoptimized: true, + }, }; export default nextConfig; diff --git a/src/llama_stack_ui/package-lock.json b/src/llama_stack_ui/package-lock.json index 14e34b720..aa8b2ac26 100644 --- a/src/llama_stack_ui/package-lock.json +++ b/src/llama_stack_ui/package-lock.json @@ -1,12 +1,13 @@ { - "name": "ui", - "version": "0.1.0", + "name": "llama-stack-ui", + "version": "0.4.0-alpha.1", "lockfileVersion": 3, "requires": true, "packages": { "": { - "name": "ui", - "version": "0.1.0", + "name": "llama-stack-ui", + "version": "0.4.0-alpha.1", + "license": "MIT", "dependencies": { "@radix-ui/react-collapsible": "^1.1.12", "@radix-ui/react-dialog": "^1.1.15", @@ -20,7 +21,7 @@ "class-variance-authority": "^0.7.1", "clsx": "^2.1.1", "framer-motion": "^12.23.24", - "llama-stack-client": "github:llamastack/llama-stack-client-typescript", + "llama-stack-client": "^0.3.1", "lucide-react": "^0.545.0", "next": "15.5.4", "next-auth": "^4.24.11", @@ -9684,8 +9685,9 @@ "license": "MIT" }, "node_modules/llama-stack-client": { - "version": "0.4.0-alpha.1", - "resolved": "git+ssh://git@github.com/llamastack/llama-stack-client-typescript.git#78de4862c4b7d77939ac210fa9f9bde77a2c5c5f", + "version": "0.3.1", + "resolved": "https://registry.npmjs.org/llama-stack-client/-/llama-stack-client-0.3.1.tgz", + "integrity": "sha512-4aYoF2aAQiBSfxyZEtczeQmJn8q9T22ePDqGhR+ej5RG6a8wvl5B3v7ZoKuFkft+vcP/kbJ58GQZEPLekxekZA==", "license": "MIT", "dependencies": { "@types/node": "^18.11.18", diff --git a/src/llama_stack_ui/package.json b/src/llama_stack_ui/package.json index fb7dbee75..41afc9a11 100644 --- a/src/llama_stack_ui/package.json +++ b/src/llama_stack_ui/package.json @@ -1,11 +1,31 @@ { - "name": "ui", - "version": "0.1.0", - "private": true, + "name": "llama-stack-ui", + "version": "0.4.0-alpha.4", + "description": "Web UI for Llama Stack", + "license": "MIT", + "author": "Llama Stack ", + "repository": { + "type": "git", + "url": "https://github.com/llamastack/llama-stack.git", + "directory": "llama_stack_ui" + }, + "bin": { + "llama-stack-ui": "bin/cli.js" + }, + "files": [ + "bin", + ".next", + "public", + "next.config.ts", + "instrumentation.ts", + "tsconfig.json", + "package.json" + ], "scripts": { "dev": "next dev --turbopack --port ${LLAMA_STACK_UI_PORT:-8322}", - "build": "next build", + "build": "next build && node scripts/postbuild.js", "start": "next start", + "prepublishOnly": "npm run build", "lint": "next lint", "format": "prettier --write \"./**/*.{ts,tsx}\"", "format:check": "prettier --check \"./**/*.{ts,tsx}\"", @@ -25,7 +45,7 @@ "class-variance-authority": "^0.7.1", "clsx": "^2.1.1", "framer-motion": "^12.23.24", - "llama-stack-client": "github:llamastack/llama-stack-client-typescript", + "llama-stack-client": "^0.3.1", "lucide-react": "^0.545.0", "next": "15.5.4", "next-auth": "^4.24.11", diff --git a/src/llama_stack_ui/scripts/postbuild.js b/src/llama_stack_ui/scripts/postbuild.js new file mode 100644 index 000000000..4b4dbdf5d --- /dev/null +++ b/src/llama_stack_ui/scripts/postbuild.js @@ -0,0 +1,40 @@ +const fs = require('fs'); +const path = require('path'); + +// Copy public directory to standalone +const publicSrc = path.join(__dirname, '..', 'public'); +const publicDest = path.join(__dirname, '..', '.next', 'standalone', 'ui', 'src', 'llama_stack_ui', 'public'); + +if (fs.existsSync(publicSrc) && !fs.existsSync(publicDest)) { + console.log('Copying public directory to standalone...'); + copyDir(publicSrc, publicDest); +} + +// Copy .next/static to standalone +const staticSrc = path.join(__dirname, '..', '.next', 'static'); +const staticDest = path.join(__dirname, '..', '.next', 'standalone', 'ui', 'src', 'llama_stack_ui', '.next', 'static'); + +if (fs.existsSync(staticSrc) && !fs.existsSync(staticDest)) { + console.log('Copying .next/static to standalone...'); + copyDir(staticSrc, staticDest); +} + +function copyDir(src, dest) { + if (!fs.existsSync(dest)) { + fs.mkdirSync(dest, { recursive: true }); + } + + const files = fs.readdirSync(src); + files.forEach((file) => { + const srcFile = path.join(src, file); + const destFile = path.join(dest, file); + + if (fs.statSync(srcFile).isDirectory()) { + copyDir(srcFile, destFile); + } else { + fs.copyFileSync(srcFile, destFile); + } + }); +} + +console.log('Postbuild complete!'); diff --git a/tests/integration/ci_matrix.json b/tests/integration/ci_matrix.json index 858176dff..43678e5c7 100644 --- a/tests/integration/ci_matrix.json +++ b/tests/integration/ci_matrix.json @@ -1,6 +1,7 @@ { "default": [ {"suite": "base", "setup": "ollama"}, + {"suite": "base", "setup": "ollama-postgres", "allowed_clients": ["server"], "stack_config": "server:ci-tests::run-with-postgres-store.yaml"}, {"suite": "vision", "setup": "ollama-vision"}, {"suite": "responses", "setup": "gpt"}, {"suite": "base-vllm-subset", "setup": "vllm"} diff --git a/tests/integration/fixtures/common.py b/tests/integration/fixtures/common.py index d5e4c15f7..817180cfe 100644 --- a/tests/integration/fixtures/common.py +++ b/tests/integration/fixtures/common.py @@ -233,10 +233,21 @@ def instantiate_llama_stack_client(session): raise ValueError("You must specify either --stack-config or LLAMA_STACK_CONFIG") # Handle server: format or server:: + # Also handles server::: format if config.startswith("server:"): - parts = config.split(":") - config_name = parts[1] - port = int(parts[2]) if len(parts) > 2 else int(os.environ.get("LLAMA_STACK_PORT", DEFAULT_PORT)) + # Strip the "server:" prefix first + config_part = config[7:] # len("server:") == 7 + + # Check for :: (distro::runfile format) + if "::" in config_part: + config_name = config_part + port = int(os.environ.get("LLAMA_STACK_PORT", DEFAULT_PORT)) + else: + # Single colon format: either or : + parts = config_part.split(":") + config_name = parts[0] + port = int(parts[1]) if len(parts) > 1 else int(os.environ.get("LLAMA_STACK_PORT", DEFAULT_PORT)) + base_url = f"http://localhost:{port}" force_restart = os.environ.get("LLAMA_STACK_TEST_FORCE_SERVER_RESTART") == "1" @@ -323,7 +334,13 @@ def require_server(llama_stack_client): @pytest.fixture(scope="session") def openai_client(llama_stack_client, require_server): base_url = f"{llama_stack_client.base_url}/v1" - return OpenAI(base_url=base_url, api_key="fake") + client = OpenAI(base_url=base_url, api_key="fake", max_retries=0, timeout=30.0) + yield client + # Cleanup: close HTTP connections + try: + client.close() + except Exception: + pass @pytest.fixture(params=["openai_client", "client_with_models"]) diff --git a/tests/integration/recordings/README.md b/tests/integration/recordings/README.md index 621a07562..bdf4f532f 100644 --- a/tests/integration/recordings/README.md +++ b/tests/integration/recordings/README.md @@ -2,6 +2,10 @@ This directory contains recorded inference API responses used for deterministic testing without requiring live API access. +For more information, see the +[docs](https://llamastack.github.io/docs/contributing/testing/record-replay). +This README provides more technical information. + ## Structure - `responses/` - JSON files containing request/response pairs for inference operations diff --git a/tests/integration/responses/fixtures/fixtures.py b/tests/integration/responses/fixtures/fixtures.py index dbf67e138..b06117b98 100644 --- a/tests/integration/responses/fixtures/fixtures.py +++ b/tests/integration/responses/fixtures/fixtures.py @@ -115,7 +115,15 @@ def openai_client(base_url, api_key, provider): client = LlamaStackAsLibraryClient(config, skip_logger_removal=True) return client - return OpenAI( + client = OpenAI( base_url=base_url, api_key=api_key, + max_retries=0, + timeout=30.0, ) + yield client + # Cleanup: close HTTP connections + try: + client.close() + except Exception: + pass diff --git a/tests/integration/responses/test_conversation_responses.py b/tests/integration/responses/test_conversation_responses.py index ef7ea7c4e..babb77793 100644 --- a/tests/integration/responses/test_conversation_responses.py +++ b/tests/integration/responses/test_conversation_responses.py @@ -65,8 +65,14 @@ class TestConversationResponses: conversation_items = openai_client.conversations.items.list(conversation.id) assert len(conversation_items.data) >= 4 # 2 user + 2 assistant messages + @pytest.mark.timeout(60, method="thread") def test_conversation_context_loading(self, openai_client, text_model_id): - """Test that conversation context is properly loaded for responses.""" + """Test that conversation context is properly loaded for responses. + + Note: 60s timeout added due to CI-specific deadlock in pytest/OpenAI client/httpx + after running 25+ tests. Hangs before first HTTP request is made. Works fine locally. + Investigation needed: connection pool exhaustion or event loop state issue. + """ conversation = openai_client.conversations.create( items=[ {"type": "message", "role": "user", "content": "My name is Alice. I like to eat apples."}, diff --git a/tests/integration/suites.py b/tests/integration/suites.py index 0cec66afe..7689657b4 100644 --- a/tests/integration/suites.py +++ b/tests/integration/suites.py @@ -71,6 +71,26 @@ SETUP_DEFINITIONS: dict[str, Setup] = { "embedding_model": "ollama/nomic-embed-text:v1.5", }, ), + "ollama-postgres": Setup( + name="ollama-postgres", + description="Server-mode tests with Postgres-backed persistence", + env={ + "OLLAMA_URL": "http://0.0.0.0:11434", + "SAFETY_MODEL": "ollama/llama-guard3:1b", + "POSTGRES_HOST": "127.0.0.1", + "POSTGRES_PORT": "5432", + "POSTGRES_DB": "llamastack", + "POSTGRES_USER": "llamastack", + "POSTGRES_PASSWORD": "llamastack", + "LLAMA_STACK_LOGGING": "openai_responses=info", + }, + defaults={ + "text_model": "ollama/llama3.2:3b-instruct-fp16", + "embedding_model": "sentence-transformers/nomic-embed-text-v1.5", + "safety_model": "ollama/llama-guard3:1b", + "safety_shield": "llama-guard", + }, + ), "vllm": Setup( name="vllm", description="vLLM provider with a text model", diff --git a/tests/integration/vector_io/test_openai_vector_stores.py b/tests/integration/vector_io/test_openai_vector_stores.py index 20f9d2978..1043d4903 100644 --- a/tests/integration/vector_io/test_openai_vector_stores.py +++ b/tests/integration/vector_io/test_openai_vector_stores.py @@ -11,6 +11,7 @@ import pytest from llama_stack_client import BadRequestError from openai import BadRequestError as OpenAIBadRequestError +from llama_stack.apis.files import ExpiresAfter from llama_stack.apis.vector_io import Chunk from llama_stack.core.library_client import LlamaStackAsLibraryClient from llama_stack.log import get_logger @@ -1604,3 +1605,97 @@ def test_openai_vector_store_embedding_config_from_metadata( assert "metadata_config_store" in store_names assert "consistent_config_store" in store_names + + +@vector_provider_wrapper +def test_openai_vector_store_file_contents_with_extra_query( + compat_client_with_empty_stores, client_with_models, embedding_model_id, embedding_dimension, vector_io_provider_id +): + """Test that vector store file contents endpoint supports extra_query parameter.""" + skip_if_provider_doesnt_support_openai_vector_stores(client_with_models) + compat_client = compat_client_with_empty_stores + + # Create a vector store + vector_store = compat_client.vector_stores.create( + name="test_extra_query_store", + extra_body={ + "embedding_model": embedding_model_id, + "provider_id": vector_io_provider_id, + }, + ) + + # Create and attach a file + test_content = b"This is test content for extra_query validation." + with BytesIO(test_content) as file_buffer: + file_buffer.name = "test_extra_query.txt" + file = compat_client.files.create( + file=file_buffer, + purpose="assistants", + expires_after=ExpiresAfter(anchor="created_at", seconds=86400), + ) + + file_attach_response = compat_client.vector_stores.files.create( + vector_store_id=vector_store.id, + file_id=file.id, + extra_body={"embedding_model": embedding_model_id}, + ) + assert file_attach_response.status == "completed" + + # Wait for processing + time.sleep(2) + + # Test that extra_query parameter is accepted and processed + content_with_extra_query = compat_client.vector_stores.files.content( + vector_store_id=vector_store.id, + file_id=file.id, + extra_query={"include_embeddings": True, "include_metadata": True}, + ) + + # Test without extra_query for comparison + content_without_extra_query = compat_client.vector_stores.files.content( + vector_store_id=vector_store.id, + file_id=file.id, + ) + + # Validate that both calls succeed + assert content_with_extra_query is not None + assert content_without_extra_query is not None + assert len(content_with_extra_query.data) > 0 + assert len(content_without_extra_query.data) > 0 + + # Validate that extra_query parameter is processed correctly + # Both should have the embedding/metadata fields available (may be None based on flags) + first_chunk_with_flags = content_with_extra_query.data[0] + first_chunk_without_flags = content_without_extra_query.data[0] + + # The key validation: extra_query fields are present in the response + # Handle both dict and object responses (different clients may return different formats) + def has_field(obj, field): + if isinstance(obj, dict): + return field in obj + else: + return hasattr(obj, field) + + # Validate that all expected fields are present in both responses + expected_fields = ["embedding", "chunk_metadata", "metadata", "text"] + for field in expected_fields: + assert has_field(first_chunk_with_flags, field), f"Field '{field}' missing from response with extra_query" + assert has_field(first_chunk_without_flags, field), f"Field '{field}' missing from response without extra_query" + + # Validate content is the same + def get_field(obj, field): + if isinstance(obj, dict): + return obj[field] + else: + return getattr(obj, field) + + assert get_field(first_chunk_with_flags, "text") == test_content.decode("utf-8") + assert get_field(first_chunk_without_flags, "text") == test_content.decode("utf-8") + + with_flags_embedding = get_field(first_chunk_with_flags, "embedding") + without_flags_embedding = get_field(first_chunk_without_flags, "embedding") + + # Validate that embeddings are included when requested and excluded when not requested + assert with_flags_embedding is not None, "Embeddings should be included when include_embeddings=True" + assert len(with_flags_embedding) > 0, "Embedding should be a non-empty list" + assert without_flags_embedding is None, "Embeddings should not be included when include_embeddings=False" diff --git a/tests/unit/core/routers/test_vector_io.py b/tests/unit/core/routers/test_vector_io.py index dd3246cb3..f9bd84a37 100644 --- a/tests/unit/core/routers/test_vector_io.py +++ b/tests/unit/core/routers/test_vector_io.py @@ -55,3 +55,65 @@ async def test_create_vector_stores_multiple_providers_missing_provider_id_error with pytest.raises(ValueError, match="Multiple vector_io providers available"): await router.openai_create_vector_store(request) + + +async def test_update_vector_store_provider_id_change_fails(): + """Test that updating a vector store with a different provider_id fails with clear error.""" + mock_routing_table = Mock() + + # Mock an existing vector store with provider_id "faiss" + mock_existing_store = Mock() + mock_existing_store.provider_id = "inline::faiss" + mock_existing_store.identifier = "vs_123" + + mock_routing_table.get_object_by_identifier = AsyncMock(return_value=mock_existing_store) + mock_routing_table.get_provider_impl = AsyncMock( + return_value=Mock(openai_update_vector_store=AsyncMock(return_value=Mock(id="vs_123"))) + ) + + router = VectorIORouter(mock_routing_table) + + # Try to update with different provider_id in metadata - this should fail + with pytest.raises(ValueError, match="provider_id cannot be changed after vector store creation"): + await router.openai_update_vector_store( + vector_store_id="vs_123", + name="updated_name", + metadata={"provider_id": "inline::sqlite"}, # Different provider_id + ) + + # Verify the existing store was looked up to check provider_id + mock_routing_table.get_object_by_identifier.assert_called_once_with("vector_store", "vs_123") + + # Provider should not be called since validation failed + mock_routing_table.get_provider_impl.assert_not_called() + + +async def test_update_vector_store_same_provider_id_succeeds(): + """Test that updating a vector store with the same provider_id succeeds.""" + mock_routing_table = Mock() + + # Mock an existing vector store with provider_id "faiss" + mock_existing_store = Mock() + mock_existing_store.provider_id = "inline::faiss" + mock_existing_store.identifier = "vs_123" + + mock_routing_table.get_object_by_identifier = AsyncMock(return_value=mock_existing_store) + mock_routing_table.get_provider_impl = AsyncMock( + return_value=Mock(openai_update_vector_store=AsyncMock(return_value=Mock(id="vs_123"))) + ) + + router = VectorIORouter(mock_routing_table) + + # Update with same provider_id should succeed + await router.openai_update_vector_store( + vector_store_id="vs_123", + name="updated_name", + metadata={"provider_id": "inline::faiss"}, # Same provider_id + ) + + # Verify the provider update method was called + mock_routing_table.get_provider_impl.assert_called_once_with("vs_123") + provider = await mock_routing_table.get_provider_impl("vs_123") + provider.openai_update_vector_store.assert_called_once_with( + vector_store_id="vs_123", name="updated_name", expires_after=None, metadata={"provider_id": "inline::faiss"} + ) diff --git a/tests/unit/server/test_sse.py b/tests/unit/server/test_sse.py index f36c8c181..0303a6ded 100644 --- a/tests/unit/server/test_sse.py +++ b/tests/unit/server/test_sse.py @@ -104,12 +104,18 @@ async def test_paginated_response_url_setting(): route_handler = create_dynamic_typed_route(mock_api_method, "get", "/test/route") - # Mock minimal request + # Mock minimal request with proper state object request = MagicMock() request.scope = {"user_attributes": {}, "principal": ""} request.headers = {} request.body = AsyncMock(return_value=b"") + # Create a simple state object without auto-generating attributes + class MockState: + pass + + request.state = MockState() + result = await route_handler(request) assert isinstance(result, PaginatedResponse) diff --git a/tests/unit/utils/sqlstore/test_sqlstore.py b/tests/unit/utils/sqlstore/test_sqlstore.py index 00669b698..d7ba0dc89 100644 --- a/tests/unit/utils/sqlstore/test_sqlstore.py +++ b/tests/unit/utils/sqlstore/test_sqlstore.py @@ -9,7 +9,7 @@ from tempfile import TemporaryDirectory import pytest -from llama_stack.providers.utils.sqlstore.api import ColumnType +from llama_stack.providers.utils.sqlstore.api import ColumnDefinition, ColumnType from llama_stack.providers.utils.sqlstore.sqlalchemy_sqlstore import SqlAlchemySqlStoreImpl from llama_stack.providers.utils.sqlstore.sqlstore import SqliteSqlStoreConfig @@ -65,6 +65,38 @@ async def test_sqlite_sqlstore(): assert result.has_more is False +async def test_sqlstore_upsert_support(): + with TemporaryDirectory() as tmp_dir: + db_path = tmp_dir + "/upsert.db" + store = SqlAlchemySqlStoreImpl(SqliteSqlStoreConfig(db_path=db_path)) + + await store.create_table( + "items", + { + "id": ColumnDefinition(type=ColumnType.STRING, primary_key=True), + "value": ColumnType.STRING, + "updated_at": ColumnType.INTEGER, + }, + ) + + await store.upsert( + table="items", + data={"id": "item_1", "value": "first", "updated_at": 1}, + conflict_columns=["id"], + ) + row = await store.fetch_one("items", {"id": "item_1"}) + assert row == {"id": "item_1", "value": "first", "updated_at": 1} + + await store.upsert( + table="items", + data={"id": "item_1", "value": "second", "updated_at": 2}, + conflict_columns=["id"], + update_columns=["value", "updated_at"], + ) + row = await store.fetch_one("items", {"id": "item_1"}) + assert row == {"id": "item_1", "value": "second", "updated_at": 2} + + async def test_sqlstore_pagination_basic(): """Test basic pagination functionality at the SQL store level.""" with TemporaryDirectory() as tmp_dir: diff --git a/uv.lock b/uv.lock index ba9a862a3..f1808f005 100644 --- a/uv.lock +++ b/uv.lock @@ -1,5 +1,5 @@ version = 1 -revision = 3 +revision = 2 requires-python = ">=3.12" resolution-markers = [ "(python_full_version >= '3.13' and platform_machine != 'aarch64' and sys_platform == 'linux') or (python_full_version >= '3.13' and sys_platform != 'darwin' and sys_platform != 'linux')", @@ -2166,7 +2166,7 @@ test = [ { name = "milvus-lite", specifier = ">=2.5.0" }, { name = "psycopg2-binary", specifier = ">=2.9.0" }, { name = "pymilvus", specifier = ">=2.6.1" }, - { name = "pypdf" }, + { name = "pypdf", specifier = ">=6.1.3" }, { name = "qdrant-client" }, { name = "requests" }, { name = "sqlalchemy" }, @@ -2219,7 +2219,7 @@ unit = [ { name = "moto", extras = ["s3"], specifier = ">=5.1.10" }, { name = "ollama" }, { name = "psycopg2-binary", specifier = ">=2.9.0" }, - { name = "pypdf" }, + { name = "pypdf", specifier = ">=6.1.3" }, { name = "sqlalchemy" }, { name = "sqlalchemy", extras = ["asyncio"], specifier = ">=2.0.41" }, { name = "sqlite-vec" }, @@ -3973,11 +3973,11 @@ wheels = [ [[package]] name = "pypdf" -version = "5.9.0" +version = "6.2.0" source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/89/3a/584b97a228950ed85aec97c811c68473d9b8d149e6a8c155668287cf1a28/pypdf-5.9.0.tar.gz", hash = "sha256:30f67a614d558e495e1fbb157ba58c1de91ffc1718f5e0dfeb82a029233890a1", size = 5035118, upload-time = "2025-07-27T14:04:52.364Z" } +sdist = { url = "https://files.pythonhosted.org/packages/4e/2b/8795ec0378384000b0a37a2b5e6d67fa3d84802945aa2c612a78a784d7d4/pypdf-6.2.0.tar.gz", hash = "sha256:46b4d8495d68ae9c818e7964853cd9984e6a04c19fe7112760195395992dce48", size = 5272001, upload-time = "2025-11-09T11:10:41.911Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/48/d9/6cff57c80a6963e7dd183bf09e9f21604a77716644b1e580e97b259f7612/pypdf-5.9.0-py3-none-any.whl", hash = "sha256:be10a4c54202f46d9daceaa8788be07aa8cd5ea8c25c529c50dd509206382c35", size = 313193, upload-time = "2025-07-27T14:04:50.53Z" }, + { url = "https://files.pythonhosted.org/packages/de/ba/743ddcaf1a8fb439342399645921e2cf2c600464cba5531a11f1cc0822b6/pypdf-6.2.0-py3-none-any.whl", hash = "sha256:4c0f3e62677217a777ab79abe22bf1285442d70efabf552f61c7a03b6f5c569f", size = 326592, upload-time = "2025-11-09T11:10:39.941Z" }, ] [[package]]