Merge branch 'main' into feat/add-dana-agent-provider-stub

This commit is contained in:
Zooey Nguyen 2025-11-12 19:46:26 -08:00 committed by GitHub
commit 0d038391f1
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
55 changed files with 2164 additions and 478 deletions

View file

@ -39,6 +39,32 @@ runs:
if: ${{ inputs.setup == 'vllm' && inputs.inference-mode == 'record' }}
uses: ./.github/actions/setup-vllm
- name: Start Postgres service
if: ${{ contains(inputs.setup, 'postgres') }}
shell: bash
run: |
sudo docker rm -f postgres-ci || true
sudo docker run -d --name postgres-ci \
-e POSTGRES_USER=llamastack \
-e POSTGRES_PASSWORD=llamastack \
-e POSTGRES_DB=llamastack \
-p 5432:5432 \
postgres:16
echo "Waiting for Postgres to become ready..."
for i in {1..30}; do
if sudo docker exec postgres-ci pg_isready -U llamastack -d llamastack >/dev/null 2>&1; then
echo "Postgres is ready"
break
fi
if [ "$i" -eq 30 ]; then
echo "Postgres failed to start in time"
sudo docker logs postgres-ci || true
exit 1
fi
sleep 2
done
- name: Build Llama Stack
shell: bash
run: |

View file

@ -66,12 +66,12 @@ jobs:
run-replay-mode-tests:
needs: generate-matrix
runs-on: ubuntu-latest
name: ${{ format('Integration Tests ({0}, {1}, {2}, client={3}, {4})', matrix.client-type, matrix.config.setup, matrix.python-version, matrix.client-version, matrix.config.suite) }}
name: ${{ format('Integration Tests ({0}, {1}, {2}, client={3}, {4})', matrix.client, matrix.config.setup, matrix.python-version, matrix.client-version, matrix.config.suite) }}
strategy:
fail-fast: false
matrix:
client-type: [library, docker, server]
client: [library, docker, server]
# Use Python 3.13 only on nightly schedule (daily latest client test), otherwise use 3.12
python-version: ${{ github.event.schedule == '0 0 * * *' && fromJSON('["3.12", "3.13"]') || fromJSON('["3.12"]') }}
client-version: ${{ (github.event.schedule == '0 0 * * *' || github.event.inputs.test-all-client-versions == 'true') && fromJSON('["published", "latest"]') || fromJSON('["latest"]') }}
@ -84,6 +84,7 @@ jobs:
uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0
- name: Setup test environment
if: ${{ matrix.config.allowed_clients == null || contains(matrix.config.allowed_clients, matrix.client) }}
uses: ./.github/actions/setup-test-environment
with:
python-version: ${{ matrix.python-version }}
@ -93,11 +94,16 @@ jobs:
inference-mode: 'replay'
- name: Run tests
if: ${{ matrix.config.allowed_clients == null || contains(matrix.config.allowed_clients, matrix.client) }}
uses: ./.github/actions/run-and-record-tests
env:
OPENAI_API_KEY: dummy
with:
stack-config: ${{ matrix.client-type == 'library' && 'ci-tests' || matrix.client-type == 'server' && 'server:ci-tests' || 'docker:ci-tests' }}
stack-config: >-
${{ matrix.config.stack_config
|| (matrix.client == 'library' && 'ci-tests')
|| (matrix.client == 'server' && 'server:ci-tests')
|| 'docker:ci-tests' }}
setup: ${{ matrix.config.setup }}
inference-mode: 'replay'
suite: ${{ matrix.config.suite }}

View file

@ -463,6 +463,12 @@ resources:
settings:
license: MIT
unwrap_response_fields: [data]
file_header: |
Copyright (c) Meta Platforms, Inc. and affiliates.
All rights reserved.
This source code is licensed under the terms described in the LICENSE file in
the root directory of this source tree.
openapi:
transformations:

View file

@ -2691,7 +2691,8 @@ paths:
responses:
'200':
description: >-
A VectorStoreFileContentResponse representing the file contents.
File contents, optionally with embeddings and metadata based on query
parameters.
content:
application/json:
schema:
@ -2726,6 +2727,20 @@ paths:
required: true
schema:
type: string
- name: include_embeddings
in: query
description: >-
Whether to include embedding vectors in the response.
required: false
schema:
$ref: '#/components/schemas/bool'
- name: include_metadata
in: query
description: >-
Whether to include chunk metadata in the response.
required: false
schema:
$ref: '#/components/schemas/bool'
deprecated: false
/v1/vector_stores/{vector_store_id}/search:
post:
@ -10091,6 +10106,8 @@ components:
title: VectorStoreFileDeleteResponse
description: >-
Response from deleting a vector store file.
bool:
type: boolean
VectorStoreContent:
type: object
properties:
@ -10102,6 +10119,26 @@ components:
text:
type: string
description: The actual text content
embedding:
type: array
items:
type: number
description: >-
Optional embedding vector for this content chunk
chunk_metadata:
$ref: '#/components/schemas/ChunkMetadata'
description: Optional chunk metadata
metadata:
type: object
additionalProperties:
oneOf:
- type: 'null'
- type: boolean
- type: number
- type: string
- type: array
- type: object
description: Optional user-defined metadata
additionalProperties: false
required:
- type
@ -10125,6 +10162,7 @@ components:
description: Parsed content of the file
has_more:
type: boolean
default: false
description: >-
Indicates if there are more content pages to fetch
next_page:

View file

@ -221,7 +221,15 @@ models:
```
A Model is an instance of a "Resource" (see [Concepts](../concepts/)) and is associated with a specific inference provider (in this case, the provider with identifier `ollama`). This is an instance of a "pre-registered" model. While we always encourage the clients to register models before using them, some Stack servers may come up a list of "already known and available" models.
What's with the `provider_model_id` field? This is an identifier for the model inside the provider's model catalog. Contrast it with `model_id` which is the identifier for the same model for Llama Stack's purposes. For example, you may want to name "llama3.2:vision-11b" as "image_captioning_model" when you use it in your Stack interactions. When omitted, the server will set `provider_model_id` to be the same as `model_id`.
What's with the `provider_model_id` field? This is an identifier for the model inside the provider's model catalog. The `model_id` field is provided for configuration purposes but is not used as part of the model identifier.
**Important:** Models are identified as `provider_id/provider_model_id` in the system and when making API calls. When `provider_model_id` is omitted, the server will set it to be the same as `model_id`.
Examples:
- Config: `model_id: llama3.2`, `provider_id: ollama`, `provider_model_id: null`
→ Access as: `ollama/llama3.2`
- Config: `model_id: my-llama`, `provider_id: vllm-inference`, `provider_model_id: llama-3-2-3b`
→ Access as: `vllm-inference/llama-3-2-3b` (the `model_id` is not used in the identifier)
If you need to conditionally register a model in the configuration, such as only when specific environment variable(s) are set, this can be accomplished by utilizing a special `__disabled__` string as the default value of an environment variable substitution, as shown below:

View file

@ -19,3 +19,4 @@ This section provides an overview of the distributions available in Llama Stack.
- **[Starting Llama Stack Server](./starting_llama_stack_server.mdx)** - How to run distributions
- **[Importing as Library](./importing_as_library.mdx)** - Use distributions in your code
- **[Configuration Reference](./configuration.mdx)** - Configuration file format details
- **[Llama Stack UI](./llama_stack_ui.mdx)** - Web-based user interface for interacting with Llama Stack servers

View file

@ -0,0 +1,109 @@
---
title: Llama Stack UI
description: Web-based user interface for interacting with Llama Stack servers
sidebar_label: Llama Stack UI
sidebar_position: 8
---
# Llama Stack UI
The Llama Stack UI is a web-based interface for interacting with Llama Stack servers. Built with Next.js and React, it provides a visual way to work with agents, manage resources, and view logs.
## Features
- **Logs & Monitoring**: View chat completions, agent responses, and vector store activity
- **Vector Stores**: Create and manage vector databases for RAG (Retrieval-Augmented Generation) workflows
- **Prompt Management**: Create and manage reusable prompts
## Prerequisites
You need a running Llama Stack server. The UI is a client that connects to the Llama Stack backend.
If you don't have a Llama Stack server running yet, see the [Starting Llama Stack Server](../getting_started/starting_llama_stack_server.mdx) guide.
## Running the UI
### Option 1: Using npx (Recommended for Quick Start)
The fastest way to get started is using `npx`:
```bash
npx llama-stack-ui
```
This will start the UI server on `http://localhost:8322` (default port).
### Option 2: Using Docker
Run the UI in a container:
```bash
docker run -p 8322:8322 llamastack/ui
```
Access the UI at `http://localhost:8322`.
## Environment Variables
The UI can be configured using the following environment variables:
| Variable | Description | Default |
|----------|-------------|---------|
| `LLAMA_STACK_BACKEND_URL` | URL of your Llama Stack server | `http://localhost:8321` |
| `LLAMA_STACK_UI_PORT` | Port for the UI server | `8322` |
If the Llama Stack server is running with authentication enabled, you can configure the UI to use it by setting the following environment variables:
| Variable | Description | Default |
|----------|-------------|---------|
| `NEXTAUTH_URL` | NextAuth URL for authentication | `http://localhost:8322` |
| `GITHUB_CLIENT_ID` | GitHub OAuth client ID (optional, for authentication) | - |
| `GITHUB_CLIENT_SECRET` | GitHub OAuth client secret (optional, for authentication) | - |
### Setting Environment Variables
#### For npx:
```bash
LLAMA_STACK_BACKEND_URL=http://localhost:8321 \
LLAMA_STACK_UI_PORT=8080 \
npx llama-stack-ui
```
#### For Docker:
```bash
docker run -p 8080:8080 \
-e LLAMA_STACK_BACKEND_URL=http://localhost:8321 \
-e LLAMA_STACK_UI_PORT=8080 \
llamastack/ui
```
## Using the UI
### Managing Resources
- **Vector Stores**: Create vector databases for RAG workflows, view stored documents and embeddings
- **Prompts**: Create and manage reusable prompt templates
- **Chat Completions**: View history of chat interactions
- **Responses**: Browse detailed agent responses and tool calls
## Development
If you want to run the UI from source for development:
```bash
# From the project root
cd src/llama_stack_ui
# Install dependencies
npm install
# Set environment variables
export LLAMA_STACK_BACKEND_URL=http://localhost:8321
# Start the development server
npm run dev
```
The development server will start on `http://localhost:8322` with hot reloading enabled.

View file

@ -144,7 +144,7 @@ source .venv/bin/activate
```bash
uv venv client --python 3.12
source client/bin/activate
pip install llama-stack-client
uv pip install llama-stack-client
```
</TabItem>
</Tabs>

View file

@ -57,6 +57,7 @@ const sidebars: SidebarsConfig = {
'distributions/importing_as_library',
'distributions/configuration',
'distributions/starting_llama_stack_server',
'distributions/llama_stack_ui',
{
type: 'category',
label: 'Self-Hosted Distributions',

View file

@ -2688,7 +2688,8 @@ paths:
responses:
'200':
description: >-
A VectorStoreFileContentResponse representing the file contents.
File contents, optionally with embeddings and metadata based on query
parameters.
content:
application/json:
schema:
@ -2723,6 +2724,20 @@ paths:
required: true
schema:
type: string
- name: include_embeddings
in: query
description: >-
Whether to include embedding vectors in the response.
required: false
schema:
$ref: '#/components/schemas/bool'
- name: include_metadata
in: query
description: >-
Whether to include chunk metadata in the response.
required: false
schema:
$ref: '#/components/schemas/bool'
deprecated: false
/v1/vector_stores/{vector_store_id}/search:
post:
@ -9375,6 +9390,8 @@ components:
title: VectorStoreFileDeleteResponse
description: >-
Response from deleting a vector store file.
bool:
type: boolean
VectorStoreContent:
type: object
properties:
@ -9386,6 +9403,26 @@ components:
text:
type: string
description: The actual text content
embedding:
type: array
items:
type: number
description: >-
Optional embedding vector for this content chunk
chunk_metadata:
$ref: '#/components/schemas/ChunkMetadata'
description: Optional chunk metadata
metadata:
type: object
additionalProperties:
oneOf:
- type: 'null'
- type: boolean
- type: number
- type: string
- type: array
- type: object
description: Optional user-defined metadata
additionalProperties: false
required:
- type
@ -9409,6 +9446,7 @@ components:
description: Parsed content of the file
has_more:
type: boolean
default: false
description: >-
Indicates if there are more content pages to fetch
next_page:

View file

@ -2691,7 +2691,8 @@ paths:
responses:
'200':
description: >-
A VectorStoreFileContentResponse representing the file contents.
File contents, optionally with embeddings and metadata based on query
parameters.
content:
application/json:
schema:
@ -2726,6 +2727,20 @@ paths:
required: true
schema:
type: string
- name: include_embeddings
in: query
description: >-
Whether to include embedding vectors in the response.
required: false
schema:
$ref: '#/components/schemas/bool'
- name: include_metadata
in: query
description: >-
Whether to include chunk metadata in the response.
required: false
schema:
$ref: '#/components/schemas/bool'
deprecated: false
/v1/vector_stores/{vector_store_id}/search:
post:
@ -10091,6 +10106,8 @@ components:
title: VectorStoreFileDeleteResponse
description: >-
Response from deleting a vector store file.
bool:
type: boolean
VectorStoreContent:
type: object
properties:
@ -10102,6 +10119,26 @@ components:
text:
type: string
description: The actual text content
embedding:
type: array
items:
type: number
description: >-
Optional embedding vector for this content chunk
chunk_metadata:
$ref: '#/components/schemas/ChunkMetadata'
description: Optional chunk metadata
metadata:
type: object
additionalProperties:
oneOf:
- type: 'null'
- type: boolean
- type: number
- type: string
- type: array
- type: object
description: Optional user-defined metadata
additionalProperties: false
required:
- type
@ -10125,6 +10162,7 @@ components:
description: Parsed content of the file
has_more:
type: boolean
default: false
description: >-
Indicates if there are more content pages to fetch
next_page:

View file

@ -112,7 +112,7 @@ unit = [
"aiosqlite",
"aiohttp",
"psycopg2-binary>=2.9.0",
"pypdf",
"pypdf>=6.1.3",
"mcp",
"chardet",
"sqlalchemy",
@ -135,7 +135,7 @@ test = [
"torchvision>=0.21.0",
"chardet",
"psycopg2-binary>=2.9.0",
"pypdf",
"pypdf>=6.1.3",
"mcp",
"datasets>=4.0.0",
"autoevals",

View file

@ -10,7 +10,7 @@
# the root directory of this source tree.
from typing import Annotated, Any, Literal, Protocol, runtime_checkable
from fastapi import Body
from fastapi import Body, Query
from pydantic import BaseModel, Field
from llama_stack.apis.common.tracing import telemetry_traceable
@ -224,10 +224,16 @@ class VectorStoreContent(BaseModel):
:param type: Content type, currently only "text" is supported
:param text: The actual text content
:param embedding: Optional embedding vector for this content chunk
:param chunk_metadata: Optional chunk metadata
:param metadata: Optional user-defined metadata
"""
type: Literal["text"]
text: str
embedding: list[float] | None = None
chunk_metadata: ChunkMetadata | None = None
metadata: dict[str, Any] | None = None
@json_schema_type
@ -280,6 +286,22 @@ class VectorStoreDeleteResponse(BaseModel):
deleted: bool = True
@json_schema_type
class VectorStoreFileContentResponse(BaseModel):
"""Represents the parsed content of a vector store file.
:param object: The object type, which is always `vector_store.file_content.page`
:param data: Parsed content of the file
:param has_more: Indicates if there are more content pages to fetch
:param next_page: The token for the next page, if any
"""
object: Literal["vector_store.file_content.page"] = "vector_store.file_content.page"
data: list[VectorStoreContent]
has_more: bool = False
next_page: str | None = None
@json_schema_type
class VectorStoreChunkingStrategyAuto(BaseModel):
"""Automatic chunking strategy for vector store files.
@ -395,22 +417,6 @@ class VectorStoreListFilesResponse(BaseModel):
has_more: bool = False
@json_schema_type
class VectorStoreFileContentResponse(BaseModel):
"""Represents the parsed content of a vector store file.
:param object: The object type, which is always `vector_store.file_content.page`
:param data: Parsed content of the file
:param has_more: Indicates if there are more content pages to fetch
:param next_page: The token for the next page, if any
"""
object: Literal["vector_store.file_content.page"] = "vector_store.file_content.page"
data: list[VectorStoreContent]
has_more: bool
next_page: str | None = None
@json_schema_type
class VectorStoreFileDeleteResponse(BaseModel):
"""Response from deleting a vector store file.
@ -732,12 +738,16 @@ class VectorIO(Protocol):
self,
vector_store_id: str,
file_id: str,
include_embeddings: Annotated[bool | None, Query(default=False)] = False,
include_metadata: Annotated[bool | None, Query(default=False)] = False,
) -> VectorStoreFileContentResponse:
"""Retrieves the contents of a vector store file.
:param vector_store_id: The ID of the vector store containing the file to retrieve.
:param file_id: The ID of the file to retrieve.
:returns: A VectorStoreFileContentResponse representing the file contents.
:param include_embeddings: Whether to include embedding vectors in the response.
:param include_metadata: Whether to include chunk metadata in the response.
:returns: File contents, optionally with embeddings and metadata based on query parameters.
"""
...

View file

@ -4,7 +4,6 @@
# This source code is licensed under the terms described in the LICENSE file in
# the root directory of this source tree.
import importlib.resources
import sys
from pydantic import BaseModel
@ -12,9 +11,6 @@ from termcolor import cprint
from llama_stack.core.datatypes import BuildConfig
from llama_stack.core.distribution import get_provider_registry
from llama_stack.core.external import load_external_apis
from llama_stack.core.utils.exec import run_command
from llama_stack.core.utils.image_types import LlamaStackImageType
from llama_stack.distributions.template import DistributionTemplate
from llama_stack.log import get_logger
from llama_stack.providers.datatypes import Api
@ -101,64 +97,3 @@ def print_pip_install_help(config: BuildConfig):
for special_dep in special_deps:
cprint(f"uv pip install {special_dep}", color="yellow", file=sys.stderr)
print()
def build_image(
build_config: BuildConfig,
image_name: str,
distro_or_config: str,
run_config: str | None = None,
):
container_base = build_config.distribution_spec.container_image or "python:3.12-slim"
normal_deps, special_deps, external_provider_deps = get_provider_dependencies(build_config)
normal_deps += SERVER_DEPENDENCIES
if build_config.external_apis_dir:
external_apis = load_external_apis(build_config)
if external_apis:
for _, api_spec in external_apis.items():
normal_deps.extend(api_spec.pip_packages)
if build_config.image_type == LlamaStackImageType.CONTAINER.value:
script = str(importlib.resources.files("llama_stack") / "core/build_container.sh")
args = [
script,
"--distro-or-config",
distro_or_config,
"--image-name",
image_name,
"--container-base",
container_base,
"--normal-deps",
" ".join(normal_deps),
]
# When building from a config file (not a template), include the run config path in the
# build arguments
if run_config is not None:
args.extend(["--run-config", run_config])
else:
script = str(importlib.resources.files("llama_stack") / "core/build_venv.sh")
args = [
script,
"--env-name",
str(image_name),
"--normal-deps",
" ".join(normal_deps),
]
# Always pass both arguments, even if empty, to maintain consistent positional arguments
if special_deps:
args.extend(["--optional-deps", "#".join(special_deps)])
if external_provider_deps:
args.extend(
["--external-provider-deps", "#".join(external_provider_deps)]
) # the script will install external provider module, get its deps, and install those too.
return_code = run_command(args)
if return_code != 0:
log.error(
f"Failed to build target {image_name} with return code {return_code}",
)
return return_code

View file

@ -203,16 +203,11 @@ class ConversationServiceImpl(Conversations):
"item_data": item_dict,
}
# TODO: Add support for upsert in sql_store, this will fail first if ID exists and then update
try:
await self.sql_store.insert(table="conversation_items", data=item_record)
except Exception:
# If insert fails due to ID conflict, update existing record
await self.sql_store.update(
table="conversation_items",
data={"created_at": created_at, "item_data": item_dict},
where={"id": item_id},
)
await self.sql_store.upsert(
table="conversation_items",
data=item_record,
conflict_columns=["id"],
)
created_items.append(item_dict)

View file

@ -389,6 +389,12 @@ class AsyncLlamaStackAsLibraryClient(AsyncLlamaStackClient):
matched_func, path_params, route_path, webmethod = find_matching_route(options.method, path, self.route_impls)
body |= path_params
# Pass through params that aren't already handled as path params
if options.params:
extra_query_params = {k: v for k, v in options.params.items() if k not in path_params}
if extra_query_params:
body["extra_query"] = extra_query_params
body, field_names = self._handle_file_uploads(options, body)
body = self._convert_body(matched_func, body, exclude_params=set(field_names))

View file

@ -247,6 +247,13 @@ class VectorIORouter(VectorIO):
metadata: dict[str, Any] | None = None,
) -> VectorStoreObject:
logger.debug(f"VectorIORouter.openai_update_vector_store: {vector_store_id}")
# Check if provider_id is being changed (not supported)
if metadata and "provider_id" in metadata:
current_store = await self.routing_table.get_object_by_identifier("vector_store", vector_store_id)
if current_store and current_store.provider_id != metadata["provider_id"]:
raise ValueError("provider_id cannot be changed after vector store creation")
provider = await self.routing_table.get_provider_impl(vector_store_id)
return await provider.openai_update_vector_store(
vector_store_id=vector_store_id,
@ -338,12 +345,19 @@ class VectorIORouter(VectorIO):
self,
vector_store_id: str,
file_id: str,
include_embeddings: bool | None = False,
include_metadata: bool | None = False,
) -> VectorStoreFileContentResponse:
logger.debug(f"VectorIORouter.openai_retrieve_vector_store_file_contents: {vector_store_id}, {file_id}")
provider = await self.routing_table.get_provider_impl(vector_store_id)
return await provider.openai_retrieve_vector_store_file_contents(
logger.debug(
f"VectorIORouter.openai_retrieve_vector_store_file_contents: {vector_store_id}, {file_id}, "
f"include_embeddings={include_embeddings}, include_metadata={include_metadata}"
)
return await self.routing_table.openai_retrieve_vector_store_file_contents(
vector_store_id=vector_store_id,
file_id=file_id,
include_embeddings=include_embeddings,
include_metadata=include_metadata,
)
async def openai_update_vector_store_file(

View file

@ -195,12 +195,17 @@ class VectorStoresRoutingTable(CommonRoutingTableImpl):
self,
vector_store_id: str,
file_id: str,
include_embeddings: bool | None = False,
include_metadata: bool | None = False,
) -> VectorStoreFileContentResponse:
await self.assert_action_allowed("read", "vector_store", vector_store_id)
provider = await self.get_provider_impl(vector_store_id)
return await provider.openai_retrieve_vector_store_file_contents(
vector_store_id=vector_store_id,
file_id=file_id,
include_embeddings=include_embeddings,
include_metadata=include_metadata,
)
async def openai_update_vector_store_file(

View file

@ -13,6 +13,5 @@ from ..starter.starter import get_distribution_template as get_starter_distribut
def get_distribution_template() -> DistributionTemplate:
template = get_starter_distribution_template(name="ci-tests")
template.description = "CI tests for Llama Stack"
template.run_configs.pop("run-with-postgres-store.yaml", None)
return template

View file

@ -0,0 +1,293 @@
version: 2
image_name: ci-tests
apis:
- agents
- batches
- datasetio
- eval
- files
- inference
- post_training
- safety
- scoring
- tool_runtime
- vector_io
providers:
inference:
- provider_id: ${env.CEREBRAS_API_KEY:+cerebras}
provider_type: remote::cerebras
config:
base_url: https://api.cerebras.ai
api_key: ${env.CEREBRAS_API_KEY:=}
- provider_id: ${env.OLLAMA_URL:+ollama}
provider_type: remote::ollama
config:
url: ${env.OLLAMA_URL:=http://localhost:11434}
- provider_id: ${env.VLLM_URL:+vllm}
provider_type: remote::vllm
config:
url: ${env.VLLM_URL:=}
max_tokens: ${env.VLLM_MAX_TOKENS:=4096}
api_token: ${env.VLLM_API_TOKEN:=fake}
tls_verify: ${env.VLLM_TLS_VERIFY:=true}
- provider_id: ${env.TGI_URL:+tgi}
provider_type: remote::tgi
config:
url: ${env.TGI_URL:=}
- provider_id: fireworks
provider_type: remote::fireworks
config:
url: https://api.fireworks.ai/inference/v1
api_key: ${env.FIREWORKS_API_KEY:=}
- provider_id: together
provider_type: remote::together
config:
url: https://api.together.xyz/v1
api_key: ${env.TOGETHER_API_KEY:=}
- provider_id: bedrock
provider_type: remote::bedrock
config:
api_key: ${env.AWS_BEDROCK_API_KEY:=}
region_name: ${env.AWS_DEFAULT_REGION:=us-east-2}
- provider_id: ${env.NVIDIA_API_KEY:+nvidia}
provider_type: remote::nvidia
config:
url: ${env.NVIDIA_BASE_URL:=https://integrate.api.nvidia.com}
api_key: ${env.NVIDIA_API_KEY:=}
append_api_version: ${env.NVIDIA_APPEND_API_VERSION:=True}
- provider_id: openai
provider_type: remote::openai
config:
api_key: ${env.OPENAI_API_KEY:=}
base_url: ${env.OPENAI_BASE_URL:=https://api.openai.com/v1}
- provider_id: anthropic
provider_type: remote::anthropic
config:
api_key: ${env.ANTHROPIC_API_KEY:=}
- provider_id: gemini
provider_type: remote::gemini
config:
api_key: ${env.GEMINI_API_KEY:=}
- provider_id: ${env.VERTEX_AI_PROJECT:+vertexai}
provider_type: remote::vertexai
config:
project: ${env.VERTEX_AI_PROJECT:=}
location: ${env.VERTEX_AI_LOCATION:=us-central1}
- provider_id: groq
provider_type: remote::groq
config:
url: https://api.groq.com
api_key: ${env.GROQ_API_KEY:=}
- provider_id: sambanova
provider_type: remote::sambanova
config:
url: https://api.sambanova.ai/v1
api_key: ${env.SAMBANOVA_API_KEY:=}
- provider_id: ${env.AZURE_API_KEY:+azure}
provider_type: remote::azure
config:
api_key: ${env.AZURE_API_KEY:=}
api_base: ${env.AZURE_API_BASE:=}
api_version: ${env.AZURE_API_VERSION:=}
api_type: ${env.AZURE_API_TYPE:=}
- provider_id: sentence-transformers
provider_type: inline::sentence-transformers
vector_io:
- provider_id: faiss
provider_type: inline::faiss
config:
persistence:
namespace: vector_io::faiss
backend: kv_default
- provider_id: sqlite-vec
provider_type: inline::sqlite-vec
config:
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ci-tests}/sqlite_vec.db
persistence:
namespace: vector_io::sqlite_vec
backend: kv_default
- provider_id: ${env.MILVUS_URL:+milvus}
provider_type: inline::milvus
config:
db_path: ${env.MILVUS_DB_PATH:=~/.llama/distributions/ci-tests}/milvus.db
persistence:
namespace: vector_io::milvus
backend: kv_default
- provider_id: ${env.CHROMADB_URL:+chromadb}
provider_type: remote::chromadb
config:
url: ${env.CHROMADB_URL:=}
persistence:
namespace: vector_io::chroma_remote
backend: kv_default
- provider_id: ${env.PGVECTOR_DB:+pgvector}
provider_type: remote::pgvector
config:
host: ${env.PGVECTOR_HOST:=localhost}
port: ${env.PGVECTOR_PORT:=5432}
db: ${env.PGVECTOR_DB:=}
user: ${env.PGVECTOR_USER:=}
password: ${env.PGVECTOR_PASSWORD:=}
persistence:
namespace: vector_io::pgvector
backend: kv_default
- provider_id: ${env.QDRANT_URL:+qdrant}
provider_type: remote::qdrant
config:
api_key: ${env.QDRANT_API_KEY:=}
persistence:
namespace: vector_io::qdrant_remote
backend: kv_default
- provider_id: ${env.WEAVIATE_CLUSTER_URL:+weaviate}
provider_type: remote::weaviate
config:
weaviate_api_key: null
weaviate_cluster_url: ${env.WEAVIATE_CLUSTER_URL:=localhost:8080}
persistence:
namespace: vector_io::weaviate
backend: kv_default
files:
- provider_id: meta-reference-files
provider_type: inline::localfs
config:
storage_dir: ${env.FILES_STORAGE_DIR:=~/.llama/distributions/ci-tests/files}
metadata_store:
table_name: files_metadata
backend: sql_default
safety:
- provider_id: llama-guard
provider_type: inline::llama-guard
config:
excluded_categories: []
- provider_id: code-scanner
provider_type: inline::code-scanner
agents:
- provider_id: meta-reference
provider_type: inline::meta-reference
config:
persistence:
agent_state:
namespace: agents
backend: kv_default
responses:
table_name: responses
backend: sql_default
max_write_queue_size: 10000
num_writers: 4
post_training:
- provider_id: torchtune-cpu
provider_type: inline::torchtune-cpu
config:
checkpoint_format: meta
eval:
- provider_id: meta-reference
provider_type: inline::meta-reference
config:
kvstore:
namespace: eval
backend: kv_default
datasetio:
- provider_id: huggingface
provider_type: remote::huggingface
config:
kvstore:
namespace: datasetio::huggingface
backend: kv_default
- provider_id: localfs
provider_type: inline::localfs
config:
kvstore:
namespace: datasetio::localfs
backend: kv_default
scoring:
- provider_id: basic
provider_type: inline::basic
- provider_id: llm-as-judge
provider_type: inline::llm-as-judge
- provider_id: braintrust
provider_type: inline::braintrust
config:
openai_api_key: ${env.OPENAI_API_KEY:=}
tool_runtime:
- provider_id: brave-search
provider_type: remote::brave-search
config:
api_key: ${env.BRAVE_SEARCH_API_KEY:=}
max_results: 3
- provider_id: tavily-search
provider_type: remote::tavily-search
config:
api_key: ${env.TAVILY_SEARCH_API_KEY:=}
max_results: 3
- provider_id: rag-runtime
provider_type: inline::rag-runtime
- provider_id: model-context-protocol
provider_type: remote::model-context-protocol
batches:
- provider_id: reference
provider_type: inline::reference
config:
kvstore:
namespace: batches
backend: kv_default
storage:
backends:
kv_default:
type: kv_postgres
host: ${env.POSTGRES_HOST:=localhost}
port: ${env.POSTGRES_PORT:=5432}
db: ${env.POSTGRES_DB:=llamastack}
user: ${env.POSTGRES_USER:=llamastack}
password: ${env.POSTGRES_PASSWORD:=llamastack}
table_name: ${env.POSTGRES_TABLE_NAME:=llamastack_kvstore}
sql_default:
type: sql_postgres
host: ${env.POSTGRES_HOST:=localhost}
port: ${env.POSTGRES_PORT:=5432}
db: ${env.POSTGRES_DB:=llamastack}
user: ${env.POSTGRES_USER:=llamastack}
password: ${env.POSTGRES_PASSWORD:=llamastack}
stores:
metadata:
namespace: registry
backend: kv_default
inference:
table_name: inference_store
backend: sql_default
max_write_queue_size: 10000
num_writers: 4
conversations:
table_name: openai_conversations
backend: sql_default
prompts:
namespace: prompts
backend: kv_default
registered_resources:
models: []
shields:
- shield_id: llama-guard
provider_id: ${env.SAFETY_MODEL:+llama-guard}
provider_shield_id: ${env.SAFETY_MODEL:=}
- shield_id: code-scanner
provider_id: ${env.CODE_SCANNER_MODEL:+code-scanner}
provider_shield_id: ${env.CODE_SCANNER_MODEL:=}
vector_dbs: []
datasets: []
scoring_fns: []
benchmarks: []
tool_groups:
- toolgroup_id: builtin::websearch
provider_id: tavily-search
- toolgroup_id: builtin::rag
provider_id: rag-runtime
server:
port: 8321
telemetry:
enabled: true
vector_stores:
default_provider_id: faiss
default_embedding_model:
provider_id: sentence-transformers
model_id: nomic-ai/nomic-embed-text-v1.5
safety:
default_shield_id: llama-guard

View file

@ -165,20 +165,15 @@ providers:
- provider_id: meta-reference
provider_type: inline::meta-reference
config:
persistence_store:
type: sql_postgres
host: ${env.POSTGRES_HOST:=localhost}
port: ${env.POSTGRES_PORT:=5432}
db: ${env.POSTGRES_DB:=llamastack}
user: ${env.POSTGRES_USER:=llamastack}
password: ${env.POSTGRES_PASSWORD:=llamastack}
responses_store:
type: sql_postgres
host: ${env.POSTGRES_HOST:=localhost}
port: ${env.POSTGRES_PORT:=5432}
db: ${env.POSTGRES_DB:=llamastack}
user: ${env.POSTGRES_USER:=llamastack}
password: ${env.POSTGRES_PASSWORD:=llamastack}
persistence:
agent_state:
namespace: agents
backend: kv_default
responses:
table_name: responses
backend: sql_default
max_write_queue_size: 10000
num_writers: 4
post_training:
- provider_id: huggingface-gpu
provider_type: inline::huggingface-gpu
@ -237,10 +232,10 @@ providers:
config:
kvstore:
namespace: batches
backend: kv_postgres
backend: kv_default
storage:
backends:
kv_postgres:
kv_default:
type: kv_postgres
host: ${env.POSTGRES_HOST:=localhost}
port: ${env.POSTGRES_PORT:=5432}
@ -248,7 +243,7 @@ storage:
user: ${env.POSTGRES_USER:=llamastack}
password: ${env.POSTGRES_PASSWORD:=llamastack}
table_name: ${env.POSTGRES_TABLE_NAME:=llamastack_kvstore}
sql_postgres:
sql_default:
type: sql_postgres
host: ${env.POSTGRES_HOST:=localhost}
port: ${env.POSTGRES_PORT:=5432}
@ -258,27 +253,44 @@ storage:
stores:
metadata:
namespace: registry
backend: kv_postgres
backend: kv_default
inference:
table_name: inference_store
backend: sql_postgres
backend: sql_default
max_write_queue_size: 10000
num_writers: 4
conversations:
table_name: openai_conversations
backend: sql_postgres
backend: sql_default
prompts:
namespace: prompts
backend: kv_postgres
backend: kv_default
registered_resources:
models: []
shields: []
shields:
- shield_id: llama-guard
provider_id: ${env.SAFETY_MODEL:+llama-guard}
provider_shield_id: ${env.SAFETY_MODEL:=}
- shield_id: code-scanner
provider_id: ${env.CODE_SCANNER_MODEL:+code-scanner}
provider_shield_id: ${env.CODE_SCANNER_MODEL:=}
vector_dbs: []
datasets: []
scoring_fns: []
benchmarks: []
tool_groups: []
tool_groups:
- toolgroup_id: builtin::websearch
provider_id: tavily-search
- toolgroup_id: builtin::rag
provider_id: rag-runtime
server:
port: 8321
telemetry:
enabled: true
vector_stores:
default_provider_id: faiss
default_embedding_model:
provider_id: sentence-transformers
model_id: nomic-ai/nomic-embed-text-v1.5
safety:
default_shield_id: llama-guard

View file

@ -165,20 +165,15 @@ providers:
- provider_id: meta-reference
provider_type: inline::meta-reference
config:
persistence_store:
type: sql_postgres
host: ${env.POSTGRES_HOST:=localhost}
port: ${env.POSTGRES_PORT:=5432}
db: ${env.POSTGRES_DB:=llamastack}
user: ${env.POSTGRES_USER:=llamastack}
password: ${env.POSTGRES_PASSWORD:=llamastack}
responses_store:
type: sql_postgres
host: ${env.POSTGRES_HOST:=localhost}
port: ${env.POSTGRES_PORT:=5432}
db: ${env.POSTGRES_DB:=llamastack}
user: ${env.POSTGRES_USER:=llamastack}
password: ${env.POSTGRES_PASSWORD:=llamastack}
persistence:
agent_state:
namespace: agents
backend: kv_default
responses:
table_name: responses
backend: sql_default
max_write_queue_size: 10000
num_writers: 4
post_training:
- provider_id: torchtune-cpu
provider_type: inline::torchtune-cpu
@ -234,10 +229,10 @@ providers:
config:
kvstore:
namespace: batches
backend: kv_postgres
backend: kv_default
storage:
backends:
kv_postgres:
kv_default:
type: kv_postgres
host: ${env.POSTGRES_HOST:=localhost}
port: ${env.POSTGRES_PORT:=5432}
@ -245,7 +240,7 @@ storage:
user: ${env.POSTGRES_USER:=llamastack}
password: ${env.POSTGRES_PASSWORD:=llamastack}
table_name: ${env.POSTGRES_TABLE_NAME:=llamastack_kvstore}
sql_postgres:
sql_default:
type: sql_postgres
host: ${env.POSTGRES_HOST:=localhost}
port: ${env.POSTGRES_PORT:=5432}
@ -255,27 +250,44 @@ storage:
stores:
metadata:
namespace: registry
backend: kv_postgres
backend: kv_default
inference:
table_name: inference_store
backend: sql_postgres
backend: sql_default
max_write_queue_size: 10000
num_writers: 4
conversations:
table_name: openai_conversations
backend: sql_postgres
backend: sql_default
prompts:
namespace: prompts
backend: kv_postgres
backend: kv_default
registered_resources:
models: []
shields: []
shields:
- shield_id: llama-guard
provider_id: ${env.SAFETY_MODEL:+llama-guard}
provider_shield_id: ${env.SAFETY_MODEL:=}
- shield_id: code-scanner
provider_id: ${env.CODE_SCANNER_MODEL:+code-scanner}
provider_shield_id: ${env.CODE_SCANNER_MODEL:=}
vector_dbs: []
datasets: []
scoring_fns: []
benchmarks: []
tool_groups: []
tool_groups:
- toolgroup_id: builtin::websearch
provider_id: tavily-search
- toolgroup_id: builtin::rag
provider_id: rag-runtime
server:
port: 8321
telemetry:
enabled: true
vector_stores:
default_provider_id: faiss
default_embedding_model:
provider_id: sentence-transformers
model_id: nomic-ai/nomic-embed-text-v1.5
safety:
default_shield_id: llama-guard

View file

@ -17,11 +17,6 @@ from llama_stack.core.datatypes import (
ToolGroupInput,
VectorStoresConfig,
)
from llama_stack.core.storage.datatypes import (
InferenceStoreReference,
KVStoreReference,
SqlStoreReference,
)
from llama_stack.core.utils.dynamic import instantiate_class_type
from llama_stack.distributions.template import DistributionTemplate, RunConfigSettings
from llama_stack.providers.datatypes import RemoteProviderSpec
@ -157,10 +152,11 @@ def get_distribution_template(name: str = "starter") -> DistributionTemplate:
BuildProvider(provider_type="inline::reference"),
],
}
files_config = LocalfsFilesImplConfig.sample_run_config(f"~/.llama/distributions/{name}")
files_provider = Provider(
provider_id="meta-reference-files",
provider_type="inline::localfs",
config=LocalfsFilesImplConfig.sample_run_config(f"~/.llama/distributions/{name}"),
config=files_config,
)
embedding_provider = Provider(
provider_id="sentence-transformers",
@ -190,7 +186,8 @@ def get_distribution_template(name: str = "starter") -> DistributionTemplate:
provider_shield_id="${env.CODE_SCANNER_MODEL:=}",
),
]
postgres_config = PostgresSqlStoreConfig.sample_run_config()
postgres_sql_config = PostgresSqlStoreConfig.sample_run_config()
postgres_kv_config = PostgresKVStoreConfig.sample_run_config()
default_overrides = {
"inference": remote_inference_providers + [embedding_provider],
"vector_io": [
@ -247,6 +244,33 @@ def get_distribution_template(name: str = "starter") -> DistributionTemplate:
"files": [files_provider],
}
base_run_settings = RunConfigSettings(
provider_overrides=default_overrides,
default_models=[],
default_tool_groups=default_tool_groups,
default_shields=default_shields,
vector_stores_config=VectorStoresConfig(
default_provider_id="faiss",
default_embedding_model=QualifiedModel(
provider_id="sentence-transformers",
model_id="nomic-ai/nomic-embed-text-v1.5",
),
),
safety_config=SafetyConfig(
default_shield_id="llama-guard",
),
)
postgres_run_settings = base_run_settings.model_copy(
update={
"storage_backends": {
"kv_default": postgres_kv_config,
"sql_default": postgres_sql_config,
}
},
deep=True,
)
return DistributionTemplate(
name=name,
distro_type="self_hosted",
@ -256,71 +280,8 @@ def get_distribution_template(name: str = "starter") -> DistributionTemplate:
providers=providers,
additional_pip_packages=list(set(PostgresSqlStoreConfig.pip_packages() + PostgresKVStoreConfig.pip_packages())),
run_configs={
"run.yaml": RunConfigSettings(
provider_overrides=default_overrides,
default_models=[],
default_tool_groups=default_tool_groups,
default_shields=default_shields,
vector_stores_config=VectorStoresConfig(
default_provider_id="faiss",
default_embedding_model=QualifiedModel(
provider_id="sentence-transformers",
model_id="nomic-ai/nomic-embed-text-v1.5",
),
),
safety_config=SafetyConfig(
default_shield_id="llama-guard",
),
),
"run-with-postgres-store.yaml": RunConfigSettings(
provider_overrides={
**default_overrides,
"agents": [
Provider(
provider_id="meta-reference",
provider_type="inline::meta-reference",
config=dict(
persistence_store=postgres_config,
responses_store=postgres_config,
),
)
],
"batches": [
Provider(
provider_id="reference",
provider_type="inline::reference",
config=dict(
kvstore=KVStoreReference(
backend="kv_postgres",
namespace="batches",
).model_dump(exclude_none=True),
),
)
],
},
storage_backends={
"kv_postgres": PostgresKVStoreConfig.sample_run_config(),
"sql_postgres": postgres_config,
},
storage_stores={
"metadata": KVStoreReference(
backend="kv_postgres",
namespace="registry",
).model_dump(exclude_none=True),
"inference": InferenceStoreReference(
backend="sql_postgres",
table_name="inference_store",
).model_dump(exclude_none=True),
"conversations": SqlStoreReference(
backend="sql_postgres",
table_name="openai_conversations",
).model_dump(exclude_none=True),
"prompts": KVStoreReference(
backend="kv_postgres",
namespace="prompts",
).model_dump(exclude_none=True),
},
),
"run.yaml": base_run_settings,
"run-with-postgres-store.yaml": postgres_run_settings,
},
run_config_env_vars={
"LLAMA_STACK_PORT": (

View file

@ -66,14 +66,6 @@ class InferenceStore:
},
)
if self.enable_write_queue:
self._queue = asyncio.Queue(maxsize=self._max_write_queue_size)
for _ in range(self._num_writers):
self._worker_tasks.append(asyncio.create_task(self._worker_loop()))
logger.debug(
f"Inference store write queue enabled with {self._num_writers} writers, max queue size {self._max_write_queue_size}"
)
async def shutdown(self) -> None:
if not self._worker_tasks:
return
@ -94,10 +86,29 @@ class InferenceStore:
if self.enable_write_queue and self._queue is not None:
await self._queue.join()
async def _ensure_workers_started(self) -> None:
"""Ensure the async write queue workers run on the current loop."""
if not self.enable_write_queue:
return
if self._queue is None:
self._queue = asyncio.Queue(maxsize=self._max_write_queue_size)
logger.debug(
f"Inference store write queue created with max size {self._max_write_queue_size} "
f"and {self._num_writers} writers"
)
if not self._worker_tasks:
loop = asyncio.get_running_loop()
for _ in range(self._num_writers):
task = loop.create_task(self._worker_loop())
self._worker_tasks.append(task)
async def store_chat_completion(
self, chat_completion: OpenAIChatCompletion, input_messages: list[OpenAIMessageParam]
) -> None:
if self.enable_write_queue:
await self._ensure_workers_started()
if self._queue is None:
raise ValueError("Inference store is not initialized")
try:

View file

@ -11,6 +11,9 @@
from __future__ import annotations
import asyncio
from collections import defaultdict
from llama_stack.core.storage.datatypes import KVStoreReference, StorageBackendConfig, StorageBackendType
from .api import KVStore
@ -53,45 +56,63 @@ class InmemoryKVStoreImpl(KVStore):
_KVSTORE_BACKENDS: dict[str, KVStoreConfig] = {}
_KVSTORE_INSTANCES: dict[tuple[str, str], KVStore] = {}
_KVSTORE_LOCKS: defaultdict[tuple[str, str], asyncio.Lock] = defaultdict(asyncio.Lock)
def register_kvstore_backends(backends: dict[str, StorageBackendConfig]) -> None:
"""Register the set of available KV store backends for reference resolution."""
global _KVSTORE_BACKENDS
global _KVSTORE_INSTANCES
global _KVSTORE_LOCKS
_KVSTORE_BACKENDS.clear()
_KVSTORE_INSTANCES.clear()
_KVSTORE_LOCKS.clear()
for name, cfg in backends.items():
_KVSTORE_BACKENDS[name] = cfg
async def kvstore_impl(reference: KVStoreReference) -> KVStore:
backend_name = reference.backend
cache_key = (backend_name, reference.namespace)
existing = _KVSTORE_INSTANCES.get(cache_key)
if existing:
return existing
backend_config = _KVSTORE_BACKENDS.get(backend_name)
if backend_config is None:
raise ValueError(f"Unknown KVStore backend '{backend_name}'. Registered backends: {sorted(_KVSTORE_BACKENDS)}")
config = backend_config.model_copy()
config.namespace = reference.namespace
lock = _KVSTORE_LOCKS[cache_key]
async with lock:
existing = _KVSTORE_INSTANCES.get(cache_key)
if existing:
return existing
if config.type == StorageBackendType.KV_REDIS.value:
from .redis import RedisKVStoreImpl
config = backend_config.model_copy()
config.namespace = reference.namespace
impl = RedisKVStoreImpl(config)
elif config.type == StorageBackendType.KV_SQLITE.value:
from .sqlite import SqliteKVStoreImpl
if config.type == StorageBackendType.KV_REDIS.value:
from .redis import RedisKVStoreImpl
impl = SqliteKVStoreImpl(config)
elif config.type == StorageBackendType.KV_POSTGRES.value:
from .postgres import PostgresKVStoreImpl
impl = RedisKVStoreImpl(config)
elif config.type == StorageBackendType.KV_SQLITE.value:
from .sqlite import SqliteKVStoreImpl
impl = PostgresKVStoreImpl(config)
elif config.type == StorageBackendType.KV_MONGODB.value:
from .mongodb import MongoDBKVStoreImpl
impl = SqliteKVStoreImpl(config)
elif config.type == StorageBackendType.KV_POSTGRES.value:
from .postgres import PostgresKVStoreImpl
impl = MongoDBKVStoreImpl(config)
else:
raise ValueError(f"Unknown kvstore type {config.type}")
impl = PostgresKVStoreImpl(config)
elif config.type == StorageBackendType.KV_MONGODB.value:
from .mongodb import MongoDBKVStoreImpl
await impl.initialize()
return impl
impl = MongoDBKVStoreImpl(config)
else:
raise ValueError(f"Unknown kvstore type {config.type}")
await impl.initialize()
_KVSTORE_INSTANCES[cache_key] = impl
return impl

View file

@ -704,34 +704,35 @@ class OpenAIVectorStoreMixin(ABC):
# Unknown filter type, default to no match
raise ValueError(f"Unsupported filter type: {filter_type}")
def _chunk_to_vector_store_content(self, chunk: Chunk) -> list[VectorStoreContent]:
# content is InterleavedContent
def _chunk_to_vector_store_content(
self, chunk: Chunk, include_embeddings: bool = False, include_metadata: bool = False
) -> list[VectorStoreContent]:
def extract_fields() -> dict:
"""Extract embedding and metadata fields from chunk based on include flags."""
return {
"embedding": chunk.embedding if include_embeddings else None,
"chunk_metadata": chunk.chunk_metadata if include_metadata else None,
"metadata": chunk.metadata if include_metadata else None,
}
fields = extract_fields()
if isinstance(chunk.content, str):
content = [
VectorStoreContent(
type="text",
text=chunk.content,
)
]
content_item = VectorStoreContent(type="text", text=chunk.content, **fields)
content = [content_item]
elif isinstance(chunk.content, list):
# TODO: Add support for other types of content
content = [
VectorStoreContent(
type="text",
text=item.text,
)
for item in chunk.content
if item.type == "text"
]
content = []
for item in chunk.content:
if item.type == "text":
content_item = VectorStoreContent(type="text", text=item.text, **fields)
content.append(content_item)
else:
if chunk.content.type != "text":
raise ValueError(f"Unsupported content type: {chunk.content.type}")
content = [
VectorStoreContent(
type="text",
text=chunk.content.text,
)
]
content_item = VectorStoreContent(type="text", text=chunk.content.text, **fields)
content = [content_item]
return content
async def openai_attach_file_to_vector_store(
@ -820,13 +821,12 @@ class OpenAIVectorStoreMixin(ABC):
message=str(e),
)
# Create OpenAI vector store file metadata
# Save vector store file to persistent storage AFTER insert_chunks
# so that chunks include the embeddings that were generated
file_info = vector_store_file_object.model_dump(exclude={"last_error"})
file_info["filename"] = file_response.filename if file_response else ""
# Save vector store file to persistent storage (provider-specific)
dict_chunks = [c.model_dump() for c in chunks]
# This should be updated to include chunk_id
await self._save_openai_vector_store_file(vector_store_id, file_id, file_info, dict_chunks)
# Update file_ids and file_counts in vector store metadata
@ -921,21 +921,27 @@ class OpenAIVectorStoreMixin(ABC):
self,
vector_store_id: str,
file_id: str,
include_embeddings: bool | None = False,
include_metadata: bool | None = False,
) -> VectorStoreFileContentResponse:
"""Retrieves the contents of a vector store file."""
if vector_store_id not in self.openai_vector_stores:
raise VectorStoreNotFoundError(vector_store_id)
# Parameters are already provided directly
# include_embeddings and include_metadata are now function parameters
dict_chunks = await self._load_openai_vector_store_file_contents(vector_store_id, file_id)
chunks = [Chunk.model_validate(c) for c in dict_chunks]
content = []
for chunk in chunks:
content.extend(self._chunk_to_vector_store_content(chunk))
content.extend(
self._chunk_to_vector_store_content(
chunk, include_embeddings=include_embeddings or False, include_metadata=include_metadata or False
)
)
return VectorStoreFileContentResponse(
object="vector_store.file_content.page",
data=content,
has_more=False,
next_page=None,
)
async def openai_update_vector_store_file(

View file

@ -3,8 +3,6 @@
#
# This source code is licensed under the terms described in the LICENSE file in
# the root directory of this source tree.
import asyncio
from typing import Any
from llama_stack.apis.agents import (
Order,
@ -19,12 +17,12 @@ from llama_stack.apis.agents.openai_responses import (
)
from llama_stack.apis.inference import OpenAIMessageParam
from llama_stack.core.datatypes import AccessRule
from llama_stack.core.storage.datatypes import ResponsesStoreReference, SqlStoreReference, StorageBackendType
from llama_stack.core.storage.datatypes import ResponsesStoreReference, SqlStoreReference
from llama_stack.log import get_logger
from ..sqlstore.api import ColumnDefinition, ColumnType
from ..sqlstore.authorized_sqlstore import AuthorizedSqlStore
from ..sqlstore.sqlstore import _SQLSTORE_BACKENDS, sqlstore_impl
from ..sqlstore.sqlstore import sqlstore_impl
logger = get_logger(name=__name__, category="openai_responses")
@ -55,28 +53,12 @@ class ResponsesStore:
self.policy = policy
self.sql_store = None
self.enable_write_queue = True
# Async write queue and worker control
self._queue: (
asyncio.Queue[tuple[OpenAIResponseObject, list[OpenAIResponseInput], list[OpenAIMessageParam]]] | None
) = None
self._worker_tasks: list[asyncio.Task[Any]] = []
self._max_write_queue_size: int = self.reference.max_write_queue_size
self._num_writers: int = max(1, self.reference.num_writers)
async def initialize(self):
"""Create the necessary tables if they don't exist."""
base_store = sqlstore_impl(self.reference)
self.sql_store = AuthorizedSqlStore(base_store, self.policy)
# Disable write queue for SQLite since WAL mode handles concurrency
# Keep it enabled for other backends (like Postgres) for performance
backend_config = _SQLSTORE_BACKENDS.get(self.reference.backend)
if backend_config and backend_config.type == StorageBackendType.SQL_SQLITE:
self.enable_write_queue = False
logger.debug("Write queue disabled for SQLite (WAL mode handles concurrency)")
await self.sql_store.create_table(
"openai_responses",
{
@ -95,33 +77,12 @@ class ResponsesStore:
},
)
if self.enable_write_queue:
self._queue = asyncio.Queue(maxsize=self._max_write_queue_size)
for _ in range(self._num_writers):
self._worker_tasks.append(asyncio.create_task(self._worker_loop()))
logger.debug(
f"Responses store write queue enabled with {self._num_writers} writers, max queue size {self._max_write_queue_size}"
)
async def shutdown(self) -> None:
if not self._worker_tasks:
return
if self._queue is not None:
await self._queue.join()
for t in self._worker_tasks:
if not t.done():
t.cancel()
for t in self._worker_tasks:
try:
await t
except asyncio.CancelledError:
pass
self._worker_tasks.clear()
return
async def flush(self) -> None:
"""Wait for all queued writes to complete. Useful for testing."""
if self.enable_write_queue and self._queue is not None:
await self._queue.join()
"""Maintained for compatibility; no-op now that writes are synchronous."""
return
async def store_response_object(
self,
@ -129,31 +90,7 @@ class ResponsesStore:
input: list[OpenAIResponseInput],
messages: list[OpenAIMessageParam],
) -> None:
if self.enable_write_queue:
if self._queue is None:
raise ValueError("Responses store is not initialized")
try:
self._queue.put_nowait((response_object, input, messages))
except asyncio.QueueFull:
logger.warning(f"Write queue full; adding response id={getattr(response_object, 'id', '<unknown>')}")
await self._queue.put((response_object, input, messages))
else:
await self._write_response_object(response_object, input, messages)
async def _worker_loop(self) -> None:
assert self._queue is not None
while True:
try:
item = await self._queue.get()
except asyncio.CancelledError:
break
response_object, input, messages = item
try:
await self._write_response_object(response_object, input, messages)
except Exception as e: # noqa: BLE001
logger.error(f"Error writing response object: {e}")
finally:
self._queue.task_done()
await self._write_response_object(response_object, input, messages)
async def _write_response_object(
self,
@ -315,19 +252,12 @@ class ResponsesStore:
# Serialize messages to dict format for JSON storage
messages_data = [msg.model_dump() for msg in messages]
# Upsert: try insert first, update if exists
try:
await self.sql_store.insert(
table="conversation_messages",
data={"conversation_id": conversation_id, "messages": messages_data},
)
except Exception:
# If insert fails due to ID conflict, update existing record
await self.sql_store.update(
table="conversation_messages",
data={"messages": messages_data},
where={"conversation_id": conversation_id},
)
await self.sql_store.upsert(
table="conversation_messages",
data={"conversation_id": conversation_id, "messages": messages_data},
conflict_columns=["conversation_id"],
update_columns=["messages"],
)
logger.debug(f"Stored {len(messages)} messages for conversation {conversation_id}")

View file

@ -47,6 +47,18 @@ class SqlStore(Protocol):
"""
pass
async def upsert(
self,
table: str,
data: Mapping[str, Any],
conflict_columns: list[str],
update_columns: list[str] | None = None,
) -> None:
"""
Insert a row and update specified columns when conflicts occur.
"""
pass
async def fetch_all(
self,
table: str,

View file

@ -45,8 +45,13 @@ def _enhance_item_with_access_control(item: Mapping[str, Any], current_user: Use
enhanced["owner_principal"] = current_user.principal
enhanced["access_attributes"] = current_user.attributes
else:
enhanced["owner_principal"] = None
enhanced["access_attributes"] = None
# IMPORTANT: Use empty string and null value (not None) to match public access filter
# The public access filter in _get_public_access_conditions() expects:
# - owner_principal = '' (empty string)
# - access_attributes = null (JSON null, which serializes to the string 'null')
# Setting them to None (SQL NULL) will cause rows to be filtered out on read.
enhanced["owner_principal"] = ""
enhanced["access_attributes"] = None # Pydantic/JSON will serialize this as JSON null
return enhanced
@ -124,6 +129,23 @@ class AuthorizedSqlStore:
enhanced_data = [_enhance_item_with_access_control(item, current_user) for item in data]
await self.sql_store.insert(table, enhanced_data)
async def upsert(
self,
table: str,
data: Mapping[str, Any],
conflict_columns: list[str],
update_columns: list[str] | None = None,
) -> None:
"""Upsert a row with automatic access control attribute capture."""
current_user = get_authenticated_user()
enhanced_data = _enhance_item_with_access_control(data, current_user)
await self.sql_store.upsert(
table=table,
data=enhanced_data,
conflict_columns=conflict_columns,
update_columns=update_columns,
)
async def fetch_all(
self,
table: str,
@ -188,8 +210,9 @@ class AuthorizedSqlStore:
enhanced_data["owner_principal"] = current_user.principal
enhanced_data["access_attributes"] = current_user.attributes
else:
enhanced_data["owner_principal"] = None
enhanced_data["access_attributes"] = None
# IMPORTANT: Use empty string for owner_principal to match public access filter
enhanced_data["owner_principal"] = ""
enhanced_data["access_attributes"] = None # Will serialize as JSON null
await self.sql_store.update(table, enhanced_data, where)
@ -245,14 +268,24 @@ class AuthorizedSqlStore:
raise ValueError(f"Unsupported database type: {self.database_type}")
def _get_public_access_conditions(self) -> list[str]:
"""Get the SQL conditions for public access."""
# Public records are records that have no owner_principal or access_attributes
"""Get the SQL conditions for public access.
Public records are those with:
- owner_principal = '' (empty string)
- access_attributes is either SQL NULL or JSON null
Note: Different databases serialize None differently:
- SQLite: None JSON null (text = 'null')
- Postgres: None SQL NULL (IS NULL)
"""
conditions = ["owner_principal = ''"]
if self.database_type == StorageBackendType.SQL_POSTGRES.value:
# Postgres stores JSON null as 'null'
conditions.append("access_attributes::text = 'null'")
# Accept both SQL NULL and JSON null for Postgres compatibility
# This handles both old rows (SQL NULL) and new rows (JSON null)
conditions.append("(access_attributes IS NULL OR access_attributes::text = 'null')")
elif self.database_type == StorageBackendType.SQL_SQLITE.value:
conditions.append("access_attributes = 'null'")
# SQLite serializes None as JSON null
conditions.append("(access_attributes IS NULL OR access_attributes = 'null')")
else:
raise ValueError(f"Unsupported database type: {self.database_type}")
return conditions

View file

@ -72,13 +72,14 @@ def _build_where_expr(column: ColumnElement, value: Any) -> ColumnElement:
class SqlAlchemySqlStoreImpl(SqlStore):
def __init__(self, config: SqlAlchemySqlStoreConfig):
self.config = config
self._is_sqlite_backend = "sqlite" in self.config.engine_str
self.async_session = async_sessionmaker(self.create_engine())
self.metadata = MetaData()
def create_engine(self) -> AsyncEngine:
# Configure connection args for better concurrency support
connect_args = {}
if "sqlite" in self.config.engine_str:
if self._is_sqlite_backend:
# SQLite-specific optimizations for concurrent access
# With WAL mode, most locks resolve in milliseconds, but allow up to 5s for edge cases
connect_args["timeout"] = 5.0
@ -91,7 +92,7 @@ class SqlAlchemySqlStoreImpl(SqlStore):
)
# Enable WAL mode for SQLite to support concurrent readers and writers
if "sqlite" in self.config.engine_str:
if self._is_sqlite_backend:
@event.listens_for(engine.sync_engine, "connect")
def set_sqlite_pragma(dbapi_conn, connection_record):
@ -151,6 +152,29 @@ class SqlAlchemySqlStoreImpl(SqlStore):
await session.execute(self.metadata.tables[table].insert(), data)
await session.commit()
async def upsert(
self,
table: str,
data: Mapping[str, Any],
conflict_columns: list[str],
update_columns: list[str] | None = None,
) -> None:
table_obj = self.metadata.tables[table]
dialect_insert = self._get_dialect_insert(table_obj)
insert_stmt = dialect_insert.values(**data)
if update_columns is None:
update_columns = [col for col in data.keys() if col not in conflict_columns]
update_mapping = {col: getattr(insert_stmt.excluded, col) for col in update_columns}
conflict_cols = [table_obj.c[col] for col in conflict_columns]
stmt = insert_stmt.on_conflict_do_update(index_elements=conflict_cols, set_=update_mapping)
async with self.async_session() as session:
await session.execute(stmt)
await session.commit()
async def fetch_all(
self,
table: str,
@ -333,9 +357,18 @@ class SqlAlchemySqlStoreImpl(SqlStore):
add_column_sql = text(f"ALTER TABLE {table} ADD COLUMN {column_name} {compiled_type}{nullable_clause}")
await conn.execute(add_column_sql)
except Exception as e:
# If any error occurs during migration, log it but don't fail
# The table creation will handle adding the column
logger.error(f"Error adding column {column_name} to table {table}: {e}")
pass
def _get_dialect_insert(self, table: Table):
if self._is_sqlite_backend:
from sqlalchemy.dialects.sqlite import insert as sqlite_insert
return sqlite_insert(table)
else:
from sqlalchemy.dialects.postgresql import insert as pg_insert
return pg_insert(table)

View file

@ -4,6 +4,7 @@
# This source code is licensed under the terms described in the LICENSE file in
# the root directory of this source tree.
from threading import Lock
from typing import Annotated, cast
from pydantic import Field
@ -21,6 +22,8 @@ from .api import SqlStore
sql_store_pip_packages = ["sqlalchemy[asyncio]", "aiosqlite", "asyncpg"]
_SQLSTORE_BACKENDS: dict[str, StorageBackendConfig] = {}
_SQLSTORE_INSTANCES: dict[str, SqlStore] = {}
_SQLSTORE_LOCKS: dict[str, Lock] = {}
SqlStoreConfig = Annotated[
@ -52,19 +55,34 @@ def sqlstore_impl(reference: SqlStoreReference) -> SqlStore:
f"Unknown SQL store backend '{backend_name}'. Registered backends: {sorted(_SQLSTORE_BACKENDS)}"
)
if isinstance(backend_config, SqliteSqlStoreConfig | PostgresSqlStoreConfig):
from .sqlalchemy_sqlstore import SqlAlchemySqlStoreImpl
existing = _SQLSTORE_INSTANCES.get(backend_name)
if existing:
return existing
config = cast(SqliteSqlStoreConfig | PostgresSqlStoreConfig, backend_config).model_copy()
return SqlAlchemySqlStoreImpl(config)
else:
raise ValueError(f"Unknown sqlstore type {backend_config.type}")
lock = _SQLSTORE_LOCKS.setdefault(backend_name, Lock())
with lock:
existing = _SQLSTORE_INSTANCES.get(backend_name)
if existing:
return existing
if isinstance(backend_config, SqliteSqlStoreConfig | PostgresSqlStoreConfig):
from .sqlalchemy_sqlstore import SqlAlchemySqlStoreImpl
config = cast(SqliteSqlStoreConfig | PostgresSqlStoreConfig, backend_config).model_copy()
instance = SqlAlchemySqlStoreImpl(config)
_SQLSTORE_INSTANCES[backend_name] = instance
return instance
else:
raise ValueError(f"Unknown sqlstore type {backend_config.type}")
def register_sqlstore_backends(backends: dict[str, StorageBackendConfig]) -> None:
"""Register the set of available SQL store backends for reference resolution."""
global _SQLSTORE_BACKENDS
global _SQLSTORE_INSTANCES
_SQLSTORE_BACKENDS.clear()
_SQLSTORE_INSTANCES.clear()
_SQLSTORE_LOCKS.clear()
for name, cfg in backends.items():
_SQLSTORE_BACKENDS[name] = cfg

View file

@ -0,0 +1,20 @@
.git
.gitignore
.env.local
.env.*.local
.next
node_modules
npm-debug.log
*.md
.DS_Store
.vscode
.idea
playwright-report
e2e
jest.config.ts
jest.setup.ts
eslint.config.mjs
.prettierrc
.prettierignore
.nvmrc
playwright.config.ts

View file

@ -0,0 +1,18 @@
FROM node:22.5.1-alpine
ENV NODE_ENV=production
# Install dumb-init for proper signal handling
RUN apk add --no-cache dumb-init
# Create non-root user for security
RUN addgroup --system --gid 1001 nodejs
RUN adduser --system --uid 1001 nextjs
# Install llama-stack-ui from npm
RUN npm install -g llama-stack-ui
USER nextjs
ENTRYPOINT ["dumb-init", "--"]
CMD ["llama-stack-ui"]

View file

@ -8,6 +8,9 @@ import type {
import { useRouter } from "next/navigation";
import { usePagination } from "@/hooks/use-pagination";
import { Button } from "@/components/ui/button";
import { Plus, Trash2, Search, Edit, X } from "lucide-react";
import { useState } from "react";
import { Input } from "@/components/ui/input";
import {
Table,
TableBody,
@ -17,9 +20,21 @@ import {
TableRow,
} from "@/components/ui/table";
import { Skeleton } from "@/components/ui/skeleton";
import { useAuthClient } from "@/hooks/use-auth-client";
import {
VectorStoreEditor,
VectorStoreFormData,
} from "@/components/vector-stores/vector-store-editor";
export default function VectorStoresPage() {
const router = useRouter();
const client = useAuthClient();
const [deletingStores, setDeletingStores] = useState<Set<string>>(new Set());
const [searchTerm, setSearchTerm] = useState("");
const [showVectorStoreModal, setShowVectorStoreModal] = useState(false);
const [editingStore, setEditingStore] = useState<VectorStore | null>(null);
const [modalError, setModalError] = useState<string | null>(null);
const [showSuccessState, setShowSuccessState] = useState(false);
const {
data: stores,
status,
@ -47,6 +62,142 @@ export default function VectorStoresPage() {
}
}, [status, hasMore, loadMore]);
// Handle ESC key to close modal
React.useEffect(() => {
const handleEscape = (event: KeyboardEvent) => {
if (event.key === "Escape" && showVectorStoreModal) {
handleCancel();
}
};
document.addEventListener("keydown", handleEscape);
return () => document.removeEventListener("keydown", handleEscape);
}, [showVectorStoreModal]);
const handleDeleteVectorStore = async (storeId: string) => {
if (
!confirm(
"Are you sure you want to delete this vector store? This action cannot be undone."
)
) {
return;
}
setDeletingStores(prev => new Set([...prev, storeId]));
try {
await client.vectorStores.delete(storeId);
// Reload the data to reflect the deletion
window.location.reload();
} catch (err: unknown) {
console.error("Failed to delete vector store:", err);
const errorMessage = err instanceof Error ? err.message : "Unknown error";
alert(`Failed to delete vector store: ${errorMessage}`);
} finally {
setDeletingStores(prev => {
const newSet = new Set(prev);
newSet.delete(storeId);
return newSet;
});
}
};
const handleSaveVectorStore = async (formData: VectorStoreFormData) => {
try {
setModalError(null);
if (editingStore) {
// Update existing vector store
const updateParams: {
name?: string;
extra_body?: Record<string, unknown>;
} = {};
// Only include fields that have changed or are provided
if (formData.name && formData.name !== editingStore.name) {
updateParams.name = formData.name;
}
// Add all parameters to extra_body (except provider_id which can't be changed)
const extraBody: Record<string, unknown> = {};
if (formData.embedding_model) {
extraBody.embedding_model = formData.embedding_model;
}
if (formData.embedding_dimension) {
extraBody.embedding_dimension = formData.embedding_dimension;
}
if (Object.keys(extraBody).length > 0) {
updateParams.extra_body = extraBody;
}
await client.vectorStores.update(editingStore.id, updateParams);
// Show success state with close button
setShowSuccessState(true);
setModalError(
"✅ Vector store updated successfully! You can close this modal and refresh the page to see changes."
);
return;
}
const createParams: {
name?: string;
provider_id?: string;
extra_body?: Record<string, unknown>;
} = {
name: formData.name || undefined,
};
// Extract provider_id to top-level (like Python client does)
if (formData.provider_id) {
createParams.provider_id = formData.provider_id;
}
// Add remaining parameters to extra_body
const extraBody: Record<string, unknown> = {};
if (formData.provider_id) {
extraBody.provider_id = formData.provider_id;
}
if (formData.embedding_model) {
extraBody.embedding_model = formData.embedding_model;
}
if (formData.embedding_dimension) {
extraBody.embedding_dimension = formData.embedding_dimension;
}
if (Object.keys(extraBody).length > 0) {
createParams.extra_body = extraBody;
}
await client.vectorStores.create(createParams);
// Show success state with close button
setShowSuccessState(true);
setModalError(
"✅ Vector store created successfully! You can close this modal and refresh the page to see changes."
);
} catch (err: unknown) {
console.error("Failed to create vector store:", err);
const errorMessage =
err instanceof Error ? err.message : "Failed to create vector store";
setModalError(errorMessage);
}
};
const handleEditVectorStore = (store: VectorStore) => {
setEditingStore(store);
setShowVectorStoreModal(true);
setModalError(null);
};
const handleCancel = () => {
setShowVectorStoreModal(false);
setEditingStore(null);
setModalError(null);
setShowSuccessState(false);
};
const renderContent = () => {
if (status === "loading") {
return (
@ -66,73 +217,190 @@ export default function VectorStoresPage() {
return <p>No vector stores found.</p>;
}
return (
<div className="overflow-auto flex-1 min-h-0">
<Table>
<TableHeader>
<TableRow>
<TableHead>ID</TableHead>
<TableHead>Name</TableHead>
<TableHead>Created</TableHead>
<TableHead>Completed</TableHead>
<TableHead>Cancelled</TableHead>
<TableHead>Failed</TableHead>
<TableHead>In Progress</TableHead>
<TableHead>Total</TableHead>
<TableHead>Usage Bytes</TableHead>
<TableHead>Provider ID</TableHead>
<TableHead>Provider Vector DB ID</TableHead>
</TableRow>
</TableHeader>
<TableBody>
{stores.map(store => {
const fileCounts = store.file_counts;
const metadata = store.metadata || {};
const providerId = metadata.provider_id ?? "";
const providerDbId = metadata.provider_vector_db_id ?? "";
// Filter stores based on search term
const filteredStores = stores.filter(store => {
if (!searchTerm) return true;
return (
<TableRow
key={store.id}
onClick={() => router.push(`/logs/vector-stores/${store.id}`)}
className="cursor-pointer hover:bg-muted/50"
>
<TableCell>
<Button
variant="link"
className="p-0 h-auto font-mono text-blue-600 hover:text-blue-800 dark:text-blue-400 dark:hover:text-blue-300"
onClick={() =>
router.push(`/logs/vector-stores/${store.id}`)
}
>
{store.id}
</Button>
</TableCell>
<TableCell>{store.name}</TableCell>
<TableCell>
{new Date(store.created_at * 1000).toLocaleString()}
</TableCell>
<TableCell>{fileCounts.completed}</TableCell>
<TableCell>{fileCounts.cancelled}</TableCell>
<TableCell>{fileCounts.failed}</TableCell>
<TableCell>{fileCounts.in_progress}</TableCell>
<TableCell>{fileCounts.total}</TableCell>
<TableCell>{store.usage_bytes}</TableCell>
<TableCell>{providerId}</TableCell>
<TableCell>{providerDbId}</TableCell>
</TableRow>
);
})}
</TableBody>
</Table>
const searchLower = searchTerm.toLowerCase();
return (
store.id.toLowerCase().includes(searchLower) ||
(store.name && store.name.toLowerCase().includes(searchLower)) ||
(store.metadata?.provider_id &&
String(store.metadata.provider_id)
.toLowerCase()
.includes(searchLower)) ||
(store.metadata?.provider_vector_db_id &&
String(store.metadata.provider_vector_db_id)
.toLowerCase()
.includes(searchLower))
);
});
return (
<div className="space-y-4">
{/* Search Bar */}
<div className="relative flex-1 max-w-md">
<Search className="absolute left-3 top-1/2 transform -translate-y-1/2 text-muted-foreground h-4 w-4" />
<Input
placeholder="Search vector stores..."
value={searchTerm}
onChange={e => setSearchTerm(e.target.value)}
className="pl-10"
/>
</div>
<div className="overflow-auto flex-1 min-h-0">
<Table>
<TableHeader>
<TableRow>
<TableHead>ID</TableHead>
<TableHead>Name</TableHead>
<TableHead>Created</TableHead>
<TableHead>Completed</TableHead>
<TableHead>Cancelled</TableHead>
<TableHead>Failed</TableHead>
<TableHead>In Progress</TableHead>
<TableHead>Total</TableHead>
<TableHead>Usage Bytes</TableHead>
<TableHead>Provider ID</TableHead>
<TableHead>Provider Vector DB ID</TableHead>
<TableHead>Actions</TableHead>
</TableRow>
</TableHeader>
<TableBody>
{filteredStores.map(store => {
const fileCounts = store.file_counts;
const metadata = store.metadata || {};
const providerId = metadata.provider_id ?? "";
const providerDbId = metadata.provider_vector_db_id ?? "";
return (
<TableRow
key={store.id}
onClick={() =>
router.push(`/logs/vector-stores/${store.id}`)
}
className="cursor-pointer hover:bg-muted/50"
>
<TableCell>
<Button
variant="link"
className="p-0 h-auto font-mono text-blue-600 hover:text-blue-800 dark:text-blue-400 dark:hover:text-blue-300"
onClick={() =>
router.push(`/logs/vector-stores/${store.id}`)
}
>
{store.id}
</Button>
</TableCell>
<TableCell>{store.name}</TableCell>
<TableCell>
{new Date(store.created_at * 1000).toLocaleString()}
</TableCell>
<TableCell>{fileCounts.completed}</TableCell>
<TableCell>{fileCounts.cancelled}</TableCell>
<TableCell>{fileCounts.failed}</TableCell>
<TableCell>{fileCounts.in_progress}</TableCell>
<TableCell>{fileCounts.total}</TableCell>
<TableCell>{store.usage_bytes}</TableCell>
<TableCell>{providerId}</TableCell>
<TableCell>{providerDbId}</TableCell>
<TableCell>
<div className="flex gap-2">
<Button
variant="outline"
size="sm"
onClick={e => {
e.stopPropagation();
handleEditVectorStore(store);
}}
>
<Edit className="h-4 w-4" />
</Button>
<Button
variant="outline"
size="sm"
onClick={e => {
e.stopPropagation();
handleDeleteVectorStore(store.id);
}}
disabled={deletingStores.has(store.id)}
>
{deletingStores.has(store.id) ? (
"Deleting..."
) : (
<>
<Trash2 className="h-4 w-4" />
</>
)}
</Button>
</div>
</TableCell>
</TableRow>
);
})}
</TableBody>
</Table>
</div>
</div>
);
};
return (
<div className="space-y-4">
<h1 className="text-2xl font-semibold">Vector Stores</h1>
<div className="flex items-center justify-between">
<h1 className="text-2xl font-semibold">Vector Stores</h1>
<Button
onClick={() => setShowVectorStoreModal(true)}
disabled={status === "loading"}
>
<Plus className="h-4 w-4 mr-2" />
New Vector Store
</Button>
</div>
{renderContent()}
{/* Create Vector Store Modal */}
{showVectorStoreModal && (
<div className="fixed inset-0 bg-black/50 flex items-center justify-center z-50">
<div className="bg-background border rounded-lg shadow-lg max-w-2xl w-full mx-4 max-h-[90vh] overflow-hidden">
<div className="p-6 border-b flex items-center justify-between">
<h2 className="text-2xl font-bold">
{editingStore ? "Edit Vector Store" : "Create New Vector Store"}
</h2>
<Button
variant="ghost"
size="sm"
onClick={handleCancel}
className="p-1 h-auto"
>
<X className="h-4 w-4" />
</Button>
</div>
<div className="p-6 overflow-y-auto max-h-[calc(90vh-120px)]">
<VectorStoreEditor
onSave={handleSaveVectorStore}
onCancel={handleCancel}
error={modalError}
showSuccessState={showSuccessState}
isEditing={!!editingStore}
initialData={
editingStore
? {
name: editingStore.name || "",
embedding_model:
editingStore.metadata?.embedding_model || "",
embedding_dimension:
editingStore.metadata?.embedding_dimension || 768,
provider_id: editingStore.metadata?.provider_id || "",
}
: undefined
}
/>
</div>
</div>
</div>
)}
</div>
);
}

34
src/llama_stack_ui/bin/cli.js Executable file
View file

@ -0,0 +1,34 @@
#!/usr/bin/env node
const { spawn } = require('child_process');
const path = require('path');
const port = process.env.LLAMA_STACK_UI_PORT || 8322;
const uiDir = path.resolve(__dirname, '..');
const serverPath = path.join(uiDir, '.next', 'standalone', 'ui', 'src', 'llama_stack_ui', 'server.js');
const serverDir = path.dirname(serverPath);
console.log(`Starting Llama Stack UI on http://localhost:${port}`);
const child = spawn(process.execPath, [serverPath], {
cwd: serverDir,
stdio: 'inherit',
env: {
...process.env,
PORT: port,
},
});
process.on('SIGINT', () => {
child.kill('SIGINT');
process.exit(0);
});
process.on('SIGTERM', () => {
child.kill('SIGTERM');
process.exit(0);
});
child.on('exit', (code) => {
process.exit(code);
});

View file

@ -2,7 +2,7 @@ import React from "react";
import { render, screen, fireEvent } from "@testing-library/react";
import "@testing-library/jest-dom";
import { PromptEditor } from "./prompt-editor";
import type { Prompt, PromptFormData } from "./types";
import type { Prompt } from "./types";
describe("PromptEditor", () => {
const mockOnSave = jest.fn();

View file

@ -12,6 +12,20 @@ jest.mock("next/navigation", () => ({
}),
}));
// Mock NextAuth
jest.mock("next-auth/react", () => ({
useSession: () => ({
data: {
accessToken: "mock-access-token",
user: {
id: "mock-user-id",
email: "test@example.com",
},
},
status: "authenticated",
}),
}));
describe("VectorStoreDetailView", () => {
const defaultProps = {
store: null,

View file

@ -1,16 +1,18 @@
"use client";
import { useRouter } from "next/navigation";
import { useState, useEffect } from "react";
import type { VectorStore } from "llama-stack-client/resources/vector-stores/vector-stores";
import type { VectorStoreFile } from "llama-stack-client/resources/vector-stores/files";
import { Card, CardContent, CardHeader, CardTitle } from "@/components/ui/card";
import { Skeleton } from "@/components/ui/skeleton";
import { Button } from "@/components/ui/button";
import { useAuthClient } from "@/hooks/use-auth-client";
import { Edit2, Trash2, X } from "lucide-react";
import {
DetailLoadingView,
DetailErrorView,
DetailNotFoundView,
DetailLayout,
PropertiesCard,
PropertyItem,
} from "@/components/layout/detail-layout";
@ -23,6 +25,7 @@ import {
TableHeader,
TableRow,
} from "@/components/ui/table";
import { VectorStoreEditor, VectorStoreFormData } from "./vector-store-editor";
interface VectorStoreDetailViewProps {
store: VectorStore | null;
@ -43,21 +46,122 @@ export function VectorStoreDetailView({
errorFiles,
id,
}: VectorStoreDetailViewProps) {
const title = "Vector Store Details";
const router = useRouter();
const client = useAuthClient();
const [isDeleting, setIsDeleting] = useState(false);
const [showEditModal, setShowEditModal] = useState(false);
const [modalError, setModalError] = useState<string | null>(null);
const [showSuccessState, setShowSuccessState] = useState(false);
// Handle ESC key to close modal
useEffect(() => {
const handleEscape = (event: KeyboardEvent) => {
if (event.key === "Escape" && showEditModal) {
handleCancel();
}
};
document.addEventListener("keydown", handleEscape);
return () => document.removeEventListener("keydown", handleEscape);
}, [showEditModal]);
const handleFileClick = (fileId: string) => {
router.push(`/logs/vector-stores/${id}/files/${fileId}`);
};
const handleEditVectorStore = () => {
setShowEditModal(true);
setModalError(null);
setShowSuccessState(false);
};
const handleCancel = () => {
setShowEditModal(false);
setModalError(null);
setShowSuccessState(false);
};
const handleSaveVectorStore = async (formData: VectorStoreFormData) => {
try {
setModalError(null);
// Update existing vector store (same logic as list page)
const updateParams: {
name?: string;
extra_body?: Record<string, unknown>;
} = {};
// Only include fields that have changed or are provided
if (formData.name && formData.name !== store?.name) {
updateParams.name = formData.name;
}
// Add all parameters to extra_body (except provider_id which can't be changed)
const extraBody: Record<string, unknown> = {};
if (formData.embedding_model) {
extraBody.embedding_model = formData.embedding_model;
}
if (formData.embedding_dimension) {
extraBody.embedding_dimension = formData.embedding_dimension;
}
if (Object.keys(extraBody).length > 0) {
updateParams.extra_body = extraBody;
}
await client.vectorStores.update(id, updateParams);
// Show success state
setShowSuccessState(true);
setModalError(
"✅ Vector store updated successfully! You can close this modal and refresh the page to see changes."
);
} catch (err: unknown) {
console.error("Failed to update vector store:", err);
const errorMessage =
err instanceof Error ? err.message : "Failed to update vector store";
setModalError(errorMessage);
}
};
const handleDeleteVectorStore = async () => {
if (
!confirm(
"Are you sure you want to delete this vector store? This action cannot be undone."
)
) {
return;
}
setIsDeleting(true);
try {
await client.vectorStores.delete(id);
// Redirect to the vector stores list after successful deletion
router.push("/logs/vector-stores");
} catch (err: unknown) {
console.error("Failed to delete vector store:", err);
const errorMessage = err instanceof Error ? err.message : "Unknown error";
alert(`Failed to delete vector store: ${errorMessage}`);
} finally {
setIsDeleting(false);
}
};
if (errorStore) {
return <DetailErrorView title={title} id={id} error={errorStore} />;
return (
<DetailErrorView
title="Vector Store Details"
id={id}
error={errorStore}
/>
);
}
if (isLoadingStore) {
return <DetailLoadingView title={title} />;
return <DetailLoadingView />;
}
if (!store) {
return <DetailNotFoundView title={title} id={id} />;
return <DetailNotFoundView title="Vector Store Details" id={id} />;
}
const mainContent = (
@ -138,6 +242,73 @@ export function VectorStoreDetailView({
);
return (
<DetailLayout title={title} mainContent={mainContent} sidebar={sidebar} />
<>
<div className="flex items-center justify-between mb-6">
<h1 className="text-2xl font-bold">Vector Store Details</h1>
<div className="flex gap-2">
<Button
variant="outline"
onClick={handleEditVectorStore}
disabled={isDeleting}
>
<Edit2 className="h-4 w-4 mr-2" />
Edit
</Button>
<Button
variant="destructive"
onClick={handleDeleteVectorStore}
disabled={isDeleting}
>
{isDeleting ? (
"Deleting..."
) : (
<>
<Trash2 className="h-4 w-4 mr-2" />
Delete
</>
)}
</Button>
</div>
</div>
<div className="flex flex-col md:flex-row gap-6">
<div className="flex-grow md:w-2/3 space-y-6">{mainContent}</div>
<div className="md:w-1/3">{sidebar}</div>
</div>
{/* Edit Vector Store Modal */}
{showEditModal && (
<div className="fixed inset-0 bg-black/50 flex items-center justify-center z-50">
<div className="bg-background border rounded-lg shadow-lg max-w-2xl w-full mx-4 max-h-[90vh] overflow-hidden">
<div className="p-6 border-b flex items-center justify-between">
<h2 className="text-2xl font-bold">Edit Vector Store</h2>
<Button
variant="ghost"
size="sm"
onClick={handleCancel}
className="p-1 h-auto"
>
<X className="h-4 w-4" />
</Button>
</div>
<div className="p-6 overflow-y-auto max-h-[calc(90vh-120px)]">
<VectorStoreEditor
onSave={handleSaveVectorStore}
onCancel={handleCancel}
error={modalError}
showSuccessState={showSuccessState}
isEditing={true}
initialData={{
name: store?.name || "",
embedding_model: store?.metadata?.embedding_model || "",
embedding_dimension:
store?.metadata?.embedding_dimension || 768,
provider_id: store?.metadata?.provider_id || "",
}}
/>
</div>
</div>
</div>
)}
</>
);
}

View file

@ -0,0 +1,235 @@
"use client";
import { useState, useEffect } from "react";
import { Button } from "@/components/ui/button";
import { Input } from "@/components/ui/input";
import { Label } from "@/components/ui/label";
import { Card, CardContent } from "@/components/ui/card";
import {
Select,
SelectContent,
SelectItem,
SelectTrigger,
SelectValue,
} from "@/components/ui/select";
import { useAuthClient } from "@/hooks/use-auth-client";
import type { Model } from "llama-stack-client/resources/models";
export interface VectorStoreFormData {
name: string;
embedding_model?: string;
embedding_dimension?: number;
provider_id?: string;
}
interface VectorStoreEditorProps {
onSave: (formData: VectorStoreFormData) => Promise<void>;
onCancel: () => void;
error?: string | null;
initialData?: VectorStoreFormData;
showSuccessState?: boolean;
isEditing?: boolean;
}
export function VectorStoreEditor({
onSave,
onCancel,
error,
initialData,
showSuccessState,
isEditing = false,
}: VectorStoreEditorProps) {
const client = useAuthClient();
const [formData, setFormData] = useState<VectorStoreFormData>(
initialData || {
name: "",
embedding_model: "",
embedding_dimension: 768,
provider_id: "",
}
);
const [loading, setLoading] = useState(false);
const [models, setModels] = useState<Model[]>([]);
const [modelsLoading, setModelsLoading] = useState(true);
const [modelsError, setModelsError] = useState<string | null>(null);
const embeddingModels = models.filter(
model => model.custom_metadata?.model_type === "embedding"
);
useEffect(() => {
const fetchModels = async () => {
try {
setModelsLoading(true);
setModelsError(null);
const modelList = await client.models.list();
setModels(modelList);
// Set default embedding model if available
const embeddingModelsList = modelList.filter(model => {
return model.custom_metadata?.model_type === "embedding";
});
if (embeddingModelsList.length > 0 && !formData.embedding_model) {
setFormData(prev => ({
...prev,
embedding_model: embeddingModelsList[0].id,
}));
}
} catch (err) {
console.error("Failed to load models:", err);
setModelsError(
err instanceof Error ? err.message : "Failed to load models"
);
} finally {
setModelsLoading(false);
}
};
fetchModels();
}, [client]);
const handleSubmit = async (e: React.FormEvent) => {
e.preventDefault();
setLoading(true);
try {
await onSave(formData);
} finally {
setLoading(false);
}
};
return (
<Card>
<CardContent className="pt-6">
<form onSubmit={handleSubmit} className="space-y-4">
<div className="space-y-2">
<Label htmlFor="name">Name</Label>
<Input
id="name"
value={formData.name}
onChange={e => setFormData({ ...formData, name: e.target.value })}
placeholder="Enter vector store name"
required
/>
</div>
<div className="space-y-2">
<Label htmlFor="embedding_model">Embedding Model (Optional)</Label>
{modelsLoading ? (
<div className="text-sm text-muted-foreground">
Loading models... ({models.length} loaded)
</div>
) : modelsError ? (
<div className="text-sm text-destructive">
Error: {modelsError}
</div>
) : embeddingModels.length === 0 ? (
<div className="text-sm text-muted-foreground">
No embedding models available ({models.length} total models)
</div>
) : (
<Select
value={formData.embedding_model}
onValueChange={value =>
setFormData({ ...formData, embedding_model: value })
}
>
<SelectTrigger>
<SelectValue placeholder="Select Embedding Model" />
</SelectTrigger>
<SelectContent>
{embeddingModels.map((model, index) => (
<SelectItem key={model.id} value={model.id}>
{model.id}
</SelectItem>
))}
</SelectContent>
</Select>
)}
{formData.embedding_model && (
<p className="text-xs text-muted-foreground mt-1">
Dimension:{" "}
{embeddingModels.find(m => m.id === formData.embedding_model)
?.custom_metadata?.embedding_dimension || "Unknown"}
</p>
)}
</div>
<div className="space-y-2">
<Label htmlFor="embedding_dimension">
Embedding Dimension (Optional)
</Label>
<Input
id="embedding_dimension"
type="number"
value={formData.embedding_dimension}
onChange={e =>
setFormData({
...formData,
embedding_dimension: parseInt(e.target.value) || 768,
})
}
placeholder="768"
/>
</div>
<div className="space-y-2">
<Label htmlFor="provider_id">
Provider ID {isEditing ? "(Read-only)" : "(Optional)"}
</Label>
<Input
id="provider_id"
value={formData.provider_id}
onChange={e =>
setFormData({ ...formData, provider_id: e.target.value })
}
placeholder="e.g., faiss, chroma, sqlite"
disabled={isEditing}
/>
{isEditing && (
<p className="text-xs text-muted-foreground">
Provider ID cannot be changed after vector store creation
</p>
)}
</div>
{error && (
<div
className={`text-sm p-3 rounded ${
error.startsWith("✅")
? "text-green-700 bg-green-50 border border-green-200"
: "text-destructive bg-destructive/10"
}`}
>
{error}
</div>
)}
<div className="flex gap-2 pt-4">
{showSuccessState ? (
<Button type="button" onClick={onCancel}>
Close
</Button>
) : (
<>
<Button type="submit" disabled={loading}>
{loading
? initialData
? "Updating..."
: "Creating..."
: initialData
? "Update Vector Store"
: "Create Vector Store"}
</Button>
<Button type="button" variant="outline" onClick={onCancel}>
Cancel
</Button>
</>
)}
</div>
</form>
</CardContent>
</Card>
);
}

View file

@ -34,9 +34,35 @@ export class ContentsAPI {
async getFileContents(
vectorStoreId: string,
fileId: string
fileId: string,
includeEmbeddings: boolean = true,
includeMetadata: boolean = true
): Promise<VectorStoreContentsResponse> {
return this.client.vectorStores.files.content(vectorStoreId, fileId);
try {
// Use query parameters to pass embeddings and metadata flags (OpenAI-compatible pattern)
const extraQuery: Record<string, boolean> = {};
if (includeEmbeddings) {
extraQuery.include_embeddings = true;
}
if (includeMetadata) {
extraQuery.include_metadata = true;
}
const result = await this.client.vectorStores.files.content(
vectorStoreId,
fileId,
{
query: {
include_embeddings: includeEmbeddings,
include_metadata: includeMetadata,
},
}
);
return result;
} catch (error) {
console.error("ContentsAPI.getFileContents error:", error);
throw error;
}
}
async getContent(
@ -70,11 +96,15 @@ export class ContentsAPI {
order?: string;
after?: string;
before?: string;
includeEmbeddings?: boolean;
includeMetadata?: boolean;
}
): Promise<VectorStoreListContentsResponse> {
const fileContents = await this.client.vectorStores.files.content(
const fileContents = await this.getFileContents(
vectorStoreId,
fileId
fileId,
options?.includeEmbeddings ?? true,
options?.includeMetadata ?? true
);
const contentItems: VectorStoreContentItem[] = [];
@ -82,7 +112,7 @@ export class ContentsAPI {
const rawContent = content as Record<string, unknown>;
// Extract actual fields from the API response
const embedding = rawContent.embedding || undefined;
const embedding = rawContent.embedding as number[] | undefined;
const created_timestamp =
rawContent.created_timestamp ||
rawContent.created_at ||

View file

@ -1,7 +1,13 @@
import type { NextConfig } from "next";
const nextConfig: NextConfig = {
/* config options here */
typescript: {
ignoreBuildErrors: true,
},
output: "standalone",
images: {
unoptimized: true,
},
};
export default nextConfig;

View file

@ -1,12 +1,13 @@
{
"name": "ui",
"version": "0.1.0",
"name": "llama-stack-ui",
"version": "0.4.0-alpha.1",
"lockfileVersion": 3,
"requires": true,
"packages": {
"": {
"name": "ui",
"version": "0.1.0",
"name": "llama-stack-ui",
"version": "0.4.0-alpha.1",
"license": "MIT",
"dependencies": {
"@radix-ui/react-collapsible": "^1.1.12",
"@radix-ui/react-dialog": "^1.1.15",
@ -20,7 +21,7 @@
"class-variance-authority": "^0.7.1",
"clsx": "^2.1.1",
"framer-motion": "^12.23.24",
"llama-stack-client": "github:llamastack/llama-stack-client-typescript",
"llama-stack-client": "^0.3.1",
"lucide-react": "^0.545.0",
"next": "15.5.4",
"next-auth": "^4.24.11",
@ -9684,8 +9685,9 @@
"license": "MIT"
},
"node_modules/llama-stack-client": {
"version": "0.4.0-alpha.1",
"resolved": "git+ssh://git@github.com/llamastack/llama-stack-client-typescript.git#78de4862c4b7d77939ac210fa9f9bde77a2c5c5f",
"version": "0.3.1",
"resolved": "https://registry.npmjs.org/llama-stack-client/-/llama-stack-client-0.3.1.tgz",
"integrity": "sha512-4aYoF2aAQiBSfxyZEtczeQmJn8q9T22ePDqGhR+ej5RG6a8wvl5B3v7ZoKuFkft+vcP/kbJ58GQZEPLekxekZA==",
"license": "MIT",
"dependencies": {
"@types/node": "^18.11.18",

View file

@ -1,11 +1,31 @@
{
"name": "ui",
"version": "0.1.0",
"private": true,
"name": "llama-stack-ui",
"version": "0.4.0-alpha.4",
"description": "Web UI for Llama Stack",
"license": "MIT",
"author": "Llama Stack <llamastack@meta.com>",
"repository": {
"type": "git",
"url": "https://github.com/llamastack/llama-stack.git",
"directory": "llama_stack_ui"
},
"bin": {
"llama-stack-ui": "bin/cli.js"
},
"files": [
"bin",
".next",
"public",
"next.config.ts",
"instrumentation.ts",
"tsconfig.json",
"package.json"
],
"scripts": {
"dev": "next dev --turbopack --port ${LLAMA_STACK_UI_PORT:-8322}",
"build": "next build",
"build": "next build && node scripts/postbuild.js",
"start": "next start",
"prepublishOnly": "npm run build",
"lint": "next lint",
"format": "prettier --write \"./**/*.{ts,tsx}\"",
"format:check": "prettier --check \"./**/*.{ts,tsx}\"",
@ -25,7 +45,7 @@
"class-variance-authority": "^0.7.1",
"clsx": "^2.1.1",
"framer-motion": "^12.23.24",
"llama-stack-client": "github:llamastack/llama-stack-client-typescript",
"llama-stack-client": "^0.3.1",
"lucide-react": "^0.545.0",
"next": "15.5.4",
"next-auth": "^4.24.11",

View file

@ -0,0 +1,40 @@
const fs = require('fs');
const path = require('path');
// Copy public directory to standalone
const publicSrc = path.join(__dirname, '..', 'public');
const publicDest = path.join(__dirname, '..', '.next', 'standalone', 'ui', 'src', 'llama_stack_ui', 'public');
if (fs.existsSync(publicSrc) && !fs.existsSync(publicDest)) {
console.log('Copying public directory to standalone...');
copyDir(publicSrc, publicDest);
}
// Copy .next/static to standalone
const staticSrc = path.join(__dirname, '..', '.next', 'static');
const staticDest = path.join(__dirname, '..', '.next', 'standalone', 'ui', 'src', 'llama_stack_ui', '.next', 'static');
if (fs.existsSync(staticSrc) && !fs.existsSync(staticDest)) {
console.log('Copying .next/static to standalone...');
copyDir(staticSrc, staticDest);
}
function copyDir(src, dest) {
if (!fs.existsSync(dest)) {
fs.mkdirSync(dest, { recursive: true });
}
const files = fs.readdirSync(src);
files.forEach((file) => {
const srcFile = path.join(src, file);
const destFile = path.join(dest, file);
if (fs.statSync(srcFile).isDirectory()) {
copyDir(srcFile, destFile);
} else {
fs.copyFileSync(srcFile, destFile);
}
});
}
console.log('Postbuild complete!');

View file

@ -1,6 +1,7 @@
{
"default": [
{"suite": "base", "setup": "ollama"},
{"suite": "base", "setup": "ollama-postgres", "allowed_clients": ["server"], "stack_config": "server:ci-tests::run-with-postgres-store.yaml"},
{"suite": "vision", "setup": "ollama-vision"},
{"suite": "responses", "setup": "gpt"},
{"suite": "base-vllm-subset", "setup": "vllm"}

View file

@ -233,10 +233,21 @@ def instantiate_llama_stack_client(session):
raise ValueError("You must specify either --stack-config or LLAMA_STACK_CONFIG")
# Handle server:<config_name> format or server:<config_name>:<port>
# Also handles server:<distro>::<run_file.yaml> format
if config.startswith("server:"):
parts = config.split(":")
config_name = parts[1]
port = int(parts[2]) if len(parts) > 2 else int(os.environ.get("LLAMA_STACK_PORT", DEFAULT_PORT))
# Strip the "server:" prefix first
config_part = config[7:] # len("server:") == 7
# Check for :: (distro::runfile format)
if "::" in config_part:
config_name = config_part
port = int(os.environ.get("LLAMA_STACK_PORT", DEFAULT_PORT))
else:
# Single colon format: either <name> or <name>:<port>
parts = config_part.split(":")
config_name = parts[0]
port = int(parts[1]) if len(parts) > 1 else int(os.environ.get("LLAMA_STACK_PORT", DEFAULT_PORT))
base_url = f"http://localhost:{port}"
force_restart = os.environ.get("LLAMA_STACK_TEST_FORCE_SERVER_RESTART") == "1"
@ -323,7 +334,13 @@ def require_server(llama_stack_client):
@pytest.fixture(scope="session")
def openai_client(llama_stack_client, require_server):
base_url = f"{llama_stack_client.base_url}/v1"
return OpenAI(base_url=base_url, api_key="fake")
client = OpenAI(base_url=base_url, api_key="fake", max_retries=0, timeout=30.0)
yield client
# Cleanup: close HTTP connections
try:
client.close()
except Exception:
pass
@pytest.fixture(params=["openai_client", "client_with_models"])

View file

@ -2,6 +2,10 @@
This directory contains recorded inference API responses used for deterministic testing without requiring live API access.
For more information, see the
[docs](https://llamastack.github.io/docs/contributing/testing/record-replay).
This README provides more technical information.
## Structure
- `responses/` - JSON files containing request/response pairs for inference operations

View file

@ -115,7 +115,15 @@ def openai_client(base_url, api_key, provider):
client = LlamaStackAsLibraryClient(config, skip_logger_removal=True)
return client
return OpenAI(
client = OpenAI(
base_url=base_url,
api_key=api_key,
max_retries=0,
timeout=30.0,
)
yield client
# Cleanup: close HTTP connections
try:
client.close()
except Exception:
pass

View file

@ -65,8 +65,14 @@ class TestConversationResponses:
conversation_items = openai_client.conversations.items.list(conversation.id)
assert len(conversation_items.data) >= 4 # 2 user + 2 assistant messages
@pytest.mark.timeout(60, method="thread")
def test_conversation_context_loading(self, openai_client, text_model_id):
"""Test that conversation context is properly loaded for responses."""
"""Test that conversation context is properly loaded for responses.
Note: 60s timeout added due to CI-specific deadlock in pytest/OpenAI client/httpx
after running 25+ tests. Hangs before first HTTP request is made. Works fine locally.
Investigation needed: connection pool exhaustion or event loop state issue.
"""
conversation = openai_client.conversations.create(
items=[
{"type": "message", "role": "user", "content": "My name is Alice. I like to eat apples."},

View file

@ -71,6 +71,26 @@ SETUP_DEFINITIONS: dict[str, Setup] = {
"embedding_model": "ollama/nomic-embed-text:v1.5",
},
),
"ollama-postgres": Setup(
name="ollama-postgres",
description="Server-mode tests with Postgres-backed persistence",
env={
"OLLAMA_URL": "http://0.0.0.0:11434",
"SAFETY_MODEL": "ollama/llama-guard3:1b",
"POSTGRES_HOST": "127.0.0.1",
"POSTGRES_PORT": "5432",
"POSTGRES_DB": "llamastack",
"POSTGRES_USER": "llamastack",
"POSTGRES_PASSWORD": "llamastack",
"LLAMA_STACK_LOGGING": "openai_responses=info",
},
defaults={
"text_model": "ollama/llama3.2:3b-instruct-fp16",
"embedding_model": "sentence-transformers/nomic-embed-text-v1.5",
"safety_model": "ollama/llama-guard3:1b",
"safety_shield": "llama-guard",
},
),
"vllm": Setup(
name="vllm",
description="vLLM provider with a text model",

View file

@ -11,6 +11,7 @@ import pytest
from llama_stack_client import BadRequestError
from openai import BadRequestError as OpenAIBadRequestError
from llama_stack.apis.files import ExpiresAfter
from llama_stack.apis.vector_io import Chunk
from llama_stack.core.library_client import LlamaStackAsLibraryClient
from llama_stack.log import get_logger
@ -1604,3 +1605,97 @@ def test_openai_vector_store_embedding_config_from_metadata(
assert "metadata_config_store" in store_names
assert "consistent_config_store" in store_names
@vector_provider_wrapper
def test_openai_vector_store_file_contents_with_extra_query(
compat_client_with_empty_stores, client_with_models, embedding_model_id, embedding_dimension, vector_io_provider_id
):
"""Test that vector store file contents endpoint supports extra_query parameter."""
skip_if_provider_doesnt_support_openai_vector_stores(client_with_models)
compat_client = compat_client_with_empty_stores
# Create a vector store
vector_store = compat_client.vector_stores.create(
name="test_extra_query_store",
extra_body={
"embedding_model": embedding_model_id,
"provider_id": vector_io_provider_id,
},
)
# Create and attach a file
test_content = b"This is test content for extra_query validation."
with BytesIO(test_content) as file_buffer:
file_buffer.name = "test_extra_query.txt"
file = compat_client.files.create(
file=file_buffer,
purpose="assistants",
expires_after=ExpiresAfter(anchor="created_at", seconds=86400),
)
file_attach_response = compat_client.vector_stores.files.create(
vector_store_id=vector_store.id,
file_id=file.id,
extra_body={"embedding_model": embedding_model_id},
)
assert file_attach_response.status == "completed"
# Wait for processing
time.sleep(2)
# Test that extra_query parameter is accepted and processed
content_with_extra_query = compat_client.vector_stores.files.content(
vector_store_id=vector_store.id,
file_id=file.id,
extra_query={"include_embeddings": True, "include_metadata": True},
)
# Test without extra_query for comparison
content_without_extra_query = compat_client.vector_stores.files.content(
vector_store_id=vector_store.id,
file_id=file.id,
)
# Validate that both calls succeed
assert content_with_extra_query is not None
assert content_without_extra_query is not None
assert len(content_with_extra_query.data) > 0
assert len(content_without_extra_query.data) > 0
# Validate that extra_query parameter is processed correctly
# Both should have the embedding/metadata fields available (may be None based on flags)
first_chunk_with_flags = content_with_extra_query.data[0]
first_chunk_without_flags = content_without_extra_query.data[0]
# The key validation: extra_query fields are present in the response
# Handle both dict and object responses (different clients may return different formats)
def has_field(obj, field):
if isinstance(obj, dict):
return field in obj
else:
return hasattr(obj, field)
# Validate that all expected fields are present in both responses
expected_fields = ["embedding", "chunk_metadata", "metadata", "text"]
for field in expected_fields:
assert has_field(first_chunk_with_flags, field), f"Field '{field}' missing from response with extra_query"
assert has_field(first_chunk_without_flags, field), f"Field '{field}' missing from response without extra_query"
# Validate content is the same
def get_field(obj, field):
if isinstance(obj, dict):
return obj[field]
else:
return getattr(obj, field)
assert get_field(first_chunk_with_flags, "text") == test_content.decode("utf-8")
assert get_field(first_chunk_without_flags, "text") == test_content.decode("utf-8")
with_flags_embedding = get_field(first_chunk_with_flags, "embedding")
without_flags_embedding = get_field(first_chunk_without_flags, "embedding")
# Validate that embeddings are included when requested and excluded when not requested
assert with_flags_embedding is not None, "Embeddings should be included when include_embeddings=True"
assert len(with_flags_embedding) > 0, "Embedding should be a non-empty list"
assert without_flags_embedding is None, "Embeddings should not be included when include_embeddings=False"

View file

@ -55,3 +55,65 @@ async def test_create_vector_stores_multiple_providers_missing_provider_id_error
with pytest.raises(ValueError, match="Multiple vector_io providers available"):
await router.openai_create_vector_store(request)
async def test_update_vector_store_provider_id_change_fails():
"""Test that updating a vector store with a different provider_id fails with clear error."""
mock_routing_table = Mock()
# Mock an existing vector store with provider_id "faiss"
mock_existing_store = Mock()
mock_existing_store.provider_id = "inline::faiss"
mock_existing_store.identifier = "vs_123"
mock_routing_table.get_object_by_identifier = AsyncMock(return_value=mock_existing_store)
mock_routing_table.get_provider_impl = AsyncMock(
return_value=Mock(openai_update_vector_store=AsyncMock(return_value=Mock(id="vs_123")))
)
router = VectorIORouter(mock_routing_table)
# Try to update with different provider_id in metadata - this should fail
with pytest.raises(ValueError, match="provider_id cannot be changed after vector store creation"):
await router.openai_update_vector_store(
vector_store_id="vs_123",
name="updated_name",
metadata={"provider_id": "inline::sqlite"}, # Different provider_id
)
# Verify the existing store was looked up to check provider_id
mock_routing_table.get_object_by_identifier.assert_called_once_with("vector_store", "vs_123")
# Provider should not be called since validation failed
mock_routing_table.get_provider_impl.assert_not_called()
async def test_update_vector_store_same_provider_id_succeeds():
"""Test that updating a vector store with the same provider_id succeeds."""
mock_routing_table = Mock()
# Mock an existing vector store with provider_id "faiss"
mock_existing_store = Mock()
mock_existing_store.provider_id = "inline::faiss"
mock_existing_store.identifier = "vs_123"
mock_routing_table.get_object_by_identifier = AsyncMock(return_value=mock_existing_store)
mock_routing_table.get_provider_impl = AsyncMock(
return_value=Mock(openai_update_vector_store=AsyncMock(return_value=Mock(id="vs_123")))
)
router = VectorIORouter(mock_routing_table)
# Update with same provider_id should succeed
await router.openai_update_vector_store(
vector_store_id="vs_123",
name="updated_name",
metadata={"provider_id": "inline::faiss"}, # Same provider_id
)
# Verify the provider update method was called
mock_routing_table.get_provider_impl.assert_called_once_with("vs_123")
provider = await mock_routing_table.get_provider_impl("vs_123")
provider.openai_update_vector_store.assert_called_once_with(
vector_store_id="vs_123", name="updated_name", expires_after=None, metadata={"provider_id": "inline::faiss"}
)

View file

@ -104,12 +104,18 @@ async def test_paginated_response_url_setting():
route_handler = create_dynamic_typed_route(mock_api_method, "get", "/test/route")
# Mock minimal request
# Mock minimal request with proper state object
request = MagicMock()
request.scope = {"user_attributes": {}, "principal": ""}
request.headers = {}
request.body = AsyncMock(return_value=b"")
# Create a simple state object without auto-generating attributes
class MockState:
pass
request.state = MockState()
result = await route_handler(request)
assert isinstance(result, PaginatedResponse)

View file

@ -9,7 +9,7 @@ from tempfile import TemporaryDirectory
import pytest
from llama_stack.providers.utils.sqlstore.api import ColumnType
from llama_stack.providers.utils.sqlstore.api import ColumnDefinition, ColumnType
from llama_stack.providers.utils.sqlstore.sqlalchemy_sqlstore import SqlAlchemySqlStoreImpl
from llama_stack.providers.utils.sqlstore.sqlstore import SqliteSqlStoreConfig
@ -65,6 +65,38 @@ async def test_sqlite_sqlstore():
assert result.has_more is False
async def test_sqlstore_upsert_support():
with TemporaryDirectory() as tmp_dir:
db_path = tmp_dir + "/upsert.db"
store = SqlAlchemySqlStoreImpl(SqliteSqlStoreConfig(db_path=db_path))
await store.create_table(
"items",
{
"id": ColumnDefinition(type=ColumnType.STRING, primary_key=True),
"value": ColumnType.STRING,
"updated_at": ColumnType.INTEGER,
},
)
await store.upsert(
table="items",
data={"id": "item_1", "value": "first", "updated_at": 1},
conflict_columns=["id"],
)
row = await store.fetch_one("items", {"id": "item_1"})
assert row == {"id": "item_1", "value": "first", "updated_at": 1}
await store.upsert(
table="items",
data={"id": "item_1", "value": "second", "updated_at": 2},
conflict_columns=["id"],
update_columns=["value", "updated_at"],
)
row = await store.fetch_one("items", {"id": "item_1"})
assert row == {"id": "item_1", "value": "second", "updated_at": 2}
async def test_sqlstore_pagination_basic():
"""Test basic pagination functionality at the SQL store level."""
with TemporaryDirectory() as tmp_dir:

12
uv.lock generated
View file

@ -1,5 +1,5 @@
version = 1
revision = 3
revision = 2
requires-python = ">=3.12"
resolution-markers = [
"(python_full_version >= '3.13' and platform_machine != 'aarch64' and sys_platform == 'linux') or (python_full_version >= '3.13' and sys_platform != 'darwin' and sys_platform != 'linux')",
@ -2166,7 +2166,7 @@ test = [
{ name = "milvus-lite", specifier = ">=2.5.0" },
{ name = "psycopg2-binary", specifier = ">=2.9.0" },
{ name = "pymilvus", specifier = ">=2.6.1" },
{ name = "pypdf" },
{ name = "pypdf", specifier = ">=6.1.3" },
{ name = "qdrant-client" },
{ name = "requests" },
{ name = "sqlalchemy" },
@ -2219,7 +2219,7 @@ unit = [
{ name = "moto", extras = ["s3"], specifier = ">=5.1.10" },
{ name = "ollama" },
{ name = "psycopg2-binary", specifier = ">=2.9.0" },
{ name = "pypdf" },
{ name = "pypdf", specifier = ">=6.1.3" },
{ name = "sqlalchemy" },
{ name = "sqlalchemy", extras = ["asyncio"], specifier = ">=2.0.41" },
{ name = "sqlite-vec" },
@ -3973,11 +3973,11 @@ wheels = [
[[package]]
name = "pypdf"
version = "5.9.0"
version = "6.2.0"
source = { registry = "https://pypi.org/simple" }
sdist = { url = "https://files.pythonhosted.org/packages/89/3a/584b97a228950ed85aec97c811c68473d9b8d149e6a8c155668287cf1a28/pypdf-5.9.0.tar.gz", hash = "sha256:30f67a614d558e495e1fbb157ba58c1de91ffc1718f5e0dfeb82a029233890a1", size = 5035118, upload-time = "2025-07-27T14:04:52.364Z" }
sdist = { url = "https://files.pythonhosted.org/packages/4e/2b/8795ec0378384000b0a37a2b5e6d67fa3d84802945aa2c612a78a784d7d4/pypdf-6.2.0.tar.gz", hash = "sha256:46b4d8495d68ae9c818e7964853cd9984e6a04c19fe7112760195395992dce48", size = 5272001, upload-time = "2025-11-09T11:10:41.911Z" }
wheels = [
{ url = "https://files.pythonhosted.org/packages/48/d9/6cff57c80a6963e7dd183bf09e9f21604a77716644b1e580e97b259f7612/pypdf-5.9.0-py3-none-any.whl", hash = "sha256:be10a4c54202f46d9daceaa8788be07aa8cd5ea8c25c529c50dd509206382c35", size = 313193, upload-time = "2025-07-27T14:04:50.53Z" },
{ url = "https://files.pythonhosted.org/packages/de/ba/743ddcaf1a8fb439342399645921e2cf2c600464cba5531a11f1cc0822b6/pypdf-6.2.0-py3-none-any.whl", hash = "sha256:4c0f3e62677217a777ab79abe22bf1285442d70efabf552f61c7a03b6f5c569f", size = 326592, upload-time = "2025-11-09T11:10:39.941Z" },
]
[[package]]