Implement attaching files to vector stores

This adds the ability to attach files to vector
stores (client.vector_stores.files.create) for the OpenAI Vector
Stores Files API.

The initial implementation is only for Faiss, and tested via the
existing test_responses.py::test_response_non_streaming_file_search.

Signed-off-by: Ben Browning <bbrownin@redhat.com>
This commit is contained in:
Ben Browning 2025-06-11 11:14:38 -04:00
parent 8ede67b809
commit de84ee0748
12 changed files with 689 additions and 28 deletions

View file

@ -3240,6 +3240,59 @@
}
}
},
"/v1/openai/v1/vector_stores/{vector_store_id}/files": {
"post": {
"responses": {
"200": {
"description": "A VectorStoreFileObject representing the attached file.",
"content": {
"application/json": {
"schema": {
"$ref": "#/components/schemas/VectorStoreFileObject"
}
}
}
},
"400": {
"$ref": "#/components/responses/BadRequest400"
},
"429": {
"$ref": "#/components/responses/TooManyRequests429"
},
"500": {
"$ref": "#/components/responses/InternalServerError500"
},
"default": {
"$ref": "#/components/responses/DefaultError"
}
},
"tags": [
"VectorIO"
],
"description": "Attach a file to a vector store.",
"parameters": [
{
"name": "vector_store_id",
"in": "path",
"description": "The ID of the vector store to attach the file to.",
"required": true,
"schema": {
"type": "string"
}
}
],
"requestBody": {
"content": {
"application/json": {
"schema": {
"$ref": "#/components/schemas/OpenaiAttachFileToVectorStoreRequest"
}
}
},
"required": true
}
}
},
"/v1/openai/v1/completions": {
"post": {
"responses": {
@ -11831,6 +11884,232 @@
],
"title": "LogEventRequest"
},
"VectorStoreChunkingStrategy": {
"oneOf": [
{
"$ref": "#/components/schemas/VectorStoreChunkingStrategyAuto"
},
{
"$ref": "#/components/schemas/VectorStoreChunkingStrategyStatic"
}
],
"discriminator": {
"propertyName": "type",
"mapping": {
"auto": "#/components/schemas/VectorStoreChunkingStrategyAuto",
"static": "#/components/schemas/VectorStoreChunkingStrategyStatic"
}
}
},
"VectorStoreChunkingStrategyAuto": {
"type": "object",
"properties": {
"type": {
"type": "string",
"const": "auto",
"default": "auto"
}
},
"additionalProperties": false,
"required": [
"type"
],
"title": "VectorStoreChunkingStrategyAuto"
},
"VectorStoreChunkingStrategyStatic": {
"type": "object",
"properties": {
"type": {
"type": "string",
"const": "static",
"default": "static"
},
"static": {
"$ref": "#/components/schemas/VectorStoreChunkingStrategyStaticConfig"
}
},
"additionalProperties": false,
"required": [
"type",
"static"
],
"title": "VectorStoreChunkingStrategyStatic"
},
"VectorStoreChunkingStrategyStaticConfig": {
"type": "object",
"properties": {
"chunk_overlap_tokens": {
"type": "integer",
"default": 400
},
"max_chunk_size_tokens": {
"type": "integer",
"default": 800
}
},
"additionalProperties": false,
"required": [
"chunk_overlap_tokens",
"max_chunk_size_tokens"
],
"title": "VectorStoreChunkingStrategyStaticConfig"
},
"OpenaiAttachFileToVectorStoreRequest": {
"type": "object",
"properties": {
"file_id": {
"type": "string",
"description": "The ID of the file to attach to the vector store."
},
"attributes": {
"type": "object",
"additionalProperties": {
"oneOf": [
{
"type": "null"
},
{
"type": "boolean"
},
{
"type": "number"
},
{
"type": "string"
},
{
"type": "array"
},
{
"type": "object"
}
]
},
"description": "The key-value attributes stored with the file, which can be used for filtering."
},
"chunking_strategy": {
"$ref": "#/components/schemas/VectorStoreChunkingStrategy",
"description": "The chunking strategy to use for the file."
}
},
"additionalProperties": false,
"required": [
"file_id"
],
"title": "OpenaiAttachFileToVectorStoreRequest"
},
"VectorStoreFileLastError": {
"type": "object",
"properties": {
"code": {
"oneOf": [
{
"type": "string",
"const": "server_error"
},
{
"type": "string",
"const": "rate_limit_exceeded"
}
]
},
"message": {
"type": "string"
}
},
"additionalProperties": false,
"required": [
"code",
"message"
],
"title": "VectorStoreFileLastError"
},
"VectorStoreFileObject": {
"type": "object",
"properties": {
"id": {
"type": "string"
},
"object": {
"type": "string",
"default": "vector_store.file"
},
"attributes": {
"type": "object",
"additionalProperties": {
"oneOf": [
{
"type": "null"
},
{
"type": "boolean"
},
{
"type": "number"
},
{
"type": "string"
},
{
"type": "array"
},
{
"type": "object"
}
]
}
},
"chunking_strategy": {
"$ref": "#/components/schemas/VectorStoreChunkingStrategy"
},
"created_at": {
"type": "integer"
},
"last_error": {
"$ref": "#/components/schemas/VectorStoreFileLastError"
},
"status": {
"oneOf": [
{
"type": "string",
"const": "completed"
},
{
"type": "string",
"const": "in_progress"
},
{
"type": "string",
"const": "cancelled"
},
{
"type": "string",
"const": "failed"
}
]
},
"usage_bytes": {
"type": "integer",
"default": 0
},
"vector_store_id": {
"type": "string"
}
},
"additionalProperties": false,
"required": [
"id",
"object",
"attributes",
"chunking_strategy",
"created_at",
"status",
"usage_bytes",
"vector_store_id"
],
"title": "VectorStoreFileObject",
"description": "OpenAI Vector Store File object."
},
"OpenAIJSONSchema": {
"type": "object",
"properties": {

View file

@ -2263,6 +2263,43 @@ paths:
schema:
$ref: '#/components/schemas/LogEventRequest'
required: true
/v1/openai/v1/vector_stores/{vector_store_id}/files:
post:
responses:
'200':
description: >-
A VectorStoreFileObject representing the attached file.
content:
application/json:
schema:
$ref: '#/components/schemas/VectorStoreFileObject'
'400':
$ref: '#/components/responses/BadRequest400'
'429':
$ref: >-
#/components/responses/TooManyRequests429
'500':
$ref: >-
#/components/responses/InternalServerError500
default:
$ref: '#/components/responses/DefaultError'
tags:
- VectorIO
description: Attach a file to a vector store.
parameters:
- name: vector_store_id
in: path
description: >-
The ID of the vector store to attach the file to.
required: true
schema:
type: string
requestBody:
content:
application/json:
schema:
$ref: '#/components/schemas/OpenaiAttachFileToVectorStoreRequest'
required: true
/v1/openai/v1/completions:
post:
responses:
@ -8289,6 +8326,148 @@ components:
- event
- ttl_seconds
title: LogEventRequest
VectorStoreChunkingStrategy:
oneOf:
- $ref: '#/components/schemas/VectorStoreChunkingStrategyAuto'
- $ref: '#/components/schemas/VectorStoreChunkingStrategyStatic'
discriminator:
propertyName: type
mapping:
auto: '#/components/schemas/VectorStoreChunkingStrategyAuto'
static: '#/components/schemas/VectorStoreChunkingStrategyStatic'
VectorStoreChunkingStrategyAuto:
type: object
properties:
type:
type: string
const: auto
default: auto
additionalProperties: false
required:
- type
title: VectorStoreChunkingStrategyAuto
VectorStoreChunkingStrategyStatic:
type: object
properties:
type:
type: string
const: static
default: static
static:
$ref: '#/components/schemas/VectorStoreChunkingStrategyStaticConfig'
additionalProperties: false
required:
- type
- static
title: VectorStoreChunkingStrategyStatic
VectorStoreChunkingStrategyStaticConfig:
type: object
properties:
chunk_overlap_tokens:
type: integer
default: 400
max_chunk_size_tokens:
type: integer
default: 800
additionalProperties: false
required:
- chunk_overlap_tokens
- max_chunk_size_tokens
title: VectorStoreChunkingStrategyStaticConfig
OpenaiAttachFileToVectorStoreRequest:
type: object
properties:
file_id:
type: string
description: >-
The ID of the file to attach to the vector store.
attributes:
type: object
additionalProperties:
oneOf:
- type: 'null'
- type: boolean
- type: number
- type: string
- type: array
- type: object
description: >-
The key-value attributes stored with the file, which can be used for filtering.
chunking_strategy:
$ref: '#/components/schemas/VectorStoreChunkingStrategy'
description: >-
The chunking strategy to use for the file.
additionalProperties: false
required:
- file_id
title: OpenaiAttachFileToVectorStoreRequest
VectorStoreFileLastError:
type: object
properties:
code:
oneOf:
- type: string
const: server_error
- type: string
const: rate_limit_exceeded
message:
type: string
additionalProperties: false
required:
- code
- message
title: VectorStoreFileLastError
VectorStoreFileObject:
type: object
properties:
id:
type: string
object:
type: string
default: vector_store.file
attributes:
type: object
additionalProperties:
oneOf:
- type: 'null'
- type: boolean
- type: number
- type: string
- type: array
- type: object
chunking_strategy:
$ref: '#/components/schemas/VectorStoreChunkingStrategy'
created_at:
type: integer
last_error:
$ref: '#/components/schemas/VectorStoreFileLastError'
status:
oneOf:
- type: string
const: completed
- type: string
const: in_progress
- type: string
const: cancelled
- type: string
const: failed
usage_bytes:
type: integer
default: 0
vector_store_id:
type: string
additionalProperties: false
required:
- id
- object
- attributes
- chunking_strategy
- created_at
- status
- usage_bytes
- vector_store_id
title: VectorStoreFileObject
description: OpenAI Vector Store File object.
OpenAIJSONSchema:
type: object
properties:

View file

@ -8,7 +8,7 @@
#
# This source code is licensed under the terms described in the LICENSE file in
# the root directory of this source tree.
from typing import Any, Literal, Protocol, runtime_checkable
from typing import Annotated, Any, Literal, Protocol, runtime_checkable
from pydantic import BaseModel, Field
@ -16,6 +16,7 @@ from llama_stack.apis.inference import InterleavedContent
from llama_stack.apis.vector_dbs import VectorDB
from llama_stack.providers.utils.telemetry.trace_protocol import trace_protocol
from llama_stack.schema_utils import json_schema_type, webmethod
from llama_stack.strong_typing.schema import register_schema
class Chunk(BaseModel):
@ -133,6 +134,50 @@ class VectorStoreDeleteResponse(BaseModel):
deleted: bool = True
@json_schema_type
class VectorStoreChunkingStrategyAuto(BaseModel):
type: Literal["auto"] = "auto"
@json_schema_type
class VectorStoreChunkingStrategyStaticConfig(BaseModel):
chunk_overlap_tokens: int = 400
max_chunk_size_tokens: int = Field(800, ge=100, le=4096)
@json_schema_type
class VectorStoreChunkingStrategyStatic(BaseModel):
type: Literal["static"] = "static"
static: VectorStoreChunkingStrategyStaticConfig
VectorStoreChunkingStrategy = Annotated[
VectorStoreChunkingStrategyAuto | VectorStoreChunkingStrategyStatic, Field(discriminator="type")
]
register_schema(VectorStoreChunkingStrategy, name="VectorStoreChunkingStrategy")
@json_schema_type
class VectorStoreFileLastError(BaseModel):
code: Literal["server_error"] | Literal["rate_limit_exceeded"]
message: str
@json_schema_type
class VectorStoreFileObject(BaseModel):
"""OpenAI Vector Store File object."""
id: str
object: str = "vector_store.file"
attributes: dict[str, Any] = Field(default_factory=dict)
chunking_strategy: VectorStoreChunkingStrategy
created_at: int
last_error: VectorStoreFileLastError | None = None
status: Literal["completed"] | Literal["in_progress"] | Literal["cancelled"] | Literal["failed"]
usage_bytes: int = 0
vector_store_id: str
class VectorDBStore(Protocol):
def get_vector_db(self, vector_db_id: str) -> VectorDB | None: ...
@ -290,3 +335,21 @@ class VectorIO(Protocol):
:returns: A VectorStoreSearchResponse containing the search results.
"""
...
@webmethod(route="/openai/v1/vector_stores/{vector_store_id}/files", method="POST")
async def openai_attach_file_to_vector_store(
self,
vector_store_id: str,
file_id: str,
attributes: dict[str, Any] | None = None,
chunking_strategy: VectorStoreChunkingStrategy | None = None,
) -> VectorStoreFileObject:
"""Attach a file to a vector store.
:param vector_store_id: The ID of the vector store to attach the file to.
:param file_id: The ID of the file to attach to the vector store.
:param attributes: The key-value attributes stored with the file, which can be used for filtering.
:param chunking_strategy: The chunking strategy to use for the file.
:returns: A VectorStoreFileObject representing the attached file.
"""
...

View file

@ -19,6 +19,7 @@ from llama_stack.apis.vector_io import (
VectorStoreObject,
VectorStoreSearchResponsePage,
)
from llama_stack.apis.vector_io.vector_io import VectorStoreChunkingStrategy, VectorStoreFileObject
from llama_stack.log import get_logger
from llama_stack.providers.datatypes import RoutingTable
@ -254,3 +255,20 @@ class VectorIORouter(VectorIO):
ranking_options=ranking_options,
rewrite_query=rewrite_query,
)
async def openai_attach_file_to_vector_store(
self,
vector_store_id: str,
file_id: str,
attributes: dict[str, Any] | None = None,
chunking_strategy: VectorStoreChunkingStrategy | None = None,
) -> VectorStoreFileObject:
logger.debug(f"VectorIORouter.openai_attach_file_to_vector_store: {vector_store_id}, {file_id}")
# Route based on vector store ID
provider = self.routing_table.get_provider_impl(vector_store_id)
return await provider.openai_attach_file_to_vector_store(
vector_store_id=vector_store_id,
file_id=file_id,
attributes=attributes,
chunking_strategy=chunking_strategy,
)

View file

@ -16,6 +16,6 @@ async def get_provider_impl(config: FaissVectorIOConfig, deps: dict[Api, Any]):
assert isinstance(config, FaissVectorIOConfig), f"Unexpected config type: {type(config)}"
impl = FaissVectorIOAdapter(config, deps[Api.inference])
impl = FaissVectorIOAdapter(config, deps[Api.inference], deps[Api.files])
await impl.initialize()
return impl

View file

@ -9,20 +9,30 @@ import base64
import io
import json
import logging
import time
from typing import Any
import faiss
import numpy as np
from numpy.typing import NDArray
from llama_stack.apis.files import Files
from llama_stack.apis.inference import InterleavedContent
from llama_stack.apis.inference.inference import Inference
from llama_stack.apis.tools.rag_tool import RAGDocument
from llama_stack.apis.vector_dbs import VectorDB
from llama_stack.apis.vector_io import (
Chunk,
QueryChunksResponse,
VectorIO,
)
from llama_stack.apis.vector_io.vector_io import (
VectorStoreChunkingStrategy,
VectorStoreChunkingStrategyAuto,
VectorStoreChunkingStrategyStatic,
VectorStoreFileLastError,
VectorStoreFileObject,
)
from llama_stack.providers.datatypes import VectorDBsProtocolPrivate
from llama_stack.providers.utils.kvstore import kvstore_impl
from llama_stack.providers.utils.kvstore.api import KVStore
@ -30,6 +40,8 @@ from llama_stack.providers.utils.memory.openai_vector_store_mixin import OpenAIV
from llama_stack.providers.utils.memory.vector_store import (
EmbeddingIndex,
VectorDBWithIndex,
content_from_doc,
make_overlapped_chunks,
)
from .config import FaissVectorIOConfig
@ -132,9 +144,10 @@ class FaissIndex(EmbeddingIndex):
class FaissVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, VectorDBsProtocolPrivate):
def __init__(self, config: FaissVectorIOConfig, inference_api: Inference) -> None:
def __init__(self, config: FaissVectorIOConfig, inference_api: Inference, files_api: Files) -> None:
self.config = config
self.inference_api = inference_api
self.files_api = files_api
self.cache: dict[str, VectorDBWithIndex] = {}
self.kvstore: KVStore | None = None
self.openai_vector_stores: dict[str, dict[str, Any]] = {}
@ -250,3 +263,71 @@ class FaissVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, VectorDBsProtocolPr
assert self.kvstore is not None
key = f"{OPENAI_VECTOR_STORES_PREFIX}{store_id}"
await self.kvstore.delete(key)
async def openai_attach_file_to_vector_store(
self,
vector_store_id: str,
file_id: str,
attributes: dict[str, Any] | None = None,
chunking_strategy: VectorStoreChunkingStrategy | None = None,
) -> VectorStoreFileObject:
attributes = attributes or {}
chunking_strategy = chunking_strategy or VectorStoreChunkingStrategyAuto()
vector_store_file_object = VectorStoreFileObject(
id=file_id,
attributes=attributes,
chunking_strategy=chunking_strategy,
created_at=int(time.time()),
status="in_progress",
vector_store_id=vector_store_id,
)
if isinstance(chunking_strategy, VectorStoreChunkingStrategyStatic):
max_chunk_size_tokens = chunking_strategy.static.max_chunk_size_tokens
chunk_overlap_tokens = chunking_strategy.static.chunk_overlap_tokens
else:
# Default values from OpenAI API docs
max_chunk_size_tokens = 800
chunk_overlap_tokens = 400
try:
content_response = await self.files_api.openai_retrieve_file_content(file_id)
content = content_response.body
doc = RAGDocument(
document_id=file_id,
content=content,
metadata=attributes,
)
content = await content_from_doc(doc)
chunks = make_overlapped_chunks(
doc.document_id,
content,
max_chunk_size_tokens,
chunk_overlap_tokens,
doc.metadata,
)
if not chunks:
vector_store_file_object.status = "failed"
vector_store_file_object.last_error = VectorStoreFileLastError(
code="server_error",
message="No chunks were generated from the file",
)
return vector_store_file_object
await self.insert_chunks(
vector_db_id=vector_store_id,
chunks=chunks,
)
except Exception as e:
vector_store_file_object.status = "failed"
vector_store_file_object.last_error = VectorStoreFileLastError(
code="server_error",
message=str(e),
)
return vector_store_file_object
vector_store_file_object.status = "completed"
return vector_store_file_object

View file

@ -24,6 +24,7 @@ from llama_stack.apis.vector_io import (
QueryChunksResponse,
VectorIO,
)
from llama_stack.apis.vector_io.vector_io import VectorStoreChunkingStrategy, VectorStoreFileObject
from llama_stack.providers.datatypes import VectorDBsProtocolPrivate
from llama_stack.providers.utils.memory.openai_vector_store_mixin import OpenAIVectorStoreMixin
from llama_stack.providers.utils.memory.vector_store import EmbeddingIndex, VectorDBWithIndex
@ -489,6 +490,15 @@ class SQLiteVecVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, VectorDBsProtoc
raise ValueError(f"Vector DB {vector_db_id} not found")
return await self.cache[vector_db_id].query_chunks(query, params)
async def openai_attach_file_to_vector_store(
self,
vector_store_id: str,
file_id: str,
attributes: dict[str, Any] | None = None,
chunking_strategy: VectorStoreChunkingStrategy | None = None,
) -> VectorStoreFileObject:
raise NotImplementedError("OpenAI Vector Stores Files API is not supported in sqlite_vec")
def generate_chunk_id(document_id: str, chunk_text: str) -> str:
"""Generate a unique chunk ID using a hash of document ID and chunk text."""

View file

@ -31,7 +31,7 @@ def available_providers() -> list[ProviderSpec]:
pip_packages=["faiss-cpu"],
module="llama_stack.providers.inline.vector_io.faiss",
config_class="llama_stack.providers.inline.vector_io.faiss.FaissVectorIOConfig",
api_dependencies=[Api.inference],
api_dependencies=[Api.inference, Api.files],
),
# NOTE: sqlite-vec cannot be bundled into the container image because it does not have a
# source distribution and the wheels are not available for all platforms.

View file

@ -23,6 +23,7 @@ from llama_stack.apis.vector_io import (
VectorStoreObject,
VectorStoreSearchResponsePage,
)
from llama_stack.apis.vector_io.vector_io import VectorStoreChunkingStrategy, VectorStoreFileObject
from llama_stack.providers.datatypes import Api, VectorDBsProtocolPrivate
from llama_stack.providers.inline.vector_io.chroma import ChromaVectorIOConfig as InlineChromaVectorIOConfig
from llama_stack.providers.utils.memory.vector_store import (
@ -241,3 +242,12 @@ class ChromaVectorIOAdapter(VectorIO, VectorDBsProtocolPrivate):
rewrite_query: bool | None = False,
) -> VectorStoreSearchResponsePage:
raise NotImplementedError("OpenAI Vector Stores API is not supported in Chroma")
async def openai_attach_file_to_vector_store(
self,
vector_store_id: str,
file_id: str,
attributes: dict[str, Any] | None = None,
chunking_strategy: VectorStoreChunkingStrategy | None = None,
) -> VectorStoreFileObject:
raise NotImplementedError("OpenAI Vector Stores API is not supported in Chroma")

View file

@ -25,6 +25,7 @@ from llama_stack.apis.vector_io import (
VectorStoreObject,
VectorStoreSearchResponsePage,
)
from llama_stack.apis.vector_io.vector_io import VectorStoreChunkingStrategy, VectorStoreFileObject
from llama_stack.providers.datatypes import Api, VectorDBsProtocolPrivate
from llama_stack.providers.inline.vector_io.milvus import MilvusVectorIOConfig as InlineMilvusVectorIOConfig
from llama_stack.providers.utils.memory.vector_store import (
@ -240,6 +241,15 @@ class MilvusVectorIOAdapter(VectorIO, VectorDBsProtocolPrivate):
) -> VectorStoreSearchResponsePage:
raise NotImplementedError("OpenAI Vector Stores API is not supported in Qdrant")
async def openai_attach_file_to_vector_store(
self,
vector_store_id: str,
file_id: str,
attributes: dict[str, Any] | None = None,
chunking_strategy: VectorStoreChunkingStrategy | None = None,
) -> VectorStoreFileObject:
raise NotImplementedError("OpenAI Vector Stores API is not supported in Milvus")
def generate_chunk_id(document_id: str, chunk_text: str) -> str:
"""Generate a unique chunk ID using a hash of document ID and chunk text."""

View file

@ -23,6 +23,7 @@ from llama_stack.apis.vector_io import (
VectorStoreObject,
VectorStoreSearchResponsePage,
)
from llama_stack.apis.vector_io.vector_io import VectorStoreChunkingStrategy, VectorStoreFileObject
from llama_stack.providers.datatypes import Api, VectorDBsProtocolPrivate
from llama_stack.providers.inline.vector_io.qdrant import QdrantVectorIOConfig as InlineQdrantVectorIOConfig
from llama_stack.providers.utils.memory.vector_store import (
@ -241,3 +242,12 @@ class QdrantVectorIOAdapter(VectorIO, VectorDBsProtocolPrivate):
rewrite_query: bool | None = False,
) -> VectorStoreSearchResponsePage:
raise NotImplementedError("OpenAI Vector Stores API is not supported in Qdrant")
async def openai_attach_file_to_vector_store(
self,
vector_store_id: str,
file_id: str,
attributes: dict[str, Any] | None = None,
chunking_strategy: VectorStoreChunkingStrategy | None = None,
) -> VectorStoreFileObject:
raise NotImplementedError("OpenAI Vector Stores API is not supported in Qdrant")

View file

@ -10,7 +10,6 @@ import time
import httpx
import openai
import pytest
from llama_stack_client import LlamaStackClient
from llama_stack import LlamaStackAsLibraryClient
from llama_stack.distribution.datatypes import AuthenticationRequiredError
@ -275,10 +274,13 @@ def test_response_non_streaming_file_search(
if should_skip_test(verification_config, provider, model, test_name_base):
pytest.skip(f"Skipping {test_name_base} for model {model} on provider {provider} based on config.")
# Ensure we don't reuse an existing vector store
vector_stores = openai_client.vector_stores.list()
for vector_store in vector_stores:
if vector_store.name == "test_vector_store":
openai_client.vector_stores.delete(vector_store_id=vector_store.id)
# Create a new vector store
vector_store = openai_client.vector_stores.create(
name="test_vector_store",
# extra_body={
@ -287,47 +289,42 @@ def test_response_non_streaming_file_search(
# },
)
doc_content = "Llama 4 Maverick has 128 experts"
chunks = [
{
"content": doc_content,
"mime_type": "text/plain",
"metadata": {
"document_id": "doc1",
},
},
]
# Ensure we don't reuse an existing file
file_name = "test_response_non_streaming_file_search.txt"
files = openai_client.files.list()
for file in files:
if file.filename == file_name:
openai_client.files.delete(file_id=file.id)
# Upload a text file with our document content
doc_content = "Llama 4 Maverick has 128 experts"
file_path = tmp_path / file_name
file_path.write_text(doc_content)
file_response = openai_client.files.create(file=open(file_path, "rb"), purpose="assistants")
if "api.openai.com" in base_url:
# Attach our file to the vector store
file_attach_response = openai_client.vector_stores.files.create(
vector_store_id=vector_store.id,
file_id=file_response.id,
)
# Wait for the file to be attached
while file_attach_response.status == "in_progress":
time.sleep(0.1)
file_attach_response = openai_client.vector_stores.files.retrieve(
vector_store_id=vector_store.id,
file_id=file_response.id,
)
else:
# TODO: only until we have a way to insert content into OpenAI vector stores
lls_client = LlamaStackClient(base_url=base_url.replace("/v1/openai/v1", ""))
lls_client.vector_io.insert(vector_db_id=vector_store.id, chunks=chunks)
assert file_attach_response.status == "completed"
assert not file_attach_response.last_error
# Update our tools with the right vector store id
tools = case["tools"]
for tool in tools:
if tool["type"] == "file_search":
tool["vector_store_ids"] = [vector_store.id]
# Create the response request, which should query our document
response = openai_client.responses.create(
model=model,
input=case["input"],
@ -335,6 +332,8 @@ def test_response_non_streaming_file_search(
stream=False,
include=["file_search_call.results"],
)
# Verify the file_search_tool was called
assert len(response.output) > 1
assert response.output[0].type == "file_search_call"
assert response.output[0].status == "completed"
@ -342,6 +341,8 @@ def test_response_non_streaming_file_search(
assert response.output[0].results
assert response.output[0].results[0].text == doc_content
assert response.output[0].results[0].score > 0
# Verify the assistant response that summarizes the results
assert response.output[1].type == "message"
assert response.output[1].status == "completed"
assert response.output[1].role == "assistant"