mirror of
https://github.com/meta-llama/llama-stack.git
synced 2025-06-27 18:50:41 +00:00
feat: Add search_mode support to OpenAI vector store API (#2500)
Some checks failed
Integration Auth Tests / test-matrix (oauth2_token) (push) Failing after 1s
Integration Tests / test-matrix (http, 3.12, datasets) (push) Failing after 4s
Integration Tests / test-matrix (http, 3.12, providers) (push) Failing after 4s
Integration Tests / test-matrix (http, 3.12, agents) (push) Failing after 11s
Integration Tests / test-matrix (http, 3.12, scoring) (push) Failing after 9s
Integration Tests / test-matrix (http, 3.12, post_training) (push) Failing after 11s
Integration Tests / test-matrix (http, 3.13, inference) (push) Failing after 12s
Integration Tests / test-matrix (http, 3.13, inspect) (push) Failing after 11s
Integration Tests / test-matrix (http, 3.12, inference) (push) Failing after 18s
Integration Tests / test-matrix (http, 3.12, inspect) (push) Failing after 17s
Integration Tests / test-matrix (http, 3.13, agents) (push) Failing after 16s
Integration Tests / test-matrix (http, 3.12, tool_runtime) (push) Failing after 17s
Integration Tests / test-matrix (http, 3.13, vector_io) (push) Failing after 13s
Integration Tests / test-matrix (library, 3.12, agents) (push) Failing after 11s
Integration Tests / test-matrix (library, 3.12, providers) (push) Failing after 11s
Integration Tests / test-matrix (library, 3.12, vector_io) (push) Failing after 11s
Python Package Build Test / build (3.12) (push) Failing after 3s
Integration Tests / test-matrix (library, 3.12, tool_runtime) (push) Failing after 15s
Test Llama Stack Build / generate-matrix (push) Successful in 6s
Integration Tests / test-matrix (http, 3.13, scoring) (push) Failing after 15s
Integration Tests / test-matrix (library, 3.12, scoring) (push) Failing after 11s
Test Llama Stack Build / build-custom-container-distribution (push) Failing after 7s
Integration Tests / test-matrix (http, 3.13, post_training) (push) Failing after 17s
Python Package Build Test / build (3.13) (push) Failing after 5s
Integration Tests / test-matrix (http, 3.13, providers) (push) Failing after 18s
Test Llama Stack Build / build-single-provider (push) Failing after 8s
Integration Tests / test-matrix (library, 3.12, datasets) (push) Failing after 15s
Integration Tests / test-matrix (library, 3.12, post_training) (push) Failing after 15s
Integration Tests / test-matrix (library, 3.12, inspect) (push) Failing after 13s
Integration Tests / test-matrix (library, 3.13, inspect) (push) Failing after 11s
Integration Tests / test-matrix (library, 3.13, post_training) (push) Failing after 12s
Integration Tests / test-matrix (library, 3.13, tool_runtime) (push) Failing after 9s
Integration Tests / test-matrix (http, 3.13, tool_runtime) (push) Failing after 17s
Unit Tests / unit-tests (3.12) (push) Failing after 7s
Integration Tests / test-matrix (library, 3.13, datasets) (push) Failing after 9s
Integration Tests / test-matrix (library, 3.13, inference) (push) Failing after 13s
Integration Tests / test-matrix (library, 3.12, inference) (push) Failing after 17s
Integration Tests / test-matrix (library, 3.13, agents) (push) Failing after 16s
Integration Tests / test-matrix (library, 3.13, vector_io) (push) Failing after 10s
Integration Tests / test-matrix (library, 3.13, providers) (push) Failing after 9s
Integration Tests / test-matrix (http, 3.12, vector_io) (push) Failing after 18s
Integration Tests / test-matrix (library, 3.13, scoring) (push) Failing after 8s
Unit Tests / unit-tests (3.13) (push) Failing after 8s
Integration Tests / test-matrix (http, 3.13, datasets) (push) Failing after 19s
Test Llama Stack Build / build (push) Failing after 5s
Update ReadTheDocs / update-readthedocs (push) Failing after 44s
Test External Providers / test-external-providers (venv) (push) Failing after 47s
Test Llama Stack Build / build-ubi9-container-distribution (push) Failing after 50s
Pre-commit / pre-commit (push) Successful in 2m12s
Some checks failed
Integration Auth Tests / test-matrix (oauth2_token) (push) Failing after 1s
Integration Tests / test-matrix (http, 3.12, datasets) (push) Failing after 4s
Integration Tests / test-matrix (http, 3.12, providers) (push) Failing after 4s
Integration Tests / test-matrix (http, 3.12, agents) (push) Failing after 11s
Integration Tests / test-matrix (http, 3.12, scoring) (push) Failing after 9s
Integration Tests / test-matrix (http, 3.12, post_training) (push) Failing after 11s
Integration Tests / test-matrix (http, 3.13, inference) (push) Failing after 12s
Integration Tests / test-matrix (http, 3.13, inspect) (push) Failing after 11s
Integration Tests / test-matrix (http, 3.12, inference) (push) Failing after 18s
Integration Tests / test-matrix (http, 3.12, inspect) (push) Failing after 17s
Integration Tests / test-matrix (http, 3.13, agents) (push) Failing after 16s
Integration Tests / test-matrix (http, 3.12, tool_runtime) (push) Failing after 17s
Integration Tests / test-matrix (http, 3.13, vector_io) (push) Failing after 13s
Integration Tests / test-matrix (library, 3.12, agents) (push) Failing after 11s
Integration Tests / test-matrix (library, 3.12, providers) (push) Failing after 11s
Integration Tests / test-matrix (library, 3.12, vector_io) (push) Failing after 11s
Python Package Build Test / build (3.12) (push) Failing after 3s
Integration Tests / test-matrix (library, 3.12, tool_runtime) (push) Failing after 15s
Test Llama Stack Build / generate-matrix (push) Successful in 6s
Integration Tests / test-matrix (http, 3.13, scoring) (push) Failing after 15s
Integration Tests / test-matrix (library, 3.12, scoring) (push) Failing after 11s
Test Llama Stack Build / build-custom-container-distribution (push) Failing after 7s
Integration Tests / test-matrix (http, 3.13, post_training) (push) Failing after 17s
Python Package Build Test / build (3.13) (push) Failing after 5s
Integration Tests / test-matrix (http, 3.13, providers) (push) Failing after 18s
Test Llama Stack Build / build-single-provider (push) Failing after 8s
Integration Tests / test-matrix (library, 3.12, datasets) (push) Failing after 15s
Integration Tests / test-matrix (library, 3.12, post_training) (push) Failing after 15s
Integration Tests / test-matrix (library, 3.12, inspect) (push) Failing after 13s
Integration Tests / test-matrix (library, 3.13, inspect) (push) Failing after 11s
Integration Tests / test-matrix (library, 3.13, post_training) (push) Failing after 12s
Integration Tests / test-matrix (library, 3.13, tool_runtime) (push) Failing after 9s
Integration Tests / test-matrix (http, 3.13, tool_runtime) (push) Failing after 17s
Unit Tests / unit-tests (3.12) (push) Failing after 7s
Integration Tests / test-matrix (library, 3.13, datasets) (push) Failing after 9s
Integration Tests / test-matrix (library, 3.13, inference) (push) Failing after 13s
Integration Tests / test-matrix (library, 3.12, inference) (push) Failing after 17s
Integration Tests / test-matrix (library, 3.13, agents) (push) Failing after 16s
Integration Tests / test-matrix (library, 3.13, vector_io) (push) Failing after 10s
Integration Tests / test-matrix (library, 3.13, providers) (push) Failing after 9s
Integration Tests / test-matrix (http, 3.12, vector_io) (push) Failing after 18s
Integration Tests / test-matrix (library, 3.13, scoring) (push) Failing after 8s
Unit Tests / unit-tests (3.13) (push) Failing after 8s
Integration Tests / test-matrix (http, 3.13, datasets) (push) Failing after 19s
Test Llama Stack Build / build (push) Failing after 5s
Update ReadTheDocs / update-readthedocs (push) Failing after 44s
Test External Providers / test-external-providers (venv) (push) Failing after 47s
Test Llama Stack Build / build-ubi9-container-distribution (push) Failing after 50s
Pre-commit / pre-commit (push) Successful in 2m12s
# What does this PR do? Add search_mode parameter (vector/keyword/hybrid) to openai_search_vector_store method. Fixes OpenAPI code generation by using str instead of Literal type. Closes: #2459 ## Test Plan <!-- Describe the tests you ran to verify your changes with result summaries. *Provide clear instructions so the plan can be easily re-executed.* --> Signed-off-by: Varsha Prasad Narsing <varshaprasad96@gmail.com>
This commit is contained in:
parent
114946ae88
commit
cfee63bd0d
9 changed files with 42 additions and 4 deletions
4
docs/_static/llama-stack-spec.html
vendored
4
docs/_static/llama-stack-spec.html
vendored
|
@ -13943,6 +13943,10 @@
|
|||
"rewrite_query": {
|
||||
"type": "boolean",
|
||||
"description": "Whether to rewrite the natural language query for vector search (default false)"
|
||||
},
|
||||
"search_mode": {
|
||||
"type": "string",
|
||||
"description": "The search mode to use - \"keyword\", \"vector\", or \"hybrid\" (default \"vector\")"
|
||||
}
|
||||
},
|
||||
"additionalProperties": false,
|
||||
|
|
4
docs/_static/llama-stack-spec.yaml
vendored
4
docs/_static/llama-stack-spec.yaml
vendored
|
@ -9737,6 +9737,10 @@ components:
|
|||
description: >-
|
||||
Whether to rewrite the natural language query for vector search (default
|
||||
false)
|
||||
search_mode:
|
||||
type: string
|
||||
description: >-
|
||||
The search mode to use - "keyword", "vector", or "hybrid" (default "vector")
|
||||
additionalProperties: false
|
||||
required:
|
||||
- query
|
||||
|
|
|
@ -372,6 +372,7 @@ class VectorIO(Protocol):
|
|||
max_num_results: int | None = 10,
|
||||
ranking_options: SearchRankingOptions | None = None,
|
||||
rewrite_query: bool | None = False,
|
||||
search_mode: str | None = "vector", # Using str instead of Literal due to OpenAPI schema generator limitations
|
||||
) -> VectorStoreSearchResponsePage:
|
||||
"""Search for chunks in a vector store.
|
||||
|
||||
|
@ -383,6 +384,7 @@ class VectorIO(Protocol):
|
|||
:param max_num_results: Maximum number of results to return (1 to 50 inclusive, default 10).
|
||||
:param ranking_options: Ranking options for fine-tuning the search results.
|
||||
:param rewrite_query: Whether to rewrite the natural language query for vector search (default false)
|
||||
:param search_mode: The search mode to use - "keyword", "vector", or "hybrid" (default "vector")
|
||||
:returns: A VectorStoreSearchResponse containing the search results.
|
||||
"""
|
||||
...
|
||||
|
|
|
@ -255,6 +255,7 @@ class VectorIORouter(VectorIO):
|
|||
max_num_results: int | None = 10,
|
||||
ranking_options: SearchRankingOptions | None = None,
|
||||
rewrite_query: bool | None = False,
|
||||
search_mode: str | None = "vector",
|
||||
) -> VectorStoreSearchResponsePage:
|
||||
logger.debug(f"VectorIORouter.openai_search_vector_store: {vector_store_id}")
|
||||
# Route based on vector store ID
|
||||
|
@ -266,6 +267,7 @@ class VectorIORouter(VectorIO):
|
|||
max_num_results=max_num_results,
|
||||
ranking_options=ranking_options,
|
||||
rewrite_query=rewrite_query,
|
||||
search_mode=search_mode,
|
||||
)
|
||||
|
||||
async def openai_attach_file_to_vector_store(
|
||||
|
|
|
@ -256,6 +256,7 @@ class ChromaVectorIOAdapter(VectorIO, VectorDBsProtocolPrivate):
|
|||
max_num_results: int | None = 10,
|
||||
ranking_options: SearchRankingOptions | None = None,
|
||||
rewrite_query: bool | None = False,
|
||||
search_mode: str | None = "vector",
|
||||
) -> VectorStoreSearchResponsePage:
|
||||
raise NotImplementedError("OpenAI Vector Stores API is not supported in Chroma")
|
||||
|
||||
|
|
|
@ -254,8 +254,9 @@ class MilvusVectorIOAdapter(VectorIO, VectorDBsProtocolPrivate):
|
|||
max_num_results: int | None = 10,
|
||||
ranking_options: SearchRankingOptions | None = None,
|
||||
rewrite_query: bool | None = False,
|
||||
search_mode: str | None = "vector",
|
||||
) -> VectorStoreSearchResponsePage:
|
||||
raise NotImplementedError("OpenAI Vector Stores API is not supported in Qdrant")
|
||||
raise NotImplementedError("OpenAI Vector Stores API is not supported in Milvus")
|
||||
|
||||
async def openai_attach_file_to_vector_store(
|
||||
self,
|
||||
|
|
|
@ -256,6 +256,7 @@ class QdrantVectorIOAdapter(VectorIO, VectorDBsProtocolPrivate):
|
|||
max_num_results: int | None = 10,
|
||||
ranking_options: SearchRankingOptions | None = None,
|
||||
rewrite_query: bool | None = False,
|
||||
search_mode: str | None = "vector",
|
||||
) -> VectorStoreSearchResponsePage:
|
||||
raise NotImplementedError("OpenAI Vector Stores API is not supported in Qdrant")
|
||||
|
||||
|
|
|
@ -337,13 +337,16 @@ class OpenAIVectorStoreMixin(ABC):
|
|||
max_num_results: int | None = 10,
|
||||
ranking_options: SearchRankingOptions | None = None,
|
||||
rewrite_query: bool | None = False,
|
||||
# search_mode: Literal["keyword", "vector", "hybrid"] = "vector",
|
||||
search_mode: str | None = "vector", # Using str instead of Literal due to OpenAPI schema generator limitations
|
||||
) -> VectorStoreSearchResponsePage:
|
||||
"""Search for chunks in a vector store."""
|
||||
# TODO: Add support in the API for this
|
||||
search_mode = "vector"
|
||||
max_num_results = max_num_results or 10
|
||||
|
||||
# Validate search_mode
|
||||
valid_modes = {"keyword", "vector", "hybrid"}
|
||||
if search_mode not in valid_modes:
|
||||
raise ValueError(f"search_mode must be one of {valid_modes}, got {search_mode}")
|
||||
|
||||
if vector_store_id not in self.openai_vector_stores:
|
||||
raise ValueError(f"Vector store {vector_store_id} not found")
|
||||
|
||||
|
|
|
@ -804,3 +804,23 @@ def test_openai_vector_store_update_file(compat_client_with_empty_stores, client
|
|||
file_id=file.id,
|
||||
)
|
||||
assert retrieved_file.attributes["foo"] == "baz"
|
||||
|
||||
|
||||
@pytest.mark.skip(reason="Client library needs to be scaffolded to support search_mode parameter")
|
||||
def test_openai_vector_store_search_modes():
|
||||
"""Test OpenAI vector store search with different search modes.
|
||||
|
||||
This test is skipped because the client library
|
||||
needs to be regenerated from the updated OpenAPI spec to support the
|
||||
search_mode parameter. Once the client library is updated, this test
|
||||
can be enabled to verify:
|
||||
- vector search mode (default)
|
||||
- keyword search mode
|
||||
- hybrid search mode
|
||||
- invalid search mode validation
|
||||
"""
|
||||
# TODO: Enable this test once llama_stack_client is updated to support search_mode
|
||||
# The server-side implementation is complete but the client
|
||||
# library needs to be updated:
|
||||
# https://github.com/meta-llama/llama-stack-client-python/blob/52c0b5d23e9ae67ceb09d755143d436f38c20547/src/llama_stack_client/resources/vector_stores/vector_stores.py#L314
|
||||
pass
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue