mirror of
https://github.com/meta-llama/llama-stack.git
synced 2025-06-27 18:50:41 +00:00
feat: Add search_mode support to OpenAI vector store API (#2500)
Some checks failed
Integration Auth Tests / test-matrix (oauth2_token) (push) Failing after 1s
Integration Tests / test-matrix (http, 3.12, datasets) (push) Failing after 4s
Integration Tests / test-matrix (http, 3.12, providers) (push) Failing after 4s
Integration Tests / test-matrix (http, 3.12, agents) (push) Failing after 11s
Integration Tests / test-matrix (http, 3.12, scoring) (push) Failing after 9s
Integration Tests / test-matrix (http, 3.12, post_training) (push) Failing after 11s
Integration Tests / test-matrix (http, 3.13, inference) (push) Failing after 12s
Integration Tests / test-matrix (http, 3.13, inspect) (push) Failing after 11s
Integration Tests / test-matrix (http, 3.12, inference) (push) Failing after 18s
Integration Tests / test-matrix (http, 3.12, inspect) (push) Failing after 17s
Integration Tests / test-matrix (http, 3.13, agents) (push) Failing after 16s
Integration Tests / test-matrix (http, 3.12, tool_runtime) (push) Failing after 17s
Integration Tests / test-matrix (http, 3.13, vector_io) (push) Failing after 13s
Integration Tests / test-matrix (library, 3.12, agents) (push) Failing after 11s
Integration Tests / test-matrix (library, 3.12, providers) (push) Failing after 11s
Integration Tests / test-matrix (library, 3.12, vector_io) (push) Failing after 11s
Python Package Build Test / build (3.12) (push) Failing after 3s
Integration Tests / test-matrix (library, 3.12, tool_runtime) (push) Failing after 15s
Test Llama Stack Build / generate-matrix (push) Successful in 6s
Integration Tests / test-matrix (http, 3.13, scoring) (push) Failing after 15s
Integration Tests / test-matrix (library, 3.12, scoring) (push) Failing after 11s
Test Llama Stack Build / build-custom-container-distribution (push) Failing after 7s
Integration Tests / test-matrix (http, 3.13, post_training) (push) Failing after 17s
Python Package Build Test / build (3.13) (push) Failing after 5s
Integration Tests / test-matrix (http, 3.13, providers) (push) Failing after 18s
Test Llama Stack Build / build-single-provider (push) Failing after 8s
Integration Tests / test-matrix (library, 3.12, datasets) (push) Failing after 15s
Integration Tests / test-matrix (library, 3.12, post_training) (push) Failing after 15s
Integration Tests / test-matrix (library, 3.12, inspect) (push) Failing after 13s
Integration Tests / test-matrix (library, 3.13, inspect) (push) Failing after 11s
Integration Tests / test-matrix (library, 3.13, post_training) (push) Failing after 12s
Integration Tests / test-matrix (library, 3.13, tool_runtime) (push) Failing after 9s
Integration Tests / test-matrix (http, 3.13, tool_runtime) (push) Failing after 17s
Unit Tests / unit-tests (3.12) (push) Failing after 7s
Integration Tests / test-matrix (library, 3.13, datasets) (push) Failing after 9s
Integration Tests / test-matrix (library, 3.13, inference) (push) Failing after 13s
Integration Tests / test-matrix (library, 3.12, inference) (push) Failing after 17s
Integration Tests / test-matrix (library, 3.13, agents) (push) Failing after 16s
Integration Tests / test-matrix (library, 3.13, vector_io) (push) Failing after 10s
Integration Tests / test-matrix (library, 3.13, providers) (push) Failing after 9s
Integration Tests / test-matrix (http, 3.12, vector_io) (push) Failing after 18s
Integration Tests / test-matrix (library, 3.13, scoring) (push) Failing after 8s
Unit Tests / unit-tests (3.13) (push) Failing after 8s
Integration Tests / test-matrix (http, 3.13, datasets) (push) Failing after 19s
Test Llama Stack Build / build (push) Failing after 5s
Update ReadTheDocs / update-readthedocs (push) Failing after 44s
Test External Providers / test-external-providers (venv) (push) Failing after 47s
Test Llama Stack Build / build-ubi9-container-distribution (push) Failing after 50s
Pre-commit / pre-commit (push) Successful in 2m12s
Some checks failed
Integration Auth Tests / test-matrix (oauth2_token) (push) Failing after 1s
Integration Tests / test-matrix (http, 3.12, datasets) (push) Failing after 4s
Integration Tests / test-matrix (http, 3.12, providers) (push) Failing after 4s
Integration Tests / test-matrix (http, 3.12, agents) (push) Failing after 11s
Integration Tests / test-matrix (http, 3.12, scoring) (push) Failing after 9s
Integration Tests / test-matrix (http, 3.12, post_training) (push) Failing after 11s
Integration Tests / test-matrix (http, 3.13, inference) (push) Failing after 12s
Integration Tests / test-matrix (http, 3.13, inspect) (push) Failing after 11s
Integration Tests / test-matrix (http, 3.12, inference) (push) Failing after 18s
Integration Tests / test-matrix (http, 3.12, inspect) (push) Failing after 17s
Integration Tests / test-matrix (http, 3.13, agents) (push) Failing after 16s
Integration Tests / test-matrix (http, 3.12, tool_runtime) (push) Failing after 17s
Integration Tests / test-matrix (http, 3.13, vector_io) (push) Failing after 13s
Integration Tests / test-matrix (library, 3.12, agents) (push) Failing after 11s
Integration Tests / test-matrix (library, 3.12, providers) (push) Failing after 11s
Integration Tests / test-matrix (library, 3.12, vector_io) (push) Failing after 11s
Python Package Build Test / build (3.12) (push) Failing after 3s
Integration Tests / test-matrix (library, 3.12, tool_runtime) (push) Failing after 15s
Test Llama Stack Build / generate-matrix (push) Successful in 6s
Integration Tests / test-matrix (http, 3.13, scoring) (push) Failing after 15s
Integration Tests / test-matrix (library, 3.12, scoring) (push) Failing after 11s
Test Llama Stack Build / build-custom-container-distribution (push) Failing after 7s
Integration Tests / test-matrix (http, 3.13, post_training) (push) Failing after 17s
Python Package Build Test / build (3.13) (push) Failing after 5s
Integration Tests / test-matrix (http, 3.13, providers) (push) Failing after 18s
Test Llama Stack Build / build-single-provider (push) Failing after 8s
Integration Tests / test-matrix (library, 3.12, datasets) (push) Failing after 15s
Integration Tests / test-matrix (library, 3.12, post_training) (push) Failing after 15s
Integration Tests / test-matrix (library, 3.12, inspect) (push) Failing after 13s
Integration Tests / test-matrix (library, 3.13, inspect) (push) Failing after 11s
Integration Tests / test-matrix (library, 3.13, post_training) (push) Failing after 12s
Integration Tests / test-matrix (library, 3.13, tool_runtime) (push) Failing after 9s
Integration Tests / test-matrix (http, 3.13, tool_runtime) (push) Failing after 17s
Unit Tests / unit-tests (3.12) (push) Failing after 7s
Integration Tests / test-matrix (library, 3.13, datasets) (push) Failing after 9s
Integration Tests / test-matrix (library, 3.13, inference) (push) Failing after 13s
Integration Tests / test-matrix (library, 3.12, inference) (push) Failing after 17s
Integration Tests / test-matrix (library, 3.13, agents) (push) Failing after 16s
Integration Tests / test-matrix (library, 3.13, vector_io) (push) Failing after 10s
Integration Tests / test-matrix (library, 3.13, providers) (push) Failing after 9s
Integration Tests / test-matrix (http, 3.12, vector_io) (push) Failing after 18s
Integration Tests / test-matrix (library, 3.13, scoring) (push) Failing after 8s
Unit Tests / unit-tests (3.13) (push) Failing after 8s
Integration Tests / test-matrix (http, 3.13, datasets) (push) Failing after 19s
Test Llama Stack Build / build (push) Failing after 5s
Update ReadTheDocs / update-readthedocs (push) Failing after 44s
Test External Providers / test-external-providers (venv) (push) Failing after 47s
Test Llama Stack Build / build-ubi9-container-distribution (push) Failing after 50s
Pre-commit / pre-commit (push) Successful in 2m12s
# What does this PR do? Add search_mode parameter (vector/keyword/hybrid) to openai_search_vector_store method. Fixes OpenAPI code generation by using str instead of Literal type. Closes: #2459 ## Test Plan <!-- Describe the tests you ran to verify your changes with result summaries. *Provide clear instructions so the plan can be easily re-executed.* --> Signed-off-by: Varsha Prasad Narsing <varshaprasad96@gmail.com>
This commit is contained in:
parent
114946ae88
commit
cfee63bd0d
9 changed files with 42 additions and 4 deletions
4
docs/_static/llama-stack-spec.html
vendored
4
docs/_static/llama-stack-spec.html
vendored
|
@ -13943,6 +13943,10 @@
|
||||||
"rewrite_query": {
|
"rewrite_query": {
|
||||||
"type": "boolean",
|
"type": "boolean",
|
||||||
"description": "Whether to rewrite the natural language query for vector search (default false)"
|
"description": "Whether to rewrite the natural language query for vector search (default false)"
|
||||||
|
},
|
||||||
|
"search_mode": {
|
||||||
|
"type": "string",
|
||||||
|
"description": "The search mode to use - \"keyword\", \"vector\", or \"hybrid\" (default \"vector\")"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"additionalProperties": false,
|
"additionalProperties": false,
|
||||||
|
|
4
docs/_static/llama-stack-spec.yaml
vendored
4
docs/_static/llama-stack-spec.yaml
vendored
|
@ -9737,6 +9737,10 @@ components:
|
||||||
description: >-
|
description: >-
|
||||||
Whether to rewrite the natural language query for vector search (default
|
Whether to rewrite the natural language query for vector search (default
|
||||||
false)
|
false)
|
||||||
|
search_mode:
|
||||||
|
type: string
|
||||||
|
description: >-
|
||||||
|
The search mode to use - "keyword", "vector", or "hybrid" (default "vector")
|
||||||
additionalProperties: false
|
additionalProperties: false
|
||||||
required:
|
required:
|
||||||
- query
|
- query
|
||||||
|
|
|
@ -372,6 +372,7 @@ class VectorIO(Protocol):
|
||||||
max_num_results: int | None = 10,
|
max_num_results: int | None = 10,
|
||||||
ranking_options: SearchRankingOptions | None = None,
|
ranking_options: SearchRankingOptions | None = None,
|
||||||
rewrite_query: bool | None = False,
|
rewrite_query: bool | None = False,
|
||||||
|
search_mode: str | None = "vector", # Using str instead of Literal due to OpenAPI schema generator limitations
|
||||||
) -> VectorStoreSearchResponsePage:
|
) -> VectorStoreSearchResponsePage:
|
||||||
"""Search for chunks in a vector store.
|
"""Search for chunks in a vector store.
|
||||||
|
|
||||||
|
@ -383,6 +384,7 @@ class VectorIO(Protocol):
|
||||||
:param max_num_results: Maximum number of results to return (1 to 50 inclusive, default 10).
|
:param max_num_results: Maximum number of results to return (1 to 50 inclusive, default 10).
|
||||||
:param ranking_options: Ranking options for fine-tuning the search results.
|
:param ranking_options: Ranking options for fine-tuning the search results.
|
||||||
:param rewrite_query: Whether to rewrite the natural language query for vector search (default false)
|
:param rewrite_query: Whether to rewrite the natural language query for vector search (default false)
|
||||||
|
:param search_mode: The search mode to use - "keyword", "vector", or "hybrid" (default "vector")
|
||||||
:returns: A VectorStoreSearchResponse containing the search results.
|
:returns: A VectorStoreSearchResponse containing the search results.
|
||||||
"""
|
"""
|
||||||
...
|
...
|
||||||
|
|
|
@ -255,6 +255,7 @@ class VectorIORouter(VectorIO):
|
||||||
max_num_results: int | None = 10,
|
max_num_results: int | None = 10,
|
||||||
ranking_options: SearchRankingOptions | None = None,
|
ranking_options: SearchRankingOptions | None = None,
|
||||||
rewrite_query: bool | None = False,
|
rewrite_query: bool | None = False,
|
||||||
|
search_mode: str | None = "vector",
|
||||||
) -> VectorStoreSearchResponsePage:
|
) -> VectorStoreSearchResponsePage:
|
||||||
logger.debug(f"VectorIORouter.openai_search_vector_store: {vector_store_id}")
|
logger.debug(f"VectorIORouter.openai_search_vector_store: {vector_store_id}")
|
||||||
# Route based on vector store ID
|
# Route based on vector store ID
|
||||||
|
@ -266,6 +267,7 @@ class VectorIORouter(VectorIO):
|
||||||
max_num_results=max_num_results,
|
max_num_results=max_num_results,
|
||||||
ranking_options=ranking_options,
|
ranking_options=ranking_options,
|
||||||
rewrite_query=rewrite_query,
|
rewrite_query=rewrite_query,
|
||||||
|
search_mode=search_mode,
|
||||||
)
|
)
|
||||||
|
|
||||||
async def openai_attach_file_to_vector_store(
|
async def openai_attach_file_to_vector_store(
|
||||||
|
|
|
@ -256,6 +256,7 @@ class ChromaVectorIOAdapter(VectorIO, VectorDBsProtocolPrivate):
|
||||||
max_num_results: int | None = 10,
|
max_num_results: int | None = 10,
|
||||||
ranking_options: SearchRankingOptions | None = None,
|
ranking_options: SearchRankingOptions | None = None,
|
||||||
rewrite_query: bool | None = False,
|
rewrite_query: bool | None = False,
|
||||||
|
search_mode: str | None = "vector",
|
||||||
) -> VectorStoreSearchResponsePage:
|
) -> VectorStoreSearchResponsePage:
|
||||||
raise NotImplementedError("OpenAI Vector Stores API is not supported in Chroma")
|
raise NotImplementedError("OpenAI Vector Stores API is not supported in Chroma")
|
||||||
|
|
||||||
|
|
|
@ -254,8 +254,9 @@ class MilvusVectorIOAdapter(VectorIO, VectorDBsProtocolPrivate):
|
||||||
max_num_results: int | None = 10,
|
max_num_results: int | None = 10,
|
||||||
ranking_options: SearchRankingOptions | None = None,
|
ranking_options: SearchRankingOptions | None = None,
|
||||||
rewrite_query: bool | None = False,
|
rewrite_query: bool | None = False,
|
||||||
|
search_mode: str | None = "vector",
|
||||||
) -> VectorStoreSearchResponsePage:
|
) -> VectorStoreSearchResponsePage:
|
||||||
raise NotImplementedError("OpenAI Vector Stores API is not supported in Qdrant")
|
raise NotImplementedError("OpenAI Vector Stores API is not supported in Milvus")
|
||||||
|
|
||||||
async def openai_attach_file_to_vector_store(
|
async def openai_attach_file_to_vector_store(
|
||||||
self,
|
self,
|
||||||
|
|
|
@ -256,6 +256,7 @@ class QdrantVectorIOAdapter(VectorIO, VectorDBsProtocolPrivate):
|
||||||
max_num_results: int | None = 10,
|
max_num_results: int | None = 10,
|
||||||
ranking_options: SearchRankingOptions | None = None,
|
ranking_options: SearchRankingOptions | None = None,
|
||||||
rewrite_query: bool | None = False,
|
rewrite_query: bool | None = False,
|
||||||
|
search_mode: str | None = "vector",
|
||||||
) -> VectorStoreSearchResponsePage:
|
) -> VectorStoreSearchResponsePage:
|
||||||
raise NotImplementedError("OpenAI Vector Stores API is not supported in Qdrant")
|
raise NotImplementedError("OpenAI Vector Stores API is not supported in Qdrant")
|
||||||
|
|
||||||
|
|
|
@ -337,13 +337,16 @@ class OpenAIVectorStoreMixin(ABC):
|
||||||
max_num_results: int | None = 10,
|
max_num_results: int | None = 10,
|
||||||
ranking_options: SearchRankingOptions | None = None,
|
ranking_options: SearchRankingOptions | None = None,
|
||||||
rewrite_query: bool | None = False,
|
rewrite_query: bool | None = False,
|
||||||
# search_mode: Literal["keyword", "vector", "hybrid"] = "vector",
|
search_mode: str | None = "vector", # Using str instead of Literal due to OpenAPI schema generator limitations
|
||||||
) -> VectorStoreSearchResponsePage:
|
) -> VectorStoreSearchResponsePage:
|
||||||
"""Search for chunks in a vector store."""
|
"""Search for chunks in a vector store."""
|
||||||
# TODO: Add support in the API for this
|
|
||||||
search_mode = "vector"
|
|
||||||
max_num_results = max_num_results or 10
|
max_num_results = max_num_results or 10
|
||||||
|
|
||||||
|
# Validate search_mode
|
||||||
|
valid_modes = {"keyword", "vector", "hybrid"}
|
||||||
|
if search_mode not in valid_modes:
|
||||||
|
raise ValueError(f"search_mode must be one of {valid_modes}, got {search_mode}")
|
||||||
|
|
||||||
if vector_store_id not in self.openai_vector_stores:
|
if vector_store_id not in self.openai_vector_stores:
|
||||||
raise ValueError(f"Vector store {vector_store_id} not found")
|
raise ValueError(f"Vector store {vector_store_id} not found")
|
||||||
|
|
||||||
|
|
|
@ -804,3 +804,23 @@ def test_openai_vector_store_update_file(compat_client_with_empty_stores, client
|
||||||
file_id=file.id,
|
file_id=file.id,
|
||||||
)
|
)
|
||||||
assert retrieved_file.attributes["foo"] == "baz"
|
assert retrieved_file.attributes["foo"] == "baz"
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.skip(reason="Client library needs to be scaffolded to support search_mode parameter")
|
||||||
|
def test_openai_vector_store_search_modes():
|
||||||
|
"""Test OpenAI vector store search with different search modes.
|
||||||
|
|
||||||
|
This test is skipped because the client library
|
||||||
|
needs to be regenerated from the updated OpenAPI spec to support the
|
||||||
|
search_mode parameter. Once the client library is updated, this test
|
||||||
|
can be enabled to verify:
|
||||||
|
- vector search mode (default)
|
||||||
|
- keyword search mode
|
||||||
|
- hybrid search mode
|
||||||
|
- invalid search mode validation
|
||||||
|
"""
|
||||||
|
# TODO: Enable this test once llama_stack_client is updated to support search_mode
|
||||||
|
# The server-side implementation is complete but the client
|
||||||
|
# library needs to be updated:
|
||||||
|
# https://github.com/meta-llama/llama-stack-client-python/blob/52c0b5d23e9ae67ceb09d755143d436f38c20547/src/llama_stack_client/resources/vector_stores/vector_stores.py#L314
|
||||||
|
pass
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue