Merge branch 'main' into chunk-metadata

2025-12-17 09:52:36 +00:00 · 2025-06-25 12:57:50 -06:00 · 2025-06-25 12:57:50 -06:00 · f52eb51555
commit f52eb51555
parent 7ed916dbb3 fa0b0c13d4
40 changed files with 272 additions and 722 deletions
--- a/llama_stack/providers/utils/memory/openai_vector_store_mixin.py
+++ b/llama_stack/providers/utils/memory/openai_vector_store_mixin.py
@ -16,20 +16,14 @@ from llama_stack.apis.files import Files
 from llama_stack.apis.files.files import OpenAIFileObject
 from llama_stack.apis.vector_dbs import VectorDB
 from llama_stack.apis.vector_io import (
+    Chunk,
    QueryChunksResponse,
    SearchRankingOptions,
-    VectorStoreContent,
-    VectorStoreDeleteResponse,
-    VectorStoreListResponse,
-    VectorStoreObject,
-    VectorStoreSearchResponse,
-    VectorStoreSearchResponsePage,
-)
-from llama_stack.apis.vector_io.vector_io import (
-    Chunk,
    VectorStoreChunkingStrategy,
    VectorStoreChunkingStrategyAuto,
    VectorStoreChunkingStrategyStatic,
+    VectorStoreContent,
+    VectorStoreDeleteResponse,
    VectorStoreFileContentsResponse,
    VectorStoreFileCounts,
    VectorStoreFileDeleteResponse,
@ -37,6 +31,10 @@ from llama_stack.apis.vector_io.vector_io import (
    VectorStoreFileObject,
    VectorStoreFileStatus,
    VectorStoreListFilesResponse,
+    VectorStoreListResponse,
+    VectorStoreObject,
+    VectorStoreSearchResponse,
+    VectorStoreSearchResponsePage,
 )
 from llama_stack.providers.utils.memory.vector_store import content_from_data_and_mime_type, make_overlapped_chunks

@ -339,13 +337,16 @@ class OpenAIVectorStoreMixin(ABC):
        max_num_results: int | None = 10,
        ranking_options: SearchRankingOptions | None = None,
        rewrite_query: bool | None = False,
-        # search_mode: Literal["keyword", "vector", "hybrid"] = "vector",
+        search_mode: str | None = "vector",  # Using str instead of Literal due to OpenAPI schema generator limitations
    ) -> VectorStoreSearchResponsePage:
        """Search for chunks in a vector store."""
-        # TODO: Add support in the API for this
-        search_mode = "vector"
        max_num_results = max_num_results or 10

+        # Validate search_mode
+        valid_modes = {"keyword", "vector", "hybrid"}
+        if search_mode not in valid_modes:
+            raise ValueError(f"search_mode must be one of {valid_modes}, got {search_mode}")
+
        if vector_store_id not in self.openai_vector_stores:
            raise ValueError(f"Vector store {vector_store_id} not found")

--- a/llama_stack/providers/utils/scheduler.py
+++ b/llama_stack/providers/utils/scheduler.py
@ -11,7 +11,7 @@ import threading
 from collections.abc import Callable, Coroutine, Iterable
 from datetime import UTC, datetime
 from enum import Enum
-from typing import Any, TypeAlias
+from typing import Any

 from pydantic import BaseModel

@ -30,8 +30,8 @@ class JobStatus(Enum):
    completed = "completed"


-JobID: TypeAlias = str
-JobType: TypeAlias = str
+type JobID = str
+type JobType = str


 class JobArtifact(BaseModel):
@ -47,7 +47,7 @@ JobHandler = Callable[
 ]


-LogMessage: TypeAlias = tuple[datetime, str]
+type LogMessage = tuple[datetime, str]


 _COMPLETED_STATUSES = {JobStatus.completed, JobStatus.failed}
--- a/llama_stack/providers/utils/tools/mcp.py
+++ b/llama_stack/providers/utils/tools/mcp.py
@ -5,15 +5,7 @@
 # the root directory of this source tree.

 from contextlib import asynccontextmanager
-from typing import Any
-
-try:
-    # for python < 3.11
-    import exceptiongroup
-
-    BaseExceptionGroup = exceptiongroup.BaseExceptionGroup
-except ImportError:
-    pass
+from typing import Any, cast

 import httpx
 from mcp import ClientSession
@ -40,14 +32,14 @@ async def sse_client_wrapper(endpoint: str, headers: dict[str, str]):
            async with ClientSession(*streams) as session:
                await session.initialize()
                yield session
-    except BaseException as e:
-        if isinstance(e, BaseExceptionGroup):
-            for exc in e.exceptions:
-                if isinstance(exc, httpx.HTTPStatusError) and exc.response.status_code == 401:
-                    raise AuthenticationRequiredError(exc) from exc
-        elif isinstance(e, httpx.HTTPStatusError) and e.response.status_code == 401:
-            raise AuthenticationRequiredError(e) from e
-
+    except* httpx.HTTPStatusError as eg:
+        for exc in eg.exceptions:
+            # mypy does not currently narrow the type of `eg.exceptions` based on the `except*` filter,
+            # so we explicitly cast each item to httpx.HTTPStatusError. This is safe because
+            # `except* httpx.HTTPStatusError` guarantees all exceptions in `eg.exceptions` are of that type.
+            err = cast(httpx.HTTPStatusError, exc)
+            if err.response.status_code == 401:
+                raise AuthenticationRequiredError(exc) from exc
        raise