mirror of
https://github.com/meta-llama/llama-stack.git
synced 2025-07-20 19:56:59 +00:00
Merge branch 'main' into chunk-metadata
This commit is contained in:
commit
f52eb51555
40 changed files with 272 additions and 722 deletions
|
@ -16,20 +16,14 @@ from llama_stack.apis.files import Files
|
|||
from llama_stack.apis.files.files import OpenAIFileObject
|
||||
from llama_stack.apis.vector_dbs import VectorDB
|
||||
from llama_stack.apis.vector_io import (
|
||||
Chunk,
|
||||
QueryChunksResponse,
|
||||
SearchRankingOptions,
|
||||
VectorStoreContent,
|
||||
VectorStoreDeleteResponse,
|
||||
VectorStoreListResponse,
|
||||
VectorStoreObject,
|
||||
VectorStoreSearchResponse,
|
||||
VectorStoreSearchResponsePage,
|
||||
)
|
||||
from llama_stack.apis.vector_io.vector_io import (
|
||||
Chunk,
|
||||
VectorStoreChunkingStrategy,
|
||||
VectorStoreChunkingStrategyAuto,
|
||||
VectorStoreChunkingStrategyStatic,
|
||||
VectorStoreContent,
|
||||
VectorStoreDeleteResponse,
|
||||
VectorStoreFileContentsResponse,
|
||||
VectorStoreFileCounts,
|
||||
VectorStoreFileDeleteResponse,
|
||||
|
@ -37,6 +31,10 @@ from llama_stack.apis.vector_io.vector_io import (
|
|||
VectorStoreFileObject,
|
||||
VectorStoreFileStatus,
|
||||
VectorStoreListFilesResponse,
|
||||
VectorStoreListResponse,
|
||||
VectorStoreObject,
|
||||
VectorStoreSearchResponse,
|
||||
VectorStoreSearchResponsePage,
|
||||
)
|
||||
from llama_stack.providers.utils.memory.vector_store import content_from_data_and_mime_type, make_overlapped_chunks
|
||||
|
||||
|
@ -339,13 +337,16 @@ class OpenAIVectorStoreMixin(ABC):
|
|||
max_num_results: int | None = 10,
|
||||
ranking_options: SearchRankingOptions | None = None,
|
||||
rewrite_query: bool | None = False,
|
||||
# search_mode: Literal["keyword", "vector", "hybrid"] = "vector",
|
||||
search_mode: str | None = "vector", # Using str instead of Literal due to OpenAPI schema generator limitations
|
||||
) -> VectorStoreSearchResponsePage:
|
||||
"""Search for chunks in a vector store."""
|
||||
# TODO: Add support in the API for this
|
||||
search_mode = "vector"
|
||||
max_num_results = max_num_results or 10
|
||||
|
||||
# Validate search_mode
|
||||
valid_modes = {"keyword", "vector", "hybrid"}
|
||||
if search_mode not in valid_modes:
|
||||
raise ValueError(f"search_mode must be one of {valid_modes}, got {search_mode}")
|
||||
|
||||
if vector_store_id not in self.openai_vector_stores:
|
||||
raise ValueError(f"Vector store {vector_store_id} not found")
|
||||
|
||||
|
|
|
@ -11,7 +11,7 @@ import threading
|
|||
from collections.abc import Callable, Coroutine, Iterable
|
||||
from datetime import UTC, datetime
|
||||
from enum import Enum
|
||||
from typing import Any, TypeAlias
|
||||
from typing import Any
|
||||
|
||||
from pydantic import BaseModel
|
||||
|
||||
|
@ -30,8 +30,8 @@ class JobStatus(Enum):
|
|||
completed = "completed"
|
||||
|
||||
|
||||
JobID: TypeAlias = str
|
||||
JobType: TypeAlias = str
|
||||
type JobID = str
|
||||
type JobType = str
|
||||
|
||||
|
||||
class JobArtifact(BaseModel):
|
||||
|
@ -47,7 +47,7 @@ JobHandler = Callable[
|
|||
]
|
||||
|
||||
|
||||
LogMessage: TypeAlias = tuple[datetime, str]
|
||||
type LogMessage = tuple[datetime, str]
|
||||
|
||||
|
||||
_COMPLETED_STATUSES = {JobStatus.completed, JobStatus.failed}
|
||||
|
|
|
@ -5,15 +5,7 @@
|
|||
# the root directory of this source tree.
|
||||
|
||||
from contextlib import asynccontextmanager
|
||||
from typing import Any
|
||||
|
||||
try:
|
||||
# for python < 3.11
|
||||
import exceptiongroup
|
||||
|
||||
BaseExceptionGroup = exceptiongroup.BaseExceptionGroup
|
||||
except ImportError:
|
||||
pass
|
||||
from typing import Any, cast
|
||||
|
||||
import httpx
|
||||
from mcp import ClientSession
|
||||
|
@ -40,14 +32,14 @@ async def sse_client_wrapper(endpoint: str, headers: dict[str, str]):
|
|||
async with ClientSession(*streams) as session:
|
||||
await session.initialize()
|
||||
yield session
|
||||
except BaseException as e:
|
||||
if isinstance(e, BaseExceptionGroup):
|
||||
for exc in e.exceptions:
|
||||
if isinstance(exc, httpx.HTTPStatusError) and exc.response.status_code == 401:
|
||||
raise AuthenticationRequiredError(exc) from exc
|
||||
elif isinstance(e, httpx.HTTPStatusError) and e.response.status_code == 401:
|
||||
raise AuthenticationRequiredError(e) from e
|
||||
|
||||
except* httpx.HTTPStatusError as eg:
|
||||
for exc in eg.exceptions:
|
||||
# mypy does not currently narrow the type of `eg.exceptions` based on the `except*` filter,
|
||||
# so we explicitly cast each item to httpx.HTTPStatusError. This is safe because
|
||||
# `except* httpx.HTTPStatusError` guarantees all exceptions in `eg.exceptions` are of that type.
|
||||
err = cast(httpx.HTTPStatusError, exc)
|
||||
if err.response.status_code == 401:
|
||||
raise AuthenticationRequiredError(exc) from exc
|
||||
raise
|
||||
|
||||
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue