Merge branch 'main' into chunk-metadata

2025-07-21 12:09:40 +00:00 · 2025-06-25 12:57:50 -06:00 · 2025-06-25 12:57:50 -06:00 · f52eb51555
commit f52eb51555
parent 7ed916dbb3 fa0b0c13d4
40 changed files with 272 additions and 722 deletions
--- a/llama_stack/providers/remote/inference/ollama/init.py
+++ b/llama_stack/providers/remote/inference/ollama/init.py
@ -10,6 +10,6 @@ from .config import OllamaImplConfig
 async def get_adapter_impl(config: OllamaImplConfig, _deps):
    from .ollama import OllamaInferenceAdapter

-    impl = OllamaInferenceAdapter(config.url)
+    impl = OllamaInferenceAdapter(config)
    await impl.initialize()
    return impl
--- a/llama_stack/providers/remote/inference/ollama/config.py
+++ b/llama_stack/providers/remote/inference/ollama/config.py
@ -13,7 +13,13 @@ DEFAULT_OLLAMA_URL = "http://localhost:11434"

 class OllamaImplConfig(BaseModel):
    url: str = DEFAULT_OLLAMA_URL
+    raise_on_connect_error: bool = True

    @classmethod
-    def sample_run_config(cls, url: str = "${env.OLLAMA_URL:http://localhost:11434}", **kwargs) -> dict[str, Any]:
-        return {"url": url}
+    def sample_run_config(
+        cls, url: str = "${env.OLLAMA_URL:http://localhost:11434}", raise_on_connect_error: bool = True, **kwargs
+    ) -> dict[str, Any]:
+        return {
+            "url": url,
+            "raise_on_connect_error": raise_on_connect_error,
+        }
--- a/llama_stack/providers/remote/inference/ollama/ollama.py
+++ b/llama_stack/providers/remote/inference/ollama/ollama.py
@ -9,7 +9,6 @@ import uuid
 from collections.abc import AsyncGenerator, AsyncIterator
 from typing import Any

-import httpx
 from ollama import AsyncClient  # type: ignore[attr-defined]
 from openai import AsyncOpenAI

@ -57,6 +56,7 @@ from llama_stack.providers.datatypes import (
    HealthStatus,
    ModelsProtocolPrivate,
 )
+from llama_stack.providers.remote.inference.ollama.config import OllamaImplConfig
 from llama_stack.providers.utils.inference.model_registry import (
    ModelRegistryHelper,
 )
@ -90,9 +90,10 @@ class OllamaInferenceAdapter(
    InferenceProvider,
    ModelsProtocolPrivate,
 ):
-    def __init__(self, url: str) -> None:
+    def __init__(self, config: OllamaImplConfig) -> None:
        self.register_helper = ModelRegistryHelper(MODEL_ENTRIES)
-        self.url = url
+        self.url = config.url
+        self.raise_on_connect_error = config.raise_on_connect_error

    @property
    def client(self) -> AsyncClient:
@ -103,8 +104,13 @@ class OllamaInferenceAdapter(
        return AsyncOpenAI(base_url=f"{self.url}/v1", api_key="ollama")

    async def initialize(self) -> None:
-        logger.info(f"checking connectivity to Ollama at `{self.url}`...")
-        await self.health()
+        logger.debug(f"checking connectivity to Ollama at `{self.url}`...")
+        health_response = await self.health()
+        if health_response["status"] == HealthStatus.ERROR:
+            if self.raise_on_connect_error:
+                raise RuntimeError("Ollama Server is not running, start it using `ollama serve` in a separate terminal")
+            else:
+                logger.warning("Ollama Server is not running, start it using `ollama serve` in a separate terminal")

    async def health(self) -> HealthResponse:
        """
@ -117,10 +123,8 @@ class OllamaInferenceAdapter(
        try:
            await self.client.ps()
            return HealthResponse(status=HealthStatus.OK)
-        except httpx.ConnectError as e:
-            raise RuntimeError(
-                "Ollama Server is not running, start it using `ollama serve` in a separate terminal"
-            ) from e
+        except Exception as e:
+            return HealthResponse(status=HealthStatus.ERROR, message=f"Health check failed: {str(e)}")

    async def shutdown(self) -> None:
        pass
--- a/llama_stack/providers/remote/inference/vllm/vllm.py
+++ b/llama_stack/providers/remote/inference/vllm/vllm.py
@ -9,7 +9,7 @@ from collections.abc import AsyncGenerator, AsyncIterator
 from typing import Any

 import httpx
-from openai import AsyncOpenAI
+from openai import APIConnectionError, AsyncOpenAI
 from openai.types.chat.chat_completion_chunk import (
    ChatCompletionChunk as OpenAIChatCompletionChunk,
 )
@ -461,7 +461,12 @@ class VLLMInferenceAdapter(Inference, ModelsProtocolPrivate):
            model = await self.register_helper.register_model(model)
        except ValueError:
            pass  # Ignore statically unknown model, will check live listing
-        res = await client.models.list()
+        try:
+            res = await client.models.list()
+        except APIConnectionError as e:
+            raise ValueError(
+                f"Failed to connect to vLLM at {self.config.url}. Please check if vLLM is running and accessible at that URL."
+            ) from e
        available_models = [m.id async for m in res]
        if model.provider_resource_id not in available_models:
            raise ValueError(
--- a/llama_stack/providers/remote/vector_io/chroma/chroma.py
+++ b/llama_stack/providers/remote/vector_io/chroma/chroma.py
@ -19,17 +19,16 @@ from llama_stack.apis.vector_io import (
    QueryChunksResponse,
    SearchRankingOptions,
    VectorIO,
+    VectorStoreChunkingStrategy,
    VectorStoreDeleteResponse,
+    VectorStoreFileContentsResponse,
+    VectorStoreFileObject,
+    VectorStoreFileStatus,
+    VectorStoreListFilesResponse,
    VectorStoreListResponse,
    VectorStoreObject,
    VectorStoreSearchResponsePage,
 )
-from llama_stack.apis.vector_io.vector_io import (
-    VectorStoreChunkingStrategy,
-    VectorStoreFileContentsResponse,
-    VectorStoreFileObject,
-    VectorStoreListFilesResponse,
-)
 from llama_stack.providers.datatypes import Api, VectorDBsProtocolPrivate
 from llama_stack.providers.inline.vector_io.chroma import ChromaVectorIOConfig as InlineChromaVectorIOConfig
 from llama_stack.providers.utils.memory.vector_store import (
@ -257,6 +256,7 @@ class ChromaVectorIOAdapter(VectorIO, VectorDBsProtocolPrivate):
        max_num_results: int | None = 10,
        ranking_options: SearchRankingOptions | None = None,
        rewrite_query: bool | None = False,
+        search_mode: str | None = "vector",
    ) -> VectorStoreSearchResponsePage:
        raise NotImplementedError("OpenAI Vector Stores API is not supported in Chroma")

@ -272,6 +272,11 @@ class ChromaVectorIOAdapter(VectorIO, VectorDBsProtocolPrivate):
    async def openai_list_files_in_vector_store(
        self,
        vector_store_id: str,
+        limit: int | None = 20,
+        order: str | None = "desc",
+        after: str | None = None,
+        before: str | None = None,
+        filter: VectorStoreFileStatus | None = None,
    ) -> VectorStoreListFilesResponse:
        raise NotImplementedError("OpenAI Vector Stores API is not supported in Chroma")

--- a/llama_stack/providers/remote/vector_io/milvus/milvus.py
+++ b/llama_stack/providers/remote/vector_io/milvus/milvus.py
@ -21,17 +21,16 @@ from llama_stack.apis.vector_io import (
    QueryChunksResponse,
    SearchRankingOptions,
    VectorIO,
+    VectorStoreChunkingStrategy,
    VectorStoreDeleteResponse,
+    VectorStoreFileContentsResponse,
+    VectorStoreFileObject,
+    VectorStoreFileStatus,
+    VectorStoreListFilesResponse,
    VectorStoreListResponse,
    VectorStoreObject,
    VectorStoreSearchResponsePage,
 )
-from llama_stack.apis.vector_io.vector_io import (
-    VectorStoreChunkingStrategy,
-    VectorStoreFileContentsResponse,
-    VectorStoreFileObject,
-    VectorStoreListFilesResponse,
-)
 from llama_stack.providers.datatypes import Api, VectorDBsProtocolPrivate
 from llama_stack.providers.inline.vector_io.milvus import MilvusVectorIOConfig as InlineMilvusVectorIOConfig
 from llama_stack.providers.utils.memory.vector_store import (
@ -255,8 +254,9 @@ class MilvusVectorIOAdapter(VectorIO, VectorDBsProtocolPrivate):
        max_num_results: int | None = 10,
        ranking_options: SearchRankingOptions | None = None,
        rewrite_query: bool | None = False,
+        search_mode: str | None = "vector",
    ) -> VectorStoreSearchResponsePage:
-        raise NotImplementedError("OpenAI Vector Stores API is not supported in Qdrant")
+        raise NotImplementedError("OpenAI Vector Stores API is not supported in Milvus")

    async def openai_attach_file_to_vector_store(
        self,
@ -270,6 +270,11 @@ class MilvusVectorIOAdapter(VectorIO, VectorDBsProtocolPrivate):
    async def openai_list_files_in_vector_store(
        self,
        vector_store_id: str,
+        limit: int | None = 20,
+        order: str | None = "desc",
+        after: str | None = None,
+        before: str | None = None,
+        filter: VectorStoreFileStatus | None = None,
    ) -> VectorStoreListFilesResponse:
        raise NotImplementedError("OpenAI Vector Stores API is not supported in Milvus")

--- a/llama_stack/providers/remote/vector_io/qdrant/qdrant.py
+++ b/llama_stack/providers/remote/vector_io/qdrant/qdrant.py
@ -19,17 +19,16 @@ from llama_stack.apis.vector_io import (
    QueryChunksResponse,
    SearchRankingOptions,
    VectorIO,
+    VectorStoreChunkingStrategy,
    VectorStoreDeleteResponse,
+    VectorStoreFileContentsResponse,
+    VectorStoreFileObject,
+    VectorStoreFileStatus,
+    VectorStoreListFilesResponse,
    VectorStoreListResponse,
    VectorStoreObject,
    VectorStoreSearchResponsePage,
 )
-from llama_stack.apis.vector_io.vector_io import (
-    VectorStoreChunkingStrategy,
-    VectorStoreFileContentsResponse,
-    VectorStoreFileObject,
-    VectorStoreListFilesResponse,
-)
 from llama_stack.providers.datatypes import Api, VectorDBsProtocolPrivate
 from llama_stack.providers.inline.vector_io.qdrant import QdrantVectorIOConfig as InlineQdrantVectorIOConfig
 from llama_stack.providers.utils.memory.vector_store import (
@ -257,6 +256,7 @@ class QdrantVectorIOAdapter(VectorIO, VectorDBsProtocolPrivate):
        max_num_results: int | None = 10,
        ranking_options: SearchRankingOptions | None = None,
        rewrite_query: bool | None = False,
+        search_mode: str | None = "vector",
    ) -> VectorStoreSearchResponsePage:
        raise NotImplementedError("OpenAI Vector Stores API is not supported in Qdrant")

@ -272,6 +272,11 @@ class QdrantVectorIOAdapter(VectorIO, VectorDBsProtocolPrivate):
    async def openai_list_files_in_vector_store(
        self,
        vector_store_id: str,
+        limit: int | None = 20,
+        order: str | None = "desc",
+        after: str | None = None,
+        before: str | None = None,
+        filter: VectorStoreFileStatus | None = None,
    ) -> VectorStoreListFilesResponse:
        raise NotImplementedError("OpenAI Vector Stores API is not supported in Qdrant")