mirror of
https://github.com/meta-llama/llama-stack.git
synced 2025-07-16 09:58:10 +00:00
Merge branch 'main' into chunk-metadata
This commit is contained in:
commit
f52eb51555
40 changed files with 272 additions and 722 deletions
|
@ -5,10 +5,10 @@
|
|||
# the root directory of this source tree.
|
||||
|
||||
from enum import Enum
|
||||
from typing import Annotated, Any, Literal
|
||||
from typing import Annotated, Any, Literal, Protocol
|
||||
|
||||
from pydantic import BaseModel, Field, field_validator
|
||||
from typing_extensions import Protocol, runtime_checkable
|
||||
from typing_extensions import runtime_checkable
|
||||
|
||||
from llama_stack.apis.common.content_types import URL, InterleavedContent
|
||||
from llama_stack.providers.utils.telemetry.trace_protocol import trace_protocol
|
||||
|
|
|
@ -5,10 +5,10 @@
|
|||
# the root directory of this source tree.
|
||||
|
||||
from enum import Enum
|
||||
from typing import Any, Literal
|
||||
from typing import Any, Literal, Protocol
|
||||
|
||||
from pydantic import BaseModel, Field
|
||||
from typing_extensions import Protocol, runtime_checkable
|
||||
from typing_extensions import runtime_checkable
|
||||
|
||||
from llama_stack.apis.common.content_types import URL, InterleavedContent
|
||||
from llama_stack.apis.resource import Resource, ResourceType
|
||||
|
|
|
@ -432,6 +432,7 @@ class VectorIO(Protocol):
|
|||
max_num_results: int | None = 10,
|
||||
ranking_options: SearchRankingOptions | None = None,
|
||||
rewrite_query: bool | None = False,
|
||||
search_mode: str | None = "vector", # Using str instead of Literal due to OpenAPI schema generator limitations
|
||||
) -> VectorStoreSearchResponsePage:
|
||||
"""Search for chunks in a vector store.
|
||||
|
||||
|
@ -443,6 +444,7 @@ class VectorIO(Protocol):
|
|||
:param max_num_results: Maximum number of results to return (1 to 50 inclusive, default 10).
|
||||
:param ranking_options: Ranking options for fine-tuning the search results.
|
||||
:param rewrite_query: Whether to rewrite the natural language query for vector search (default false)
|
||||
:param search_mode: The search mode to use - "keyword", "vector", or "hybrid" (default "vector")
|
||||
:returns: A VectorStoreSearchResponse containing the search results.
|
||||
"""
|
||||
...
|
||||
|
|
|
@ -101,7 +101,7 @@ def build_image(
|
|||
template_or_config: str,
|
||||
run_config: str | None = None,
|
||||
):
|
||||
container_base = build_config.distribution_spec.container_image or "python:3.11-slim"
|
||||
container_base = build_config.distribution_spec.container_image or "python:3.12-slim"
|
||||
|
||||
normal_deps, special_deps = get_provider_dependencies(build_config)
|
||||
normal_deps += SERVER_DEPENDENCIES
|
||||
|
|
|
@ -49,7 +49,7 @@ ensure_conda_env_python310() {
|
|||
local env_name="$1"
|
||||
local pip_dependencies="$2"
|
||||
local special_pip_deps="$3"
|
||||
local python_version="3.11"
|
||||
local python_version="3.12"
|
||||
|
||||
# Check if conda command is available
|
||||
if ! is_command_available conda; then
|
||||
|
@ -81,19 +81,19 @@ ensure_conda_env_python310() {
|
|||
eval "$(conda shell.bash hook)"
|
||||
conda deactivate && conda activate "${env_name}"
|
||||
|
||||
$CONDA_PREFIX/bin/pip install uv
|
||||
"$CONDA_PREFIX"/bin/pip install uv
|
||||
|
||||
if [ -n "$TEST_PYPI_VERSION" ]; then
|
||||
# these packages are damaged in test-pypi, so install them first
|
||||
uv pip install fastapi libcst
|
||||
uv pip install --extra-index-url https://test.pypi.org/simple/ \
|
||||
llama-stack==$TEST_PYPI_VERSION \
|
||||
$pip_dependencies
|
||||
llama-stack=="$TEST_PYPI_VERSION" \
|
||||
"$pip_dependencies"
|
||||
if [ -n "$special_pip_deps" ]; then
|
||||
IFS='#' read -ra parts <<<"$special_pip_deps"
|
||||
for part in "${parts[@]}"; do
|
||||
echo "$part"
|
||||
uv pip install $part
|
||||
uv pip install "$part"
|
||||
done
|
||||
fi
|
||||
else
|
||||
|
@ -113,7 +113,7 @@ ensure_conda_env_python310() {
|
|||
else
|
||||
SPEC_VERSION="llama-stack"
|
||||
fi
|
||||
uv pip install --no-cache-dir $SPEC_VERSION
|
||||
uv pip install --no-cache-dir "$SPEC_VERSION"
|
||||
fi
|
||||
|
||||
if [ -n "$LLAMA_STACK_CLIENT_DIR" ]; then
|
||||
|
@ -138,7 +138,7 @@ ensure_conda_env_python310() {
|
|||
fi
|
||||
fi
|
||||
|
||||
mv $build_file_path $CONDA_PREFIX/llamastack-build.yaml
|
||||
mv "$build_file_path" "$CONDA_PREFIX"/llamastack-build.yaml
|
||||
echo "Build spec configuration saved at $CONDA_PREFIX/llamastack-build.yaml"
|
||||
}
|
||||
|
||||
|
|
|
@ -84,7 +84,7 @@ add_to_container() {
|
|||
}
|
||||
|
||||
# Check if container command is available
|
||||
if ! is_command_available $CONTAINER_BINARY; then
|
||||
if ! is_command_available "$CONTAINER_BINARY"; then
|
||||
printf "${RED}Error: ${CONTAINER_BINARY} command not found. Is ${CONTAINER_BINARY} installed and in your PATH?${NC}" >&2
|
||||
exit 1
|
||||
fi
|
||||
|
@ -95,13 +95,13 @@ if [[ $container_base == *"registry.access.redhat.com/ubi9"* ]]; then
|
|||
FROM $container_base
|
||||
WORKDIR /app
|
||||
|
||||
# We install the Python 3.11 dev headers and build tools so that any
|
||||
# We install the Python 3.12 dev headers and build tools so that any
|
||||
# C‑extension wheels (e.g. polyleven, faiss‑cpu) can compile successfully.
|
||||
|
||||
RUN dnf -y update && dnf install -y iputils git net-tools wget \
|
||||
vim-minimal python3.11 python3.11-pip python3.11-wheel \
|
||||
python3.11-setuptools python3.11-devel gcc make && \
|
||||
ln -s /bin/pip3.11 /bin/pip && ln -s /bin/python3.11 /bin/python && dnf clean all
|
||||
vim-minimal python3.12 python3.12-pip python3.12-wheel \
|
||||
python3.12-setuptools python3.12-devel gcc make && \
|
||||
ln -s /bin/pip3.12 /bin/pip && ln -s /bin/python3.12 /bin/python && dnf clean all
|
||||
|
||||
ENV UV_SYSTEM_PYTHON=1
|
||||
RUN pip install uv
|
||||
|
@ -163,9 +163,6 @@ EOF
|
|||
if [ -n "$run_config" ]; then
|
||||
# Copy the run config to the build context since it's an absolute path
|
||||
cp "$run_config" "$BUILD_CONTEXT_DIR/run.yaml"
|
||||
add_to_container << EOF
|
||||
COPY run.yaml $RUN_CONFIG_PATH
|
||||
EOF
|
||||
|
||||
# Parse the run.yaml configuration to identify external provider directories
|
||||
# If external providers are specified, copy their directory to the container
|
||||
|
@ -173,12 +170,15 @@ EOF
|
|||
python_cmd=$(get_python_cmd)
|
||||
external_providers_dir=$($python_cmd -c "import yaml; config = yaml.safe_load(open('$run_config')); print(config.get('external_providers_dir') or '')")
|
||||
external_providers_dir=$(eval echo "$external_providers_dir")
|
||||
if [ -n "$external_providers_dir" ] && [ -d "$external_providers_dir" ]; then
|
||||
if [ -n "$external_providers_dir" ]; then
|
||||
if [ -d "$external_providers_dir" ]; then
|
||||
echo "Copying external providers directory: $external_providers_dir"
|
||||
cp -r "$external_providers_dir" "$BUILD_CONTEXT_DIR/providers.d"
|
||||
add_to_container << EOF
|
||||
COPY providers.d /.llama/providers.d
|
||||
EOF
|
||||
fi
|
||||
|
||||
# Edit the run.yaml file to change the external_providers_dir to /.llama/providers.d
|
||||
if [ "$(uname)" = "Darwin" ]; then
|
||||
sed -i.bak -e 's|external_providers_dir:.*|external_providers_dir: /.llama/providers.d|' "$BUILD_CONTEXT_DIR/run.yaml"
|
||||
|
@ -187,6 +187,11 @@ EOF
|
|||
sed -i 's|external_providers_dir:.*|external_providers_dir: /.llama/providers.d|' "$BUILD_CONTEXT_DIR/run.yaml"
|
||||
fi
|
||||
fi
|
||||
|
||||
# Copy run config into docker image
|
||||
add_to_container << EOF
|
||||
COPY run.yaml $RUN_CONFIG_PATH
|
||||
EOF
|
||||
fi
|
||||
|
||||
stack_mount="/app/llama-stack-source"
|
||||
|
|
|
@ -255,6 +255,7 @@ class VectorIORouter(VectorIO):
|
|||
max_num_results: int | None = 10,
|
||||
ranking_options: SearchRankingOptions | None = None,
|
||||
rewrite_query: bool | None = False,
|
||||
search_mode: str | None = "vector",
|
||||
) -> VectorStoreSearchResponsePage:
|
||||
logger.debug(f"VectorIORouter.openai_search_vector_store: {vector_store_id}")
|
||||
# Route based on vector store ID
|
||||
|
@ -266,6 +267,7 @@ class VectorIORouter(VectorIO):
|
|||
max_num_results=max_num_results,
|
||||
ranking_options=ranking_options,
|
||||
rewrite_query=rewrite_query,
|
||||
search_mode=search_mode,
|
||||
)
|
||||
|
||||
async def openai_attach_file_to_vector_store(
|
||||
|
|
|
@ -98,6 +98,15 @@ async def register_resources(run_config: StackRunConfig, impls: dict[Api, Any]):
|
|||
|
||||
method = getattr(impls[api], register_method)
|
||||
for obj in objects:
|
||||
# In complex templates, like our starter template, we may have dynamic model ids
|
||||
# given by environment variables. This allows those environment variables to have
|
||||
# a default value of __disabled__ to skip registration of the model if not set.
|
||||
if (
|
||||
hasattr(obj, "provider_model_id")
|
||||
and obj.provider_model_id is not None
|
||||
and "__disabled__" in obj.provider_model_id
|
||||
):
|
||||
continue
|
||||
# we want to maintain the type information in arguments to method.
|
||||
# instead of method(**obj.model_dump()), which may convert a typed attr to a dict,
|
||||
# we use model_dump() to find all the attrs and then getattr to get the still typed value.
|
||||
|
|
|
@ -10,6 +10,6 @@ from .config import OllamaImplConfig
|
|||
async def get_adapter_impl(config: OllamaImplConfig, _deps):
|
||||
from .ollama import OllamaInferenceAdapter
|
||||
|
||||
impl = OllamaInferenceAdapter(config.url)
|
||||
impl = OllamaInferenceAdapter(config)
|
||||
await impl.initialize()
|
||||
return impl
|
||||
|
|
|
@ -13,7 +13,13 @@ DEFAULT_OLLAMA_URL = "http://localhost:11434"
|
|||
|
||||
class OllamaImplConfig(BaseModel):
|
||||
url: str = DEFAULT_OLLAMA_URL
|
||||
raise_on_connect_error: bool = True
|
||||
|
||||
@classmethod
|
||||
def sample_run_config(cls, url: str = "${env.OLLAMA_URL:http://localhost:11434}", **kwargs) -> dict[str, Any]:
|
||||
return {"url": url}
|
||||
def sample_run_config(
|
||||
cls, url: str = "${env.OLLAMA_URL:http://localhost:11434}", raise_on_connect_error: bool = True, **kwargs
|
||||
) -> dict[str, Any]:
|
||||
return {
|
||||
"url": url,
|
||||
"raise_on_connect_error": raise_on_connect_error,
|
||||
}
|
||||
|
|
|
@ -9,7 +9,6 @@ import uuid
|
|||
from collections.abc import AsyncGenerator, AsyncIterator
|
||||
from typing import Any
|
||||
|
||||
import httpx
|
||||
from ollama import AsyncClient # type: ignore[attr-defined]
|
||||
from openai import AsyncOpenAI
|
||||
|
||||
|
@ -57,6 +56,7 @@ from llama_stack.providers.datatypes import (
|
|||
HealthStatus,
|
||||
ModelsProtocolPrivate,
|
||||
)
|
||||
from llama_stack.providers.remote.inference.ollama.config import OllamaImplConfig
|
||||
from llama_stack.providers.utils.inference.model_registry import (
|
||||
ModelRegistryHelper,
|
||||
)
|
||||
|
@ -90,9 +90,10 @@ class OllamaInferenceAdapter(
|
|||
InferenceProvider,
|
||||
ModelsProtocolPrivate,
|
||||
):
|
||||
def __init__(self, url: str) -> None:
|
||||
def __init__(self, config: OllamaImplConfig) -> None:
|
||||
self.register_helper = ModelRegistryHelper(MODEL_ENTRIES)
|
||||
self.url = url
|
||||
self.url = config.url
|
||||
self.raise_on_connect_error = config.raise_on_connect_error
|
||||
|
||||
@property
|
||||
def client(self) -> AsyncClient:
|
||||
|
@ -103,8 +104,13 @@ class OllamaInferenceAdapter(
|
|||
return AsyncOpenAI(base_url=f"{self.url}/v1", api_key="ollama")
|
||||
|
||||
async def initialize(self) -> None:
|
||||
logger.info(f"checking connectivity to Ollama at `{self.url}`...")
|
||||
await self.health()
|
||||
logger.debug(f"checking connectivity to Ollama at `{self.url}`...")
|
||||
health_response = await self.health()
|
||||
if health_response["status"] == HealthStatus.ERROR:
|
||||
if self.raise_on_connect_error:
|
||||
raise RuntimeError("Ollama Server is not running, start it using `ollama serve` in a separate terminal")
|
||||
else:
|
||||
logger.warning("Ollama Server is not running, start it using `ollama serve` in a separate terminal")
|
||||
|
||||
async def health(self) -> HealthResponse:
|
||||
"""
|
||||
|
@ -117,10 +123,8 @@ class OllamaInferenceAdapter(
|
|||
try:
|
||||
await self.client.ps()
|
||||
return HealthResponse(status=HealthStatus.OK)
|
||||
except httpx.ConnectError as e:
|
||||
raise RuntimeError(
|
||||
"Ollama Server is not running, start it using `ollama serve` in a separate terminal"
|
||||
) from e
|
||||
except Exception as e:
|
||||
return HealthResponse(status=HealthStatus.ERROR, message=f"Health check failed: {str(e)}")
|
||||
|
||||
async def shutdown(self) -> None:
|
||||
pass
|
||||
|
|
|
@ -9,7 +9,7 @@ from collections.abc import AsyncGenerator, AsyncIterator
|
|||
from typing import Any
|
||||
|
||||
import httpx
|
||||
from openai import AsyncOpenAI
|
||||
from openai import APIConnectionError, AsyncOpenAI
|
||||
from openai.types.chat.chat_completion_chunk import (
|
||||
ChatCompletionChunk as OpenAIChatCompletionChunk,
|
||||
)
|
||||
|
@ -461,7 +461,12 @@ class VLLMInferenceAdapter(Inference, ModelsProtocolPrivate):
|
|||
model = await self.register_helper.register_model(model)
|
||||
except ValueError:
|
||||
pass # Ignore statically unknown model, will check live listing
|
||||
res = await client.models.list()
|
||||
try:
|
||||
res = await client.models.list()
|
||||
except APIConnectionError as e:
|
||||
raise ValueError(
|
||||
f"Failed to connect to vLLM at {self.config.url}. Please check if vLLM is running and accessible at that URL."
|
||||
) from e
|
||||
available_models = [m.id async for m in res]
|
||||
if model.provider_resource_id not in available_models:
|
||||
raise ValueError(
|
||||
|
|
|
@ -19,17 +19,16 @@ from llama_stack.apis.vector_io import (
|
|||
QueryChunksResponse,
|
||||
SearchRankingOptions,
|
||||
VectorIO,
|
||||
VectorStoreChunkingStrategy,
|
||||
VectorStoreDeleteResponse,
|
||||
VectorStoreFileContentsResponse,
|
||||
VectorStoreFileObject,
|
||||
VectorStoreFileStatus,
|
||||
VectorStoreListFilesResponse,
|
||||
VectorStoreListResponse,
|
||||
VectorStoreObject,
|
||||
VectorStoreSearchResponsePage,
|
||||
)
|
||||
from llama_stack.apis.vector_io.vector_io import (
|
||||
VectorStoreChunkingStrategy,
|
||||
VectorStoreFileContentsResponse,
|
||||
VectorStoreFileObject,
|
||||
VectorStoreListFilesResponse,
|
||||
)
|
||||
from llama_stack.providers.datatypes import Api, VectorDBsProtocolPrivate
|
||||
from llama_stack.providers.inline.vector_io.chroma import ChromaVectorIOConfig as InlineChromaVectorIOConfig
|
||||
from llama_stack.providers.utils.memory.vector_store import (
|
||||
|
@ -257,6 +256,7 @@ class ChromaVectorIOAdapter(VectorIO, VectorDBsProtocolPrivate):
|
|||
max_num_results: int | None = 10,
|
||||
ranking_options: SearchRankingOptions | None = None,
|
||||
rewrite_query: bool | None = False,
|
||||
search_mode: str | None = "vector",
|
||||
) -> VectorStoreSearchResponsePage:
|
||||
raise NotImplementedError("OpenAI Vector Stores API is not supported in Chroma")
|
||||
|
||||
|
@ -272,6 +272,11 @@ class ChromaVectorIOAdapter(VectorIO, VectorDBsProtocolPrivate):
|
|||
async def openai_list_files_in_vector_store(
|
||||
self,
|
||||
vector_store_id: str,
|
||||
limit: int | None = 20,
|
||||
order: str | None = "desc",
|
||||
after: str | None = None,
|
||||
before: str | None = None,
|
||||
filter: VectorStoreFileStatus | None = None,
|
||||
) -> VectorStoreListFilesResponse:
|
||||
raise NotImplementedError("OpenAI Vector Stores API is not supported in Chroma")
|
||||
|
||||
|
|
|
@ -21,17 +21,16 @@ from llama_stack.apis.vector_io import (
|
|||
QueryChunksResponse,
|
||||
SearchRankingOptions,
|
||||
VectorIO,
|
||||
VectorStoreChunkingStrategy,
|
||||
VectorStoreDeleteResponse,
|
||||
VectorStoreFileContentsResponse,
|
||||
VectorStoreFileObject,
|
||||
VectorStoreFileStatus,
|
||||
VectorStoreListFilesResponse,
|
||||
VectorStoreListResponse,
|
||||
VectorStoreObject,
|
||||
VectorStoreSearchResponsePage,
|
||||
)
|
||||
from llama_stack.apis.vector_io.vector_io import (
|
||||
VectorStoreChunkingStrategy,
|
||||
VectorStoreFileContentsResponse,
|
||||
VectorStoreFileObject,
|
||||
VectorStoreListFilesResponse,
|
||||
)
|
||||
from llama_stack.providers.datatypes import Api, VectorDBsProtocolPrivate
|
||||
from llama_stack.providers.inline.vector_io.milvus import MilvusVectorIOConfig as InlineMilvusVectorIOConfig
|
||||
from llama_stack.providers.utils.memory.vector_store import (
|
||||
|
@ -255,8 +254,9 @@ class MilvusVectorIOAdapter(VectorIO, VectorDBsProtocolPrivate):
|
|||
max_num_results: int | None = 10,
|
||||
ranking_options: SearchRankingOptions | None = None,
|
||||
rewrite_query: bool | None = False,
|
||||
search_mode: str | None = "vector",
|
||||
) -> VectorStoreSearchResponsePage:
|
||||
raise NotImplementedError("OpenAI Vector Stores API is not supported in Qdrant")
|
||||
raise NotImplementedError("OpenAI Vector Stores API is not supported in Milvus")
|
||||
|
||||
async def openai_attach_file_to_vector_store(
|
||||
self,
|
||||
|
@ -270,6 +270,11 @@ class MilvusVectorIOAdapter(VectorIO, VectorDBsProtocolPrivate):
|
|||
async def openai_list_files_in_vector_store(
|
||||
self,
|
||||
vector_store_id: str,
|
||||
limit: int | None = 20,
|
||||
order: str | None = "desc",
|
||||
after: str | None = None,
|
||||
before: str | None = None,
|
||||
filter: VectorStoreFileStatus | None = None,
|
||||
) -> VectorStoreListFilesResponse:
|
||||
raise NotImplementedError("OpenAI Vector Stores API is not supported in Milvus")
|
||||
|
||||
|
|
|
@ -19,17 +19,16 @@ from llama_stack.apis.vector_io import (
|
|||
QueryChunksResponse,
|
||||
SearchRankingOptions,
|
||||
VectorIO,
|
||||
VectorStoreChunkingStrategy,
|
||||
VectorStoreDeleteResponse,
|
||||
VectorStoreFileContentsResponse,
|
||||
VectorStoreFileObject,
|
||||
VectorStoreFileStatus,
|
||||
VectorStoreListFilesResponse,
|
||||
VectorStoreListResponse,
|
||||
VectorStoreObject,
|
||||
VectorStoreSearchResponsePage,
|
||||
)
|
||||
from llama_stack.apis.vector_io.vector_io import (
|
||||
VectorStoreChunkingStrategy,
|
||||
VectorStoreFileContentsResponse,
|
||||
VectorStoreFileObject,
|
||||
VectorStoreListFilesResponse,
|
||||
)
|
||||
from llama_stack.providers.datatypes import Api, VectorDBsProtocolPrivate
|
||||
from llama_stack.providers.inline.vector_io.qdrant import QdrantVectorIOConfig as InlineQdrantVectorIOConfig
|
||||
from llama_stack.providers.utils.memory.vector_store import (
|
||||
|
@ -257,6 +256,7 @@ class QdrantVectorIOAdapter(VectorIO, VectorDBsProtocolPrivate):
|
|||
max_num_results: int | None = 10,
|
||||
ranking_options: SearchRankingOptions | None = None,
|
||||
rewrite_query: bool | None = False,
|
||||
search_mode: str | None = "vector",
|
||||
) -> VectorStoreSearchResponsePage:
|
||||
raise NotImplementedError("OpenAI Vector Stores API is not supported in Qdrant")
|
||||
|
||||
|
@ -272,6 +272,11 @@ class QdrantVectorIOAdapter(VectorIO, VectorDBsProtocolPrivate):
|
|||
async def openai_list_files_in_vector_store(
|
||||
self,
|
||||
vector_store_id: str,
|
||||
limit: int | None = 20,
|
||||
order: str | None = "desc",
|
||||
after: str | None = None,
|
||||
before: str | None = None,
|
||||
filter: VectorStoreFileStatus | None = None,
|
||||
) -> VectorStoreListFilesResponse:
|
||||
raise NotImplementedError("OpenAI Vector Stores API is not supported in Qdrant")
|
||||
|
||||
|
|
|
@ -16,20 +16,14 @@ from llama_stack.apis.files import Files
|
|||
from llama_stack.apis.files.files import OpenAIFileObject
|
||||
from llama_stack.apis.vector_dbs import VectorDB
|
||||
from llama_stack.apis.vector_io import (
|
||||
Chunk,
|
||||
QueryChunksResponse,
|
||||
SearchRankingOptions,
|
||||
VectorStoreContent,
|
||||
VectorStoreDeleteResponse,
|
||||
VectorStoreListResponse,
|
||||
VectorStoreObject,
|
||||
VectorStoreSearchResponse,
|
||||
VectorStoreSearchResponsePage,
|
||||
)
|
||||
from llama_stack.apis.vector_io.vector_io import (
|
||||
Chunk,
|
||||
VectorStoreChunkingStrategy,
|
||||
VectorStoreChunkingStrategyAuto,
|
||||
VectorStoreChunkingStrategyStatic,
|
||||
VectorStoreContent,
|
||||
VectorStoreDeleteResponse,
|
||||
VectorStoreFileContentsResponse,
|
||||
VectorStoreFileCounts,
|
||||
VectorStoreFileDeleteResponse,
|
||||
|
@ -37,6 +31,10 @@ from llama_stack.apis.vector_io.vector_io import (
|
|||
VectorStoreFileObject,
|
||||
VectorStoreFileStatus,
|
||||
VectorStoreListFilesResponse,
|
||||
VectorStoreListResponse,
|
||||
VectorStoreObject,
|
||||
VectorStoreSearchResponse,
|
||||
VectorStoreSearchResponsePage,
|
||||
)
|
||||
from llama_stack.providers.utils.memory.vector_store import content_from_data_and_mime_type, make_overlapped_chunks
|
||||
|
||||
|
@ -339,13 +337,16 @@ class OpenAIVectorStoreMixin(ABC):
|
|||
max_num_results: int | None = 10,
|
||||
ranking_options: SearchRankingOptions | None = None,
|
||||
rewrite_query: bool | None = False,
|
||||
# search_mode: Literal["keyword", "vector", "hybrid"] = "vector",
|
||||
search_mode: str | None = "vector", # Using str instead of Literal due to OpenAPI schema generator limitations
|
||||
) -> VectorStoreSearchResponsePage:
|
||||
"""Search for chunks in a vector store."""
|
||||
# TODO: Add support in the API for this
|
||||
search_mode = "vector"
|
||||
max_num_results = max_num_results or 10
|
||||
|
||||
# Validate search_mode
|
||||
valid_modes = {"keyword", "vector", "hybrid"}
|
||||
if search_mode not in valid_modes:
|
||||
raise ValueError(f"search_mode must be one of {valid_modes}, got {search_mode}")
|
||||
|
||||
if vector_store_id not in self.openai_vector_stores:
|
||||
raise ValueError(f"Vector store {vector_store_id} not found")
|
||||
|
||||
|
|
|
@ -11,7 +11,7 @@ import threading
|
|||
from collections.abc import Callable, Coroutine, Iterable
|
||||
from datetime import UTC, datetime
|
||||
from enum import Enum
|
||||
from typing import Any, TypeAlias
|
||||
from typing import Any
|
||||
|
||||
from pydantic import BaseModel
|
||||
|
||||
|
@ -30,8 +30,8 @@ class JobStatus(Enum):
|
|||
completed = "completed"
|
||||
|
||||
|
||||
JobID: TypeAlias = str
|
||||
JobType: TypeAlias = str
|
||||
type JobID = str
|
||||
type JobType = str
|
||||
|
||||
|
||||
class JobArtifact(BaseModel):
|
||||
|
@ -47,7 +47,7 @@ JobHandler = Callable[
|
|||
]
|
||||
|
||||
|
||||
LogMessage: TypeAlias = tuple[datetime, str]
|
||||
type LogMessage = tuple[datetime, str]
|
||||
|
||||
|
||||
_COMPLETED_STATUSES = {JobStatus.completed, JobStatus.failed}
|
||||
|
|
|
@ -5,15 +5,7 @@
|
|||
# the root directory of this source tree.
|
||||
|
||||
from contextlib import asynccontextmanager
|
||||
from typing import Any
|
||||
|
||||
try:
|
||||
# for python < 3.11
|
||||
import exceptiongroup
|
||||
|
||||
BaseExceptionGroup = exceptiongroup.BaseExceptionGroup
|
||||
except ImportError:
|
||||
pass
|
||||
from typing import Any, cast
|
||||
|
||||
import httpx
|
||||
from mcp import ClientSession
|
||||
|
@ -40,14 +32,14 @@ async def sse_client_wrapper(endpoint: str, headers: dict[str, str]):
|
|||
async with ClientSession(*streams) as session:
|
||||
await session.initialize()
|
||||
yield session
|
||||
except BaseException as e:
|
||||
if isinstance(e, BaseExceptionGroup):
|
||||
for exc in e.exceptions:
|
||||
if isinstance(exc, httpx.HTTPStatusError) and exc.response.status_code == 401:
|
||||
raise AuthenticationRequiredError(exc) from exc
|
||||
elif isinstance(e, httpx.HTTPStatusError) and e.response.status_code == 401:
|
||||
raise AuthenticationRequiredError(e) from e
|
||||
|
||||
except* httpx.HTTPStatusError as eg:
|
||||
for exc in eg.exceptions:
|
||||
# mypy does not currently narrow the type of `eg.exceptions` based on the `except*` filter,
|
||||
# so we explicitly cast each item to httpx.HTTPStatusError. This is safe because
|
||||
# `except* httpx.HTTPStatusError` guarantees all exceptions in `eg.exceptions` are of that type.
|
||||
err = cast(httpx.HTTPStatusError, exc)
|
||||
if err.response.status_code == 401:
|
||||
raise AuthenticationRequiredError(exc) from exc
|
||||
raise
|
||||
|
||||
|
||||
|
|
|
@ -18,6 +18,7 @@ providers:
|
|||
provider_type: remote::ollama
|
||||
config:
|
||||
url: ${env.OLLAMA_URL:http://localhost:11434}
|
||||
raise_on_connect_error: true
|
||||
vector_io:
|
||||
- provider_id: faiss
|
||||
provider_type: inline::faiss
|
||||
|
|
|
@ -18,6 +18,7 @@ providers:
|
|||
provider_type: remote::ollama
|
||||
config:
|
||||
url: ${env.OLLAMA_URL:http://localhost:11434}
|
||||
raise_on_connect_error: true
|
||||
vector_io:
|
||||
- provider_id: faiss
|
||||
provider_type: inline::faiss
|
||||
|
|
|
@ -31,6 +31,7 @@ providers:
|
|||
provider_type: remote::ollama
|
||||
config:
|
||||
url: ${env.OLLAMA_URL:http://localhost:11434}
|
||||
raise_on_connect_error: false
|
||||
- provider_id: anthropic
|
||||
provider_type: remote::anthropic
|
||||
config:
|
||||
|
@ -60,7 +61,14 @@ providers:
|
|||
provider_type: inline::sentence-transformers
|
||||
config: {}
|
||||
vector_io:
|
||||
- provider_id: sqlite-vec
|
||||
- provider_id: faiss
|
||||
provider_type: inline::faiss
|
||||
config:
|
||||
kvstore:
|
||||
type: sqlite
|
||||
namespace: null
|
||||
db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/starter}/faiss_store.db
|
||||
- provider_id: ${env.ENABLE_SQLITE_VEC+sqlite-vec}
|
||||
provider_type: inline::sqlite-vec
|
||||
config:
|
||||
db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/starter}/sqlite_vec.db
|
||||
|
@ -530,160 +538,15 @@ models:
|
|||
provider_model_id: meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8
|
||||
model_type: llm
|
||||
- metadata: {}
|
||||
model_id: ollama/llama3.1:8b-instruct-fp16
|
||||
model_id: ollama/${env.OLLAMA_INFERENCE_MODEL:__disabled__}
|
||||
provider_id: ollama
|
||||
provider_model_id: llama3.1:8b-instruct-fp16
|
||||
model_type: llm
|
||||
- metadata: {}
|
||||
model_id: ollama/meta-llama/Llama-3.1-8B-Instruct
|
||||
provider_id: ollama
|
||||
provider_model_id: llama3.1:8b-instruct-fp16
|
||||
model_type: llm
|
||||
- metadata: {}
|
||||
model_id: ollama/llama3.1:8b
|
||||
provider_id: ollama
|
||||
provider_model_id: llama3.1:8b
|
||||
model_type: llm
|
||||
- metadata: {}
|
||||
model_id: ollama/llama3.1:70b-instruct-fp16
|
||||
provider_id: ollama
|
||||
provider_model_id: llama3.1:70b-instruct-fp16
|
||||
model_type: llm
|
||||
- metadata: {}
|
||||
model_id: ollama/meta-llama/Llama-3.1-70B-Instruct
|
||||
provider_id: ollama
|
||||
provider_model_id: llama3.1:70b-instruct-fp16
|
||||
model_type: llm
|
||||
- metadata: {}
|
||||
model_id: ollama/llama3.1:70b
|
||||
provider_id: ollama
|
||||
provider_model_id: llama3.1:70b
|
||||
model_type: llm
|
||||
- metadata: {}
|
||||
model_id: ollama/llama3.1:405b-instruct-fp16
|
||||
provider_id: ollama
|
||||
provider_model_id: llama3.1:405b-instruct-fp16
|
||||
model_type: llm
|
||||
- metadata: {}
|
||||
model_id: ollama/meta-llama/Llama-3.1-405B-Instruct-FP8
|
||||
provider_id: ollama
|
||||
provider_model_id: llama3.1:405b-instruct-fp16
|
||||
model_type: llm
|
||||
- metadata: {}
|
||||
model_id: ollama/llama3.1:405b
|
||||
provider_id: ollama
|
||||
provider_model_id: llama3.1:405b
|
||||
model_type: llm
|
||||
- metadata: {}
|
||||
model_id: ollama/llama3.2:1b-instruct-fp16
|
||||
provider_id: ollama
|
||||
provider_model_id: llama3.2:1b-instruct-fp16
|
||||
model_type: llm
|
||||
- metadata: {}
|
||||
model_id: ollama/meta-llama/Llama-3.2-1B-Instruct
|
||||
provider_id: ollama
|
||||
provider_model_id: llama3.2:1b-instruct-fp16
|
||||
model_type: llm
|
||||
- metadata: {}
|
||||
model_id: ollama/llama3.2:1b
|
||||
provider_id: ollama
|
||||
provider_model_id: llama3.2:1b
|
||||
model_type: llm
|
||||
- metadata: {}
|
||||
model_id: ollama/llama3.2:3b-instruct-fp16
|
||||
provider_id: ollama
|
||||
provider_model_id: llama3.2:3b-instruct-fp16
|
||||
model_type: llm
|
||||
- metadata: {}
|
||||
model_id: ollama/meta-llama/Llama-3.2-3B-Instruct
|
||||
provider_id: ollama
|
||||
provider_model_id: llama3.2:3b-instruct-fp16
|
||||
model_type: llm
|
||||
- metadata: {}
|
||||
model_id: ollama/llama3.2:3b
|
||||
provider_id: ollama
|
||||
provider_model_id: llama3.2:3b
|
||||
model_type: llm
|
||||
- metadata: {}
|
||||
model_id: ollama/llama3.2-vision:11b-instruct-fp16
|
||||
provider_id: ollama
|
||||
provider_model_id: llama3.2-vision:11b-instruct-fp16
|
||||
model_type: llm
|
||||
- metadata: {}
|
||||
model_id: ollama/meta-llama/Llama-3.2-11B-Vision-Instruct
|
||||
provider_id: ollama
|
||||
provider_model_id: llama3.2-vision:11b-instruct-fp16
|
||||
model_type: llm
|
||||
- metadata: {}
|
||||
model_id: ollama/llama3.2-vision:latest
|
||||
provider_id: ollama
|
||||
provider_model_id: llama3.2-vision:latest
|
||||
model_type: llm
|
||||
- metadata: {}
|
||||
model_id: ollama/llama3.2-vision:90b-instruct-fp16
|
||||
provider_id: ollama
|
||||
provider_model_id: llama3.2-vision:90b-instruct-fp16
|
||||
model_type: llm
|
||||
- metadata: {}
|
||||
model_id: ollama/meta-llama/Llama-3.2-90B-Vision-Instruct
|
||||
provider_id: ollama
|
||||
provider_model_id: llama3.2-vision:90b-instruct-fp16
|
||||
model_type: llm
|
||||
- metadata: {}
|
||||
model_id: ollama/llama3.2-vision:90b
|
||||
provider_id: ollama
|
||||
provider_model_id: llama3.2-vision:90b
|
||||
model_type: llm
|
||||
- metadata: {}
|
||||
model_id: ollama/llama3.3:70b
|
||||
provider_id: ollama
|
||||
provider_model_id: llama3.3:70b
|
||||
model_type: llm
|
||||
- metadata: {}
|
||||
model_id: ollama/meta-llama/Llama-3.3-70B-Instruct
|
||||
provider_id: ollama
|
||||
provider_model_id: llama3.3:70b
|
||||
model_type: llm
|
||||
- metadata: {}
|
||||
model_id: ollama/llama-guard3:8b
|
||||
provider_id: ollama
|
||||
provider_model_id: llama-guard3:8b
|
||||
model_type: llm
|
||||
- metadata: {}
|
||||
model_id: ollama/meta-llama/Llama-Guard-3-8B
|
||||
provider_id: ollama
|
||||
provider_model_id: llama-guard3:8b
|
||||
model_type: llm
|
||||
- metadata: {}
|
||||
model_id: ollama/llama-guard3:1b
|
||||
provider_id: ollama
|
||||
provider_model_id: llama-guard3:1b
|
||||
model_type: llm
|
||||
- metadata: {}
|
||||
model_id: ollama/meta-llama/Llama-Guard-3-1B
|
||||
provider_id: ollama
|
||||
provider_model_id: llama-guard3:1b
|
||||
provider_model_id: ${env.OLLAMA_INFERENCE_MODEL:__disabled__}
|
||||
model_type: llm
|
||||
- metadata:
|
||||
embedding_dimension: 384
|
||||
context_length: 512
|
||||
model_id: ollama/all-minilm:latest
|
||||
embedding_dimension: ${env.OLLAMA_EMBEDDING_DIMENSION:384}
|
||||
model_id: ollama/${env.OLLAMA_EMBEDDING_MODEL:__disabled__}
|
||||
provider_id: ollama
|
||||
provider_model_id: all-minilm:latest
|
||||
model_type: embedding
|
||||
- metadata:
|
||||
embedding_dimension: 384
|
||||
context_length: 512
|
||||
model_id: ollama/all-minilm
|
||||
provider_id: ollama
|
||||
provider_model_id: all-minilm:latest
|
||||
model_type: embedding
|
||||
- metadata:
|
||||
embedding_dimension: 768
|
||||
context_length: 8192
|
||||
model_id: ollama/nomic-embed-text
|
||||
provider_id: ollama
|
||||
provider_model_id: nomic-embed-text
|
||||
provider_model_id: ${env.OLLAMA_EMBEDDING_MODEL:__disabled__}
|
||||
model_type: embedding
|
||||
- metadata: {}
|
||||
model_id: anthropic/claude-3-5-sonnet-latest
|
||||
|
@ -938,6 +801,11 @@ models:
|
|||
provider_id: sambanova
|
||||
provider_model_id: sambanova/Meta-Llama-Guard-3-8B
|
||||
model_type: llm
|
||||
- metadata: {}
|
||||
model_id: vllm/${env.VLLM_INFERENCE_MODEL:__disabled__}
|
||||
provider_id: vllm
|
||||
provider_model_id: ${env.VLLM_INFERENCE_MODEL:__disabled__}
|
||||
model_type: llm
|
||||
- metadata:
|
||||
embedding_dimension: 384
|
||||
model_id: all-MiniLM-L6-v2
|
||||
|
|
|
@ -16,6 +16,7 @@ from llama_stack.providers.inline.files.localfs.config import LocalfsFilesImplCo
|
|||
from llama_stack.providers.inline.inference.sentence_transformers import (
|
||||
SentenceTransformersInferenceConfig,
|
||||
)
|
||||
from llama_stack.providers.inline.vector_io.faiss.config import FaissVectorIOConfig
|
||||
from llama_stack.providers.inline.vector_io.sqlite_vec.config import (
|
||||
SQLiteVectorIOConfig,
|
||||
)
|
||||
|
@ -36,9 +37,6 @@ from llama_stack.providers.remote.inference.groq.models import (
|
|||
MODEL_ENTRIES as GROQ_MODEL_ENTRIES,
|
||||
)
|
||||
from llama_stack.providers.remote.inference.ollama.config import OllamaImplConfig
|
||||
from llama_stack.providers.remote.inference.ollama.models import (
|
||||
MODEL_ENTRIES as OLLAMA_MODEL_ENTRIES,
|
||||
)
|
||||
from llama_stack.providers.remote.inference.openai.config import OpenAIConfig
|
||||
from llama_stack.providers.remote.inference.openai.models import (
|
||||
MODEL_ENTRIES as OPENAI_MODEL_ENTRIES,
|
||||
|
@ -85,8 +83,22 @@ def get_inference_providers() -> tuple[list[Provider], dict[str, list[ProviderMo
|
|||
),
|
||||
(
|
||||
"ollama",
|
||||
OLLAMA_MODEL_ENTRIES,
|
||||
OllamaImplConfig.sample_run_config(),
|
||||
[
|
||||
ProviderModelEntry(
|
||||
provider_model_id="${env.OLLAMA_INFERENCE_MODEL:__disabled__}",
|
||||
model_type=ModelType.llm,
|
||||
),
|
||||
ProviderModelEntry(
|
||||
provider_model_id="${env.OLLAMA_EMBEDDING_MODEL:__disabled__}",
|
||||
model_type=ModelType.embedding,
|
||||
metadata={
|
||||
"embedding_dimension": "${env.OLLAMA_EMBEDDING_DIMENSION:384}",
|
||||
},
|
||||
),
|
||||
],
|
||||
OllamaImplConfig.sample_run_config(
|
||||
url="${env.OLLAMA_URL:http://localhost:11434}", raise_on_connect_error=False
|
||||
),
|
||||
),
|
||||
(
|
||||
"anthropic",
|
||||
|
@ -110,7 +122,12 @@ def get_inference_providers() -> tuple[list[Provider], dict[str, list[ProviderMo
|
|||
),
|
||||
(
|
||||
"vllm",
|
||||
[],
|
||||
[
|
||||
ProviderModelEntry(
|
||||
provider_model_id="${env.VLLM_INFERENCE_MODEL:__disabled__}",
|
||||
model_type=ModelType.llm,
|
||||
),
|
||||
],
|
||||
VLLMInferenceAdapterConfig.sample_run_config(
|
||||
url="${env.VLLM_URL:http://localhost:8000/v1}",
|
||||
),
|
||||
|
@ -153,7 +170,12 @@ def get_distribution_template() -> DistributionTemplate:
|
|||
|
||||
vector_io_providers = [
|
||||
Provider(
|
||||
provider_id="sqlite-vec",
|
||||
provider_id="faiss",
|
||||
provider_type="inline::faiss",
|
||||
config=FaissVectorIOConfig.sample_run_config(f"~/.llama/distributions/{name}"),
|
||||
),
|
||||
Provider(
|
||||
provider_id="${env.ENABLE_SQLITE_VEC+sqlite-vec}",
|
||||
provider_type="inline::sqlite-vec",
|
||||
config=SQLiteVectorIOConfig.sample_run_config(f"~/.llama/distributions/{name}"),
|
||||
),
|
||||
|
@ -257,7 +279,27 @@ def get_distribution_template() -> DistributionTemplate:
|
|||
),
|
||||
"VLLM_URL": (
|
||||
"http://localhost:8000/v1",
|
||||
"VLLM URL",
|
||||
"vLLM URL",
|
||||
),
|
||||
"VLLM_INFERENCE_MODEL": (
|
||||
"",
|
||||
"Optional vLLM Inference Model to register on startup",
|
||||
),
|
||||
"OLLAMA_URL": (
|
||||
"http://localhost:11434",
|
||||
"Ollama URL",
|
||||
),
|
||||
"OLLAMA_INFERENCE_MODEL": (
|
||||
"",
|
||||
"Optional Ollama Inference Model to register on startup",
|
||||
),
|
||||
"OLLAMA_EMBEDDING_MODEL": (
|
||||
"",
|
||||
"Optional Ollama Embedding Model to register on startup",
|
||||
),
|
||||
"OLLAMA_EMBEDDING_DIMENSION": (
|
||||
"384",
|
||||
"Ollama Embedding Dimension",
|
||||
),
|
||||
},
|
||||
)
|
||||
|
|
|
@ -56,6 +56,9 @@ export function usePagination<T>({
|
|||
const stateRef = useRef(state);
|
||||
stateRef.current = state;
|
||||
|
||||
// Track if initial data has been fetched
|
||||
const hasFetchedInitialData = useRef(false);
|
||||
|
||||
/**
|
||||
* Fetches data from the API with cursor-based pagination
|
||||
*/
|
||||
|
@ -119,8 +122,11 @@ export function usePagination<T>({
|
|||
|
||||
// Auto-load initial data on mount
|
||||
useEffect(() => {
|
||||
fetchData();
|
||||
}, []);
|
||||
if (!hasFetchedInitialData.current) {
|
||||
hasFetchedInitialData.current = true;
|
||||
fetchData();
|
||||
}
|
||||
}, [fetchData]);
|
||||
|
||||
return {
|
||||
data: state.data,
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue