Merge branch 'main' into eval_api_final

2025-03-26 12:29:45 -07:00 · 2025-03-26 12:29:45 -07:00 · bc0cd07008
commit bc0cd07008
parent 7f12ea290f cb2a9784ab
79 changed files with 3257 additions and 2358 deletions
--- a/llama_stack/providers/inline/agents/meta_reference/agent_instance.py
+++ b/llama_stack/providers/inline/agents/meta_reference/agent_instance.py
@ -6,14 +6,12 @@

 import copy
 import json
-import os
 import re
 import secrets
 import string
 import uuid
 from datetime import datetime, timezone
 from typing import AsyncGenerator, List, Optional, Union
-from urllib.parse import urlparse

 import httpx

@ -60,7 +58,6 @@ from llama_stack.apis.inference import (
 )
 from llama_stack.apis.safety import Safety
 from llama_stack.apis.tools import (
-    RAGDocument,
    ToolGroups,
    ToolInvocationResult,
    ToolRuntime,
@ -453,8 +450,16 @@ class ChatAgent(ShieldRunnerMixin):
        stream: bool = False,
        documents: Optional[List[Document]] = None,
    ) -> AsyncGenerator:
+        # if document is passed in a turn, we parse the raw text of the document
+        # and sent it as a user message
        if documents:
-            await self.handle_documents(session_id, documents, input_messages)
+            contexts = []
+            for document in documents:
+                raw_document_text = await get_raw_document_text(document)
+                contexts.append(raw_document_text)
+
+            attached_context = "\n".join(contexts)
+            input_messages[-1].context = attached_context

        session_info = await self.storage.get_session_info(session_id)
        # if the session has a memory bank id, let the memory tool use it
@ -829,7 +834,10 @@ class ChatAgent(ShieldRunnerMixin):
                    )
                    tool_name_to_args[tool_def.identifier] = toolgroup_to_args.get(toolgroup_name, {})

-        self.tool_defs, self.tool_name_to_args = list(tool_name_to_def.values()), tool_name_to_args
+        self.tool_defs, self.tool_name_to_args = (
+            list(tool_name_to_def.values()),
+            tool_name_to_args,
+        )

    def _parse_toolgroup_name(self, toolgroup_name_with_maybe_tool_name: str) -> tuple[str, Optional[str]]:
        """Parse a toolgroup name into its components.
@ -880,144 +888,27 @@ class ChatAgent(ShieldRunnerMixin):
        logger.debug(f"tool call {tool_name_str} completed with result: {result}")
        return result

-    async def handle_documents(
-        self,
-        session_id: str,
-        documents: List[Document],
-        input_messages: List[Message],
-    ) -> None:
-        memory_tool = any(tool_def.tool_name == MEMORY_QUERY_TOOL for tool_def in self.tool_defs)
-        code_interpreter_tool = any(tool_def.tool_name == BuiltinTool.code_interpreter for tool_def in self.tool_defs)
-        content_items = []
-        url_items = []
-        pattern = re.compile("^(https?://|file://|data:)")
-        for d in documents:
-            if isinstance(d.content, URL):
-                url_items.append(d.content)
-            elif pattern.match(d.content):
-                url_items.append(URL(uri=d.content))
-            else:
-                content_items.append(d)

-        # Save the contents to a tempdir and use its path as a URL if code interpreter is present
-        if code_interpreter_tool:
-            for c in content_items:
-                temp_file_path = os.path.join(self.tempdir, f"{make_random_string()}.txt")
-                with open(temp_file_path, "w") as temp_file:
-                    temp_file.write(c.content)
-                url_items.append(URL(uri=f"file://{temp_file_path}"))
-
-        if memory_tool and code_interpreter_tool:
-            # if both memory and code_interpreter are available, we download the URLs
-            # and attach the data to the last message.
-            await attachment_message(self.tempdir, url_items, input_messages[-1])
-            # Since memory is present, add all the data to the memory bank
-            await self.add_to_session_vector_db(session_id, documents)
-        elif code_interpreter_tool:
-            # if only code_interpreter is available, we download the URLs to a tempdir
-            # and attach the path to them as a message to inference with the
-            # assumption that the model invokes the code_interpreter tool with the path
-            await attachment_message(self.tempdir, url_items, input_messages[-1])
-        elif memory_tool:
-            # if only memory is available, we load the data from the URLs and content items to the memory bank
-            await self.add_to_session_vector_db(session_id, documents)
-        else:
-            # if no memory or code_interpreter tool is available,
-            # we try to load the data from the URLs and content items as a message to inference
-            # and add it to the last message's context
-            input_messages[-1].context = "\n".join(
-                [doc.content for doc in content_items] + await load_data_from_urls(url_items)
-            )
-
-    async def _ensure_vector_db(self, session_id: str) -> str:
-        session_info = await self.storage.get_session_info(session_id)
-        if session_info is None:
-            raise ValueError(f"Session {session_id} not found")
-
-        if session_info.vector_db_id is None:
-            vector_db_id = f"vector_db_{session_id}"
-
-            # TODO: the semantic for registration is definitely not "creation"
-            # so we need to fix it if we expect the agent to create a new vector db
-            # for each session
-            await self.vector_io_api.register_vector_db(
-                vector_db_id=vector_db_id,
-                embedding_model="all-MiniLM-L6-v2",
-            )
-            await self.storage.add_vector_db_to_session(session_id, vector_db_id)
-        else:
-            vector_db_id = session_info.vector_db_id
-
-        return vector_db_id
-
-    async def add_to_session_vector_db(self, session_id: str, data: List[Document]) -> None:
-        vector_db_id = await self._ensure_vector_db(session_id)
-        documents = [
-            RAGDocument(
-                document_id=str(uuid.uuid4()),
-                content=a.content,
-                mime_type=a.mime_type,
-                metadata={},
-            )
-            for a in data
-        ]
-        await self.tool_runtime_api.rag_tool.insert(
-            documents=documents,
-            vector_db_id=vector_db_id,
-            chunk_size_in_tokens=512,
-        )
+async def load_data_from_url(url: str) -> str:
+    if url.startswith("http"):
+        async with httpx.AsyncClient() as client:
+            r = await client.get(url)
+            resp = r.text
+            return resp
+    raise ValueError(f"Unexpected URL: {type(url)}")


-async def load_data_from_urls(urls: List[URL]) -> List[str]:
-    data = []
-    for url in urls:
-        uri = url.uri
-        if uri.startswith("file://"):
-            filepath = uri[len("file://") :]
-            with open(filepath, "r") as f:
-                data.append(f.read())
-        elif uri.startswith("http"):
-            async with httpx.AsyncClient() as client:
-                r = await client.get(uri)
-                resp = r.text
-                data.append(resp)
-    return data
-
-
-async def attachment_message(tempdir: str, urls: List[URL], message: UserMessage) -> None:
-    contents = []
-
-    for url in urls:
-        uri = url.uri
-        if uri.startswith("file://"):
-            filepath = uri[len("file://") :]
-        elif uri.startswith("http"):
-            path = urlparse(uri).path
-            basename = os.path.basename(path)
-            filepath = f"{tempdir}/{make_random_string() + basename}"
-            logger.info(f"Downloading {url} -> {filepath}")
-
-            async with httpx.AsyncClient() as client:
-                r = await client.get(uri)
-                resp = r.text
-                with open(filepath, "w") as fp:
-                    fp.write(resp)
-        else:
-            raise ValueError(f"Unsupported URL {url}")
-
-        contents.append(
-            TextContentItem(
-                text=f'# User provided a file accessible to you at "{filepath}"\nYou can use code_interpreter to load and inspect it.'
-            )
-        )
-
-    if isinstance(message.content, list):
-        message.content.extend(contents)
+async def get_raw_document_text(document: Document) -> str:
+    if not document.mime_type.startswith("text/"):
+        raise ValueError(f"Unexpected document mime type: {document.mime_type}")
+    if isinstance(document.content, URL):
+        return await load_data_from_url(document.content.uri)
+    elif isinstance(document.content, str):
+        return document.content
+    elif isinstance(document.content, TextContentItem):
+        return document.content.text
    else:
-        if isinstance(message.content, str):
-            message.content = [TextContentItem(text=message.content)] + contents
-        else:
-            message.content = [message.content] + contents
+        raise ValueError(f"Unexpected document content type: {type(document.content)}")


 def _interpret_content_as_attachment(
--- a/llama_stack/providers/inline/telemetry/meta_reference/config.py
+++ b/llama_stack/providers/inline/telemetry/meta_reference/config.py
@ -28,6 +28,11 @@ class TelemetryConfig(BaseModel):
        default="http://localhost:4318/v1/metrics",
        description="The OpenTelemetry collector endpoint URL for metrics",
    )
+    service_name: str = Field(
+        # service name is always the same, use zero-width space to avoid clutter
+        default="",
+        description="The service name to use for telemetry",
+    )
    sinks: List[TelemetrySink] = Field(
        default=[TelemetrySink.CONSOLE, TelemetrySink.SQLITE],
        description="List of telemetry sinks to enable (possible values: otel, sqlite, console)",
@ -47,6 +52,7 @@ class TelemetryConfig(BaseModel):
    @classmethod
    def sample_run_config(cls, __distro_dir__: str, db_name: str = "trace_store.db") -> Dict[str, Any]:
        return {
+            "service_name": "${env.OTEL_SERVICE_NAME:}",
            "sinks": "${env.TELEMETRY_SINKS:console,sqlite}",
            "sqlite_db_path": "${env.SQLITE_DB_PATH:" + __distro_dir__ + "/" + db_name + "}",
        }
--- a/llama_stack/providers/inline/telemetry/meta_reference/telemetry.py
+++ b/llama_stack/providers/inline/telemetry/meta_reference/telemetry.py
@ -67,8 +67,7 @@ class TelemetryAdapter(TelemetryDatasetMixin, Telemetry):

        resource = Resource.create(
            {
-                # service name is always the same, use zero-width space to avoid clutter
-                ResourceAttributes.SERVICE_NAME: "",
+                ResourceAttributes.SERVICE_NAME: self.config.service_name,
            }
        )

--- a/llama_stack/providers/inline/vector_io/sqlite_vec/sqlite_vec.py
+++ b/llama_stack/providers/inline/vector_io/sqlite_vec/sqlite_vec.py
@ -4,6 +4,7 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.

+import asyncio
 import hashlib
 import logging
 import sqlite3
@ -29,6 +30,15 @@ def serialize_vector(vector: List[float]) -> bytes:
    return struct.pack(f"{len(vector)}f", *vector)


+def _create_sqlite_connection(db_path):
+    """Create a SQLite connection with sqlite_vec extension loaded."""
+    connection = sqlite3.connect(db_path)
+    connection.enable_load_extension(True)
+    sqlite_vec.load(connection)
+    connection.enable_load_extension(False)
+    return connection
+
+
 class SQLiteVecIndex(EmbeddingIndex):
    """
    An index implementation that stores embeddings in a SQLite virtual table using sqlite-vec.
@ -37,40 +47,56 @@ class SQLiteVecIndex(EmbeddingIndex):
      - A virtual table (vec_chunks_{bank_id}) that holds the serialized vector.
    """

-    def __init__(self, dimension: int, connection: sqlite3.Connection, bank_id: str):
+    def __init__(self, dimension: int, db_path: str, bank_id: str):
        self.dimension = dimension
-        self.connection = connection
+        self.db_path = db_path
        self.bank_id = bank_id
        self.metadata_table = f"chunks_{bank_id}".replace("-", "_")
        self.vector_table = f"vec_chunks_{bank_id}".replace("-", "_")

    @classmethod
-    async def create(cls, dimension: int, connection: sqlite3.Connection, bank_id: str):
-        instance = cls(dimension, connection, bank_id)
+    async def create(cls, dimension: int, db_path: str, bank_id: str):
+        instance = cls(dimension, db_path, bank_id)
        await instance.initialize()
        return instance

    async def initialize(self) -> None:
-        cur = self.connection.cursor()
-        # Create the table to store chunk metadata.
-        cur.execute(f"""
-            CREATE TABLE IF NOT EXISTS {self.metadata_table} (
-                id TEXT PRIMARY KEY,
-                chunk TEXT
-            );
-        """)
-        # Create the virtual table for embeddings.
-        cur.execute(f"""
-            CREATE VIRTUAL TABLE IF NOT EXISTS {self.vector_table}
-            USING vec0(embedding FLOAT[{self.dimension}], id TEXT);
-        """)
-        self.connection.commit()
+        def _init_tables():
+            connection = _create_sqlite_connection(self.db_path)
+            cur = connection.cursor()
+            try:
+                # Create the table to store chunk metadata.
+                cur.execute(f"""
+                    CREATE TABLE IF NOT EXISTS {self.metadata_table} (
+                        id TEXT PRIMARY KEY,
+                        chunk TEXT
+                    );
+                """)
+                # Create the virtual table for embeddings.
+                cur.execute(f"""
+                    CREATE VIRTUAL TABLE IF NOT EXISTS {self.vector_table}
+                    USING vec0(embedding FLOAT[{self.dimension}], id TEXT);
+                """)
+                connection.commit()
+            finally:
+                cur.close()
+                connection.close()

-    async def delete(self):
-        cur = self.connection.cursor()
-        cur.execute(f"DROP TABLE IF EXISTS {self.metadata_table};")
-        cur.execute(f"DROP TABLE IF EXISTS {self.vector_table};")
-        self.connection.commit()
+        await asyncio.to_thread(_init_tables)
+
+    async def delete(self) -> None:
+        def _drop_tables():
+            connection = _create_sqlite_connection(self.db_path)
+            cur = connection.cursor()
+            try:
+                cur.execute(f"DROP TABLE IF EXISTS {self.metadata_table};")
+                cur.execute(f"DROP TABLE IF EXISTS {self.vector_table};")
+                connection.commit()
+            finally:
+                cur.close()
+                connection.close()
+
+        await asyncio.to_thread(_drop_tables)

    async def add_chunks(self, chunks: List[Chunk], embeddings: NDArray, batch_size: int = 500):
        """
@ -81,44 +107,55 @@ class SQLiteVecIndex(EmbeddingIndex):
        """
        assert all(isinstance(chunk.content, str) for chunk in chunks), "SQLiteVecIndex only supports text chunks"

-        cur = self.connection.cursor()
-        try:
-            # Start transaction
-            cur.execute("BEGIN TRANSACTION")
-            for i in range(0, len(chunks), batch_size):
-                batch_chunks = chunks[i : i + batch_size]
-                batch_embeddings = embeddings[i : i + batch_size]
-                # Prepare metadata inserts
-                metadata_data = [
-                    (generate_chunk_id(chunk.metadata["document_id"], chunk.content), chunk.model_dump_json())
-                    for chunk in batch_chunks
-                    if isinstance(chunk.content, str)
-                ]
-                # Insert metadata (ON CONFLICT to avoid duplicates)
-                cur.executemany(
-                    f"""
-                    INSERT INTO {self.metadata_table} (id, chunk)
-                    VALUES (?, ?)
-                    ON CONFLICT(id) DO UPDATE SET chunk = excluded.chunk;
-                    """,
-                    metadata_data,
-                )
-                # Prepare embeddings inserts
-                embedding_data = [
-                    (generate_chunk_id(chunk.metadata["document_id"], chunk.content), serialize_vector(emb.tolist()))
-                    for chunk, emb in zip(batch_chunks, batch_embeddings, strict=True)
-                    if isinstance(chunk.content, str)
-                ]
-                # Insert embeddings in batch
-                cur.executemany(f"INSERT INTO {self.vector_table} (id, embedding) VALUES (?, ?);", embedding_data)
-            self.connection.commit()
+        def _execute_all_batch_inserts():
+            connection = _create_sqlite_connection(self.db_path)
+            cur = connection.cursor()

-        except sqlite3.Error as e:
-            self.connection.rollback()  # Rollback on failure
-            logger.error(f"Error inserting into {self.vector_table}: {e}")
+            try:
+                # Start transaction a single transcation for all batches
+                cur.execute("BEGIN TRANSACTION")
+                for i in range(0, len(chunks), batch_size):
+                    batch_chunks = chunks[i : i + batch_size]
+                    batch_embeddings = embeddings[i : i + batch_size]
+                    # Prepare metadata inserts
+                    metadata_data = [
+                        (generate_chunk_id(chunk.metadata["document_id"], chunk.content), chunk.model_dump_json())
+                        for chunk in batch_chunks
+                        if isinstance(chunk.content, str)
+                    ]
+                    # Insert metadata (ON CONFLICT to avoid duplicates)
+                    cur.executemany(
+                        f"""
+                        INSERT INTO {self.metadata_table} (id, chunk)
+                        VALUES (?, ?)
+                        ON CONFLICT(id) DO UPDATE SET chunk = excluded.chunk;
+                        """,
+                        metadata_data,
+                    )
+                    # Prepare embeddings inserts
+                    embedding_data = [
+                        (
+                            generate_chunk_id(chunk.metadata["document_id"], chunk.content),
+                            serialize_vector(emb.tolist()),
+                        )
+                        for chunk, emb in zip(batch_chunks, batch_embeddings, strict=True)
+                        if isinstance(chunk.content, str)
+                    ]
+                    # Insert embeddings in batch
+                    cur.executemany(f"INSERT INTO {self.vector_table} (id, embedding) VALUES (?, ?);", embedding_data)
+                connection.commit()

-        finally:
-            cur.close()  # Ensure cursor is closed
+            except sqlite3.Error as e:
+                connection.rollback()  # Rollback on failure
+                logger.error(f"Error inserting into {self.vector_table}: {e}")
+                raise
+
+            finally:
+                cur.close()
+                connection.close()
+
+        # Process all batches in a single thread
+        await asyncio.to_thread(_execute_all_batch_inserts)

    async def query(self, embedding: NDArray, k: int, score_threshold: float) -> QueryChunksResponse:
        """
@ -127,18 +164,28 @@ class SQLiteVecIndex(EmbeddingIndex):
        """
        emb_list = embedding.tolist() if isinstance(embedding, np.ndarray) else list(embedding)
        emb_blob = serialize_vector(emb_list)
-        cur = self.connection.cursor()
-        query_sql = f"""
-            SELECT m.id, m.chunk, v.distance
-            FROM {self.vector_table} AS v
-            JOIN {self.metadata_table} AS m ON m.id = v.id
-            WHERE v.embedding MATCH ? AND k = ?
-            ORDER BY v.distance;
-        """
-        cur.execute(query_sql, (emb_blob, k))
-        rows = cur.fetchall()
-        chunks = []
-        scores = []
+
+        def _execute_query():
+            connection = _create_sqlite_connection(self.db_path)
+            cur = connection.cursor()
+
+            try:
+                query_sql = f"""
+                    SELECT m.id, m.chunk, v.distance
+                    FROM {self.vector_table} AS v
+                    JOIN {self.metadata_table} AS m ON m.id = v.id
+                    WHERE v.embedding MATCH ? AND k = ?
+                    ORDER BY v.distance;
+                """
+                cur.execute(query_sql, (emb_blob, k))
+                return cur.fetchall()
+            finally:
+                cur.close()
+                connection.close()
+
+        rows = await asyncio.to_thread(_execute_query)
+
+        chunks, scores = [], []
        for _id, chunk_json, distance in rows:
            try:
                chunk = Chunk.model_validate_json(chunk_json)
@ -163,63 +210,81 @@ class SQLiteVecVectorIOAdapter(VectorIO, VectorDBsProtocolPrivate):
        self.config = config
        self.inference_api = inference_api
        self.cache: Dict[str, VectorDBWithIndex] = {}
-        self.connection: Optional[sqlite3.Connection] = None

    async def initialize(self) -> None:
-        # Open a connection to the SQLite database (the file is specified in the config).
-        self.connection = sqlite3.connect(self.config.db_path)
-        self.connection.enable_load_extension(True)
-        sqlite_vec.load(self.connection)
-        self.connection.enable_load_extension(False)
-        cur = self.connection.cursor()
-        # Create a table to persist vector DB registrations.
-        cur.execute("""
-            CREATE TABLE IF NOT EXISTS vector_dbs (
-                id TEXT PRIMARY KEY,
-                metadata TEXT
-            );
-        """)
-        self.connection.commit()
-        # Load any existing vector DB registrations.
-        cur.execute("SELECT metadata FROM vector_dbs")
-        rows = cur.fetchall()
+        def _setup_connection():
+            # Open a connection to the SQLite database (the file is specified in the config).
+            connection = _create_sqlite_connection(self.config.db_path)
+            cur = connection.cursor()
+            try:
+                # Create a table to persist vector DB registrations.
+                cur.execute("""
+                    CREATE TABLE IF NOT EXISTS vector_dbs (
+                        id TEXT PRIMARY KEY,
+                        metadata TEXT
+                    );
+                """)
+                connection.commit()
+                # Load any existing vector DB registrations.
+                cur.execute("SELECT metadata FROM vector_dbs")
+                rows = cur.fetchall()
+                return rows
+            finally:
+                cur.close()
+                connection.close()
+
+        rows = await asyncio.to_thread(_setup_connection)
        for row in rows:
            vector_db_data = row[0]
            vector_db = VectorDB.model_validate_json(vector_db_data)
-            index = await SQLiteVecIndex.create(vector_db.embedding_dimension, self.connection, vector_db.identifier)
+            index = await SQLiteVecIndex.create(
+                vector_db.embedding_dimension, self.config.db_path, vector_db.identifier
+            )
            self.cache[vector_db.identifier] = VectorDBWithIndex(vector_db, index, self.inference_api)

    async def shutdown(self) -> None:
-        if self.connection:
-            self.connection.close()
-            self.connection = None
+        # nothing to do since we don't maintain a persistent connection
+        pass

    async def register_vector_db(self, vector_db: VectorDB) -> None:
-        if self.connection is None:
-            raise RuntimeError("SQLite connection not initialized")
-        cur = self.connection.cursor()
-        cur.execute(
-            "INSERT OR REPLACE INTO vector_dbs (id, metadata) VALUES (?, ?)",
-            (vector_db.identifier, vector_db.model_dump_json()),
-        )
-        self.connection.commit()
-        index = await SQLiteVecIndex.create(vector_db.embedding_dimension, self.connection, vector_db.identifier)
+        def _register_db():
+            connection = _create_sqlite_connection(self.config.db_path)
+            cur = connection.cursor()
+            try:
+                cur.execute(
+                    "INSERT OR REPLACE INTO vector_dbs (id, metadata) VALUES (?, ?)",
+                    (vector_db.identifier, vector_db.model_dump_json()),
+                )
+                connection.commit()
+            finally:
+                cur.close()
+                connection.close()
+
+        await asyncio.to_thread(_register_db)
+        index = await SQLiteVecIndex.create(vector_db.embedding_dimension, self.config.db_path, vector_db.identifier)
        self.cache[vector_db.identifier] = VectorDBWithIndex(vector_db, index, self.inference_api)

    async def list_vector_dbs(self) -> List[VectorDB]:
        return [v.vector_db for v in self.cache.values()]

    async def unregister_vector_db(self, vector_db_id: str) -> None:
-        if self.connection is None:
-            raise RuntimeError("SQLite connection not initialized")
        if vector_db_id not in self.cache:
            logger.warning(f"Vector DB {vector_db_id} not found")
            return
        await self.cache[vector_db_id].index.delete()
        del self.cache[vector_db_id]
-        cur = self.connection.cursor()
-        cur.execute("DELETE FROM vector_dbs WHERE id = ?", (vector_db_id,))
-        self.connection.commit()
+
+        def _delete_vector_db_from_registry():
+            connection = _create_sqlite_connection(self.config.db_path)
+            cur = connection.cursor()
+            try:
+                cur.execute("DELETE FROM vector_dbs WHERE id = ?", (vector_db_id,))
+                connection.commit()
+            finally:
+                cur.close()
+                connection.close()
+
+        await asyncio.to_thread(_delete_vector_db_from_registry)

    async def insert_chunks(self, vector_db_id: str, chunks: List[Chunk], ttl_seconds: Optional[int] = None) -> None:
        if vector_db_id not in self.cache: