diff --git a/docs/notebooks/crewai/Llama_Stack_CrewAI.ipynb b/docs/notebooks/crewai/Llama_Stack_CrewAI.ipynb index 0ac42c835..98d554372 100644 --- a/docs/notebooks/crewai/Llama_Stack_CrewAI.ipynb +++ b/docs/notebooks/crewai/Llama_Stack_CrewAI.ipynb @@ -13,14 +13,14 @@ "\n", "### Overview\n", "\n", - "- **LlamaStack**: Provides the infrastructure for running LLMs and vector databases.\n", + "- **LlamaStack**: Provides the infrastructure for running LLMs and vector store.\n", "- **CrewAI**: Offers a framework for orchestrating agents and tasks.\n", "- **Integration**: Leverages LlamaStack's OpenAI-compatible API with CrewAI.\n", "\n", "### What You Will Learn\n", "\n", "1. How to set up and start the LlamaStack server using the Together AI provider.\n", - "2. How to create and manage vector databases within LlamaStack.\n", + "2. How to create and manage vector stores within LlamaStack.\n", "3. How to build RAG tool with CrewAI by utilizing the LlamaStack server.\n", "4. How to query the RAG tool for effective information retrieval and generation.\n", "\n", @@ -38,7 +38,7 @@ }, { "cell_type": "code", - "execution_count": 1, + "execution_count": 2, "id": "5b6a6a17-b931-4bea-8273-0d6e5563637a", "metadata": { "colab": { @@ -59,7 +59,7 @@ ] }, { - "name": "stdin", + "name": "stdout", "output_type": "stream", "text": [ "TOGETHER_API_KEY environment variable is not set. Please enter your API key: ········\n" @@ -109,7 +109,7 @@ }, { "cell_type": "code", - "execution_count": 2, + "execution_count": 3, "id": "dd2dacf3-ec8b-4cc7-8ff4-b5b6ea4a6e9e", "metadata": { "colab": { @@ -128,32 +128,32 @@ "Environment '/Users/kaiwu/work/kaiwu/llama-stack/.venv' already exists, re-using it.\n", "Virtual environment /Users/kaiwu/work/kaiwu/llama-stack/.venv is already active\n", "\u001b[2mUsing Python 3.12.9 environment at: /Users/kaiwu/work/kaiwu/llama-stack/.venv\u001b[0m\n", - "\u001b[2mAudited \u001b[1m1 package\u001b[0m \u001b[2min 206ms\u001b[0m\u001b[0m\n", + "\u001b[2mAudited \u001b[1m1 package\u001b[0m \u001b[2min 186ms\u001b[0m\u001b[0m\n", "Installing pip dependencies\n", "\u001b[2mUsing Python 3.12.9 environment at: /Users/kaiwu/work/kaiwu/llama-stack/.venv\u001b[0m\n", - "\u001b[2K\u001b[2mResolved \u001b[1m185 packages\u001b[0m \u001b[2min 2.02s\u001b[0m\u001b[0m \u001b[0m\n", - "\u001b[2mUninstalled \u001b[1m3 packages\u001b[0m \u001b[2min 247ms\u001b[0m\u001b[0m\n", - "\u001b[2K\u001b[2mInstalled \u001b[1m3 packages\u001b[0m \u001b[2min 97ms\u001b[0m\u001b[0m \u001b[0m\n", + "\u001b[2K\u001b[2mResolved \u001b[1m185 packages\u001b[0m \u001b[2min 1.29s\u001b[0m\u001b[0m \u001b[0m\n", + "\u001b[2mUninstalled \u001b[1m3 packages\u001b[0m \u001b[2min 285ms\u001b[0m\u001b[0m\n", + "\u001b[2K\u001b[2mInstalled \u001b[1m3 packages\u001b[0m \u001b[2min 105ms\u001b[0m\u001b[0m \u001b[0m\n", " \u001b[31m-\u001b[39m \u001b[1mprotobuf\u001b[0m\u001b[2m==5.29.5\u001b[0m\n", " \u001b[32m+\u001b[39m \u001b[1mprotobuf\u001b[0m\u001b[2m==5.29.4\u001b[0m\n", " \u001b[31m-\u001b[39m \u001b[1mruamel-yaml\u001b[0m\u001b[2m==0.18.14\u001b[0m\n", " \u001b[32m+\u001b[39m \u001b[1mruamel-yaml\u001b[0m\u001b[2m==0.17.40\u001b[0m\n", " \u001b[31m-\u001b[39m \u001b[1mruff\u001b[0m\u001b[2m==0.12.5\u001b[0m\n", " \u001b[32m+\u001b[39m \u001b[1mruff\u001b[0m\u001b[2m==0.9.10\u001b[0m\n", - "Installing special provider module: torch torchtune>=0.5.0 torchao>=0.12.0 --extra-index-url https://download.pytorch.org/whl/cpu\n", - "\u001b[2mUsing Python 3.12.9 environment at: /Users/kaiwu/work/kaiwu/llama-stack/.venv\u001b[0m\n", - "\u001b[2mAudited \u001b[1m3 packages\u001b[0m \u001b[2min 169ms\u001b[0m\u001b[0m\n", - "Installing special provider module: sentence-transformers --no-deps\n", - "\u001b[2mUsing Python 3.12.9 environment at: /Users/kaiwu/work/kaiwu/llama-stack/.venv\u001b[0m\n", - "\u001b[2mAudited \u001b[1m1 package\u001b[0m \u001b[2min 43ms\u001b[0m\u001b[0m\n", "Installing special provider module: torch torchvision torchao>=0.12.0 --extra-index-url https://download.pytorch.org/whl/cpu\n", "\u001b[2mUsing Python 3.12.9 environment at: /Users/kaiwu/work/kaiwu/llama-stack/.venv\u001b[0m\n", - "\u001b[2mAudited \u001b[1m3 packages\u001b[0m \u001b[2min 29ms\u001b[0m\u001b[0m\n", + "\u001b[2mAudited \u001b[1m3 packages\u001b[0m \u001b[2min 66ms\u001b[0m\u001b[0m\n", + "Installing special provider module: sentence-transformers --no-deps\n", + "\u001b[2mUsing Python 3.12.9 environment at: /Users/kaiwu/work/kaiwu/llama-stack/.venv\u001b[0m\n", + "\u001b[2mAudited \u001b[1m1 package\u001b[0m \u001b[2min 63ms\u001b[0m\u001b[0m\n", + "Installing special provider module: torch torchtune>=0.5.0 torchao>=0.12.0 --extra-index-url https://download.pytorch.org/whl/cpu\n", + "\u001b[2mUsing Python 3.12.9 environment at: /Users/kaiwu/work/kaiwu/llama-stack/.venv\u001b[0m\n", + "\u001b[2mAudited \u001b[1m3 packages\u001b[0m \u001b[2min 95ms\u001b[0m\u001b[0m\n", "\u001b[32mBuild Successful!\u001b[0m\n", "\u001b[34mYou can find the newly-built distribution here: /Users/kaiwu/.llama/distributions/starter/starter-run.yaml\u001b[0m\n", "\u001b[32mYou can run the new Llama Stack distro via: \u001b[34mllama stack run /Users/kaiwu/.llama/distributions/starter/starter-run.yaml --image-type venv\u001b[0m\u001b[0m\n", - "Starting Llama Stack server with PID: 97967\n", - "Waiting for server to start.................\n", + "Starting Llama Stack server with PID: 26783\n", + "Waiting for server to start..........\n", "Server is ready!\n" ] } @@ -231,7 +231,7 @@ }, { "cell_type": "code", - "execution_count": 3, + "execution_count": 4, "id": "ab4eff97-4565-4c73-b1b3-0020a4c7e2a5", "metadata": { "id": "ab4eff97-4565-4c73-b1b3-0020a4c7e2a5" @@ -260,7 +260,7 @@ }, { "cell_type": "code", - "execution_count": 4, + "execution_count": 5, "id": "880443ef-ac3c-48b1-a80a-7dab5b25ac61", "metadata": { "id": "880443ef-ac3c-48b1-a80a-7dab5b25ac61", @@ -268,6 +268,13 @@ "scrolled": true }, "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Available models:\n" + ] + }, { "name": "stderr", "output_type": "stream", @@ -280,7 +287,12 @@ "name": "stdout", "output_type": "stream", "text": [ - "Available models:\n", + "- llama_api/Llama-3.3-70B-Instruct\n", + "- llama_api/Llama-4-Maverick-17B-128E-Instruct-FP8\n", + "- llama_api/Llama-4-Scout-17B-16E-Instruct-FP8\n", + "- meta-llama/Llama-3.3-70B-Instruct\n", + "- meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8\n", + "- meta-llama/Llama-4-Scout-17B-16E-Instruct-FP8\n", "- fireworks/accounts/fireworks/models/llama-v3p1-8b-instruct\n", "- fireworks/accounts/fireworks/models/llama-v3p1-70b-instruct\n", "- fireworks/accounts/fireworks/models/llama-v3p1-405b-instruct\n", @@ -375,21 +387,21 @@ "id": "b0f28603-3207-4157-b731-638d93cd82b5" }, "source": [ - "### 4. Vector Database Setup\n", + "### 4. Vector Store Setup\n", "\n", - "#### Register a Vector Database\n", + "#### Create a Vector Store with File Upload\n", "\n", - "Create a FAISS vector database for storing document embeddings:\n", + "Create a vector store using the OpenAI-compatible vector stores API:\n", "\n", - "- **Vector DB ID**: Unique identifier for the database\n", - "- **Provider**: FAISS (Facebook AI Similarity Search)\n", + "- **Vector Store**: OpenAI-compatible vector store for document storage\n", + "- **File Upload**: Automatic chunking and embedding of uploaded files\n", "- **Embedding Model**: Sentence Transformers model for text embeddings\n", "- **Dimensions**: 384-dimensional embeddings" ] }, { "cell_type": "code", - "execution_count": 5, + "execution_count": 6, "id": "0f241d81-19a7-451f-ac4e-2869a29300d1", "metadata": { "id": "0f241d81-19a7-451f-ac4e-2869a29300d1", @@ -400,57 +412,30 @@ "name": "stderr", "output_type": "stream", "text": [ - "INFO:httpx:HTTP Request: POST http://0.0.0.0:8321/v1/vector-dbs \"HTTP/1.1 200 OK\"\n", - "INFO:httpx:HTTP Request: GET http://0.0.0.0:8321/v1/vector-dbs \"HTTP/1.1 200 OK\"\n" + "INFO:httpx:HTTP Request: POST http://0.0.0.0:8321/v1/openai/v1/files \"HTTP/1.1 200 OK\"\n", + "INFO:httpx:HTTP Request: POST http://0.0.0.0:8321/v1/openai/v1/files \"HTTP/1.1 200 OK\"\n", + "INFO:httpx:HTTP Request: POST http://0.0.0.0:8321/v1/openai/v1/files \"HTTP/1.1 200 OK\"\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ - "Registered new vector DB: VectorDBRegisterResponse(embedding_dimension=384, embedding_model='sentence-transformers/all-MiniLM-L6-v2', identifier='acme_docs', provider_id='faiss', type='vector_db', provider_resource_id='acme_docs_v2', vector_db_name=None, owner=None, source='via_register_api')\n", - "Existing vector DBs: [VectorDBListResponseItem(embedding_dimension=384, embedding_model='sentence-transformers/all-MiniLM-L6-v2', identifier='acme_docs', provider_id='faiss', type='vector_db', provider_resource_id='acme_docs_v2', vector_db_name=None)]\n" + "File(id='file-489db9aae0424745960e3408ff0f477f', bytes=41, created_at=1757540912, expires_at=1789076912, filename='shipping_policy.txt', object='file', purpose='assistants')\n", + "File(id='file-b2f38b0e164347f5a2b6bbe211e33ff3', bytes=48, created_at=1757540912, expires_at=1789076912, filename='returns_policy.txt', object='file', purpose='assistants')\n", + "File(id='file-6f6f157d165a4078b4abef66a095ccd6', bytes=45, created_at=1757540912, expires_at=1789076912, filename='support.txt', object='file', purpose='assistants')\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "INFO:httpx:HTTP Request: POST http://0.0.0.0:8321/v1/openai/v1/vector_stores \"HTTP/1.1 200 OK\"\n" ] } ], "source": [ - "# Register a new clean vector database\n", - "vector_db = client.vector_dbs.register(\n", - " vector_db_id=\"acme_docs\", # Use a new unique name\n", - " provider_id=\"faiss\",\n", - " provider_vector_db_id=\"acme_docs_v2\",\n", - " embedding_model=\"sentence-transformers/all-MiniLM-L6-v2\",\n", - " embedding_dimension=384,\n", - ")\n", - "print(\"Registered new vector DB:\", vector_db)\n", - "\n", - "# List all registered vector databases\n", - "dbs = client.vector_dbs.list()\n", - "print(\"Existing vector DBs:\", dbs)" - ] - }, - { - "cell_type": "markdown", - "id": "pcgjqzfr3eo", - "metadata": { - "id": "pcgjqzfr3eo" - }, - "source": [ - "#### Prepare Sample Documents\n", - "\n", - "Create LLAMA Stack Chunks for FAISS vector store" - ] - }, - { - "cell_type": "code", - "execution_count": 6, - "id": "5a0a6619-c9fb-4938-8ff3-f84304eed91e", - "metadata": { - "id": "5a0a6619-c9fb-4938-8ff3-f84304eed91e" - }, - "outputs": [], - "source": [ - "from llama_stack_client.types.vector_io_insert_params import Chunk\n", + "from io import BytesIO\n", "\n", "docs = [\n", " (\"Acme ships globally in 3-5 business days.\", {\"title\": \"Shipping Policy\"}),\n", @@ -458,62 +443,22 @@ " (\"Support is available 24/7 via chat and email.\", {\"title\": \"Support\"}),\n", "]\n", "\n", - "# Convert to Chunk objects\n", - "chunks = []\n", - "for _, (content, metadata) in enumerate(docs):\n", - " # Transform metadata to required format with document_id from title\n", - " metadata = {\"document_id\": metadata[\"title\"]}\n", - " chunk = Chunk(\n", - " content=content, # Required[InterleavedContent]\n", - " metadata=metadata, # Required[Dict]\n", - " )\n", - " chunks.append(chunk)" - ] - }, - { - "cell_type": "markdown", - "id": "6bg3sm2ko5g", - "metadata": { - "id": "6bg3sm2ko5g" - }, - "source": [ - "#### Insert Documents into Vector Database\n", + "file_ids = []\n", + "for content, metadata in docs:\n", + " with BytesIO(content.encode()) as file_buffer:\n", + " file_buffer.name = f\"{metadata['title'].replace(' ', '_').lower()}.txt\"\n", + " create_file_response = client.files.create(file=file_buffer, purpose=\"assistants\")\n", + " print(create_file_response)\n", + " file_ids.append(create_file_response.id)\n", "\n", - "Store the prepared documents in the FAISS vector database. This process:\n", - "1. Generates embeddings for each document\n", - "2. Stores embeddings with metadata\n", - "3. Enables semantic search capabilities" - ] - }, - { - "cell_type": "code", - "execution_count": 7, - "id": "0e8740d8-b809-44b9-915f-1e0200e3c3f1", - "metadata": { - "id": "0e8740d8-b809-44b9-915f-1e0200e3c3f1", - "outputId": "ebc275ae-54dd-4566-edb0-f3a067bfcf03" - }, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "INFO:httpx:HTTP Request: POST http://0.0.0.0:8321/v1/vector-io/insert \"HTTP/1.1 200 OK\"\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Documents inserted: None\n" - ] - } - ], - "source": [ - "# Insert chunks into FAISS vector store\n", - "\n", - "response = client.vector_io.insert(vector_db_id=\"acme_docs\", chunks=chunks)\n", - "print(\"Documents inserted:\", response)" + "# Create vector store with files\n", + "vector_store = client.vector_stores.create(\n", + " name=\"acme_docs\",\n", + " file_ids=file_ids,\n", + " embedding_model=\"sentence-transformers/all-MiniLM-L6-v2\",\n", + " embedding_dimension=384,\n", + " provider_id=\"faiss\"\n", + ")" ] }, { @@ -525,12 +470,12 @@ "source": [ "#### Test Vector Search\n", "\n", - "Query the vector database to verify it's working correctly. This performs semantic search to find relevant documents based on the query." + "Query the vector store to verify it's working correctly. This performs semantic search to find relevant documents based on the query." ] }, { "cell_type": "code", - "execution_count": 8, + "execution_count": 7, "id": "4a5e010c-eeeb-4020-a957-74d6d1cba342", "metadata": { "id": "4a5e010c-eeeb-4020-a957-74d6d1cba342", @@ -541,32 +486,27 @@ "name": "stderr", "output_type": "stream", "text": [ - "INFO:httpx:HTTP Request: POST http://0.0.0.0:8321/v1/vector-io/query \"HTTP/1.1 200 OK\"\n" + "INFO:httpx:HTTP Request: POST http://0.0.0.0:8321/v1/openai/v1/vector_stores/vs_dab05212-db05-402c-91ef-57e41797406b/search \"HTTP/1.1 200 OK\"\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ - "metadata : {'document_id': 'Shipping Policy'}\n", - "content : Acme ships globally in 3-5 business days.\n", - "metadata : {'document_id': 'Returns Policy'}\n", - "content : Returns are accepted within 30 days of purchase.\n", - "metadata : {'document_id': 'Support'}\n", - "content : Support is available 24/7 via chat and email.\n" + "Acme ships globally in 3-5 business days.\n", + "Returns are accepted within 30 days of purchase.\n" ] } ], "source": [ - "# Query chunks from FAISS vector store\n", - "\n", - "query_chunk_response = client.vector_io.query(\n", - " vector_db_id=\"acme_docs\",\n", - " query=\"How long does Acme take to ship orders?\",\n", + "search_response = client.vector_stores.search(\n", + " vector_store_id=vector_store.id,\n", + " query=\"How long does shipping take?\",\n", + " max_num_results=2\n", ")\n", - "for chunk in query_chunk_response.chunks:\n", - " print(\"metadata\", \":\", chunk.metadata)\n", - " print(\"content\", \":\", chunk.content)" + "for result in search_response.data:\n", + " content = result.content[0].text\n", + " print(content)" ] }, { @@ -589,7 +529,7 @@ }, { "cell_type": "code", - "execution_count": 9, + "execution_count": 8, "id": "c378bd10-09c2-417c-bdfc-1e0a2dd19084", "metadata": { "id": "c378bd10-09c2-417c-bdfc-1e0a2dd19084", @@ -631,7 +571,7 @@ }, { "cell_type": "code", - "execution_count": 10, + "execution_count": 9, "id": "f88ffb5a-657b-4916-9375-c6ddc156c25e", "metadata": { "id": "f88ffb5a-657b-4916-9375-c6ddc156c25e", @@ -642,22 +582,22 @@ "name": "stderr", "output_type": "stream", "text": [ - "\u001b[92m10:30:35 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "\u001b[92m14:49:56 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", "LiteLLM completion() model= together/meta-llama/Llama-3.3-70B-Instruct-Turbo; provider = openai\n", "INFO:LiteLLM:\n", "LiteLLM completion() model= together/meta-llama/Llama-3.3-70B-Instruct-Turbo; provider = openai\n", "INFO:httpx:HTTP Request: POST http://localhost:8321/v1/openai/v1/chat/completions \"HTTP/1.1 200 OK\"\n", - "\u001b[92m10:30:41 - LiteLLM:INFO\u001b[0m: utils.py:1260 - Wrapper: Completed Call, calling success_handler\n", + "\u001b[92m14:50:01 - LiteLLM:INFO\u001b[0m: utils.py:1260 - Wrapper: Completed Call, calling success_handler\n", "INFO:LiteLLM:Wrapper: Completed Call, calling success_handler\n" ] }, { "data": { "text/plain": [ - "\"In the Andes' gentle breeze, a llama's soft eyes gaze with peaceful ease, its fur a warm and fuzzy tease. With steps both gentle and serene, the llama roams, a symbol of calm, its beauty truly unseen.\"" + "\"In the Andes' gentle breeze, a llama's soft eyes gaze with peaceful ease, its fur a warm and fuzzy tease. With steps both gentle and serene, the llama roams, a symbol of calm, its beauty pure and supreme.\"" ] }, - "execution_count": 10, + "execution_count": 9, "metadata": {}, "output_type": "execute_result" } @@ -680,15 +620,15 @@ "source": [ "#### Create CrewAI Custom Tool\n", "\n", - "Define a custom CrewAI tool, `LlamaStackRAGTool`, to encapsulate the logic for querying the LlamaStack vector database. This tool will be used by the CrewAI agent to perform retrieval during the RAG process.\n", + "Define a custom CrewAI tool, `LlamaStackRAGTool`, to encapsulate the logic for querying the LlamaStack vector store. This tool will be used by the CrewAI agent to perform retrieval during the RAG process.\n", "\n", - "- **Input Schema**: Defines the expected input parameters for the tool, such as the user query, the vector database ID, and optional parameters like `top_k` and `score_threshold`.\n", - "- **Tool Logic**: Implements the `_run` method, which takes the user query and vector database ID, calls the LlamaStack client's `vector_io.query` method, and formats the retrieved documents into a human-readable string for the LLM to use as context." + "- **Input Schema**: Defines the expected input parameters for the tool, such as the user query, the vector store ID, and optional parameters like `top_k`.\n", + "- **Tool Logic**: Implements the `_run` method, which takes the user query and vector store ID, calls the LlamaStack client's `vector_stores.search` method, and formats the retrieved documents into a human-readable string for the LLM to use as context." ] }, { "cell_type": "code", - "execution_count": 11, + "execution_count": 16, "id": "08de540f-ed47-405a-a9c5-16505f4c88c8", "metadata": { "id": "08de540f-ed47-405a-a9c5-16505f4c88c8" @@ -700,11 +640,11 @@ "from pydantic import BaseModel, Field\n", "\n", "# ---------- 1. Input schema ----------\n", - "class RAGToolInput(BaseModel):\n", - " \"\"\"Input schema for LlamaStackRAGTool.\"\"\"\n", + "class VectorStoreRAGToolInput(BaseModel):\n", + " \"\"\"Input schema for LlamaStackVectorStoreRAGTool.\"\"\"\n", " query: str = Field(..., description=\"The user query for RAG search\")\n", - " vector_db_id: str = Field(...,\n", - " description=\"ID of the vector database to search inside the Llama-Stack server\",\n", + " vector_store_id: str = Field(...,\n", + " description=\"ID of the vector store to search inside the Llama-Stack server\",\n", " )\n", " top_k: Optional[int] = Field(\n", " default=5,\n", @@ -716,44 +656,41 @@ " )\n", "\n", "# ---------- 2. The tool ----------\n", - "class LlamaStackRAGTool(BaseTool):\n", - " name: str = \"Llama Stack RAG tool\"\n", + "class LlamaStackVectorStoreRAGTool(BaseTool):\n", + " name: str = \"Llama Stack Vector Store RAG tool\"\n", " description: str = (\n", - " \"This tool calls a Llama-Stack endpoint for retrieval-augmented generation. \"\n", + " \"This tool calls a Llama-Stack endpoint for retrieval-augmented generation using a vector store. \"\n", " \"It takes a natural-language query and returns the most relevant documents.\"\n", " )\n", - " args_schema: Type[BaseModel] = RAGToolInput\n", + " args_schema: Type[BaseModel] = VectorStoreRAGToolInput\n", " client: Any\n", - " vector_db_id: str = \"\"\n", + " vector_store_id: str = \"\"\n", " top_k: int = 5\n", - " score_threshold: Optional[float] = None\n", "\n", " def _run(self, **kwargs: Any) -> str:\n", " # 1. Resolve parameters (use instance defaults when not supplied)\n", - " print(kwargs)\n", " query: str = kwargs.get(\"query\") # Required – schema enforces presence\n", - " vector_db_id: str = kwargs.get(\"vector_db_id\", self.vector_db_id)\n", + " vector_store_id: str = kwargs.get(\"vector_store_id\", self.vector_store_id)\n", " top_k: int = kwargs.get(\"top_k\", self.top_k)\n", - " score_threshold: float | None = kwargs.get(\n", - " \"score_threshold\", self.score_threshold\n", - " )\n", - " if vector_db_id == \"\":\n", - " print('vector_db_id is empty, please specify which vector_db to search')\n", + " if vector_store_id == \"\":\n", + " print('vector_store_id is empty, please specify which vector_store to search')\n", " return \"No documents found.\"\n", " # 2. Issue request to Llama-Stack\n", - " response: List[dict] = self.client.vector_io.query(\n", - " vector_db_id=vector_db_id,\n", + " response = self.client.vector_stores.search(\n", + " vector_store_id=vector_store_id,\n", " query=query,\n", - " params={\"max_chunks\": top_k, \"score_threshold\": score_threshold}\n", + " max_num_results=top_k,\n", " )\n", "\n", " # 3. Massage results into a single human-readable string\n", - " if not response:\n", + " if not response or not response.data:\n", " return \"No documents found.\"\n", "\n", " docs: List[str] = []\n", - " for metadata,content in response:\n", - " docs.append(f\"metadata: {metadata}, content: {content}\")\n", + " for result in response.data:\n", + " content = result.content[0].text if result.content else \"No content\"\n", + " filename = result.filename if result.filename else {}\n", + " docs.append(f\"filename: {filename}, content: {content}\")\n", " return \"\\n\".join(docs)\n" ] }, @@ -771,7 +708,7 @@ "Construct a CrewAI pipeline that orchestrates the RAG process. This pipeline includes:\n", "\n", "1. **Agent Definition**: Defining a CrewAI agent with a specific role (`RAG assistant`), goal, backstory, and the LlamaStack LLM and the custom RAG tool.\n", - "2. **Task Definition**: Defining a CrewAI task for the agent to perform. The task description includes placeholders for the user query and vector database ID, which will be provided during execution. The task's expected output is an answer to the question based on the retrieved context.\n", + "2. **Task Definition**: Defining a CrewAI task for the agent to perform. The task description includes placeholders for the user query and vector store ID, which will be provided during execution. The task's expected output is an answer to the question based on the retrieved context.\n", "3. **Crew Definition**: Creating a CrewAI `Crew` object with the defined task and agent. This crew represents the complete RAG pipeline.\n", "\n", "**CrewAI workflow**:\n", @@ -780,7 +717,7 @@ }, { "cell_type": "code", - "execution_count": 12, + "execution_count": 17, "id": "9684427d-dcc7-4544-9af5-8b110d014c42", "metadata": { "id": "9684427d-dcc7-4544-9af5-8b110d014c42" @@ -796,10 +733,10 @@ " backstory=\"You are an experienced search assistant specializing in finding relevant information from documentation and vector_db to answer user questions accurately.\",\n", " allow_delegation=False,\n", " llm=llamastack_llm,\n", - " tools=[LlamaStackRAGTool(client=client)])\n", + " tools=[LlamaStackVectorStoreRAGTool(client=client)])\n", "# ---- 4. Wrap everything in a Crew task ---------------------------\n", "task = Task(\n", - " description=\"Answer the following questions: {query}, using the RAG_tool to search the provided vector_db_id {vector_db_id} if needed\",\n", + " description=\"Answer the following questions: {query}, using the RAG_tool to search the provided vector_store_id {vector_store_id} if needed\",\n", " expected_output=\"An answer to the question with provided context\",\n", " agent=agent,\n", ")\n", @@ -820,7 +757,7 @@ }, { "cell_type": "code", - "execution_count": 13, + "execution_count": 18, "id": "03322188-9509-446a-a4a8-ce3bb83ec87c", "metadata": { "colab": { @@ -839,7 +776,7 @@ " \n", " Crew Execution Started \n", " Name: crew \n", - " ID: 146be674-7521-4db6-bb70-287b22aa21aa \n", + " ID: 091cf919-5c4b-4168-ac49-65fe5e8faa9e \n", " Tool Args: \n", " \n", " \n", @@ -851,7 +788,7 @@ "\u001b[36m│\u001b[0m \u001b[36m│\u001b[0m\n", "\u001b[36m│\u001b[0m \u001b[1;36mCrew Execution Started\u001b[0m \u001b[36m│\u001b[0m\n", "\u001b[36m│\u001b[0m \u001b[37mName: \u001b[0m\u001b[36mcrew\u001b[0m \u001b[36m│\u001b[0m\n", - "\u001b[36m│\u001b[0m \u001b[37mID: \u001b[0m\u001b[36m146be674-7521-4db6-bb70-287b22aa21aa\u001b[0m \u001b[36m│\u001b[0m\n", + "\u001b[36m│\u001b[0m \u001b[37mID: \u001b[0m\u001b[36m091cf919-5c4b-4168-ac49-65fe5e8faa9e\u001b[0m \u001b[36m│\u001b[0m\n", "\u001b[36m│\u001b[0m \u001b[37mTool Args: \u001b[0m \u001b[36m│\u001b[0m\n", "\u001b[36m│\u001b[0m \u001b[36m│\u001b[0m\n", "\u001b[36m│\u001b[0m \u001b[36m│\u001b[0m\n", @@ -877,7 +814,7 @@ { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "0fd1807480b443948be37ef922f4ad0f", + "model_id": "cb8f60c158fb4a0496e78e4d596ac4c8", "version_major": 2, "version_minor": 0 }, @@ -892,23 +829,25 @@ "name": "stderr", "output_type": "stream", "text": [ - "\u001b[92m10:31:29 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "\u001b[92m14:55:09 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", "LiteLLM completion() model= together/meta-llama/Llama-3.3-70B-Instruct-Turbo; provider = openai\n", "INFO:LiteLLM:\n", "LiteLLM completion() model= together/meta-llama/Llama-3.3-70B-Instruct-Turbo; provider = openai\n", "INFO:httpx:HTTP Request: POST http://localhost:8321/v1/openai/v1/chat/completions \"HTTP/1.1 200 OK\"\n", - "\u001b[92m10:31:30 - LiteLLM:INFO\u001b[0m: utils.py:1260 - Wrapper: Completed Call, calling success_handler\n", + "\u001b[92m14:55:11 - LiteLLM:INFO\u001b[0m: utils.py:1260 - Wrapper: Completed Call, calling success_handler\n", "INFO:LiteLLM:Wrapper: Completed Call, calling success_handler\n" ] }, { "data": { "text/html": [ - "
{'query': 'How long does shipping take?', 'vector_db_id': 'acme_docs', 'top_k': 5, 'score_threshold': 0.5}\n",
+       "
{'query': 'How long does shipping take?', 'vector_store_id': 'vs_dab05212-db05-402c-91ef-57e41797406b', 'top_k': 1,\n",
+       "'score_threshold': 0.0}\n",
        "
\n" ], "text/plain": [ - "{'query': 'How long does shipping take?', 'vector_db_id': 'acme_docs', 'top_k': 5, 'score_threshold': 0.5}\n" + "{'query': 'How long does shipping take?', 'vector_store_id': 'vs_dab05212-db05-402c-91ef-57e41797406b', 'top_k': 1,\n", + "'score_threshold': 0.0}\n" ] }, "metadata": {}, @@ -918,13 +857,13 @@ "name": "stderr", "output_type": "stream", "text": [ - "INFO:httpx:HTTP Request: POST http://0.0.0.0:8321/v1/vector-io/query \"HTTP/1.1 200 OK\"\n", - "\u001b[92m10:31:31 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "INFO:httpx:HTTP Request: POST http://0.0.0.0:8321/v1/openai/v1/vector_stores/vs_dab05212-db05-402c-91ef-57e41797406b/search \"HTTP/1.1 200 OK\"\n", + "\u001b[92m14:55:11 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", "LiteLLM completion() model= together/meta-llama/Llama-3.3-70B-Instruct-Turbo; provider = openai\n", "INFO:LiteLLM:\n", "LiteLLM completion() model= together/meta-llama/Llama-3.3-70B-Instruct-Turbo; provider = openai\n", "INFO:httpx:HTTP Request: POST http://localhost:8321/v1/openai/v1/chat/completions \"HTTP/1.1 200 OK\"\n", - "\u001b[92m10:31:33 - LiteLLM:INFO\u001b[0m: utils.py:1260 - Wrapper: Completed Call, calling success_handler\n", + "\u001b[92m14:55:12 - LiteLLM:INFO\u001b[0m: utils.py:1260 - Wrapper: Completed Call, calling success_handler\n", "INFO:LiteLLM:Wrapper: Completed Call, calling success_handler\n" ] }, @@ -944,7 +883,7 @@ "
╭──────────────────────────────────────────────── Task Completion ────────────────────────────────────────────────╮\n",
        "                                                                                                                 \n",
        "  Task Completed                                                                                                 \n",
-       "  Name: b1362fb4-fe9b-479d-a01d-18bf38406780                                                                     \n",
+       "  Name: cf3f4f08-744c-4aee-9387-e9eb70624fc1                                                                     \n",
        "  Agent: RAG assistant                                                                                           \n",
        "  Tool Args:                                                                                                     \n",
        "                                                                                                                 \n",
@@ -956,7 +895,7 @@
        "\u001b[32m╭─\u001b[0m\u001b[32m───────────────────────────────────────────────\u001b[0m\u001b[32m Task Completion \u001b[0m\u001b[32m───────────────────────────────────────────────\u001b[0m\u001b[32m─╮\u001b[0m\n",
        "\u001b[32m│\u001b[0m                                                                                                                 \u001b[32m│\u001b[0m\n",
        "\u001b[32m│\u001b[0m  \u001b[1;32mTask Completed\u001b[0m                                                                                                 \u001b[32m│\u001b[0m\n",
-       "\u001b[32m│\u001b[0m  \u001b[37mName: \u001b[0m\u001b[32mb1362fb4-fe9b-479d-a01d-18bf38406780\u001b[0m                                                                     \u001b[32m│\u001b[0m\n",
+       "\u001b[32m│\u001b[0m  \u001b[37mName: \u001b[0m\u001b[32mcf3f4f08-744c-4aee-9387-e9eb70624fc1\u001b[0m                                                                     \u001b[32m│\u001b[0m\n",
        "\u001b[32m│\u001b[0m  \u001b[37mAgent: \u001b[0m\u001b[32mRAG assistant\u001b[0m                                                                                           \u001b[32m│\u001b[0m\n",
        "\u001b[32m│\u001b[0m  \u001b[37mTool Args: \u001b[0m                                                                                                    \u001b[32m│\u001b[0m\n",
        "\u001b[32m│\u001b[0m                                                                                                                 \u001b[32m│\u001b[0m\n",
@@ -987,10 +926,9 @@
        "                                                                                                                 \n",
        "  Crew Execution Completed                                                                                       \n",
        "  Name: crew                                                                                                     \n",
-       "  ID: 146be674-7521-4db6-bb70-287b22aa21aa                                                                       \n",
+       "  ID: 091cf919-5c4b-4168-ac49-65fe5e8faa9e                                                                       \n",
        "  Tool Args:                                                                                                     \n",
-       "  Final Output: Acme ships globally in 3-5 business days. Returns are accepted within 30 days of purchase.       \n",
-       "  Support is available 24/7 via chat and email.                                                                  \n",
+       "  Final Output: Acme ships globally in 3-5 business days.                                                        \n",
        "                                                                                                                 \n",
        "                                                                                                                 \n",
        "╰─────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯\n",
@@ -1001,10 +939,9 @@
        "\u001b[32m│\u001b[0m                                                                                                                 \u001b[32m│\u001b[0m\n",
        "\u001b[32m│\u001b[0m  \u001b[1;32mCrew Execution Completed\u001b[0m                                                                                       \u001b[32m│\u001b[0m\n",
        "\u001b[32m│\u001b[0m  \u001b[37mName: \u001b[0m\u001b[32mcrew\u001b[0m                                                                                                     \u001b[32m│\u001b[0m\n",
-       "\u001b[32m│\u001b[0m  \u001b[37mID: \u001b[0m\u001b[32m146be674-7521-4db6-bb70-287b22aa21aa\u001b[0m                                                                       \u001b[32m│\u001b[0m\n",
+       "\u001b[32m│\u001b[0m  \u001b[37mID: \u001b[0m\u001b[32m091cf919-5c4b-4168-ac49-65fe5e8faa9e\u001b[0m                                                                       \u001b[32m│\u001b[0m\n",
        "\u001b[32m│\u001b[0m  \u001b[37mTool Args: \u001b[0m                                                                                                    \u001b[32m│\u001b[0m\n",
-       "\u001b[32m│\u001b[0m  \u001b[37mFinal Output: Acme ships globally in 3-5 business days. Returns are accepted within 30 days of purchase. \u001b[0m      \u001b[32m│\u001b[0m\n",
-       "\u001b[32m│\u001b[0m  \u001b[37mSupport is available 24/7 via chat and email.\u001b[0m                                                                  \u001b[32m│\u001b[0m\n",
+       "\u001b[32m│\u001b[0m  \u001b[37mFinal Output: Acme ships globally in 3-5 business days.\u001b[0m                                                        \u001b[32m│\u001b[0m\n",
        "\u001b[32m│\u001b[0m                                                                                                                 \u001b[32m│\u001b[0m\n",
        "\u001b[32m│\u001b[0m                                                                                                                 \u001b[32m│\u001b[0m\n",
        "\u001b[32m╰─────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯\u001b[0m\n"
@@ -1031,13 +968,13 @@
      "output_type": "stream",
      "text": [
       "❓ How long does shipping take?\n",
-      "💡 Acme ships globally in 3-5 business days. Returns are accepted within 30 days of purchase. Support is available 24/7 via chat and email.\n"
+      "💡 Acme ships globally in 3-5 business days.\n"
      ]
     }
    ],
    "source": [
     "query = \"How long does shipping take?\"\n",
-    "response = crew.kickoff(inputs={\"query\": query,\"vector_db_id\": \"acme_docs\"})\n",
+    "response = crew.kickoff(inputs={\"query\": query,\"vector_store_id\": vector_store.id})\n",
     "print(\"❓\", query)\n",
     "print(\"💡\", response)"
    ]
@@ -1054,7 +991,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 14,
+   "execution_count": 19,
    "id": "61995550-bb0b-46a8-a5d0-023207475d60",
    "metadata": {
     "colab": {
@@ -1073,7 +1010,7 @@
        "                                                                                                                 \n",
        "  Crew Execution Started                                                                                         \n",
        "  Name: crew                                                                                                     \n",
-       "  ID: 146be674-7521-4db6-bb70-287b22aa21aa                                                                       \n",
+       "  ID: 091cf919-5c4b-4168-ac49-65fe5e8faa9e                                                                       \n",
        "  Tool Args:                                                                                                     \n",
        "                                                                                                                 \n",
        "                                                                                                                 \n",
@@ -1085,7 +1022,7 @@
        "\u001b[36m│\u001b[0m                                                                                                                 \u001b[36m│\u001b[0m\n",
        "\u001b[36m│\u001b[0m  \u001b[1;36mCrew Execution Started\u001b[0m                                                                                         \u001b[36m│\u001b[0m\n",
        "\u001b[36m│\u001b[0m  \u001b[37mName: \u001b[0m\u001b[36mcrew\u001b[0m                                                                                                     \u001b[36m│\u001b[0m\n",
-       "\u001b[36m│\u001b[0m  \u001b[37mID: \u001b[0m\u001b[36m146be674-7521-4db6-bb70-287b22aa21aa\u001b[0m                                                                       \u001b[36m│\u001b[0m\n",
+       "\u001b[36m│\u001b[0m  \u001b[37mID: \u001b[0m\u001b[36m091cf919-5c4b-4168-ac49-65fe5e8faa9e\u001b[0m                                                                       \u001b[36m│\u001b[0m\n",
        "\u001b[36m│\u001b[0m  \u001b[37mTool Args: \u001b[0m                                                                                                    \u001b[36m│\u001b[0m\n",
        "\u001b[36m│\u001b[0m                                                                                                                 \u001b[36m│\u001b[0m\n",
        "\u001b[36m│\u001b[0m                                                                                                                 \u001b[36m│\u001b[0m\n",
@@ -1111,7 +1048,7 @@
     {
      "data": {
       "application/vnd.jupyter.widget-view+json": {
-       "model_id": "e1e2dc3abcd745fa80b32ef32b264396",
+       "model_id": "60b83042bfc14a75b555537d13147372",
        "version_major": 2,
        "version_minor": 0
       },
@@ -1126,23 +1063,25 @@
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      "\u001b[92m10:31:38 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n",
+      "\u001b[92m14:55:19 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n",
       "LiteLLM completion() model= together/meta-llama/Llama-3.3-70B-Instruct-Turbo; provider = openai\n",
       "INFO:LiteLLM:\n",
       "LiteLLM completion() model= together/meta-llama/Llama-3.3-70B-Instruct-Turbo; provider = openai\n",
       "INFO:httpx:HTTP Request: POST http://localhost:8321/v1/openai/v1/chat/completions \"HTTP/1.1 200 OK\"\n",
-      "\u001b[92m10:31:39 - LiteLLM:INFO\u001b[0m: utils.py:1260 - Wrapper: Completed Call, calling success_handler\n",
+      "\u001b[92m14:55:21 - LiteLLM:INFO\u001b[0m: utils.py:1260 - Wrapper: Completed Call, calling success_handler\n",
       "INFO:LiteLLM:Wrapper: Completed Call, calling success_handler\n"
      ]
     },
     {
      "data": {
       "text/html": [
-       "
{'query': 'return policy after 40 days', 'vector_db_id': 'acme_docs', 'top_k': 1, 'score_threshold': 0.5}\n",
+       "
{'query': 'return policy after 40 days', 'vector_store_id': 'vs_dab05212-db05-402c-91ef-57e41797406b', 'top_k': 1, \n",
+       "'score_threshold': 0.5}\n",
        "
\n" ], "text/plain": [ - "{'query': 'return policy after 40 days', 'vector_db_id': 'acme_docs', 'top_k': 1, 'score_threshold': 0.5}\n" + "{'query': 'return policy after 40 days', 'vector_store_id': 'vs_dab05212-db05-402c-91ef-57e41797406b', 'top_k': 1, \n", + "'score_threshold': 0.5}\n" ] }, "metadata": {}, @@ -1152,13 +1091,13 @@ "name": "stderr", "output_type": "stream", "text": [ - "INFO:httpx:HTTP Request: POST http://0.0.0.0:8321/v1/vector-io/query \"HTTP/1.1 200 OK\"\n", - "\u001b[92m10:31:39 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", + "INFO:httpx:HTTP Request: POST http://0.0.0.0:8321/v1/openai/v1/vector_stores/vs_dab05212-db05-402c-91ef-57e41797406b/search \"HTTP/1.1 200 OK\"\n", + "\u001b[92m14:55:22 - LiteLLM:INFO\u001b[0m: utils.py:3258 - \n", "LiteLLM completion() model= together/meta-llama/Llama-3.3-70B-Instruct-Turbo; provider = openai\n", "INFO:LiteLLM:\n", "LiteLLM completion() model= together/meta-llama/Llama-3.3-70B-Instruct-Turbo; provider = openai\n", "INFO:httpx:HTTP Request: POST http://localhost:8321/v1/openai/v1/chat/completions \"HTTP/1.1 200 OK\"\n", - "\u001b[92m10:31:41 - LiteLLM:INFO\u001b[0m: utils.py:1260 - Wrapper: Completed Call, calling success_handler\n", + "\u001b[92m14:55:22 - LiteLLM:INFO\u001b[0m: utils.py:1260 - Wrapper: Completed Call, calling success_handler\n", "INFO:LiteLLM:Wrapper: Completed Call, calling success_handler\n" ] }, @@ -1178,7 +1117,7 @@ "
╭──────────────────────────────────────────────── Task Completion ────────────────────────────────────────────────╮\n",
        "                                                                                                                 \n",
        "  Task Completed                                                                                                 \n",
-       "  Name: b1362fb4-fe9b-479d-a01d-18bf38406780                                                                     \n",
+       "  Name: cf3f4f08-744c-4aee-9387-e9eb70624fc1                                                                     \n",
        "  Agent: RAG assistant                                                                                           \n",
        "  Tool Args:                                                                                                     \n",
        "                                                                                                                 \n",
@@ -1190,7 +1129,7 @@
        "\u001b[32m╭─\u001b[0m\u001b[32m───────────────────────────────────────────────\u001b[0m\u001b[32m Task Completion \u001b[0m\u001b[32m───────────────────────────────────────────────\u001b[0m\u001b[32m─╮\u001b[0m\n",
        "\u001b[32m│\u001b[0m                                                                                                                 \u001b[32m│\u001b[0m\n",
        "\u001b[32m│\u001b[0m  \u001b[1;32mTask Completed\u001b[0m                                                                                                 \u001b[32m│\u001b[0m\n",
-       "\u001b[32m│\u001b[0m  \u001b[37mName: \u001b[0m\u001b[32mb1362fb4-fe9b-479d-a01d-18bf38406780\u001b[0m                                                                     \u001b[32m│\u001b[0m\n",
+       "\u001b[32m│\u001b[0m  \u001b[37mName: \u001b[0m\u001b[32mcf3f4f08-744c-4aee-9387-e9eb70624fc1\u001b[0m                                                                     \u001b[32m│\u001b[0m\n",
        "\u001b[32m│\u001b[0m  \u001b[37mAgent: \u001b[0m\u001b[32mRAG assistant\u001b[0m                                                                                           \u001b[32m│\u001b[0m\n",
        "\u001b[32m│\u001b[0m  \u001b[37mTool Args: \u001b[0m                                                                                                    \u001b[32m│\u001b[0m\n",
        "\u001b[32m│\u001b[0m                                                                                                                 \u001b[32m│\u001b[0m\n",
@@ -1221,10 +1160,9 @@
        "                                                                                                                 \n",
        "  Crew Execution Completed                                                                                       \n",
        "  Name: crew                                                                                                     \n",
-       "  ID: 146be674-7521-4db6-bb70-287b22aa21aa                                                                       \n",
+       "  ID: 091cf919-5c4b-4168-ac49-65fe5e8faa9e                                                                       \n",
        "  Tool Args:                                                                                                     \n",
-       "  Final Output: Returns are accepted within 30 days of purchase. Therefore, you cannot return a product after    \n",
-       "  40 days.                                                                                                       \n",
+       "  Final Output: Returns are accepted within 30 days of purchase.                                                 \n",
        "                                                                                                                 \n",
        "                                                                                                                 \n",
        "╰─────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯\n",
@@ -1235,10 +1173,9 @@
        "\u001b[32m│\u001b[0m                                                                                                                 \u001b[32m│\u001b[0m\n",
        "\u001b[32m│\u001b[0m  \u001b[1;32mCrew Execution Completed\u001b[0m                                                                                       \u001b[32m│\u001b[0m\n",
        "\u001b[32m│\u001b[0m  \u001b[37mName: \u001b[0m\u001b[32mcrew\u001b[0m                                                                                                     \u001b[32m│\u001b[0m\n",
-       "\u001b[32m│\u001b[0m  \u001b[37mID: \u001b[0m\u001b[32m146be674-7521-4db6-bb70-287b22aa21aa\u001b[0m                                                                       \u001b[32m│\u001b[0m\n",
+       "\u001b[32m│\u001b[0m  \u001b[37mID: \u001b[0m\u001b[32m091cf919-5c4b-4168-ac49-65fe5e8faa9e\u001b[0m                                                                       \u001b[32m│\u001b[0m\n",
        "\u001b[32m│\u001b[0m  \u001b[37mTool Args: \u001b[0m                                                                                                    \u001b[32m│\u001b[0m\n",
-       "\u001b[32m│\u001b[0m  \u001b[37mFinal Output: Returns are accepted within 30 days of purchase. Therefore, you cannot return a product after \u001b[0m   \u001b[32m│\u001b[0m\n",
-       "\u001b[32m│\u001b[0m  \u001b[37m40 days.\u001b[0m                                                                                                       \u001b[32m│\u001b[0m\n",
+       "\u001b[32m│\u001b[0m  \u001b[37mFinal Output: Returns are accepted within 30 days of purchase.\u001b[0m                                                 \u001b[32m│\u001b[0m\n",
        "\u001b[32m│\u001b[0m                                                                                                                 \u001b[32m│\u001b[0m\n",
        "\u001b[32m│\u001b[0m                                                                                                                 \u001b[32m│\u001b[0m\n",
        "\u001b[32m╰─────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯\u001b[0m\n"
@@ -1265,13 +1202,13 @@
      "output_type": "stream",
      "text": [
       "❓ Can I return a product after 40 days?\n",
-      "💡 Returns are accepted within 30 days of purchase. Therefore, you cannot return a product after 40 days.\n"
+      "💡 Returns are accepted within 30 days of purchase.\n"
      ]
     }
    ],
    "source": [
     "query = \"Can I return a product after 40 days?\"\n",
-    "response = crew.kickoff(inputs={\"query\": query,\"vector_db_id\": \"acme_docs\"})\n",
+    "response = crew.kickoff(inputs={\"query\": query,\"vector_store_id\": vector_store.id})\n",
     "print(\"❓\", query)\n",
     "print(\"💡\", response)"
    ]
@@ -1287,31 +1224,30 @@
     "\n",
     "We have successfully built a RAG system that combines:\n",
     "\n",
-    "-   **LlamaStack** for infrastructure (LLM serving + vector database)\n",
+    "-   **LlamaStack** for infrastructure (LLM serving + vector store)\n",
     "-   **CrewAI** for orchestration (agents, tasks, and tools)\n",
     "-   **Together AI** for high-quality language models\n",
     "\n",
     "### Key Benefits\n",
     "\n",
-    "1.  **Unified Infrastructure**: A single server for LLMs and vector databases simplifies deployment and management.\n",
+    "1.  **Unified Infrastructure**: A single server for LLMs and vector stores simplifies deployment and management.\n",
     "2.  **OpenAI Compatibility**: Enables easy integration with existing libraries and frameworks that support the OpenAI API standard, such as CrewAI.\n",
     "3.  **Multi-Provider Support**: Offers the flexibility to switch between different LLM and embedding providers without altering the core application logic.\n",
     "4.  **Production Ready**: LlamaStack includes features designed for production environments, such as built-in safety shields and monitoring capabilities.\n",
     "\n",
-    "### Next Steps\n",
-    "\n",
-    "-   Implement more sophisticated document processing and chunking techniques.\n",
-    "-   Add conversation memory within the CrewAI pipeline for multi-turn interactions.\n",
-    "-   Integrate advanced safety filtering and monitoring using LlamaStack's features.\n",
-    "-   Scale the system to handle larger document collections and higher query volumes.\n",
-    "-   Integrate the RAG system with web frameworks like FastAPI or Streamlit to build interactive applications.\n",
-    "\n",
-    "---\n",
     "\n",
     "##### 🔧 Cleanup\n",
     "\n",
     "Remember to stop the LlamaStack server process when you are finished to free up resources. You can use the `kill_llama_stack_server()` helper function defined earlier in the notebook."
    ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "a21270b4-b0a7-4481-96a5-044f908de363",
+   "metadata": {},
+   "outputs": [],
+   "source": []
   }
  ],
  "metadata": {
diff --git a/docs/notebooks/crewai/customer_policies.txt b/docs/notebooks/crewai/customer_policies.txt
new file mode 100644
index 000000000..9b624e847
--- /dev/null
+++ b/docs/notebooks/crewai/customer_policies.txt
@@ -0,0 +1 @@
+Acme ships globally in 3-5 business days.
diff --git a/docs/notebooks/crewai/llama_stack_server.log b/docs/notebooks/crewai/llama_stack_server.log
new file mode 100644
index 000000000..a4545f0d9
--- /dev/null
+++ b/docs/notebooks/crewai/llama_stack_server.log
@@ -0,0 +1,1936 @@
+Uninstalled 3 packages in 114ms
+Installed 3 packages in 165ms
+INFO     2025-09-10 14:47:19,351 llama_stack.core.utils.config_resolution:52 core: Using distribution:
+         /Users/kaiwu/work/kaiwu/llama-stack/llama_stack/distributions/starter/run.yaml
+INFO     2025-09-10 14:47:19,353 llama_stack.cli.stack.run:129 cli: Using run configuration:
+         /Users/kaiwu/work/kaiwu/llama-stack/llama_stack/distributions/starter/run.yaml
+Using virtual environment: /Users/kaiwu/work/kaiwu/llama-stack/.venv
+Virtual environment already activated
++ '[' -n /Users/kaiwu/work/kaiwu/llama-stack/llama_stack/distributions/starter/run.yaml ']'
++ yaml_config_arg=/Users/kaiwu/work/kaiwu/llama-stack/llama_stack/distributions/starter/run.yaml
++ python -m llama_stack.core.server.server /Users/kaiwu/work/kaiwu/llama-stack/llama_stack/distributions/starter/run.yaml --port 8321
+INFO     2025-09-10 14:47:23,918 llama_stack.core.utils.config_resolution:45 core: Using file path:
+         /Users/kaiwu/work/kaiwu/llama-stack/llama_stack/distributions/starter/run.yaml
+INFO     2025-09-10 14:47:23,929 __main__:615 core::server: Run configuration:
+INFO     2025-09-10 14:47:23,933 __main__:618 core::server: apis:
+         - agents
+         - batches
+         - datasetio
+         - eval
+         - files
+         - inference
+         - post_training
+         - safety
+         - scoring
+         - telemetry
+         - tool_runtime
+         - vector_io
+         benchmarks: []
+         datasets: []
+         image_name: starter
+         inference_store:
+           db_path: /Users/kaiwu/.llama/distributions/starter/inference_store.db
+           type: sqlite
+         metadata_store:
+           db_path: /Users/kaiwu/.llama/distributions/starter/registry.db
+           type: sqlite
+         models: []
+         providers:
+           agents:
+           - config:
+               persistence_store:
+                 db_path: /Users/kaiwu/.llama/distributions/starter/agents_store.db
+                 type: sqlite
+               responses_store:
+                 db_path: /Users/kaiwu/.llama/distributions/starter/responses_store.db
+                 type: sqlite
+             provider_id: meta-reference
+             provider_type: inline::meta-reference
+           batches:
+           - config:
+               kvstore:
+                 db_path: /Users/kaiwu/.llama/distributions/starter/batches.db
+                 type: sqlite
+             provider_id: reference
+             provider_type: inline::reference
+           datasetio:
+           - config:
+               kvstore:
+                 db_path: /Users/kaiwu/.llama/distributions/starter/huggingface_datasetio.db
+                 type: sqlite
+             provider_id: huggingface
+             provider_type: remote::huggingface
+           - config:
+               kvstore:
+                 db_path: /Users/kaiwu/.llama/distributions/starter/localfs_datasetio.db
+                 type: sqlite
+             provider_id: localfs
+             provider_type: inline::localfs
+           eval:
+           - config:
+               kvstore:
+                 db_path: /Users/kaiwu/.llama/distributions/starter/meta_reference_eval.db
+                 type: sqlite
+             provider_id: meta-reference
+             provider_type: inline::meta-reference
+           files:
+           - config:
+               metadata_store:
+                 db_path: /Users/kaiwu/.llama/distributions/starter/files_metadata.db
+                 type: sqlite
+               storage_dir: /Users/kaiwu/.llama/distributions/starter/files
+             provider_id: meta-reference-files
+             provider_type: inline::localfs
+           inference:
+           - config:
+               api_key: '********'
+               url: https://api.fireworks.ai/inference/v1
+             provider_id: fireworks
+             provider_type: remote::fireworks
+           - config:
+               api_key: '********'
+               url: https://api.together.xyz/v1
+             provider_id: together
+             provider_type: remote::together
+           - config: {}
+             provider_id: bedrock
+             provider_type: remote::bedrock
+           - config:
+               api_key: '********'
+               base_url: https://api.openai.com/v1
+             provider_id: openai
+             provider_type: remote::openai
+           - config:
+               api_key: '********'
+             provider_id: anthropic
+             provider_type: remote::anthropic
+           - config:
+               api_key: '********'
+             provider_id: gemini
+             provider_type: remote::gemini
+           - config:
+               api_key: '********'
+               url: https://api.groq.com
+             provider_id: groq
+             provider_type: remote::groq
+           - config:
+               api_key: '********'
+               url: https://api.sambanova.ai/v1
+             provider_id: sambanova
+             provider_type: remote::sambanova
+           - config: {}
+             provider_id: sentence-transformers
+             provider_type: inline::sentence-transformers
+           post_training:
+           - config:
+               checkpoint_format: meta
+             provider_id: torchtune-cpu
+             provider_type: inline::torchtune-cpu
+           safety:
+           - config:
+               excluded_categories: []
+             provider_id: llama-guard
+             provider_type: inline::llama-guard
+           - config: {}
+             provider_id: code-scanner
+             provider_type: inline::code-scanner
+           scoring:
+           - config: {}
+             provider_id: basic
+             provider_type: inline::basic
+           - config: {}
+             provider_id: llm-as-judge
+             provider_type: inline::llm-as-judge
+           - config:
+               openai_api_key: '********'
+             provider_id: braintrust
+             provider_type: inline::braintrust
+           telemetry:
+           - config:
+               service_name: "\u200B"
+               sinks: console,sqlite
+               sqlite_db_path: /Users/kaiwu/.llama/distributions/starter/trace_store.db
+             provider_id: meta-reference
+             provider_type: inline::meta-reference
+           tool_runtime:
+           - config:
+               api_key: '********'
+               max_results: 3
+             provider_id: brave-search
+             provider_type: remote::brave-search
+           - config:
+               api_key: '********'
+               max_results: 3
+             provider_id: tavily-search
+             provider_type: remote::tavily-search
+           - config: {}
+             provider_id: rag-runtime
+             provider_type: inline::rag-runtime
+           - config: {}
+             provider_id: model-context-protocol
+             provider_type: remote::model-context-protocol
+           vector_io:
+           - config:
+               kvstore:
+                 db_path: /Users/kaiwu/.llama/distributions/starter/faiss_store.db
+                 type: sqlite
+             provider_id: faiss
+             provider_type: inline::faiss
+           - config:
+               db_path: /Users/kaiwu/.llama/distributions/starter/sqlite_vec.db
+               kvstore:
+                 db_path: /Users/kaiwu/.llama/distributions/starter/sqlite_vec_registry.db
+                 type: sqlite
+             provider_id: sqlite-vec
+             provider_type: inline::sqlite-vec
+         scoring_fns: []
+         server:
+           port: 8321
+         shields: []
+         tool_groups:
+         - provider_id: tavily-search
+           toolgroup_id: builtin::websearch
+         - provider_id: rag-runtime
+           toolgroup_id: builtin::rag
+         vector_dbs: []
+         version: 2
+
+INFO     2025-09-10 14:47:34,785 llama_stack.core.stack:388 core: starting registry refresh task
+INFO     2025-09-10 14:47:34,946 __main__:580 core::server: Listening on ['::', '0.0.0.0']:8321
+INFO     2025-09-10 14:47:35,196 uvicorn.error:84 uncategorized: Started server process [26795]
+INFO     2025-09-10 14:47:35,197 uvicorn.error:48 uncategorized: Waiting for application startup.
+INFO     2025-09-10 14:47:35,198 __main__:168 core::server: Starting up
+INFO     2025-09-10 14:47:35,199 uvicorn.error:62 uncategorized: Application startup complete.
+INFO     2025-09-10 14:47:35,199 uvicorn.error:216 uncategorized: Uvicorn running on http://['::', '0.0.0.0']:8321 (Press CTRL+C to quit)
+INFO     2025-09-10 14:47:36,852 uvicorn.access:473 uncategorized: 127.0.0.1:55733 - "GET /v1/health HTTP/1.1" 200
+INFO     2025-09-10 14:47:36,854 console_span_processor:28 telemetry: 21:47:36.854 [START] /v1/health
+INFO     2025-09-10 14:47:36,862 console_span_processor:39 telemetry: 21:47:36.855 [END] /v1/health [StatusCode.OK] (1.07ms)
+INFO     2025-09-10 14:47:36,865 console_span_processor:48 telemetry:     raw_path: /v1/health
+INFO     2025-09-10 14:47:36,866 console_span_processor:62 telemetry:  21:47:36.853 [INFO] 127.0.0.1:55733 - "GET /v1/health HTTP/1.1" 200
+INFO     2025-09-10 14:48:28,324 uvicorn.access:473 uncategorized: 127.0.0.1:56006 - "GET /v1/models HTTP/1.1" 200
+INFO     2025-09-10 14:48:28,333 console_span_processor:28 telemetry: 21:48:28.332 [START] /v1/models
+INFO     2025-09-10 14:48:28,392 console_span_processor:39 telemetry: 21:48:28.334 [END] ModelsRoutingTable.list_models [StatusCode.OK] (0.19ms)
+INFO     2025-09-10 14:48:28,410 console_span_processor:48 telemetry:     output: {'data': [{'identifier': 'llama_api/Llama-3.3-70B-Instruct',
+         'provider_resource_id': 'Llama-3.3-70B-Instruct', 'provider_id': 'llama_api', 'type': 'model', 'metadata': {}, 'model_type': 'llm'},
+         {'identifier': 'llama_api/Llama-4-Maverick-17B-128E-Instruct-FP8', 'provider_resource_id': 'Llama-4-Maverick-17B-128E-Instruct-FP8',
+         'provider_id': 'llama_api', 'type': 'model', 'metadata': {}, 'model_type': 'llm'}, {'identifier':
+         'llama_api/Llama-4-Scout-17B-16E-Instruct-FP8', 'provider_resource_id': 'Llama-4-Scout-17B-16E-Instruct-FP8', 'provider_id': 'llama_api',
+         'type': 'model', 'metadata': {}, 'model_type': 'llm'}, {'identifier': 'meta-llama/Llama-3.3-70B-Instruct', 'provider_resource_id':
+         'Llama-3.3-70B-Instruct', 'provider_id': 'meta-llama', 'type': 'model', 'metadata': {}, 'model_type': 'llm'}, {'identifier':
+         'meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8', 'provider_resource_id': 'Llama-4-Maverick-17B-128E-Instruct-FP8', 'provider_id':
+         'meta-llama', 'type': 'model', 'metadata': {...
+INFO     2025-09-10 14:48:28,426 uvicorn.access:473 uncategorized: 127.0.0.1:56006 - "GET /v1/shields HTTP/1.1" 200
+INFO     2025-09-10 14:48:28,465 console_span_processor:39 telemetry: 21:48:28.430 [END] /v1/models [StatusCode.OK] (97.81ms)
+INFO     2025-09-10 14:48:28,466 console_span_processor:48 telemetry:     raw_path: /v1/models
+INFO     2025-09-10 14:48:28,466 console_span_processor:62 telemetry:  21:48:28.330 [INFO] 127.0.0.1:56006 - "GET /v1/models HTTP/1.1" 200
+INFO     2025-09-10 14:48:28,468 console_span_processor:28 telemetry: 21:48:28.468 [START] /v1/shields
+INFO     2025-09-10 14:48:28,523 console_span_processor:39 telemetry: 21:48:28.469 [END] ShieldsRoutingTable.list_shields [StatusCode.OK] (0.24ms)
+INFO     2025-09-10 14:48:28,524 console_span_processor:48 telemetry:     output: {'data': []}
+INFO     2025-09-10 14:48:28,535 console_span_processor:39 telemetry: 21:48:28.527 [END] /v1/shields [StatusCode.OK] (59.54ms)
+INFO     2025-09-10 14:48:28,536 console_span_processor:48 telemetry:     raw_path: /v1/shields
+INFO     2025-09-10 14:48:28,536 console_span_processor:62 telemetry:  21:48:28.427 [INFO] 127.0.0.1:56006 - "GET /v1/shields HTTP/1.1" 200
+INFO     2025-09-10 14:48:32,432 console_span_processor:28 telemetry: 21:48:32.432 [START] /v1/openai/v1/files
+WARNING  2025-09-10 14:48:32,437 __main__:220 core::server: Could not read or log request body for POST /v1/openai/v1/files: Stream consumed
+INFO     2025-09-10 14:48:32,495 uvicorn.access:473 uncategorized: 127.0.0.1:56006 - "POST /v1/openai/v1/files HTTP/1.1" 200
+WARNING  2025-09-10 14:48:32,502 __main__:220 core::server: Could not read or log request body for POST /v1/openai/v1/files: Stream consumed
+INFO     2025-09-10 14:48:32,514 console_span_processor:39 telemetry: 21:48:32.496 [END] LocalfsFilesImpl.openai_upload_file [StatusCode.OK] (55.21ms)
+INFO     2025-09-10 14:48:32,515 console_span_processor:48 telemetry:     output: {'object': 'file', 'id': 'file-489db9aae0424745960e3408ff0f477f',
+         'bytes': 41, 'created_at': 1757540912, 'expires_at': 1789076912, 'filename': 'shipping_policy.txt', 'purpose': 'assistants'}
+INFO     2025-09-10 14:48:32,523 uvicorn.access:473 uncategorized: 127.0.0.1:56006 - "POST /v1/openai/v1/files HTTP/1.1" 200
+INFO     2025-09-10 14:48:32,526 console_span_processor:39 telemetry: 21:48:32.516 [END] /v1/openai/v1/files [StatusCode.OK] (84.71ms)
+INFO     2025-09-10 14:48:32,528 console_span_processor:48 telemetry:     raw_path: /v1/openai/v1/files
+INFO     2025-09-10 14:48:32,529 console_span_processor:62 telemetry:  21:48:32.440 [WARN] Could not read or log request body for POST
+         /v1/openai/v1/files: Stream consumed
+INFO     2025-09-10 14:48:32,529 console_span_processor:62 telemetry:  21:48:32.496 [INFO] 127.0.0.1:56006 - "POST /v1/openai/v1/files HTTP/1.1" 200
+WARNING  2025-09-10 14:48:32,531 __main__:220 core::server: Could not read or log request body for POST /v1/openai/v1/files: Stream consumed
+INFO     2025-09-10 14:48:32,532 console_span_processor:28 telemetry: 21:48:32.532 [START] /v1/openai/v1/files
+INFO     2025-09-10 14:48:32,540 console_span_processor:39 telemetry: 21:48:32.533 [END] LocalfsFilesImpl.openai_upload_file [StatusCode.OK] (0.16ms)
+INFO     2025-09-10 14:48:32,545 console_span_processor:48 telemetry:     output: {'object': 'file', 'id': 'file-b2f38b0e164347f5a2b6bbe211e33ff3',
+         'bytes': 48, 'created_at': 1757540912, 'expires_at': 1789076912, 'filename': 'returns_policy.txt', 'purpose': 'assistants'}
+INFO     2025-09-10 14:48:32,546 uvicorn.access:473 uncategorized: 127.0.0.1:56006 - "POST /v1/openai/v1/files HTTP/1.1" 200
+INFO     2025-09-10 14:48:32,557 console_span_processor:39 telemetry: 21:48:32.547 [END] /v1/openai/v1/files [StatusCode.OK] (15.42ms)
+INFO     2025-09-10 14:48:32,560 console_span_processor:48 telemetry:     raw_path: /v1/openai/v1/files
+INFO     2025-09-10 14:48:32,562 console_span_processor:62 telemetry:  21:48:32.502 [WARN] Could not read or log request body for POST
+         /v1/openai/v1/files: Stream consumed
+INFO     2025-09-10 14:48:32,562 console_span_processor:62 telemetry:  21:48:32.526 [INFO] 127.0.0.1:56006 - "POST /v1/openai/v1/files HTTP/1.1" 200
+INFO     2025-09-10 14:48:32,563 console_span_processor:28 telemetry: 21:48:32.563 [START] /v1/openai/v1/files
+INFO     2025-09-10 14:48:32,581 console_span_processor:39 telemetry: 21:48:32.565 [END] LocalfsFilesImpl.openai_upload_file [StatusCode.OK] (0.19ms)
+INFO     2025-09-10 14:48:32,582 console_span_processor:48 telemetry:     output: {'object': 'file', 'id': 'file-6f6f157d165a4078b4abef66a095ccd6',
+         'bytes': 45, 'created_at': 1757540912, 'expires_at': 1789076912, 'filename': 'support.txt', 'purpose': 'assistants'}
+INFO     2025-09-10 14:48:32,592 console_span_processor:39 telemetry: 21:48:32.584 [END] /v1/openai/v1/files [StatusCode.OK] (20.86ms)
+INFO     2025-09-10 14:48:32,592 console_span_processor:48 telemetry:     raw_path: /v1/openai/v1/files
+INFO     2025-09-10 14:48:32,593 console_span_processor:62 telemetry:  21:48:32.531 [WARN] Could not read or log request body for POST
+         /v1/openai/v1/files: Stream consumed
+INFO     2025-09-10 14:48:32,594 console_span_processor:62 telemetry:  21:48:32.546 [INFO] 127.0.0.1:56006 - "POST /v1/openai/v1/files HTTP/1.1" 200
+INFO     2025-09-10 14:48:32,595 console_span_processor:28 telemetry: 21:48:32.595 [START] /v1/openai/v1/vector_stores
+INFO     2025-09-10 14:48:32,601 console_span_processor:39 telemetry: 21:48:32.596 [END] FaissVectorIOAdapter.register_vector_db [StatusCode.OK]
+         (0.15ms)
+INFO     2025-09-10 14:48:32,602 console_span_processor:48 telemetry:     output:
+INFO     2025-09-10 14:48:32,621 console_span_processor:39 telemetry: 21:48:32.604 [END] VectorDBsRoutingTable.register_vector_db [StatusCode.OK]
+         (8.25ms)
+INFO     2025-09-10 14:48:32,622 console_span_processor:48 telemetry:     output: {'identifier': 'vs_dab05212-db05-402c-91ef-57e41797406b',
+         'provider_resource_id': 'vs_dab05212-db05-402c-91ef-57e41797406b', 'provider_id': 'faiss', 'type': 'vector_db', 'owner': None, 'source':
+         'via_register_api', 'embedding_model': 'sentence-transformers/all-MiniLM-L6-v2', 'embedding_dimension': 384, 'vector_db_name': 'acme_docs'}
+INFO     2025-09-10 14:48:32,631 console_span_processor:39 telemetry: 21:48:32.629 [END] FaissVectorIOAdapter.register_vector_db [StatusCode.OK]
+         (5.84ms)
+INFO     2025-09-10 14:48:32,632 console_span_processor:48 telemetry:     output:
+INFO     2025-09-10 14:48:32,650 console_span_processor:39 telemetry: 21:48:32.640 [END] LocalfsFilesImpl.openai_retrieve_file [StatusCode.OK]
+         (2.11ms)
+INFO     2025-09-10 14:48:32,655 llama_stack.providers.utils.inference.embedding_mixin:103 providers::utils: Loading sentence transformer for
+         all-MiniLM-L6-v2...
+INFO     2025-09-10 14:48:32,660 console_span_processor:39 telemetry: 21:48:32.651 [END] LocalfsFilesImpl.openai_retrieve_file_content [StatusCode.OK]
+         (0.22ms)
+INFO     2025-09-10 14:48:32,662 console_span_processor:39 telemetry: 21:48:32.661 [END] LocalfsFilesImpl.openai_retrieve_file_content [StatusCode.OK]
+         (0.23ms)
+INFO     2025-09-10 14:48:32,670 console_span_processor:39 telemetry: 21:48:32.664 [END] LocalfsFilesImpl.openai_retrieve_file_content [StatusCode.OK]
+         (0.20ms)
+INFO     2025-09-10 14:48:32,674 console_span_processor:39 telemetry: 21:48:32.672 [END] ModelsRoutingTable.get_model [StatusCode.OK] (0.77ms)
+INFO     2025-09-10 14:48:32,675 console_span_processor:48 telemetry:     output: {'identifier': 'sentence-transformers/all-MiniLM-L6-v2',
+         'provider_resource_id': 'all-MiniLM-L6-v2', 'provider_id': 'sentence-transformers', 'type': 'model', 'owner': None, 'source':
+         'listed_from_provider', 'metadata': {'embedding_dimension': 384}, 'model_type': 'embedding'}
+INFO     2025-09-10 14:48:32,681 console_span_processor:39 telemetry: 21:48:32.677 [END] ModelsRoutingTable.get_provider_impl [StatusCode.OK] (0.36ms)
+INFO     2025-09-10 14:48:32,682 console_span_processor:48 telemetry:     output:
+         <llama_stack.providers.inline.inference.sentence_transformers.sentence_transformers.SentenceTransformersInferenceImpl object at 0x158e83350>
+INFO     2025-09-10 14:48:32,690 console_span_processor:39 telemetry: 21:48:32.683 [END] ModelsRoutingTable.get_model [StatusCode.OK] (0.23ms)
+INFO     2025-09-10 14:48:32,690 console_span_processor:48 telemetry:     output: {'identifier': 'sentence-transformers/all-MiniLM-L6-v2',
+         'provider_resource_id': 'all-MiniLM-L6-v2', 'provider_id': 'sentence-transformers', 'type': 'model', 'owner': None, 'source':
+         'listed_from_provider', 'metadata': {'embedding_dimension': 384}, 'model_type': 'embedding'}
+WARNING  2025-09-10 14:48:59,202 torchao.kernel.intmm:23 uncategorized: Warning: Detected no triton, on systems without Triton certain kernels will
+         not work
+W0910 14:48:59.847000 26795 torch/distributed/elastic/multiprocessing/redirects.py:29] NOTE: Redirects are currently not supported in Windows or MacOs.
+INFO     2025-09-10 14:49:05,098 sentence_transformers.SentenceTransformer:219 uncategorized: Use pytorch device_name: mps
+INFO     2025-09-10 14:49:05,099 sentence_transformers.SentenceTransformer:227 uncategorized: Load pretrained SentenceTransformer: all-MiniLM-L6-v2
+INFO     2025-09-10 14:49:09,461 console_span_processor:39 telemetry: 21:49:09.453 [END] InferenceRouter.openai_embeddings [StatusCode.OK]
+         (36781.46ms)
+INFO     2025-09-10 14:49:09,462 console_span_processor:48 telemetry:     output: {'object': 'list', 'data': [{'object': 'embedding', 'embedding':
+         [-0.012346820905804634, -0.04591671749949455, 0.00545357633382082, 0.0767817348241806, -0.01030217856168747, -0.07128783315420151,
+         -0.047789815813302994, -0.0042580426670610905, -0.08333089202642441, 0.03493816778063774, 0.03678377717733383, -0.022125769406557083,
+         -0.001785362372174859, 0.03860057145357132, 0.017692940309643745, -0.011320478282868862, 0.020480643957853317, -0.14444826543331146,
+         -0.03509785607457161, -0.08347956091165543, 0.01723267324268818, -0.020868107676506042, -0.04816542565822601, -0.04063615947961807,
+         -0.02612607553601265, -0.06816241890192032, -0.021907860413193703, -0.05328420177102089, 0.0035953382030129433, -0.060775868594646454,
+         -0.07895789295434952, 0.08725343644618988, -0.004096562508493662, 0.07713106274604797, 0.04764929413795471, 0.03390089422464371,
+         -0.03415430337190628, -0.10541579872369766, 0.0700988844037056, -0.03148755803704262, 0.07130879163742065, -0.0025789395440369844, 0.00581...
+INFO     2025-09-10 14:49:09,463 console_span_processor:62 telemetry:  21:48:32.656 [INFO] Loading sentence transformer for all-MiniLM-L6-v2...
+INFO     2025-09-10 14:49:09,464 console_span_processor:62 telemetry:  21:48:59.203 [WARN] Warning: Detected no triton, on systems without Triton
+         certain kernels will not work
+INFO     2025-09-10 14:49:09,464 console_span_processor:62 telemetry:  21:49:05.099 [INFO] Use pytorch device_name: mps
+INFO     2025-09-10 14:49:09,465 console_span_processor:62 telemetry:  21:49:05.100 [INFO] Load pretrained SentenceTransformer: all-MiniLM-L6-v2
+INFO     2025-09-10 14:49:09,469 console_span_processor:39 telemetry: 21:49:09.468 [END] FaissVectorIOAdapter.insert_chunks [StatusCode.OK]
+         (36796.60ms)
+INFO     2025-09-10 14:49:09,484 console_span_processor:39 telemetry: 21:49:09.480 [END] ModelsRoutingTable.get_model [StatusCode.OK] (0.19ms)
+INFO     2025-09-10 14:49:09,484 console_span_processor:48 telemetry:     output: {'identifier': 'sentence-transformers/all-MiniLM-L6-v2',
+         'provider_resource_id': 'all-MiniLM-L6-v2', 'provider_id': 'sentence-transformers', 'type': 'model', 'owner': None, 'source':
+         'listed_from_provider', 'metadata': {'embedding_dimension': 384}, 'model_type': 'embedding'}
+INFO     2025-09-10 14:49:09,488 console_span_processor:39 telemetry: 21:49:09.486 [END] ModelsRoutingTable.get_provider_impl [StatusCode.OK] (0.17ms)
+INFO     2025-09-10 14:49:09,488 console_span_processor:48 telemetry:     output:
+         <llama_stack.providers.inline.inference.sentence_transformers.sentence_transformers.SentenceTransformersInferenceImpl object at 0x158e83350>
+INFO     2025-09-10 14:49:09,492 console_span_processor:39 telemetry: 21:49:09.490 [END] ModelsRoutingTable.get_model [StatusCode.OK] (0.58ms)
+INFO     2025-09-10 14:49:09,493 console_span_processor:48 telemetry:     output: {'identifier': 'sentence-transformers/all-MiniLM-L6-v2',
+         'provider_resource_id': 'all-MiniLM-L6-v2', 'provider_id': 'sentence-transformers', 'type': 'model', 'owner': None, 'source':
+         'listed_from_provider', 'metadata': {'embedding_dimension': 384}, 'model_type': 'embedding'}
+INFO     2025-09-10 14:49:09,924 console_span_processor:39 telemetry: 21:49:09.921 [END] InferenceRouter.openai_embeddings [StatusCode.OK] (441.92ms)
+INFO     2025-09-10 14:49:09,926 console_span_processor:48 telemetry:     output: {'object': 'list', 'data': [{'object': 'embedding', 'embedding':
+         [-0.050018310546875, 0.0007208543829619884, 0.048539839684963226, 0.013606906868517399, -0.006504438817501068, 0.004046895541250706,
+         -0.04984626919031143, -8.024782437132671e-05, -0.02159256860613823, 0.0015519573353230953, 0.10069309175014496, 0.04437215253710747,
+         -0.022335151210427284, -0.05230937898159027, -0.028983373194932938, -0.049531035125255585, 0.03074926696717739, -0.03056054376065731,
+         0.021489247679710388, 0.016868554055690765, -0.028984908014535904, -0.07053273916244507, -0.022961033508181572, -0.0717858299612999,
+         0.0674479678273201, 0.021412895992398262, -0.06054700165987015, 0.021910998970270157, -0.023192649707198143, -0.028992125764489174,
+         0.025179730728268623, -0.006338999141007662, -0.001874048844911158, 0.009305929765105247, 0.011422501876950264, 0.037604980170726776,
+         -0.039689771831035614, -0.08734020590782166, -2.2408454242395237e-05, -0.027818966656923294, -0.05882560834288597, 0.0047395736910402...
+INFO     2025-09-10 14:49:09,935 console_span_processor:39 telemetry: 21:49:09.928 [END] FaissVectorIOAdapter.insert_chunks [StatusCode.OK] (448.38ms)
+INFO     2025-09-10 14:49:09,938 console_span_processor:39 telemetry: 21:49:09.936 [END] ModelsRoutingTable.get_model [StatusCode.OK] (0.15ms)
+INFO     2025-09-10 14:49:09,939 console_span_processor:48 telemetry:     output: {'identifier': 'sentence-transformers/all-MiniLM-L6-v2',
+         'provider_resource_id': 'all-MiniLM-L6-v2', 'provider_id': 'sentence-transformers', 'type': 'model', 'owner': None, 'source':
+         'listed_from_provider', 'metadata': {'embedding_dimension': 384}, 'model_type': 'embedding'}
+INFO     2025-09-10 14:49:09,944 console_span_processor:39 telemetry: 21:49:09.942 [END] ModelsRoutingTable.get_provider_impl [StatusCode.OK] (0.34ms)
+INFO     2025-09-10 14:49:09,945 console_span_processor:48 telemetry:     output:
+         <llama_stack.providers.inline.inference.sentence_transformers.sentence_transformers.SentenceTransformersInferenceImpl object at 0x158e83350>
+INFO     2025-09-10 14:49:09,947 console_span_processor:39 telemetry: 21:49:09.946 [END] ModelsRoutingTable.get_model [StatusCode.OK] (0.19ms)
+INFO     2025-09-10 14:49:09,948 console_span_processor:48 telemetry:     output: {'identifier': 'sentence-transformers/all-MiniLM-L6-v2',
+         'provider_resource_id': 'all-MiniLM-L6-v2', 'provider_id': 'sentence-transformers', 'type': 'model', 'owner': None, 'source':
+         'listed_from_provider', 'metadata': {'embedding_dimension': 384}, 'model_type': 'embedding'}
+INFO     2025-09-10 14:49:09,959 console_span_processor:39 telemetry: 21:49:09.949 [END] InferenceRouter.openai_embeddings [StatusCode.OK] (13.11ms)
+INFO     2025-09-10 14:49:09,960 console_span_processor:48 telemetry:     output: {'object': 'list', 'data': [{'object': 'embedding', 'embedding':
+         [-0.07958973944187164, -0.09508923441171646, 0.0747891291975975, -0.0033136801794171333, 0.03454456478357315, 0.041140977293252945,
+         -0.06803964823484421, 0.014224942773580551, -0.06238540634512901, 0.03782016783952713, -0.030075553804636, 0.07446419447660446,
+         -0.06111196056008339, 0.029801342636346817, 0.11399323493242264, 0.01820552721619606, 0.00257650064304471, -0.08268020302057266,
+         -0.08060702681541443, 0.004656659439206123, -0.053371429443359375, -0.03258609026670456, -0.024163667112588882, 0.0012904119212180376,
+         0.022314254194498062, -0.04680437222123146, -0.03445201367139816, 0.004914817865937948, -0.03131281957030296, 0.010677368380129337,
+         -0.011123362928628922, -0.022707175463438034, -0.0027590240351855755, 0.014538224786520004, 0.00694124773144722, -0.0027126462664455175,
+         0.023252086713910103, 0.07095751911401749, -0.030273273587226868, -0.04118560254573822, -0.04669591784477234, -0.028823204338550568,
+         -0.010...
+INFO     2025-09-10 14:49:09,974 console_span_processor:39 telemetry: 21:49:09.961 [END] FaissVectorIOAdapter.insert_chunks [StatusCode.OK] (25.74ms)
+INFO     2025-09-10 14:49:09,980 console_span_processor:39 telemetry: 21:49:09.975 [END] LocalfsFilesImpl.openai_retrieve_file [StatusCode.OK]
+         (37339.17ms)
+INFO     2025-09-10 14:49:09,981 console_span_processor:48 telemetry:     output: {'object': 'file', 'id': 'file-b2f38b0e164347f5a2b6bbe211e33ff3',
+         'bytes': 48, 'created_at': 1757540912, 'expires_at': 1789076912, 'filename': 'returns_policy.txt', 'purpose': 'assistants'}
+INFO     2025-09-10 14:49:09,987 console_span_processor:39 telemetry: 21:49:09.982 [END] LocalfsFilesImpl.openai_retrieve_file [StatusCode.OK]
+         (37346.24ms)
+INFO     2025-09-10 14:49:09,988 console_span_processor:48 telemetry:     output: {'object': 'file', 'id': 'file-489db9aae0424745960e3408ff0f477f',
+         'bytes': 41, 'created_at': 1757540912, 'expires_at': 1789076912, 'filename': 'shipping_policy.txt', 'purpose': 'assistants'}
+INFO     2025-09-10 14:49:10,038 uvicorn.access:473 uncategorized: 127.0.0.1:56006 - "POST /v1/openai/v1/vector_stores HTTP/1.1" 200
+INFO     2025-09-10 14:49:10,041 console_span_processor:39 telemetry: 21:49:10.039 [END] VectorIORouter.openai_create_vector_store [StatusCode.OK]
+         (37443.86ms)
+INFO     2025-09-10 14:49:10,042 console_span_processor:48 telemetry:     output: {'id': 'vs_dab05212-db05-402c-91ef-57e41797406b', 'object':
+         'vector_store', 'created_at': 1757540912, 'name': 'acme_docs', 'usage_bytes': 0, 'file_counts': {'completed': 3, 'cancelled': 0, 'failed': 0,
+         'in_progress': 0, 'total': 3}, 'status': 'completed', 'expires_after': None, 'expires_at': None, 'last_active_at': 1757540912, 'metadata':
+         {'provider_id': 'faiss', 'provider_vector_db_id': 'vs_dab05212-db05-402c-91ef-57e41797406b'}}
+INFO     2025-09-10 14:49:10,047 console_span_processor:39 telemetry: 21:49:10.044 [END] /v1/openai/v1/vector_stores [StatusCode.OK] (37449.64ms)
+INFO     2025-09-10 14:49:10,047 console_span_processor:48 telemetry:     raw_path: /v1/openai/v1/vector_stores
+INFO     2025-09-10 14:49:10,048 console_span_processor:62 telemetry:  21:49:10.039 [INFO] 127.0.0.1:56006 - "POST /v1/openai/v1/vector_stores 
+         HTTP/1.1" 200
+INFO     2025-09-10 14:49:10,126 console_span_processor:28 telemetry: 21:49:10.126 [START] /v1/openai/v1/vector_stores/{vector_store_id}/search
+INFO     2025-09-10 14:49:10,140 console_span_processor:39 telemetry: 21:49:10.132 [END] ModelsRoutingTable.get_model [StatusCode.OK] (0.72ms)
+INFO     2025-09-10 14:49:10,142 console_span_processor:48 telemetry:     output: {'identifier': 'sentence-transformers/all-MiniLM-L6-v2',
+         'provider_resource_id': 'all-MiniLM-L6-v2', 'provider_id': 'sentence-transformers', 'type': 'model', 'owner': None, 'source':
+         'listed_from_provider', 'metadata': {'embedding_dimension': 384}, 'model_type': 'embedding'}
+INFO     2025-09-10 14:49:10,155 console_span_processor:39 telemetry: 21:49:10.143 [END] ModelsRoutingTable.get_provider_impl [StatusCode.OK] (0.34ms)
+INFO     2025-09-10 14:49:10,155 console_span_processor:48 telemetry:     output:
+         <llama_stack.providers.inline.inference.sentence_transformers.sentence_transformers.SentenceTransformersInferenceImpl object at 0x158e83350>
+INFO     2025-09-10 14:49:10,160 console_span_processor:39 telemetry: 21:49:10.157 [END] ModelsRoutingTable.get_model [StatusCode.OK] (0.18ms)
+INFO     2025-09-10 14:49:10,161 console_span_processor:48 telemetry:     output: {'identifier': 'sentence-transformers/all-MiniLM-L6-v2',
+         'provider_resource_id': 'all-MiniLM-L6-v2', 'provider_id': 'sentence-transformers', 'type': 'model', 'owner': None, 'source':
+         'listed_from_provider', 'metadata': {'embedding_dimension': 384}, 'model_type': 'embedding'}
+INFO     2025-09-10 14:49:10,258 console_span_processor:39 telemetry: 21:49:10.219 [END] InferenceRouter.openai_embeddings [StatusCode.OK] (88.76ms)
+INFO     2025-09-10 14:49:10,266 console_span_processor:48 telemetry:     output: {'object': 'list', 'data': [{'object': 'embedding', 'embedding':
+         [0.0018660365603864193, -0.008432421833276749, -0.009860510006546974, 0.07527908682823181, -0.008166038431227207, -0.08819602429866791,
+         -0.025107912719249725, -0.029266025871038437, 0.014401894062757492, 0.06447950005531311, 0.04112009331583977, -0.04028748720884323,
+         -0.02221696823835373, -0.009235711768269539, -0.04810388386249542, -0.02784842811524868, 0.016442419961094856, -0.07583750039339066,
+         -0.06627479195594788, 0.01860453188419342, 0.0008918081293813884, 0.011179491877555847, -0.048671603202819824, -0.021989954635500908,
+         0.04990514740347862, -0.003409573109820485, -0.06727275997400284, -0.054264623671770096, -0.022502778097987175, -0.03865018114447594,
+         -0.0471607930958271, 0.01671769842505455, 0.024404950439929962, 0.09877222776412964, -0.01660928875207901, 0.01298862136900425,
+         0.02916400320827961, -0.0722617655992508, 0.041335687041282654, -0.037857454270124435, -0.02524978667497635, -0.010434110648930073, -0....
+INFO     2025-09-10 14:49:10,285 uvicorn.access:473 uncategorized: 127.0.0.1:56006 - "POST 
+         /v1/openai/v1/vector_stores/vs_dab05212-db05-402c-91ef-57e41797406b/search HTTP/1.1" 200
+INFO     2025-09-10 14:49:10,295 console_span_processor:39 telemetry: 21:49:10.292 [END] FaissVectorIOAdapter.query_chunks [StatusCode.OK] (161.80ms)
+INFO     2025-09-10 14:49:10,296 console_span_processor:48 telemetry:     output: {'chunks': [{'content': 'Acme ships globally in 3-5 business days.',
+         'metadata': {'document_id': 'file-489db9aae0424745960e3408ff0f477f', 'token_count': 12, 'metadata_token_count': 1}, 'embedding':
+         [-0.012346820905804634, -0.04591671749949455, 0.00545357633382082, 0.0767817348241806, -0.01030217856168747, -0.07128783315420151,
+         -0.047789815813302994, -0.0042580426670610905, -0.08333089202642441, 0.03493816778063774, 0.03678377717733383, -0.022125769406557083,
+         -0.001785362372174859, 0.03860057145357132, 0.017692940309643745, -0.011320478282868862, 0.020480643957853317, -0.14444826543331146,
+         -0.03509785607457161, -0.08347956091165543, 0.01723267324268818, -0.020868107676506042, -0.04816542565822601, -0.04063615947961807,
+         -0.02612607553601265, -0.06816241890192032, -0.021907860413193703, -0.05328420177102089, 0.0035953382030129433, -0.060775868594646454,
+         -0.07895789295434952, 0.08725343644618988, -0.004096562508493662, 0.07713106274604797, 0.04764929413795471, 0.03390089422464371, -0.03...
+INFO     2025-09-10 14:49:10,304 console_span_processor:39 telemetry: 21:49:10.299 [END] VectorDBsRoutingTable.openai_search_vector_store
+         [StatusCode.OK] (169.86ms)
+INFO     2025-09-10 14:49:10,313 console_span_processor:48 telemetry:     output: {'object': 'vector_store.search_results.page', 'search_query': 'How 
+         long does shipping take?', 'data': [{'file_id': '', 'filename': '', 'score': 1.0150761400159014, 'attributes': {'document_id':
+         'file-489db9aae0424745960e3408ff0f477f', 'token_count': 12.0, 'metadata_token_count': 1.0}, 'content': [{'type': 'text', 'text': 'Acme ships 
+         globally in 3-5 business days.'}]}, {'file_id': '', 'filename': '', 'score': 0.7863530206684866, 'attributes': {'document_id':
+         'file-b2f38b0e164347f5a2b6bbe211e33ff3', 'token_count': 10.0, 'metadata_token_count': 1.0}, 'content': [{'type': 'text', 'text': 'Returns are
+         accepted within 30 days of purchase.'}]}], 'has_more': False, 'next_page': None}
+INFO     2025-09-10 14:49:10,323 console_span_processor:39 telemetry: 21:49:10.318 [END] VectorIORouter.openai_search_vector_store [StatusCode.OK]
+         (188.52ms)
+INFO     2025-09-10 14:49:10,324 console_span_processor:48 telemetry:     output: {'object': 'vector_store.search_results.page', 'search_query': 'How 
+         long does shipping take?', 'data': [{'file_id': '', 'filename': '', 'score': 1.0150761400159014, 'attributes': {'document_id':
+         'file-489db9aae0424745960e3408ff0f477f', 'token_count': 12.0, 'metadata_token_count': 1.0}, 'content': [{'type': 'text', 'text': 'Acme ships 
+         globally in 3-5 business days.'}]}, {'file_id': '', 'filename': '', 'score': 0.7863530206684866, 'attributes': {'document_id':
+         'file-b2f38b0e164347f5a2b6bbe211e33ff3', 'token_count': 10.0, 'metadata_token_count': 1.0}, 'content': [{'type': 'text', 'text': 'Returns are
+         accepted within 30 days of purchase.'}]}], 'has_more': False, 'next_page': None}
+INFO     2025-09-10 14:49:10,329 console_span_processor:39 telemetry: 21:49:10.326 [END] /v1/openai/v1/vector_stores/{vector_store_id}/search
+         [StatusCode.OK] (199.98ms)
+INFO     2025-09-10 14:49:10,336 console_span_processor:48 telemetry:     raw_path:
+         /v1/openai/v1/vector_stores/vs_dab05212-db05-402c-91ef-57e41797406b/search
+INFO     2025-09-10 14:49:10,337 console_span_processor:62 telemetry:  21:49:10.290 [INFO] 127.0.0.1:56006 - "POST 
+         /v1/openai/v1/vector_stores/vs_dab05212-db05-402c-91ef-57e41797406b/search HTTP/1.1" 200
+INFO     2025-09-10 14:49:57,749 console_span_processor:28 telemetry: 21:49:57.749 [START] /v1/openai/v1/chat/completions
+INFO     2025-09-10 14:49:57,758 console_span_processor:39 telemetry: 21:49:57.753 [END] ModelsRoutingTable.get_model [StatusCode.OK] (0.26ms)
+INFO     2025-09-10 14:49:57,758 console_span_processor:48 telemetry:     output: {'identifier': 'together/meta-llama/Llama-3.3-70B-Instruct-Turbo',
+         'provider_resource_id': 'meta-llama/Llama-3.3-70B-Instruct-Turbo', 'provider_id': 'together', 'type': 'model', 'owner': None, 'source':
+         'listed_from_provider', 'metadata': {}, 'model_type': 'llm'}
+INFO     2025-09-10 14:49:57,761 console_span_processor:39 telemetry: 21:49:57.760 [END] ModelsRoutingTable.get_provider_impl [StatusCode.OK] (0.16ms)
+INFO     2025-09-10 14:49:57,762 console_span_processor:48 telemetry:     output:
+         <llama_stack.providers.remote.inference.together.together.TogetherInferenceAdapter object at 0x137069220>
+INFO     2025-09-10 14:49:57,766 console_span_processor:39 telemetry: 21:49:57.764 [END] ModelsRoutingTable.get_model [StatusCode.OK] (0.51ms)
+INFO     2025-09-10 14:49:57,767 console_span_processor:48 telemetry:     output: {'identifier': 'together/meta-llama/Llama-3.3-70B-Instruct-Turbo',
+         'provider_resource_id': 'meta-llama/Llama-3.3-70B-Instruct-Turbo', 'provider_id': 'together', 'type': 'model', 'owner': None, 'source':
+         'listed_from_provider', 'metadata': {}, 'model_type': 'llm'}
+huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
+To disable this warning, you can either:
+	- Avoid using `tokenizers` before the fork if possible
+	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
+huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
+To disable this warning, you can either:
+	- Avoid using `tokenizers` before the fork if possible
+	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
+INFO     2025-09-10 14:50:01,673 console_span_processor:39 telemetry: 21:50:01.672 [END] TogetherInferenceAdapter.openai_chat_completion
+         [StatusCode.OK] (3908.76ms)
+INFO     2025-09-10 14:50:01,675 console_span_processor:48 telemetry:     output: {'id': 'oAit2Fa-62bZhn-97d2236c1b482510', 'choices':
+         [{'finish_reason': 'stop', 'index': 0, 'logprobs': None, 'message': {'content': "In the Andes' gentle breeze, a llama's soft eyes gaze with 
+         peaceful ease, its fur a warm and fuzzy tease. With steps both gentle and serene, the llama roams, a symbol of calm, its beauty pure and 
+         supreme.", 'refusal': None, 'role': 'assistant', 'annotations': None, 'audio': None, 'function_call': None, 'tool_calls': []}, 'seed':
+         7075179338234292000}], 'created': 1757540999, 'model': 'meta-llama/Llama-3.3-70B-Instruct-Turbo', 'object': 'chat.completion',
+         'service_tier': None, 'system_fingerprint': None, 'usage': {'completion_tokens': 50, 'prompt_tokens': 50, 'total_tokens': 100,
+         'completion_tokens_details': None, 'prompt_tokens_details': None, 'cached_tokens': 0}, 'prompt': []}
+INFO     2025-09-10 14:50:01,687 uvicorn.access:473 uncategorized: ::1:56392 - "POST /v1/openai/v1/chat/completions HTTP/1.1" 200
+INFO     2025-09-10 14:50:01,692 console_span_processor:39 telemetry: 21:50:01.688 [END] InferenceRouter.openai_chat_completion [StatusCode.OK]
+         (3936.71ms)
+INFO     2025-09-10 14:50:01,692 console_span_processor:48 telemetry:     output: {'id': 'oAit2Fa-62bZhn-97d2236c1b482510', 'choices':
+         [{'finish_reason': 'stop', 'index': 0, 'logprobs': None, 'message': {'content': "In the Andes' gentle breeze, a llama's soft eyes gaze with 
+         peaceful ease, its fur a warm and fuzzy tease. With steps both gentle and serene, the llama roams, a symbol of calm, its beauty pure and 
+         supreme.", 'refusal': None, 'role': 'assistant', 'annotations': None, 'audio': None, 'function_call': None, 'tool_calls': None}, 'seed':
+         7075179338234292000}], 'created': 1757540999, 'model': 'meta-llama/Llama-3.3-70B-Instruct-Turbo', 'object': 'chat.completion',
+         'service_tier': None, 'system_fingerprint': None, 'usage': {'completion_tokens': 50, 'prompt_tokens': 50, 'total_tokens': 100,
+         'completion_tokens_details': None, 'prompt_tokens_details': None, 'cached_tokens': 0}, 'prompt': [], 'metrics': [{'trace_id':
+         'b7f7481d23ea1b2d4b89111888ebe8b3', 'span_id': '60cf33fa222feaf3', 'timestamp': '2025-09-10T21:50:01.687118Z', 'attributes': {'model_id':
+         'together/me...
+INFO     2025-09-10 14:50:01,694 console_span_processor:62 telemetry:  21:50:01.687 [INFO] metric.prompt_tokens
+INFO     2025-09-10 14:50:01,695 console_span_processor:67 telemetry: value: 50
+INFO     2025-09-10 14:50:01,695 console_span_processor:67 telemetry: unit: tokens
+INFO     2025-09-10 14:50:01,696 console_span_processor:67 telemetry: model_id: together/meta-llama/Llama-3.3-70B-Instruct-Turbo
+INFO     2025-09-10 14:50:01,696 console_span_processor:67 telemetry: provider_id: together
+INFO     2025-09-10 14:50:01,698 console_span_processor:62 telemetry:  21:50:01.687 [INFO] metric.completion_tokens
+INFO     2025-09-10 14:50:01,699 console_span_processor:67 telemetry: value: 50
+INFO     2025-09-10 14:50:01,699 console_span_processor:67 telemetry: unit: tokens
+INFO     2025-09-10 14:50:01,700 console_span_processor:67 telemetry: model_id: together/meta-llama/Llama-3.3-70B-Instruct-Turbo
+INFO     2025-09-10 14:50:01,701 console_span_processor:67 telemetry: provider_id: together
+INFO     2025-09-10 14:50:01,719 console_span_processor:62 telemetry:  21:50:01.687 [INFO] metric.total_tokens
+INFO     2025-09-10 14:50:01,723 console_span_processor:67 telemetry: value: 100
+INFO     2025-09-10 14:50:01,725 console_span_processor:67 telemetry: unit: tokens
+INFO     2025-09-10 14:50:01,730 console_span_processor:67 telemetry: model_id: together/meta-llama/Llama-3.3-70B-Instruct-Turbo
+INFO     2025-09-10 14:50:01,733 console_span_processor:67 telemetry: provider_id: together
+INFO     2025-09-10 14:50:01,746 console_span_processor:39 telemetry: 21:50:01.738 [END] /v1/openai/v1/chat/completions [StatusCode.OK] (3989.18ms)
+INFO     2025-09-10 14:50:01,747 console_span_processor:48 telemetry:     raw_path: /v1/openai/v1/chat/completions
+INFO     2025-09-10 14:50:01,748 console_span_processor:62 telemetry:  21:50:01.688 [INFO] ::1:56392 - "POST /v1/openai/v1/chat/completions HTTP/1.1"
+         200
+INFO     2025-09-10 14:51:45,329 console_span_processor:28 telemetry: 21:51:45.329 [START] /v1/openai/v1/chat/completions
+INFO     2025-09-10 14:51:45,371 console_span_processor:39 telemetry: 21:51:45.337 [END] ModelsRoutingTable.get_model [StatusCode.OK] (0.30ms)
+INFO     2025-09-10 14:51:45,375 console_span_processor:48 telemetry:     output: {'identifier': 'together/meta-llama/Llama-3.3-70B-Instruct-Turbo',
+         'provider_resource_id': 'meta-llama/Llama-3.3-70B-Instruct-Turbo', 'provider_id': 'together', 'type': 'model', 'owner': None, 'source':
+         'listed_from_provider', 'metadata': {}, 'model_type': 'llm'}
+INFO     2025-09-10 14:51:45,408 console_span_processor:39 telemetry: 21:51:45.377 [END] ModelsRoutingTable.get_provider_impl [StatusCode.OK] (0.18ms)
+INFO     2025-09-10 14:51:45,408 console_span_processor:48 telemetry:     output:
+         <llama_stack.providers.remote.inference.together.together.TogetherInferenceAdapter object at 0x137069220>
+INFO     2025-09-10 14:51:45,415 console_span_processor:39 telemetry: 21:51:45.410 [END] ModelsRoutingTable.get_model [StatusCode.OK] (0.56ms)
+INFO     2025-09-10 14:51:45,415 console_span_processor:48 telemetry:     output: {'identifier': 'together/meta-llama/Llama-3.3-70B-Instruct-Turbo',
+         'provider_resource_id': 'meta-llama/Llama-3.3-70B-Instruct-Turbo', 'provider_id': 'together', 'type': 'model', 'owner': None, 'source':
+         'listed_from_provider', 'metadata': {}, 'model_type': 'llm'}
+INFO     2025-09-10 14:51:46,425 console_span_processor:39 telemetry: 21:51:46.414 [END] TogetherInferenceAdapter.openai_chat_completion
+         [StatusCode.OK] (1005.10ms)
+INFO     2025-09-10 14:51:46,426 console_span_processor:48 telemetry:     output: {'id': 'oAitZwc-62bZhn-97d22606494cfa1a', 'choices':
+         [{'finish_reason': 'stop', 'index': 0, 'logprobs': None, 'message': {'content': 'Thought: To answer the question "How long does shipping 
+         take?", I need to search the provided vector store for relevant information. The vector store ID is vs_dab05212-db05-402c-91ef-57e41797406b. 
+         I will use the Llama Stack Vector Store RAG tool to find the most relevant documents.\n\nAction: Llama Stack Vector Store RAG tool\nAction 
+         Input: {"query": "How long does shipping take?", "vector_store_id": "vs_dab05212-db05-402c-91ef-57e41797406b", "top_k": 1, "score_threshold":
+         0}', 'refusal': None, 'role': 'assistant', 'annotations': None, 'audio': None, 'function_call': None, 'tool_calls': []}, 'seed':
+         15009310705663400000}], 'created': 1757541105, 'model': 'meta-llama/Llama-3.3-70B-Instruct-Turbo', 'object': 'chat.completion',
+         'service_tier': None, 'system_fingerprint': None, 'usage': {'completion_tokens': 138, 'prompt_tokens': 495, 'total_tokens': 633,...
+INFO     2025-09-10 14:51:46,443 uvicorn.access:473 uncategorized: ::1:56900 - "POST /v1/openai/v1/chat/completions HTTP/1.1" 200
+INFO     2025-09-10 14:51:46,457 console_span_processor:39 telemetry: 21:51:46.444 [END] InferenceRouter.openai_chat_completion [StatusCode.OK]
+         (1106.92ms)
+INFO     2025-09-10 14:51:46,458 console_span_processor:48 telemetry:     output: {'id': 'oAitZwc-62bZhn-97d22606494cfa1a', 'choices':
+         [{'finish_reason': 'stop', 'index': 0, 'logprobs': None, 'message': {'content': 'Thought: To answer the question "How long does shipping 
+         take?", I need to search the provided vector store for relevant information. The vector store ID is vs_dab05212-db05-402c-91ef-57e41797406b. 
+         I will use the Llama Stack Vector Store RAG tool to find the most relevant documents.\n\nAction: Llama Stack Vector Store RAG tool\nAction 
+         Input: {"query": "How long does shipping take?", "vector_store_id": "vs_dab05212-db05-402c-91ef-57e41797406b", "top_k": 1, "score_threshold":
+         0}', 'refusal': None, 'role': 'assistant', 'annotations': None, 'audio': None, 'function_call': None, 'tool_calls': None}, 'seed':
+         15009310705663400000}], 'created': 1757541105, 'model': 'meta-llama/Llama-3.3-70B-Instruct-Turbo', 'object': 'chat.completion',
+         'service_tier': None, 'system_fingerprint': None, 'usage': {'completion_tokens': 138, 'prompt_tokens': 495, 'total_tokens': 63...
+INFO     2025-09-10 14:51:46,459 console_span_processor:62 telemetry:  21:51:46.442 [INFO] metric.prompt_tokens
+INFO     2025-09-10 14:51:46,459 console_span_processor:67 telemetry: value: 495
+INFO     2025-09-10 14:51:46,460 console_span_processor:67 telemetry: unit: tokens
+INFO     2025-09-10 14:51:46,460 console_span_processor:67 telemetry: model_id: together/meta-llama/Llama-3.3-70B-Instruct-Turbo
+INFO     2025-09-10 14:51:46,461 console_span_processor:67 telemetry: provider_id: together
+INFO     2025-09-10 14:51:46,461 console_span_processor:62 telemetry:  21:51:46.442 [INFO] metric.completion_tokens
+INFO     2025-09-10 14:51:46,462 console_span_processor:67 telemetry: value: 138
+INFO     2025-09-10 14:51:46,463 console_span_processor:67 telemetry: unit: tokens
+INFO     2025-09-10 14:51:46,463 console_span_processor:67 telemetry: model_id: together/meta-llama/Llama-3.3-70B-Instruct-Turbo
+INFO     2025-09-10 14:51:46,463 console_span_processor:67 telemetry: provider_id: together
+INFO     2025-09-10 14:51:46,464 console_span_processor:62 telemetry:  21:51:46.442 [INFO] metric.total_tokens
+INFO     2025-09-10 14:51:46,466 console_span_processor:67 telemetry: value: 633
+INFO     2025-09-10 14:51:46,466 console_span_processor:67 telemetry: unit: tokens
+INFO     2025-09-10 14:51:46,467 console_span_processor:67 telemetry: model_id: together/meta-llama/Llama-3.3-70B-Instruct-Turbo
+INFO     2025-09-10 14:51:46,467 console_span_processor:67 telemetry: provider_id: together
+INFO     2025-09-10 14:51:46,522 console_span_processor:39 telemetry: 21:51:46.468 [END] /v1/openai/v1/chat/completions [StatusCode.OK] (1138.88ms)
+INFO     2025-09-10 14:51:46,530 console_span_processor:48 telemetry:     raw_path: /v1/openai/v1/chat/completions
+INFO     2025-09-10 14:51:46,531 console_span_processor:62 telemetry:  21:51:46.443 [INFO] ::1:56900 - "POST /v1/openai/v1/chat/completions HTTP/1.1"
+         200
+INFO     2025-09-10 14:51:46,532 console_span_processor:28 telemetry: 21:51:46.532 [START] /v1/openai/v1/vector_stores/{vector_store_id}/search
+INFO     2025-09-10 14:51:46,544 console_span_processor:39 telemetry: 21:51:46.534 [END] ModelsRoutingTable.get_model [StatusCode.OK] (0.20ms)
+INFO     2025-09-10 14:51:46,544 console_span_processor:48 telemetry:     output: {'identifier': 'sentence-transformers/all-MiniLM-L6-v2',
+         'provider_resource_id': 'all-MiniLM-L6-v2', 'provider_id': 'sentence-transformers', 'type': 'model', 'owner': None, 'source':
+         'listed_from_provider', 'metadata': {'embedding_dimension': 384}, 'model_type': 'embedding'}
+INFO     2025-09-10 14:51:46,551 console_span_processor:39 telemetry: 21:51:46.547 [END] ModelsRoutingTable.get_provider_impl [StatusCode.OK] (0.27ms)
+INFO     2025-09-10 14:51:46,553 console_span_processor:48 telemetry:     output:
+         <llama_stack.providers.inline.inference.sentence_transformers.sentence_transformers.SentenceTransformersInferenceImpl object at 0x158e83350>
+INFO     2025-09-10 14:51:46,558 console_span_processor:39 telemetry: 21:51:46.556 [END] ModelsRoutingTable.get_model [StatusCode.OK] (0.51ms)
+INFO     2025-09-10 14:51:46,559 console_span_processor:48 telemetry:     output: {'identifier': 'sentence-transformers/all-MiniLM-L6-v2',
+         'provider_resource_id': 'all-MiniLM-L6-v2', 'provider_id': 'sentence-transformers', 'type': 'model', 'owner': None, 'source':
+         'listed_from_provider', 'metadata': {'embedding_dimension': 384}, 'model_type': 'embedding'}
+INFO     2025-09-10 14:51:47,003 uvicorn.access:473 uncategorized: 127.0.0.1:56904 - "POST 
+         /v1/openai/v1/vector_stores/vs_dab05212-db05-402c-91ef-57e41797406b/search HTTP/1.1" 200
+INFO     2025-09-10 14:51:47,004 console_span_processor:39 telemetry: 21:51:46.991 [END] InferenceRouter.openai_embeddings [StatusCode.OK] (457.59ms)
+INFO     2025-09-10 14:51:47,005 console_span_processor:48 telemetry:     output: {'object': 'list', 'data': [{'object': 'embedding', 'embedding':
+         [0.0018660365603864193, -0.008432421833276749, -0.009860510006546974, 0.07527908682823181, -0.008166038431227207, -0.08819602429866791,
+         -0.025107912719249725, -0.029266025871038437, 0.014401894062757492, 0.06447950005531311, 0.04112009331583977, -0.04028748720884323,
+         -0.02221696823835373, -0.009235711768269539, -0.04810388386249542, -0.02784842811524868, 0.016442419961094856, -0.07583750039339066,
+         -0.06627479195594788, 0.01860453188419342, 0.0008918081293813884, 0.011179491877555847, -0.048671603202819824, -0.021989954635500908,
+         0.04990514740347862, -0.003409573109820485, -0.06727275997400284, -0.054264623671770096, -0.022502778097987175, -0.03865018114447594,
+         -0.0471607930958271, 0.01671769842505455, 0.024404950439929962, 0.09877222776412964, -0.01660928875207901, 0.01298862136900425,
+         0.02916400320827961, -0.0722617655992508, 0.041335687041282654, -0.037857454270124435, -0.02524978667497635, -0.010434110648930073, -0....
+INFO     2025-09-10 14:51:47,025 console_span_processor:39 telemetry: 21:51:47.008 [END] FaissVectorIOAdapter.query_chunks [StatusCode.OK] (474.21ms)
+INFO     2025-09-10 14:51:47,028 console_span_processor:48 telemetry:     output: {'chunks': [{'content': 'Acme ships globally in 3-5 business days.',
+         'metadata': {'document_id': 'file-489db9aae0424745960e3408ff0f477f', 'token_count': 12, 'metadata_token_count': 1}, 'embedding':
+         [-0.012346820905804634, -0.04591671749949455, 0.00545357633382082, 0.0767817348241806, -0.01030217856168747, -0.07128783315420151,
+         -0.047789815813302994, -0.0042580426670610905, -0.08333089202642441, 0.03493816778063774, 0.03678377717733383, -0.022125769406557083,
+         -0.001785362372174859, 0.03860057145357132, 0.017692940309643745, -0.011320478282868862, 0.020480643957853317, -0.14444826543331146,
+         -0.03509785607457161, -0.08347956091165543, 0.01723267324268818, -0.020868107676506042, -0.04816542565822601, -0.04063615947961807,
+         -0.02612607553601265, -0.06816241890192032, -0.021907860413193703, -0.05328420177102089, 0.0035953382030129433, -0.060775868594646454,
+         -0.07895789295434952, 0.08725343644618988, -0.004096562508493662, 0.07713106274604797, 0.04764929413795471, 0.03390089422464371, -0.03...
+INFO     2025-09-10 14:51:47,038 console_span_processor:39 telemetry: 21:51:47.034 [END] VectorDBsRoutingTable.openai_search_vector_store
+         [StatusCode.OK] (500.67ms)
+INFO     2025-09-10 14:51:47,040 console_span_processor:48 telemetry:     output: {'object': 'vector_store.search_results.page', 'search_query': 'How 
+         long does shipping take?', 'data': [{'file_id': '', 'filename': '', 'score': 1.0150761400159014, 'attributes': {'document_id':
+         'file-489db9aae0424745960e3408ff0f477f', 'token_count': 12.0, 'metadata_token_count': 1.0}, 'content': [{'type': 'text', 'text': 'Acme ships 
+         globally in 3-5 business days.'}]}], 'has_more': False, 'next_page': None}
+INFO     2025-09-10 14:51:47,051 console_span_processor:39 telemetry: 21:51:47.043 [END] VectorIORouter.openai_search_vector_store [StatusCode.OK]
+         (510.32ms)
+INFO     2025-09-10 14:51:47,052 console_span_processor:48 telemetry:     output: {'object': 'vector_store.search_results.page', 'search_query': 'How 
+         long does shipping take?', 'data': [{'file_id': '', 'filename': '', 'score': 1.0150761400159014, 'attributes': {'document_id':
+         'file-489db9aae0424745960e3408ff0f477f', 'token_count': 12.0, 'metadata_token_count': 1.0}, 'content': [{'type': 'text', 'text': 'Acme ships 
+         globally in 3-5 business days.'}]}], 'has_more': False, 'next_page': None}
+INFO     2025-09-10 14:51:47,059 console_span_processor:39 telemetry: 21:51:47.055 [END] /v1/openai/v1/vector_stores/{vector_store_id}/search
+         [StatusCode.OK] (522.95ms)
+INFO     2025-09-10 14:51:47,061 console_span_processor:48 telemetry:     raw_path:
+         /v1/openai/v1/vector_stores/vs_dab05212-db05-402c-91ef-57e41797406b/search
+INFO     2025-09-10 14:51:47,063 console_span_processor:62 telemetry:  21:51:47.004 [INFO] 127.0.0.1:56904 - "POST 
+         /v1/openai/v1/vector_stores/vs_dab05212-db05-402c-91ef-57e41797406b/search HTTP/1.1" 200
+INFO     2025-09-10 14:51:47,066 console_span_processor:28 telemetry: 21:51:47.066 [START] /v1/openai/v1/vector_stores/{vector_store_id}/search
+INFO     2025-09-10 14:51:47,072 uvicorn.access:473 uncategorized: 127.0.0.1:56904 - "POST 
+         /v1/openai/v1/vector_stores/vs_dab05212-db05-402c-91ef-57e41797406b/search HTTP/1.1" 200
+INFO     2025-09-10 14:51:47,106 console_span_processor:39 telemetry: 21:51:47.086 [END] ModelsRoutingTable.get_model [StatusCode.OK] (0.59ms)
+INFO     2025-09-10 14:51:47,107 console_span_processor:48 telemetry:     output: {'identifier': 'sentence-transformers/all-MiniLM-L6-v2',
+         'provider_resource_id': 'all-MiniLM-L6-v2', 'provider_id': 'sentence-transformers', 'type': 'model', 'owner': None, 'source':
+         'listed_from_provider', 'metadata': {'embedding_dimension': 384}, 'model_type': 'embedding'}
+INFO     2025-09-10 14:51:47,121 console_span_processor:39 telemetry: 21:51:47.118 [END] ModelsRoutingTable.get_provider_impl [StatusCode.OK] (0.85ms)
+INFO     2025-09-10 14:51:47,122 console_span_processor:48 telemetry:     output:
+         <llama_stack.providers.inline.inference.sentence_transformers.sentence_transformers.SentenceTransformersInferenceImpl object at 0x158e83350>
+INFO     2025-09-10 14:51:47,134 uvicorn.access:473 uncategorized: 127.0.0.1:56904 - "POST 
+         /v1/openai/v1/vector_stores/vs_dab05212-db05-402c-91ef-57e41797406b/search HTTP/1.1" 200
+INFO     2025-09-10 14:51:47,141 console_span_processor:39 telemetry: 21:51:47.124 [END] ModelsRoutingTable.get_model [StatusCode.OK] (0.22ms)
+INFO     2025-09-10 14:51:47,141 console_span_processor:48 telemetry:     output: {'identifier': 'sentence-transformers/all-MiniLM-L6-v2',
+         'provider_resource_id': 'all-MiniLM-L6-v2', 'provider_id': 'sentence-transformers', 'type': 'model', 'owner': None, 'source':
+         'listed_from_provider', 'metadata': {'embedding_dimension': 384}, 'model_type': 'embedding'}
+INFO     2025-09-10 14:51:47,154 uvicorn.access:473 uncategorized: 127.0.0.1:56904 - "POST 
+         /v1/openai/v1/vector_stores/vs_dab05212-db05-402c-91ef-57e41797406b/search HTTP/1.1" 200
+INFO     2025-09-10 14:51:47,155 console_span_processor:39 telemetry: 21:51:47.143 [END] InferenceRouter.openai_embeddings [StatusCode.OK] (57.86ms)
+INFO     2025-09-10 14:51:47,156 console_span_processor:48 telemetry:     output: {'object': 'list', 'data': [{'object': 'embedding', 'embedding':
+         [0.0018660365603864193, -0.008432421833276749, -0.009860510006546974, 0.07527908682823181, -0.008166038431227207, -0.08819602429866791,
+         -0.025107912719249725, -0.029266025871038437, 0.014401894062757492, 0.06447950005531311, 0.04112009331583977, -0.04028748720884323,
+         -0.02221696823835373, -0.009235711768269539, -0.04810388386249542, -0.02784842811524868, 0.016442419961094856, -0.07583750039339066,
+         -0.06627479195594788, 0.01860453188419342, 0.0008918081293813884, 0.011179491877555847, -0.048671603202819824, -0.021989954635500908,
+         0.04990514740347862, -0.003409573109820485, -0.06727275997400284, -0.054264623671770096, -0.022502778097987175, -0.03865018114447594,
+         -0.0471607930958271, 0.01671769842505455, 0.024404950439929962, 0.09877222776412964, -0.01660928875207901, 0.01298862136900425,
+         0.02916400320827961, -0.0722617655992508, 0.041335687041282654, -0.037857454270124435, -0.02524978667497635, -0.010434110648930073, -0....
+INFO     2025-09-10 14:51:47,169 console_span_processor:39 telemetry: 21:51:47.159 [END] FaissVectorIOAdapter.query_chunks [StatusCode.OK] (75.34ms)
+INFO     2025-09-10 14:51:47,170 console_span_processor:48 telemetry:     output: {'chunks': [{'content': 'Acme ships globally in 3-5 business days.',
+         'metadata': {'document_id': 'file-489db9aae0424745960e3408ff0f477f', 'token_count': 12, 'metadata_token_count': 1}, 'embedding':
+         [-0.012346820905804634, -0.04591671749949455, 0.00545357633382082, 0.0767817348241806, -0.01030217856168747, -0.07128783315420151,
+         -0.047789815813302994, -0.0042580426670610905, -0.08333089202642441, 0.03493816778063774, 0.03678377717733383, -0.022125769406557083,
+         -0.001785362372174859, 0.03860057145357132, 0.017692940309643745, -0.011320478282868862, 0.020480643957853317, -0.14444826543331146,
+         -0.03509785607457161, -0.08347956091165543, 0.01723267324268818, -0.020868107676506042, -0.04816542565822601, -0.04063615947961807,
+         -0.02612607553601265, -0.06816241890192032, -0.021907860413193703, -0.05328420177102089, 0.0035953382030129433, -0.060775868594646454,
+         -0.07895789295434952, 0.08725343644618988, -0.004096562508493662, 0.07713106274604797, 0.04764929413795471, 0.03390089422464371, -0.03...
+INFO     2025-09-10 14:51:47,185 uvicorn.access:473 uncategorized: 127.0.0.1:56904 - "POST 
+         /v1/openai/v1/vector_stores/vs_dab05212-db05-402c-91ef-57e41797406b/search HTTP/1.1" 200
+INFO     2025-09-10 14:51:47,204 uvicorn.access:473 uncategorized: 127.0.0.1:56904 - "POST 
+         /v1/openai/v1/vector_stores/vs_dab05212-db05-402c-91ef-57e41797406b/search HTTP/1.1" 200
+INFO     2025-09-10 14:51:47,205 console_span_processor:39 telemetry: 21:51:47.173 [END] VectorDBsRoutingTable.openai_search_vector_store
+         [StatusCode.OK] (89.84ms)
+INFO     2025-09-10 14:51:47,206 console_span_processor:48 telemetry:     output: {'object': 'vector_store.search_results.page', 'search_query': 'How 
+         long does shipping take?', 'data': [{'file_id': '', 'filename': '', 'score': 1.0150761400159014, 'attributes': {'document_id':
+         'file-489db9aae0424745960e3408ff0f477f', 'token_count': 12.0, 'metadata_token_count': 1.0}, 'content': [{'type': 'text', 'text': 'Acme ships 
+         globally in 3-5 business days.'}]}], 'has_more': False, 'next_page': None}
+INFO     2025-09-10 14:51:47,220 console_span_processor:39 telemetry: 21:51:47.208 [END] VectorIORouter.openai_search_vector_store [StatusCode.OK]
+         (128.68ms)
+INFO     2025-09-10 14:51:47,221 console_span_processor:48 telemetry:     output: {'object': 'vector_store.search_results.page', 'search_query': 'How 
+         long does shipping take?', 'data': [{'file_id': '', 'filename': '', 'score': 1.0150761400159014, 'attributes': {'document_id':
+         'file-489db9aae0424745960e3408ff0f477f', 'token_count': 12.0, 'metadata_token_count': 1.0}, 'content': [{'type': 'text', 'text': 'Acme ships 
+         globally in 3-5 business days.'}]}], 'has_more': False, 'next_page': None}
+INFO     2025-09-10 14:51:47,236 console_span_processor:39 telemetry: 21:51:47.223 [END] /v1/openai/v1/vector_stores/{vector_store_id}/search
+         [StatusCode.OK] (157.17ms)
+INFO     2025-09-10 14:51:47,237 console_span_processor:48 telemetry:     raw_path:
+         /v1/openai/v1/vector_stores/vs_dab05212-db05-402c-91ef-57e41797406b/search
+INFO     2025-09-10 14:51:47,238 console_span_processor:62 telemetry:  21:51:47.074 [INFO] 127.0.0.1:56904 - "POST 
+         /v1/openai/v1/vector_stores/vs_dab05212-db05-402c-91ef-57e41797406b/search HTTP/1.1" 200
+INFO     2025-09-10 14:51:47,239 console_span_processor:28 telemetry: 21:51:47.239 [START] /v1/openai/v1/vector_stores/{vector_store_id}/search
+INFO     2025-09-10 14:51:47,243 console_span_processor:39 telemetry: 21:51:47.241 [END] ModelsRoutingTable.get_model [StatusCode.OK] (0.17ms)
+INFO     2025-09-10 14:51:47,244 console_span_processor:48 telemetry:     output: {'identifier': 'sentence-transformers/all-MiniLM-L6-v2',
+         'provider_resource_id': 'all-MiniLM-L6-v2', 'provider_id': 'sentence-transformers', 'type': 'model', 'owner': None, 'source':
+         'listed_from_provider', 'metadata': {'embedding_dimension': 384}, 'model_type': 'embedding'}
+INFO     2025-09-10 14:51:47,271 console_span_processor:39 telemetry: 21:51:47.249 [END] ModelsRoutingTable.get_provider_impl [StatusCode.OK] (1.66ms)
+INFO     2025-09-10 14:51:47,272 console_span_processor:48 telemetry:     output:
+         <llama_stack.providers.inline.inference.sentence_transformers.sentence_transformers.SentenceTransformersInferenceImpl object at 0x158e83350>
+INFO     2025-09-10 14:51:47,275 console_span_processor:39 telemetry: 21:51:47.273 [END] ModelsRoutingTable.get_model [StatusCode.OK] (0.23ms)
+INFO     2025-09-10 14:51:47,275 console_span_processor:48 telemetry:     output: {'identifier': 'sentence-transformers/all-MiniLM-L6-v2',
+         'provider_resource_id': 'all-MiniLM-L6-v2', 'provider_id': 'sentence-transformers', 'type': 'model', 'owner': None, 'source':
+         'listed_from_provider', 'metadata': {'embedding_dimension': 384}, 'model_type': 'embedding'}
+INFO     2025-09-10 14:51:47,281 console_span_processor:39 telemetry: 21:51:47.276 [END] InferenceRouter.openai_embeddings [StatusCode.OK] (35.50ms)
+INFO     2025-09-10 14:51:47,281 console_span_processor:48 telemetry:     output: {'object': 'list', 'data': [{'object': 'embedding', 'embedding':
+         [0.0018660365603864193, -0.008432421833276749, -0.009860510006546974, 0.07527908682823181, -0.008166038431227207, -0.08819602429866791,
+         -0.025107912719249725, -0.029266025871038437, 0.014401894062757492, 0.06447950005531311, 0.04112009331583977, -0.04028748720884323,
+         -0.02221696823835373, -0.009235711768269539, -0.04810388386249542, -0.02784842811524868, 0.016442419961094856, -0.07583750039339066,
+         -0.06627479195594788, 0.01860453188419342, 0.0008918081293813884, 0.011179491877555847, -0.048671603202819824, -0.021989954635500908,
+         0.04990514740347862, -0.003409573109820485, -0.06727275997400284, -0.054264623671770096, -0.022502778097987175, -0.03865018114447594,
+         -0.0471607930958271, 0.01671769842505455, 0.024404950439929962, 0.09877222776412964, -0.01660928875207901, 0.01298862136900425,
+         0.02916400320827961, -0.0722617655992508, 0.041335687041282654, -0.037857454270124435, -0.02524978667497635, -0.010434110648930073, -0....
+INFO     2025-09-10 14:51:47,294 console_span_processor:39 telemetry: 21:51:47.283 [END] FaissVectorIOAdapter.query_chunks [StatusCode.OK] (42.27ms)
+INFO     2025-09-10 14:51:47,295 console_span_processor:48 telemetry:     output: {'chunks': [{'content': 'Acme ships globally in 3-5 business days.',
+         'metadata': {'document_id': 'file-489db9aae0424745960e3408ff0f477f', 'token_count': 12, 'metadata_token_count': 1}, 'embedding':
+         [-0.012346820905804634, -0.04591671749949455, 0.00545357633382082, 0.0767817348241806, -0.01030217856168747, -0.07128783315420151,
+         -0.047789815813302994, -0.0042580426670610905, -0.08333089202642441, 0.03493816778063774, 0.03678377717733383, -0.022125769406557083,
+         -0.001785362372174859, 0.03860057145357132, 0.017692940309643745, -0.011320478282868862, 0.020480643957853317, -0.14444826543331146,
+         -0.03509785607457161, -0.08347956091165543, 0.01723267324268818, -0.020868107676506042, -0.04816542565822601, -0.04063615947961807,
+         -0.02612607553601265, -0.06816241890192032, -0.021907860413193703, -0.05328420177102089, 0.0035953382030129433, -0.060775868594646454,
+         -0.07895789295434952, 0.08725343644618988, -0.004096562508493662, 0.07713106274604797, 0.04764929413795471, 0.03390089422464371, -0.03...
+INFO     2025-09-10 14:51:47,298 console_span_processor:39 telemetry: 21:51:47.297 [END] VectorDBsRoutingTable.openai_search_vector_store
+         [StatusCode.OK] (56.25ms)
+INFO     2025-09-10 14:51:47,299 console_span_processor:48 telemetry:     output: {'object': 'vector_store.search_results.page', 'search_query': 'How 
+         long does shipping take?', 'data': [{'file_id': '', 'filename': '', 'score': 1.0150761400159014, 'attributes': {'document_id':
+         'file-489db9aae0424745960e3408ff0f477f', 'token_count': 12.0, 'metadata_token_count': 1.0}, 'content': [{'type': 'text', 'text': 'Acme ships 
+         globally in 3-5 business days.'}]}], 'has_more': False, 'next_page': None}
+INFO     2025-09-10 14:51:47,301 console_span_processor:39 telemetry: 21:51:47.300 [END] VectorIORouter.openai_search_vector_store [StatusCode.OK]
+         (60.15ms)
+INFO     2025-09-10 14:51:47,302 console_span_processor:48 telemetry:     output: {'object': 'vector_store.search_results.page', 'search_query': 'How 
+         long does shipping take?', 'data': [{'file_id': '', 'filename': '', 'score': 1.0150761400159014, 'attributes': {'document_id':
+         'file-489db9aae0424745960e3408ff0f477f', 'token_count': 12.0, 'metadata_token_count': 1.0}, 'content': [{'type': 'text', 'text': 'Acme ships 
+         globally in 3-5 business days.'}]}], 'has_more': False, 'next_page': None}
+INFO     2025-09-10 14:51:47,306 console_span_processor:39 telemetry: 21:51:47.303 [END] /v1/openai/v1/vector_stores/{vector_store_id}/search
+         [StatusCode.OK] (64.16ms)
+INFO     2025-09-10 14:51:47,307 console_span_processor:48 telemetry:     raw_path:
+         /v1/openai/v1/vector_stores/vs_dab05212-db05-402c-91ef-57e41797406b/search
+INFO     2025-09-10 14:51:47,307 console_span_processor:62 telemetry:  21:51:47.135 [INFO] 127.0.0.1:56904 - "POST 
+         /v1/openai/v1/vector_stores/vs_dab05212-db05-402c-91ef-57e41797406b/search HTTP/1.1" 200
+INFO     2025-09-10 14:51:47,308 console_span_processor:28 telemetry: 21:51:47.308 [START] /v1/openai/v1/vector_stores/{vector_store_id}/search
+INFO     2025-09-10 14:51:47,316 console_span_processor:39 telemetry: 21:51:47.310 [END] ModelsRoutingTable.get_model [StatusCode.OK] (0.14ms)
+INFO     2025-09-10 14:51:47,317 console_span_processor:48 telemetry:     output: {'identifier': 'sentence-transformers/all-MiniLM-L6-v2',
+         'provider_resource_id': 'all-MiniLM-L6-v2', 'provider_id': 'sentence-transformers', 'type': 'model', 'owner': None, 'source':
+         'listed_from_provider', 'metadata': {'embedding_dimension': 384}, 'model_type': 'embedding'}
+INFO     2025-09-10 14:51:47,320 console_span_processor:39 telemetry: 21:51:47.319 [END] ModelsRoutingTable.get_provider_impl [StatusCode.OK] (0.37ms)
+INFO     2025-09-10 14:51:47,321 console_span_processor:48 telemetry:     output:
+         <llama_stack.providers.inline.inference.sentence_transformers.sentence_transformers.SentenceTransformersInferenceImpl object at 0x158e83350>
+INFO     2025-09-10 14:51:47,324 console_span_processor:39 telemetry: 21:51:47.323 [END] ModelsRoutingTable.get_model [StatusCode.OK] (0.26ms)
+INFO     2025-09-10 14:51:47,325 console_span_processor:48 telemetry:     output: {'identifier': 'sentence-transformers/all-MiniLM-L6-v2',
+         'provider_resource_id': 'all-MiniLM-L6-v2', 'provider_id': 'sentence-transformers', 'type': 'model', 'owner': None, 'source':
+         'listed_from_provider', 'metadata': {'embedding_dimension': 384}, 'model_type': 'embedding'}
+INFO     2025-09-10 14:51:47,327 console_span_processor:39 telemetry: 21:51:47.326 [END] InferenceRouter.openai_embeddings [StatusCode.OK] (16.35ms)
+INFO     2025-09-10 14:51:47,327 console_span_processor:48 telemetry:     output: {'object': 'list', 'data': [{'object': 'embedding', 'embedding':
+         [0.0018660365603864193, -0.008432421833276749, -0.009860510006546974, 0.07527908682823181, -0.008166038431227207, -0.08819602429866791,
+         -0.025107912719249725, -0.029266025871038437, 0.014401894062757492, 0.06447950005531311, 0.04112009331583977, -0.04028748720884323,
+         -0.02221696823835373, -0.009235711768269539, -0.04810388386249542, -0.02784842811524868, 0.016442419961094856, -0.07583750039339066,
+         -0.06627479195594788, 0.01860453188419342, 0.0008918081293813884, 0.011179491877555847, -0.048671603202819824, -0.021989954635500908,
+         0.04990514740347862, -0.003409573109820485, -0.06727275997400284, -0.054264623671770096, -0.022502778097987175, -0.03865018114447594,
+         -0.0471607930958271, 0.01671769842505455, 0.024404950439929962, 0.09877222776412964, -0.01660928875207901, 0.01298862136900425,
+         0.02916400320827961, -0.0722617655992508, 0.041335687041282654, -0.037857454270124435, -0.02524978667497635, -0.010434110648930073, -0....
+INFO     2025-09-10 14:51:47,330 console_span_processor:39 telemetry: 21:51:47.329 [END] FaissVectorIOAdapter.query_chunks [StatusCode.OK] (20.00ms)
+INFO     2025-09-10 14:51:47,331 console_span_processor:48 telemetry:     output: {'chunks': [{'content': 'Acme ships globally in 3-5 business days.',
+         'metadata': {'document_id': 'file-489db9aae0424745960e3408ff0f477f', 'token_count': 12, 'metadata_token_count': 1}, 'embedding':
+         [-0.012346820905804634, -0.04591671749949455, 0.00545357633382082, 0.0767817348241806, -0.01030217856168747, -0.07128783315420151,
+         -0.047789815813302994, -0.0042580426670610905, -0.08333089202642441, 0.03493816778063774, 0.03678377717733383, -0.022125769406557083,
+         -0.001785362372174859, 0.03860057145357132, 0.017692940309643745, -0.011320478282868862, 0.020480643957853317, -0.14444826543331146,
+         -0.03509785607457161, -0.08347956091165543, 0.01723267324268818, -0.020868107676506042, -0.04816542565822601, -0.04063615947961807,
+         -0.02612607553601265, -0.06816241890192032, -0.021907860413193703, -0.05328420177102089, 0.0035953382030129433, -0.060775868594646454,
+         -0.07895789295434952, 0.08725343644618988, -0.004096562508493662, 0.07713106274604797, 0.04764929413795471, 0.03390089422464371, -0.03...
+INFO     2025-09-10 14:51:47,334 console_span_processor:39 telemetry: 21:51:47.333 [END] VectorDBsRoutingTable.openai_search_vector_store
+         [StatusCode.OK] (23.86ms)
+INFO     2025-09-10 14:51:47,335 console_span_processor:48 telemetry:     output: {'object': 'vector_store.search_results.page', 'search_query': 'How 
+         long does shipping take?', 'data': [{'file_id': '', 'filename': '', 'score': 1.0150761400159014, 'attributes': {'document_id':
+         'file-489db9aae0424745960e3408ff0f477f', 'token_count': 12.0, 'metadata_token_count': 1.0}, 'content': [{'type': 'text', 'text': 'Acme ships 
+         globally in 3-5 business days.'}]}], 'has_more': False, 'next_page': None}
+INFO     2025-09-10 14:51:47,338 console_span_processor:39 telemetry: 21:51:47.336 [END] VectorIORouter.openai_search_vector_store [StatusCode.OK]
+         (27.52ms)
+INFO     2025-09-10 14:51:47,339 console_span_processor:48 telemetry:     output: {'object': 'vector_store.search_results.page', 'search_query': 'How 
+         long does shipping take?', 'data': [{'file_id': '', 'filename': '', 'score': 1.0150761400159014, 'attributes': {'document_id':
+         'file-489db9aae0424745960e3408ff0f477f', 'token_count': 12.0, 'metadata_token_count': 1.0}, 'content': [{'type': 'text', 'text': 'Acme ships 
+         globally in 3-5 business days.'}]}], 'has_more': False, 'next_page': None}
+INFO     2025-09-10 14:51:47,341 console_span_processor:39 telemetry: 21:51:47.340 [END] /v1/openai/v1/vector_stores/{vector_store_id}/search
+         [StatusCode.OK] (32.36ms)
+INFO     2025-09-10 14:51:47,342 console_span_processor:48 telemetry:     raw_path:
+         /v1/openai/v1/vector_stores/vs_dab05212-db05-402c-91ef-57e41797406b/search
+INFO     2025-09-10 14:51:47,343 console_span_processor:62 telemetry:  21:51:47.155 [INFO] 127.0.0.1:56904 - "POST 
+         /v1/openai/v1/vector_stores/vs_dab05212-db05-402c-91ef-57e41797406b/search HTTP/1.1" 200
+INFO     2025-09-10 14:51:47,343 console_span_processor:28 telemetry: 21:51:47.343 [START] /v1/openai/v1/vector_stores/{vector_store_id}/search
+INFO     2025-09-10 14:51:47,348 console_span_processor:39 telemetry: 21:51:47.345 [END] ModelsRoutingTable.get_model [StatusCode.OK] (0.20ms)
+INFO     2025-09-10 14:51:47,348 console_span_processor:48 telemetry:     output: {'identifier': 'sentence-transformers/all-MiniLM-L6-v2',
+         'provider_resource_id': 'all-MiniLM-L6-v2', 'provider_id': 'sentence-transformers', 'type': 'model', 'owner': None, 'source':
+         'listed_from_provider', 'metadata': {'embedding_dimension': 384}, 'model_type': 'embedding'}
+INFO     2025-09-10 14:51:47,352 console_span_processor:39 telemetry: 21:51:47.350 [END] ModelsRoutingTable.get_provider_impl [StatusCode.OK] (0.46ms)
+INFO     2025-09-10 14:51:47,353 console_span_processor:48 telemetry:     output:
+         <llama_stack.providers.inline.inference.sentence_transformers.sentence_transformers.SentenceTransformersInferenceImpl object at 0x158e83350>
+INFO     2025-09-10 14:51:47,355 console_span_processor:39 telemetry: 21:51:47.354 [END] ModelsRoutingTable.get_model [StatusCode.OK] (0.17ms)
+INFO     2025-09-10 14:51:47,356 console_span_processor:48 telemetry:     output: {'identifier': 'sentence-transformers/all-MiniLM-L6-v2',
+         'provider_resource_id': 'all-MiniLM-L6-v2', 'provider_id': 'sentence-transformers', 'type': 'model', 'owner': None, 'source':
+         'listed_from_provider', 'metadata': {'embedding_dimension': 384}, 'model_type': 'embedding'}
+INFO     2025-09-10 14:51:47,358 console_span_processor:39 telemetry: 21:51:47.356 [END] InferenceRouter.openai_embeddings [StatusCode.OK] (11.96ms)
+INFO     2025-09-10 14:51:47,358 console_span_processor:48 telemetry:     output: {'object': 'list', 'data': [{'object': 'embedding', 'embedding':
+         [0.0018660365603864193, -0.008432421833276749, -0.009860510006546974, 0.07527908682823181, -0.008166038431227207, -0.08819602429866791,
+         -0.025107912719249725, -0.029266025871038437, 0.014401894062757492, 0.06447950005531311, 0.04112009331583977, -0.04028748720884323,
+         -0.02221696823835373, -0.009235711768269539, -0.04810388386249542, -0.02784842811524868, 0.016442419961094856, -0.07583750039339066,
+         -0.06627479195594788, 0.01860453188419342, 0.0008918081293813884, 0.011179491877555847, -0.048671603202819824, -0.021989954635500908,
+         0.04990514740347862, -0.003409573109820485, -0.06727275997400284, -0.054264623671770096, -0.022502778097987175, -0.03865018114447594,
+         -0.0471607930958271, 0.01671769842505455, 0.024404950439929962, 0.09877222776412964, -0.01660928875207901, 0.01298862136900425,
+         0.02916400320827961, -0.0722617655992508, 0.041335687041282654, -0.037857454270124435, -0.02524978667497635, -0.010434110648930073, -0....
+INFO     2025-09-10 14:51:47,363 console_span_processor:39 telemetry: 21:51:47.360 [END] FaissVectorIOAdapter.query_chunks [StatusCode.OK] (15.81ms)
+INFO     2025-09-10 14:51:47,363 console_span_processor:48 telemetry:     output: {'chunks': [{'content': 'Acme ships globally in 3-5 business days.',
+         'metadata': {'document_id': 'file-489db9aae0424745960e3408ff0f477f', 'token_count': 12, 'metadata_token_count': 1}, 'embedding':
+         [-0.012346820905804634, -0.04591671749949455, 0.00545357633382082, 0.0767817348241806, -0.01030217856168747, -0.07128783315420151,
+         -0.047789815813302994, -0.0042580426670610905, -0.08333089202642441, 0.03493816778063774, 0.03678377717733383, -0.022125769406557083,
+         -0.001785362372174859, 0.03860057145357132, 0.017692940309643745, -0.011320478282868862, 0.020480643957853317, -0.14444826543331146,
+         -0.03509785607457161, -0.08347956091165543, 0.01723267324268818, -0.020868107676506042, -0.04816542565822601, -0.04063615947961807,
+         -0.02612607553601265, -0.06816241890192032, -0.021907860413193703, -0.05328420177102089, 0.0035953382030129433, -0.060775868594646454,
+         -0.07895789295434952, 0.08725343644618988, -0.004096562508493662, 0.07713106274604797, 0.04764929413795471, 0.03390089422464371, -0.03...
+INFO     2025-09-10 14:51:47,367 console_span_processor:39 telemetry: 21:51:47.365 [END] VectorDBsRoutingTable.openai_search_vector_store
+         [StatusCode.OK] (20.64ms)
+INFO     2025-09-10 14:51:47,368 console_span_processor:48 telemetry:     output: {'object': 'vector_store.search_results.page', 'search_query': 'How 
+         long does shipping take?', 'data': [{'file_id': '', 'filename': '', 'score': 1.0150761400159014, 'attributes': {'document_id':
+         'file-489db9aae0424745960e3408ff0f477f', 'token_count': 12.0, 'metadata_token_count': 1.0}, 'content': [{'type': 'text', 'text': 'Acme ships 
+         globally in 3-5 business days.'}]}], 'has_more': False, 'next_page': None}
+INFO     2025-09-10 14:51:47,371 console_span_processor:39 telemetry: 21:51:47.369 [END] VectorIORouter.openai_search_vector_store [StatusCode.OK]
+         (25.18ms)
+INFO     2025-09-10 14:51:47,371 console_span_processor:48 telemetry:     output: {'object': 'vector_store.search_results.page', 'search_query': 'How 
+         long does shipping take?', 'data': [{'file_id': '', 'filename': '', 'score': 1.0150761400159014, 'attributes': {'document_id':
+         'file-489db9aae0424745960e3408ff0f477f', 'token_count': 12.0, 'metadata_token_count': 1.0}, 'content': [{'type': 'text', 'text': 'Acme ships 
+         globally in 3-5 business days.'}]}], 'has_more': False, 'next_page': None}
+INFO     2025-09-10 14:51:47,376 console_span_processor:39 telemetry: 21:51:47.373 [END] /v1/openai/v1/vector_stores/{vector_store_id}/search
+         [StatusCode.OK] (29.29ms)
+INFO     2025-09-10 14:51:47,376 console_span_processor:48 telemetry:     raw_path:
+         /v1/openai/v1/vector_stores/vs_dab05212-db05-402c-91ef-57e41797406b/search
+INFO     2025-09-10 14:51:47,377 console_span_processor:62 telemetry:  21:51:47.185 [INFO] 127.0.0.1:56904 - "POST 
+         /v1/openai/v1/vector_stores/vs_dab05212-db05-402c-91ef-57e41797406b/search HTTP/1.1" 200
+INFO     2025-09-10 14:51:47,378 console_span_processor:28 telemetry: 21:51:47.378 [START] /v1/openai/v1/vector_stores/{vector_store_id}/search
+INFO     2025-09-10 14:51:47,389 console_span_processor:39 telemetry: 21:51:47.379 [END] ModelsRoutingTable.get_model [StatusCode.OK] (0.14ms)
+INFO     2025-09-10 14:51:47,390 console_span_processor:48 telemetry:     output: {'identifier': 'sentence-transformers/all-MiniLM-L6-v2',
+         'provider_resource_id': 'all-MiniLM-L6-v2', 'provider_id': 'sentence-transformers', 'type': 'model', 'owner': None, 'source':
+         'listed_from_provider', 'metadata': {'embedding_dimension': 384}, 'model_type': 'embedding'}
+INFO     2025-09-10 14:51:47,393 console_span_processor:39 telemetry: 21:51:47.391 [END] ModelsRoutingTable.get_provider_impl [StatusCode.OK] (0.16ms)
+INFO     2025-09-10 14:51:47,394 console_span_processor:48 telemetry:     output:
+         <llama_stack.providers.inline.inference.sentence_transformers.sentence_transformers.SentenceTransformersInferenceImpl object at 0x158e83350>
+INFO     2025-09-10 14:51:47,403 console_span_processor:39 telemetry: 21:51:47.395 [END] ModelsRoutingTable.get_model [StatusCode.OK] (0.17ms)
+INFO     2025-09-10 14:51:47,404 console_span_processor:48 telemetry:     output: {'identifier': 'sentence-transformers/all-MiniLM-L6-v2',
+         'provider_resource_id': 'all-MiniLM-L6-v2', 'provider_id': 'sentence-transformers', 'type': 'model', 'owner': None, 'source':
+         'listed_from_provider', 'metadata': {'embedding_dimension': 384}, 'model_type': 'embedding'}
+INFO     2025-09-10 14:51:47,407 console_span_processor:39 telemetry: 21:51:47.405 [END] InferenceRouter.openai_embeddings [StatusCode.OK] (26.41ms)
+INFO     2025-09-10 14:51:47,408 console_span_processor:48 telemetry:     output: {'object': 'list', 'data': [{'object': 'embedding', 'embedding':
+         [0.0018660365603864193, -0.008432421833276749, -0.009860510006546974, 0.07527908682823181, -0.008166038431227207, -0.08819602429866791,
+         -0.025107912719249725, -0.029266025871038437, 0.014401894062757492, 0.06447950005531311, 0.04112009331583977, -0.04028748720884323,
+         -0.02221696823835373, -0.009235711768269539, -0.04810388386249542, -0.02784842811524868, 0.016442419961094856, -0.07583750039339066,
+         -0.06627479195594788, 0.01860453188419342, 0.0008918081293813884, 0.011179491877555847, -0.048671603202819824, -0.021989954635500908,
+         0.04990514740347862, -0.003409573109820485, -0.06727275997400284, -0.054264623671770096, -0.022502778097987175, -0.03865018114447594,
+         -0.0471607930958271, 0.01671769842505455, 0.024404950439929962, 0.09877222776412964, -0.01660928875207901, 0.01298862136900425,
+         0.02916400320827961, -0.0722617655992508, 0.041335687041282654, -0.037857454270124435, -0.02524978667497635, -0.010434110648930073, -0....
+INFO     2025-09-10 14:51:47,411 console_span_processor:39 telemetry: 21:51:47.409 [END] FaissVectorIOAdapter.query_chunks [StatusCode.OK] (30.37ms)
+INFO     2025-09-10 14:51:47,412 console_span_processor:48 telemetry:     output: {'chunks': [{'content': 'Acme ships globally in 3-5 business days.',
+         'metadata': {'document_id': 'file-489db9aae0424745960e3408ff0f477f', 'token_count': 12, 'metadata_token_count': 1}, 'embedding':
+         [-0.012346820905804634, -0.04591671749949455, 0.00545357633382082, 0.0767817348241806, -0.01030217856168747, -0.07128783315420151,
+         -0.047789815813302994, -0.0042580426670610905, -0.08333089202642441, 0.03493816778063774, 0.03678377717733383, -0.022125769406557083,
+         -0.001785362372174859, 0.03860057145357132, 0.017692940309643745, -0.011320478282868862, 0.020480643957853317, -0.14444826543331146,
+         -0.03509785607457161, -0.08347956091165543, 0.01723267324268818, -0.020868107676506042, -0.04816542565822601, -0.04063615947961807,
+         -0.02612607553601265, -0.06816241890192032, -0.021907860413193703, -0.05328420177102089, 0.0035953382030129433, -0.060775868594646454,
+         -0.07895789295434952, 0.08725343644618988, -0.004096562508493662, 0.07713106274604797, 0.04764929413795471, 0.03390089422464371, -0.03...
+INFO     2025-09-10 14:51:47,421 console_span_processor:39 telemetry: 21:51:47.413 [END] VectorDBsRoutingTable.openai_search_vector_store
+         [StatusCode.OK] (34.69ms)
+INFO     2025-09-10 14:51:47,422 console_span_processor:48 telemetry:     output: {'object': 'vector_store.search_results.page', 'search_query': 'How 
+         long does shipping take?', 'data': [{'file_id': '', 'filename': '', 'score': 1.0150761400159014, 'attributes': {'document_id':
+         'file-489db9aae0424745960e3408ff0f477f', 'token_count': 12.0, 'metadata_token_count': 1.0}, 'content': [{'type': 'text', 'text': 'Acme ships 
+         globally in 3-5 business days.'}]}], 'has_more': False, 'next_page': None}
+INFO     2025-09-10 14:51:47,427 console_span_processor:39 telemetry: 21:51:47.423 [END] VectorIORouter.openai_search_vector_store [StatusCode.OK]
+         (44.73ms)
+INFO     2025-09-10 14:51:47,427 console_span_processor:48 telemetry:     output: {'object': 'vector_store.search_results.page', 'search_query': 'How 
+         long does shipping take?', 'data': [{'file_id': '', 'filename': '', 'score': 1.0150761400159014, 'attributes': {'document_id':
+         'file-489db9aae0424745960e3408ff0f477f', 'token_count': 12.0, 'metadata_token_count': 1.0}, 'content': [{'type': 'text', 'text': 'Acme ships 
+         globally in 3-5 business days.'}]}], 'has_more': False, 'next_page': None}
+INFO     2025-09-10 14:51:47,431 console_span_processor:39 telemetry: 21:51:47.429 [END] /v1/openai/v1/vector_stores/{vector_store_id}/search
+         [StatusCode.OK] (50.76ms)
+INFO     2025-09-10 14:51:47,432 console_span_processor:48 telemetry:     raw_path:
+         /v1/openai/v1/vector_stores/vs_dab05212-db05-402c-91ef-57e41797406b/search
+INFO     2025-09-10 14:51:47,432 console_span_processor:62 telemetry:  21:51:47.205 [INFO] 127.0.0.1:56904 - "POST 
+         /v1/openai/v1/vector_stores/vs_dab05212-db05-402c-91ef-57e41797406b/search HTTP/1.1" 200
+INFO     2025-09-10 14:51:47,433 console_span_processor:28 telemetry: 21:51:47.433 [START] /v1/openai/v1/chat/completions
+INFO     2025-09-10 14:51:47,443 console_span_processor:39 telemetry: 21:51:47.435 [END] ModelsRoutingTable.get_model [StatusCode.OK] (0.16ms)
+INFO     2025-09-10 14:51:47,444 console_span_processor:48 telemetry:     output: {'identifier': 'together/meta-llama/Llama-3.3-70B-Instruct-Turbo',
+         'provider_resource_id': 'meta-llama/Llama-3.3-70B-Instruct-Turbo', 'provider_id': 'together', 'type': 'model', 'owner': None, 'source':
+         'listed_from_provider', 'metadata': {}, 'model_type': 'llm'}
+INFO     2025-09-10 14:51:47,454 console_span_processor:39 telemetry: 21:51:47.445 [END] ModelsRoutingTable.get_provider_impl [StatusCode.OK] (0.16ms)
+INFO     2025-09-10 14:51:47,455 console_span_processor:48 telemetry:     output:
+         <llama_stack.providers.remote.inference.together.together.TogetherInferenceAdapter object at 0x137069220>
+INFO     2025-09-10 14:51:47,458 console_span_processor:39 telemetry: 21:51:47.456 [END] ModelsRoutingTable.get_model [StatusCode.OK] (0.19ms)
+INFO     2025-09-10 14:51:47,459 console_span_processor:48 telemetry:     output: {'identifier': 'together/meta-llama/Llama-3.3-70B-Instruct-Turbo',
+         'provider_resource_id': 'meta-llama/Llama-3.3-70B-Instruct-Turbo', 'provider_id': 'together', 'type': 'model', 'owner': None, 'source':
+         'listed_from_provider', 'metadata': {}, 'model_type': 'llm'}
+INFO     2025-09-10 14:51:48,137 console_span_processor:39 telemetry: 21:51:48.128 [END] TogetherInferenceAdapter.openai_chat_completion
+         [StatusCode.OK] (672.06ms)
+INFO     2025-09-10 14:51:48,138 console_span_processor:48 telemetry:     output: {'id': 'oAitaWe-62bZhn-97d22611ebfb168e', 'choices':
+         [{'finish_reason': 'stop', 'index': 0, 'logprobs': None, 'message': {'content': 'Thought: To answer the question "How long does shipping 
+         take?", I need to search the provided vector store for relevant information. The vector store ID is vs_dab05212-db05-402c-91ef-57e41797406b. 
+         I will use the Llama Stack Vector Store RAG tool to find the most relevant documents.\n\nAction: Llama Stack Vector Store RAG tool\nAction 
+         Input: {"query": "How long does shipping take?", "vector_store_id": "vs_dab05212-db05-402c-91ef-57e41797406b", "top_k": 1}', 'refusal': None,
+         'role': 'assistant', 'annotations': None, 'audio': None, 'function_call': None, 'tool_calls': []}, 'seed': 12110905932316877000}], 'created':
+         1757541107, 'model': 'meta-llama/Llama-3.3-70B-Instruct-Turbo', 'object': 'chat.completion', 'service_tier': None, 'system_fingerprint':
+         None, 'usage': {'completion_tokens': 131, 'prompt_tokens': 998, 'total_tokens': 1129, 'completion_tokens_d...
+INFO     2025-09-10 14:51:48,140 uvicorn.access:473 uncategorized: ::1:56900 - "POST /v1/openai/v1/chat/completions HTTP/1.1" 200
+INFO     2025-09-10 14:51:48,152 console_span_processor:39 telemetry: 21:51:48.141 [END] InferenceRouter.openai_chat_completion [StatusCode.OK]
+         (706.73ms)
+INFO     2025-09-10 14:51:48,153 console_span_processor:48 telemetry:     output: {'id': 'oAitaWe-62bZhn-97d22611ebfb168e', 'choices':
+         [{'finish_reason': 'stop', 'index': 0, 'logprobs': None, 'message': {'content': 'Thought: To answer the question "How long does shipping 
+         take?", I need to search the provided vector store for relevant information. The vector store ID is vs_dab05212-db05-402c-91ef-57e41797406b. 
+         I will use the Llama Stack Vector Store RAG tool to find the most relevant documents.\n\nAction: Llama Stack Vector Store RAG tool\nAction 
+         Input: {"query": "How long does shipping take?", "vector_store_id": "vs_dab05212-db05-402c-91ef-57e41797406b", "top_k": 1}', 'refusal': None,
+         'role': 'assistant', 'annotations': None, 'audio': None, 'function_call': None, 'tool_calls': None}, 'seed': 12110905932316877000}],
+         'created': 1757541107, 'model': 'meta-llama/Llama-3.3-70B-Instruct-Turbo', 'object': 'chat.completion', 'service_tier': None,
+         'system_fingerprint': None, 'usage': {'completion_tokens': 131, 'prompt_tokens': 998, 'total_tokens': 1129, 'completion_tokens...
+INFO     2025-09-10 14:51:48,154 console_span_processor:62 telemetry:  21:51:48.140 [INFO] metric.prompt_tokens
+INFO     2025-09-10 14:51:48,155 console_span_processor:67 telemetry: value: 998
+INFO     2025-09-10 14:51:48,155 console_span_processor:67 telemetry: unit: tokens
+INFO     2025-09-10 14:51:48,156 console_span_processor:67 telemetry: model_id: together/meta-llama/Llama-3.3-70B-Instruct-Turbo
+INFO     2025-09-10 14:51:48,156 console_span_processor:67 telemetry: provider_id: together
+INFO     2025-09-10 14:51:48,157 console_span_processor:62 telemetry:  21:51:48.140 [INFO] metric.completion_tokens
+INFO     2025-09-10 14:51:48,157 console_span_processor:67 telemetry: value: 131
+INFO     2025-09-10 14:51:48,158 console_span_processor:67 telemetry: unit: tokens
+INFO     2025-09-10 14:51:48,158 console_span_processor:67 telemetry: model_id: together/meta-llama/Llama-3.3-70B-Instruct-Turbo
+INFO     2025-09-10 14:51:48,159 console_span_processor:67 telemetry: provider_id: together
+INFO     2025-09-10 14:51:48,159 console_span_processor:62 telemetry:  21:51:48.140 [INFO] metric.total_tokens
+INFO     2025-09-10 14:51:48,160 console_span_processor:67 telemetry: value: 1129
+INFO     2025-09-10 14:51:48,160 console_span_processor:67 telemetry: unit: tokens
+INFO     2025-09-10 14:51:48,160 console_span_processor:67 telemetry: model_id: together/meta-llama/Llama-3.3-70B-Instruct-Turbo
+INFO     2025-09-10 14:51:48,161 console_span_processor:67 telemetry: provider_id: together
+INFO     2025-09-10 14:51:48,170 uvicorn.access:473 uncategorized: 127.0.0.1:56904 - "POST 
+         /v1/openai/v1/vector_stores/vs_dab05212-db05-402c-91ef-57e41797406b/search HTTP/1.1" 200
+INFO     2025-09-10 14:51:48,182 console_span_processor:39 telemetry: 21:51:48.162 [END] /v1/openai/v1/chat/completions [StatusCode.OK] (728.49ms)
+INFO     2025-09-10 14:51:48,183 console_span_processor:48 telemetry:     raw_path: /v1/openai/v1/chat/completions
+INFO     2025-09-10 14:51:48,183 console_span_processor:62 telemetry:  21:51:48.141 [INFO] ::1:56900 - "POST /v1/openai/v1/chat/completions HTTP/1.1"
+         200
+INFO     2025-09-10 14:51:48,184 console_span_processor:28 telemetry: 21:51:48.184 [START] /v1/openai/v1/vector_stores/{vector_store_id}/search
+INFO     2025-09-10 14:51:48,189 console_span_processor:39 telemetry: 21:51:48.187 [END] ModelsRoutingTable.get_model [StatusCode.OK] (0.65ms)
+INFO     2025-09-10 14:51:48,190 console_span_processor:48 telemetry:     output: {'identifier': 'sentence-transformers/all-MiniLM-L6-v2',
+         'provider_resource_id': 'all-MiniLM-L6-v2', 'provider_id': 'sentence-transformers', 'type': 'model', 'owner': None, 'source':
+         'listed_from_provider', 'metadata': {'embedding_dimension': 384}, 'model_type': 'embedding'}
+INFO     2025-09-10 14:51:48,198 uvicorn.access:473 uncategorized: 127.0.0.1:56904 - "POST 
+         /v1/openai/v1/vector_stores/vs_dab05212-db05-402c-91ef-57e41797406b/search HTTP/1.1" 200
+INFO     2025-09-10 14:51:48,199 console_span_processor:39 telemetry: 21:51:48.191 [END] ModelsRoutingTable.get_provider_impl [StatusCode.OK] (0.29ms)
+INFO     2025-09-10 14:51:48,201 console_span_processor:48 telemetry:     output:
+         <llama_stack.providers.inline.inference.sentence_transformers.sentence_transformers.SentenceTransformersInferenceImpl object at 0x158e83350>
+INFO     2025-09-10 14:51:48,211 console_span_processor:39 telemetry: 21:51:48.204 [END] ModelsRoutingTable.get_model [StatusCode.OK] (1.00ms)
+INFO     2025-09-10 14:51:48,213 console_span_processor:48 telemetry:     output: {'identifier': 'sentence-transformers/all-MiniLM-L6-v2',
+         'provider_resource_id': 'all-MiniLM-L6-v2', 'provider_id': 'sentence-transformers', 'type': 'model', 'owner': None, 'source':
+         'listed_from_provider', 'metadata': {'embedding_dimension': 384}, 'model_type': 'embedding'}
+INFO     2025-09-10 14:51:48,217 console_span_processor:39 telemetry: 21:51:48.215 [END] InferenceRouter.openai_embeddings [StatusCode.OK] (28.70ms)
+INFO     2025-09-10 14:51:48,218 console_span_processor:48 telemetry:     output: {'object': 'list', 'data': [{'object': 'embedding', 'embedding':
+         [0.0018660365603864193, -0.008432421833276749, -0.009860510006546974, 0.07527908682823181, -0.008166038431227207, -0.08819602429866791,
+         -0.025107912719249725, -0.029266025871038437, 0.014401894062757492, 0.06447950005531311, 0.04112009331583977, -0.04028748720884323,
+         -0.02221696823835373, -0.009235711768269539, -0.04810388386249542, -0.02784842811524868, 0.016442419961094856, -0.07583750039339066,
+         -0.06627479195594788, 0.01860453188419342, 0.0008918081293813884, 0.011179491877555847, -0.048671603202819824, -0.021989954635500908,
+         0.04990514740347862, -0.003409573109820485, -0.06727275997400284, -0.054264623671770096, -0.022502778097987175, -0.03865018114447594,
+         -0.0471607930958271, 0.01671769842505455, 0.024404950439929962, 0.09877222776412964, -0.01660928875207901, 0.01298862136900425,
+         0.02916400320827961, -0.0722617655992508, 0.041335687041282654, -0.037857454270124435, -0.02524978667497635, -0.010434110648930073, -0....
+INFO     2025-09-10 14:51:48,226 console_span_processor:39 telemetry: 21:51:48.221 [END] FaissVectorIOAdapter.query_chunks [StatusCode.OK] (35.89ms)
+INFO     2025-09-10 14:51:48,227 console_span_processor:48 telemetry:     output: {'chunks': [{'content': 'Acme ships globally in 3-5 business days.',
+         'metadata': {'document_id': 'file-489db9aae0424745960e3408ff0f477f', 'token_count': 12, 'metadata_token_count': 1}, 'embedding':
+         [-0.012346820905804634, -0.04591671749949455, 0.00545357633382082, 0.0767817348241806, -0.01030217856168747, -0.07128783315420151,
+         -0.047789815813302994, -0.0042580426670610905, -0.08333089202642441, 0.03493816778063774, 0.03678377717733383, -0.022125769406557083,
+         -0.001785362372174859, 0.03860057145357132, 0.017692940309643745, -0.011320478282868862, 0.020480643957853317, -0.14444826543331146,
+         -0.03509785607457161, -0.08347956091165543, 0.01723267324268818, -0.020868107676506042, -0.04816542565822601, -0.04063615947961807,
+         -0.02612607553601265, -0.06816241890192032, -0.021907860413193703, -0.05328420177102089, 0.0035953382030129433, -0.060775868594646454,
+         -0.07895789295434952, 0.08725343644618988, -0.004096562508493662, 0.07713106274604797, 0.04764929413795471, 0.03390089422464371, -0.03...
+INFO     2025-09-10 14:51:48,235 uvicorn.access:473 uncategorized: 127.0.0.1:56904 - "POST 
+         /v1/openai/v1/vector_stores/vs_dab05212-db05-402c-91ef-57e41797406b/search HTTP/1.1" 200
+INFO     2025-09-10 14:51:48,239 console_span_processor:39 telemetry: 21:51:48.229 [END] VectorDBsRoutingTable.openai_search_vector_store
+         [StatusCode.OK] (44.29ms)
+INFO     2025-09-10 14:51:48,241 console_span_processor:48 telemetry:     output: {'object': 'vector_store.search_results.page', 'search_query': 'How 
+         long does shipping take?', 'data': [{'file_id': '', 'filename': '', 'score': 1.0150761400159014, 'attributes': {'document_id':
+         'file-489db9aae0424745960e3408ff0f477f', 'token_count': 12.0, 'metadata_token_count': 1.0}, 'content': [{'type': 'text', 'text': 'Acme ships 
+         globally in 3-5 business days.'}]}], 'has_more': False, 'next_page': None}
+INFO     2025-09-10 14:51:48,252 uvicorn.access:473 uncategorized: 127.0.0.1:56904 - "POST 
+         /v1/openai/v1/vector_stores/vs_dab05212-db05-402c-91ef-57e41797406b/search HTTP/1.1" 200
+INFO     2025-09-10 14:51:48,252 console_span_processor:39 telemetry: 21:51:48.242 [END] VectorIORouter.openai_search_vector_store [StatusCode.OK]
+         (57.30ms)
+INFO     2025-09-10 14:51:48,253 console_span_processor:48 telemetry:     output: {'object': 'vector_store.search_results.page', 'search_query': 'How 
+         long does shipping take?', 'data': [{'file_id': '', 'filename': '', 'score': 1.0150761400159014, 'attributes': {'document_id':
+         'file-489db9aae0424745960e3408ff0f477f', 'token_count': 12.0, 'metadata_token_count': 1.0}, 'content': [{'type': 'text', 'text': 'Acme ships 
+         globally in 3-5 business days.'}]}], 'has_more': False, 'next_page': None}
+INFO     2025-09-10 14:51:48,265 console_span_processor:39 telemetry: 21:51:48.255 [END] /v1/openai/v1/vector_stores/{vector_store_id}/search
+         [StatusCode.OK] (70.68ms)
+INFO     2025-09-10 14:51:48,266 console_span_processor:48 telemetry:     raw_path:
+         /v1/openai/v1/vector_stores/vs_dab05212-db05-402c-91ef-57e41797406b/search
+INFO     2025-09-10 14:51:48,267 console_span_processor:62 telemetry:  21:51:48.171 [INFO] 127.0.0.1:56904 - "POST 
+         /v1/openai/v1/vector_stores/vs_dab05212-db05-402c-91ef-57e41797406b/search HTTP/1.1" 200
+INFO     2025-09-10 14:51:48,268 console_span_processor:28 telemetry: 21:51:48.268 [START] /v1/openai/v1/vector_stores/{vector_store_id}/search
+INFO     2025-09-10 14:51:48,271 console_span_processor:39 telemetry: 21:51:48.270 [END] ModelsRoutingTable.get_model [StatusCode.OK] (0.24ms)
+INFO     2025-09-10 14:51:48,272 console_span_processor:48 telemetry:     output: {'identifier': 'sentence-transformers/all-MiniLM-L6-v2',
+         'provider_resource_id': 'all-MiniLM-L6-v2', 'provider_id': 'sentence-transformers', 'type': 'model', 'owner': None, 'source':
+         'listed_from_provider', 'metadata': {'embedding_dimension': 384}, 'model_type': 'embedding'}
+INFO     2025-09-10 14:51:48,275 console_span_processor:39 telemetry: 21:51:48.273 [END] ModelsRoutingTable.get_provider_impl [StatusCode.OK] (0.24ms)
+INFO     2025-09-10 14:51:48,276 console_span_processor:48 telemetry:     output:
+         <llama_stack.providers.inline.inference.sentence_transformers.sentence_transformers.SentenceTransformersInferenceImpl object at 0x158e83350>
+INFO     2025-09-10 14:51:48,280 uvicorn.access:473 uncategorized: 127.0.0.1:56904 - "POST 
+         /v1/openai/v1/vector_stores/vs_dab05212-db05-402c-91ef-57e41797406b/search HTTP/1.1" 200
+INFO     2025-09-10 14:51:48,283 console_span_processor:39 telemetry: 21:51:48.277 [END] ModelsRoutingTable.get_model [StatusCode.OK] (0.16ms)
+INFO     2025-09-10 14:51:48,284 console_span_processor:48 telemetry:     output: {'identifier': 'sentence-transformers/all-MiniLM-L6-v2',
+         'provider_resource_id': 'all-MiniLM-L6-v2', 'provider_id': 'sentence-transformers', 'type': 'model', 'owner': None, 'source':
+         'listed_from_provider', 'metadata': {'embedding_dimension': 384}, 'model_type': 'embedding'}
+INFO     2025-09-10 14:51:48,294 console_span_processor:39 telemetry: 21:51:48.287 [END] InferenceRouter.openai_embeddings [StatusCode.OK] (18.00ms)
+INFO     2025-09-10 14:51:48,294 console_span_processor:48 telemetry:     output: {'object': 'list', 'data': [{'object': 'embedding', 'embedding':
+         [0.0018660365603864193, -0.008432421833276749, -0.009860510006546974, 0.07527908682823181, -0.008166038431227207, -0.08819602429866791,
+         -0.025107912719249725, -0.029266025871038437, 0.014401894062757492, 0.06447950005531311, 0.04112009331583977, -0.04028748720884323,
+         -0.02221696823835373, -0.009235711768269539, -0.04810388386249542, -0.02784842811524868, 0.016442419961094856, -0.07583750039339066,
+         -0.06627479195594788, 0.01860453188419342, 0.0008918081293813884, 0.011179491877555847, -0.048671603202819824, -0.021989954635500908,
+         0.04990514740347862, -0.003409573109820485, -0.06727275997400284, -0.054264623671770096, -0.022502778097987175, -0.03865018114447594,
+         -0.0471607930958271, 0.01671769842505455, 0.024404950439929962, 0.09877222776412964, -0.01660928875207901, 0.01298862136900425,
+         0.02916400320827961, -0.0722617655992508, 0.041335687041282654, -0.037857454270124435, -0.02524978667497635, -0.010434110648930073, -0....
+INFO     2025-09-10 14:51:48,298 uvicorn.access:473 uncategorized: 127.0.0.1:56904 - "POST 
+         /v1/openai/v1/vector_stores/vs_dab05212-db05-402c-91ef-57e41797406b/search HTTP/1.1" 200
+INFO     2025-09-10 14:51:48,307 console_span_processor:39 telemetry: 21:51:48.296 [END] FaissVectorIOAdapter.query_chunks [StatusCode.OK] (27.38ms)
+INFO     2025-09-10 14:51:48,308 console_span_processor:48 telemetry:     output: {'chunks': [{'content': 'Acme ships globally in 3-5 business days.',
+         'metadata': {'document_id': 'file-489db9aae0424745960e3408ff0f477f', 'token_count': 12, 'metadata_token_count': 1}, 'embedding':
+         [-0.012346820905804634, -0.04591671749949455, 0.00545357633382082, 0.0767817348241806, -0.01030217856168747, -0.07128783315420151,
+         -0.047789815813302994, -0.0042580426670610905, -0.08333089202642441, 0.03493816778063774, 0.03678377717733383, -0.022125769406557083,
+         -0.001785362372174859, 0.03860057145357132, 0.017692940309643745, -0.011320478282868862, 0.020480643957853317, -0.14444826543331146,
+         -0.03509785607457161, -0.08347956091165543, 0.01723267324268818, -0.020868107676506042, -0.04816542565822601, -0.04063615947961807,
+         -0.02612607553601265, -0.06816241890192032, -0.021907860413193703, -0.05328420177102089, 0.0035953382030129433, -0.060775868594646454,
+         -0.07895789295434952, 0.08725343644618988, -0.004096562508493662, 0.07713106274604797, 0.04764929413795471, 0.03390089422464371, -0.03...
+INFO     2025-09-10 14:51:48,330 console_span_processor:39 telemetry: 21:51:48.309 [END] VectorDBsRoutingTable.openai_search_vector_store
+         [StatusCode.OK] (40.46ms)
+INFO     2025-09-10 14:51:48,331 console_span_processor:48 telemetry:     output: {'object': 'vector_store.search_results.page', 'search_query': 'How 
+         long does shipping take?', 'data': [{'file_id': '', 'filename': '', 'score': 1.0150761400159014, 'attributes': {'document_id':
+         'file-489db9aae0424745960e3408ff0f477f', 'token_count': 12.0, 'metadata_token_count': 1.0}, 'content': [{'type': 'text', 'text': 'Acme ships 
+         globally in 3-5 business days.'}]}], 'has_more': False, 'next_page': None}
+INFO     2025-09-10 14:51:48,334 console_span_processor:39 telemetry: 21:51:48.332 [END] VectorIORouter.openai_search_vector_store [StatusCode.OK]
+         (63.81ms)
+INFO     2025-09-10 14:51:48,334 console_span_processor:48 telemetry:     output: {'object': 'vector_store.search_results.page', 'search_query': 'How 
+         long does shipping take?', 'data': [{'file_id': '', 'filename': '', 'score': 1.0150761400159014, 'attributes': {'document_id':
+         'file-489db9aae0424745960e3408ff0f477f', 'token_count': 12.0, 'metadata_token_count': 1.0}, 'content': [{'type': 'text', 'text': 'Acme ships 
+         globally in 3-5 business days.'}]}], 'has_more': False, 'next_page': None}
+INFO     2025-09-10 14:51:48,359 console_span_processor:39 telemetry: 21:51:48.336 [END] /v1/openai/v1/vector_stores/{vector_store_id}/search
+         [StatusCode.OK] (67.91ms)
+INFO     2025-09-10 14:51:48,360 console_span_processor:48 telemetry:     raw_path:
+         /v1/openai/v1/vector_stores/vs_dab05212-db05-402c-91ef-57e41797406b/search
+INFO     2025-09-10 14:51:48,360 console_span_processor:62 telemetry:  21:51:48.199 [INFO] 127.0.0.1:56904 - "POST 
+         /v1/openai/v1/vector_stores/vs_dab05212-db05-402c-91ef-57e41797406b/search HTTP/1.1" 200
+INFO     2025-09-10 14:51:48,361 console_span_processor:28 telemetry: 21:51:48.361 [START] /v1/openai/v1/vector_stores/{vector_store_id}/search
+INFO     2025-09-10 14:51:48,364 console_span_processor:39 telemetry: 21:51:48.363 [END] ModelsRoutingTable.get_model [StatusCode.OK] (0.18ms)
+INFO     2025-09-10 14:51:48,365 console_span_processor:48 telemetry:     output: {'identifier': 'sentence-transformers/all-MiniLM-L6-v2',
+         'provider_resource_id': 'all-MiniLM-L6-v2', 'provider_id': 'sentence-transformers', 'type': 'model', 'owner': None, 'source':
+         'listed_from_provider', 'metadata': {'embedding_dimension': 384}, 'model_type': 'embedding'}
+INFO     2025-09-10 14:51:48,369 console_span_processor:39 telemetry: 21:51:48.367 [END] ModelsRoutingTable.get_provider_impl [StatusCode.OK] (0.16ms)
+INFO     2025-09-10 14:51:48,369 console_span_processor:48 telemetry:     output:
+         <llama_stack.providers.inline.inference.sentence_transformers.sentence_transformers.SentenceTransformersInferenceImpl object at 0x158e83350>
+INFO     2025-09-10 14:51:48,373 console_span_processor:39 telemetry: 21:51:48.371 [END] ModelsRoutingTable.get_model [StatusCode.OK] (0.32ms)
+INFO     2025-09-10 14:51:48,374 console_span_processor:48 telemetry:     output: {'identifier': 'sentence-transformers/all-MiniLM-L6-v2',
+         'provider_resource_id': 'all-MiniLM-L6-v2', 'provider_id': 'sentence-transformers', 'type': 'model', 'owner': None, 'source':
+         'listed_from_provider', 'metadata': {'embedding_dimension': 384}, 'model_type': 'embedding'}
+INFO     2025-09-10 14:51:48,377 console_span_processor:39 telemetry: 21:51:48.375 [END] InferenceRouter.openai_embeddings [StatusCode.OK] (12.40ms)
+INFO     2025-09-10 14:51:48,378 console_span_processor:48 telemetry:     output: {'object': 'list', 'data': [{'object': 'embedding', 'embedding':
+         [0.0018660365603864193, -0.008432421833276749, -0.009860510006546974, 0.07527908682823181, -0.008166038431227207, -0.08819602429866791,
+         -0.025107912719249725, -0.029266025871038437, 0.014401894062757492, 0.06447950005531311, 0.04112009331583977, -0.04028748720884323,
+         -0.02221696823835373, -0.009235711768269539, -0.04810388386249542, -0.02784842811524868, 0.016442419961094856, -0.07583750039339066,
+         -0.06627479195594788, 0.01860453188419342, 0.0008918081293813884, 0.011179491877555847, -0.048671603202819824, -0.021989954635500908,
+         0.04990514740347862, -0.003409573109820485, -0.06727275997400284, -0.054264623671770096, -0.022502778097987175, -0.03865018114447594,
+         -0.0471607930958271, 0.01671769842505455, 0.024404950439929962, 0.09877222776412964, -0.01660928875207901, 0.01298862136900425,
+         0.02916400320827961, -0.0722617655992508, 0.041335687041282654, -0.037857454270124435, -0.02524978667497635, -0.010434110648930073, -0....
+INFO     2025-09-10 14:51:48,383 console_span_processor:39 telemetry: 21:51:48.379 [END] FaissVectorIOAdapter.query_chunks [StatusCode.OK] (16.82ms)
+INFO     2025-09-10 14:51:48,383 console_span_processor:48 telemetry:     output: {'chunks': [{'content': 'Acme ships globally in 3-5 business days.',
+         'metadata': {'document_id': 'file-489db9aae0424745960e3408ff0f477f', 'token_count': 12, 'metadata_token_count': 1}, 'embedding':
+         [-0.012346820905804634, -0.04591671749949455, 0.00545357633382082, 0.0767817348241806, -0.01030217856168747, -0.07128783315420151,
+         -0.047789815813302994, -0.0042580426670610905, -0.08333089202642441, 0.03493816778063774, 0.03678377717733383, -0.022125769406557083,
+         -0.001785362372174859, 0.03860057145357132, 0.017692940309643745, -0.011320478282868862, 0.020480643957853317, -0.14444826543331146,
+         -0.03509785607457161, -0.08347956091165543, 0.01723267324268818, -0.020868107676506042, -0.04816542565822601, -0.04063615947961807,
+         -0.02612607553601265, -0.06816241890192032, -0.021907860413193703, -0.05328420177102089, 0.0035953382030129433, -0.060775868594646454,
+         -0.07895789295434952, 0.08725343644618988, -0.004096562508493662, 0.07713106274604797, 0.04764929413795471, 0.03390089422464371, -0.03...
+INFO     2025-09-10 14:51:48,395 console_span_processor:39 telemetry: 21:51:48.385 [END] VectorDBsRoutingTable.openai_search_vector_store
+         [StatusCode.OK] (22.60ms)
+INFO     2025-09-10 14:51:48,396 console_span_processor:48 telemetry:     output: {'object': 'vector_store.search_results.page', 'search_query': 'How 
+         long does shipping take?', 'data': [{'file_id': '', 'filename': '', 'score': 1.0150761400159014, 'attributes': {'document_id':
+         'file-489db9aae0424745960e3408ff0f477f', 'token_count': 12.0, 'metadata_token_count': 1.0}, 'content': [{'type': 'text', 'text': 'Acme ships 
+         globally in 3-5 business days.'}]}], 'has_more': False, 'next_page': None}
+INFO     2025-09-10 14:51:48,409 console_span_processor:39 telemetry: 21:51:48.397 [END] VectorIORouter.openai_search_vector_store [StatusCode.OK]
+         (35.03ms)
+INFO     2025-09-10 14:51:48,414 console_span_processor:48 telemetry:     output: {'object': 'vector_store.search_results.page', 'search_query': 'How 
+         long does shipping take?', 'data': [{'file_id': '', 'filename': '', 'score': 1.0150761400159014, 'attributes': {'document_id':
+         'file-489db9aae0424745960e3408ff0f477f', 'token_count': 12.0, 'metadata_token_count': 1.0}, 'content': [{'type': 'text', 'text': 'Acme ships 
+         globally in 3-5 business days.'}]}], 'has_more': False, 'next_page': None}
+INFO     2025-09-10 14:51:48,429 console_span_processor:39 telemetry: 21:51:48.418 [END] /v1/openai/v1/vector_stores/{vector_store_id}/search
+         [StatusCode.OK] (57.22ms)
+INFO     2025-09-10 14:51:48,432 console_span_processor:48 telemetry:     raw_path:
+         /v1/openai/v1/vector_stores/vs_dab05212-db05-402c-91ef-57e41797406b/search
+INFO     2025-09-10 14:51:48,433 console_span_processor:62 telemetry:  21:51:48.235 [INFO] 127.0.0.1:56904 - "POST 
+         /v1/openai/v1/vector_stores/vs_dab05212-db05-402c-91ef-57e41797406b/search HTTP/1.1" 200
+INFO     2025-09-10 14:51:48,435 console_span_processor:28 telemetry: 21:51:48.435 [START] /v1/openai/v1/vector_stores/{vector_store_id}/search
+INFO     2025-09-10 14:51:48,449 console_span_processor:39 telemetry: 21:51:48.438 [END] ModelsRoutingTable.get_model [StatusCode.OK] (0.17ms)
+INFO     2025-09-10 14:51:48,449 console_span_processor:48 telemetry:     output: {'identifier': 'sentence-transformers/all-MiniLM-L6-v2',
+         'provider_resource_id': 'all-MiniLM-L6-v2', 'provider_id': 'sentence-transformers', 'type': 'model', 'owner': None, 'source':
+         'listed_from_provider', 'metadata': {'embedding_dimension': 384}, 'model_type': 'embedding'}
+INFO     2025-09-10 14:51:48,464 console_span_processor:39 telemetry: 21:51:48.451 [END] ModelsRoutingTable.get_provider_impl [StatusCode.OK] (0.28ms)
+INFO     2025-09-10 14:51:48,465 console_span_processor:48 telemetry:     output:
+         <llama_stack.providers.inline.inference.sentence_transformers.sentence_transformers.SentenceTransformersInferenceImpl object at 0x158e83350>
+INFO     2025-09-10 14:51:48,472 console_span_processor:39 telemetry: 21:51:48.467 [END] ModelsRoutingTable.get_model [StatusCode.OK] (0.19ms)
+INFO     2025-09-10 14:51:48,473 console_span_processor:48 telemetry:     output: {'identifier': 'sentence-transformers/all-MiniLM-L6-v2',
+         'provider_resource_id': 'all-MiniLM-L6-v2', 'provider_id': 'sentence-transformers', 'type': 'model', 'owner': None, 'source':
+         'listed_from_provider', 'metadata': {'embedding_dimension': 384}, 'model_type': 'embedding'}
+INFO     2025-09-10 14:51:48,477 console_span_processor:39 telemetry: 21:51:48.475 [END] InferenceRouter.openai_embeddings [StatusCode.OK] (37.21ms)
+INFO     2025-09-10 14:51:48,478 console_span_processor:48 telemetry:     output: {'object': 'list', 'data': [{'object': 'embedding', 'embedding':
+         [0.0018660365603864193, -0.008432421833276749, -0.009860510006546974, 0.07527908682823181, -0.008166038431227207, -0.08819602429866791,
+         -0.025107912719249725, -0.029266025871038437, 0.014401894062757492, 0.06447950005531311, 0.04112009331583977, -0.04028748720884323,
+         -0.02221696823835373, -0.009235711768269539, -0.04810388386249542, -0.02784842811524868, 0.016442419961094856, -0.07583750039339066,
+         -0.06627479195594788, 0.01860453188419342, 0.0008918081293813884, 0.011179491877555847, -0.048671603202819824, -0.021989954635500908,
+         0.04990514740347862, -0.003409573109820485, -0.06727275997400284, -0.054264623671770096, -0.022502778097987175, -0.03865018114447594,
+         -0.0471607930958271, 0.01671769842505455, 0.024404950439929962, 0.09877222776412964, -0.01660928875207901, 0.01298862136900425,
+         0.02916400320827961, -0.0722617655992508, 0.041335687041282654, -0.037857454270124435, -0.02524978667497635, -0.010434110648930073, -0....
+INFO     2025-09-10 14:51:48,482 console_span_processor:39 telemetry: 21:51:48.480 [END] FaissVectorIOAdapter.query_chunks [StatusCode.OK] (42.48ms)
+INFO     2025-09-10 14:51:48,483 console_span_processor:48 telemetry:     output: {'chunks': [{'content': 'Acme ships globally in 3-5 business days.',
+         'metadata': {'document_id': 'file-489db9aae0424745960e3408ff0f477f', 'token_count': 12, 'metadata_token_count': 1}, 'embedding':
+         [-0.012346820905804634, -0.04591671749949455, 0.00545357633382082, 0.0767817348241806, -0.01030217856168747, -0.07128783315420151,
+         -0.047789815813302994, -0.0042580426670610905, -0.08333089202642441, 0.03493816778063774, 0.03678377717733383, -0.022125769406557083,
+         -0.001785362372174859, 0.03860057145357132, 0.017692940309643745, -0.011320478282868862, 0.020480643957853317, -0.14444826543331146,
+         -0.03509785607457161, -0.08347956091165543, 0.01723267324268818, -0.020868107676506042, -0.04816542565822601, -0.04063615947961807,
+         -0.02612607553601265, -0.06816241890192032, -0.021907860413193703, -0.05328420177102089, 0.0035953382030129433, -0.060775868594646454,
+         -0.07895789295434952, 0.08725343644618988, -0.004096562508493662, 0.07713106274604797, 0.04764929413795471, 0.03390089422464371, -0.03...
+INFO     2025-09-10 14:51:48,486 console_span_processor:39 telemetry: 21:51:48.485 [END] VectorDBsRoutingTable.openai_search_vector_store
+         [StatusCode.OK] (47.47ms)
+INFO     2025-09-10 14:51:48,487 console_span_processor:48 telemetry:     output: {'object': 'vector_store.search_results.page', 'search_query': 'How 
+         long does shipping take?', 'data': [{'file_id': '', 'filename': '', 'score': 1.0150761400159014, 'attributes': {'document_id':
+         'file-489db9aae0424745960e3408ff0f477f', 'token_count': 12.0, 'metadata_token_count': 1.0}, 'content': [{'type': 'text', 'text': 'Acme ships 
+         globally in 3-5 business days.'}]}], 'has_more': False, 'next_page': None}
+INFO     2025-09-10 14:51:48,492 console_span_processor:39 telemetry: 21:51:48.488 [END] VectorIORouter.openai_search_vector_store [StatusCode.OK]
+         (51.57ms)
+INFO     2025-09-10 14:51:48,492 console_span_processor:48 telemetry:     output: {'object': 'vector_store.search_results.page', 'search_query': 'How 
+         long does shipping take?', 'data': [{'file_id': '', 'filename': '', 'score': 1.0150761400159014, 'attributes': {'document_id':
+         'file-489db9aae0424745960e3408ff0f477f', 'token_count': 12.0, 'metadata_token_count': 1.0}, 'content': [{'type': 'text', 'text': 'Acme ships 
+         globally in 3-5 business days.'}]}], 'has_more': False, 'next_page': None}
+INFO     2025-09-10 14:51:48,501 console_span_processor:39 telemetry: 21:51:48.494 [END] /v1/openai/v1/vector_stores/{vector_store_id}/search
+         [StatusCode.OK] (58.89ms)
+INFO     2025-09-10 14:51:48,502 console_span_processor:48 telemetry:     raw_path:
+         /v1/openai/v1/vector_stores/vs_dab05212-db05-402c-91ef-57e41797406b/search
+INFO     2025-09-10 14:51:48,503 console_span_processor:62 telemetry:  21:51:48.252 [INFO] 127.0.0.1:56904 - "POST 
+         /v1/openai/v1/vector_stores/vs_dab05212-db05-402c-91ef-57e41797406b/search HTTP/1.1" 200
+INFO     2025-09-10 14:51:48,504 console_span_processor:28 telemetry: 21:51:48.504 [START] /v1/openai/v1/vector_stores/{vector_store_id}/search
+INFO     2025-09-10 14:51:48,511 console_span_processor:39 telemetry: 21:51:48.505 [END] ModelsRoutingTable.get_model [StatusCode.OK] (0.15ms)
+INFO     2025-09-10 14:51:48,511 console_span_processor:48 telemetry:     output: {'identifier': 'sentence-transformers/all-MiniLM-L6-v2',
+         'provider_resource_id': 'all-MiniLM-L6-v2', 'provider_id': 'sentence-transformers', 'type': 'model', 'owner': None, 'source':
+         'listed_from_provider', 'metadata': {'embedding_dimension': 384}, 'model_type': 'embedding'}
+INFO     2025-09-10 14:51:48,518 console_span_processor:39 telemetry: 21:51:48.512 [END] ModelsRoutingTable.get_provider_impl [StatusCode.OK] (0.17ms)
+INFO     2025-09-10 14:51:48,519 console_span_processor:48 telemetry:     output:
+         <llama_stack.providers.inline.inference.sentence_transformers.sentence_transformers.SentenceTransformersInferenceImpl object at 0x158e83350>
+INFO     2025-09-10 14:51:48,546 console_span_processor:39 telemetry: 21:51:48.521 [END] ModelsRoutingTable.get_model [StatusCode.OK] (0.38ms)
+INFO     2025-09-10 14:51:48,547 console_span_processor:48 telemetry:     output: {'identifier': 'sentence-transformers/all-MiniLM-L6-v2',
+         'provider_resource_id': 'all-MiniLM-L6-v2', 'provider_id': 'sentence-transformers', 'type': 'model', 'owner': None, 'source':
+         'listed_from_provider', 'metadata': {'embedding_dimension': 384}, 'model_type': 'embedding'}
+INFO     2025-09-10 14:51:48,557 console_span_processor:39 telemetry: 21:51:48.550 [END] InferenceRouter.openai_embeddings [StatusCode.OK] (44.66ms)
+INFO     2025-09-10 14:51:48,558 console_span_processor:48 telemetry:     output: {'object': 'list', 'data': [{'object': 'embedding', 'embedding':
+         [0.0018660365603864193, -0.008432421833276749, -0.009860510006546974, 0.07527908682823181, -0.008166038431227207, -0.08819602429866791,
+         -0.025107912719249725, -0.029266025871038437, 0.014401894062757492, 0.06447950005531311, 0.04112009331583977, -0.04028748720884323,
+         -0.02221696823835373, -0.009235711768269539, -0.04810388386249542, -0.02784842811524868, 0.016442419961094856, -0.07583750039339066,
+         -0.06627479195594788, 0.01860453188419342, 0.0008918081293813884, 0.011179491877555847, -0.048671603202819824, -0.021989954635500908,
+         0.04990514740347862, -0.003409573109820485, -0.06727275997400284, -0.054264623671770096, -0.022502778097987175, -0.03865018114447594,
+         -0.0471607930958271, 0.01671769842505455, 0.024404950439929962, 0.09877222776412964, -0.01660928875207901, 0.01298862136900425,
+         0.02916400320827961, -0.0722617655992508, 0.041335687041282654, -0.037857454270124435, -0.02524978667497635, -0.010434110648930073, -0....
+INFO     2025-09-10 14:51:48,563 console_span_processor:39 telemetry: 21:51:48.559 [END] FaissVectorIOAdapter.query_chunks [StatusCode.OK] (54.51ms)
+INFO     2025-09-10 14:51:48,564 console_span_processor:48 telemetry:     output: {'chunks': [{'content': 'Acme ships globally in 3-5 business days.',
+         'metadata': {'document_id': 'file-489db9aae0424745960e3408ff0f477f', 'token_count': 12, 'metadata_token_count': 1}, 'embedding':
+         [-0.012346820905804634, -0.04591671749949455, 0.00545357633382082, 0.0767817348241806, -0.01030217856168747, -0.07128783315420151,
+         -0.047789815813302994, -0.0042580426670610905, -0.08333089202642441, 0.03493816778063774, 0.03678377717733383, -0.022125769406557083,
+         -0.001785362372174859, 0.03860057145357132, 0.017692940309643745, -0.011320478282868862, 0.020480643957853317, -0.14444826543331146,
+         -0.03509785607457161, -0.08347956091165543, 0.01723267324268818, -0.020868107676506042, -0.04816542565822601, -0.04063615947961807,
+         -0.02612607553601265, -0.06816241890192032, -0.021907860413193703, -0.05328420177102089, 0.0035953382030129433, -0.060775868594646454,
+         -0.07895789295434952, 0.08725343644618988, -0.004096562508493662, 0.07713106274604797, 0.04764929413795471, 0.03390089422464371, -0.03...
+INFO     2025-09-10 14:51:48,570 console_span_processor:39 telemetry: 21:51:48.565 [END] VectorDBsRoutingTable.openai_search_vector_store
+         [StatusCode.OK] (60.37ms)
+INFO     2025-09-10 14:51:48,571 console_span_processor:48 telemetry:     output: {'object': 'vector_store.search_results.page', 'search_query': 'How 
+         long does shipping take?', 'data': [{'file_id': '', 'filename': '', 'score': 1.0150761400159014, 'attributes': {'document_id':
+         'file-489db9aae0424745960e3408ff0f477f', 'token_count': 12.0, 'metadata_token_count': 1.0}, 'content': [{'type': 'text', 'text': 'Acme ships 
+         globally in 3-5 business days.'}]}], 'has_more': False, 'next_page': None}
+INFO     2025-09-10 14:51:48,573 console_span_processor:39 telemetry: 21:51:48.572 [END] VectorIORouter.openai_search_vector_store [StatusCode.OK]
+         (67.76ms)
+INFO     2025-09-10 14:51:48,574 console_span_processor:48 telemetry:     output: {'object': 'vector_store.search_results.page', 'search_query': 'How 
+         long does shipping take?', 'data': [{'file_id': '', 'filename': '', 'score': 1.0150761400159014, 'attributes': {'document_id':
+         'file-489db9aae0424745960e3408ff0f477f', 'token_count': 12.0, 'metadata_token_count': 1.0}, 'content': [{'type': 'text', 'text': 'Acme ships 
+         globally in 3-5 business days.'}]}], 'has_more': False, 'next_page': None}
+INFO     2025-09-10 14:51:48,577 console_span_processor:39 telemetry: 21:51:48.575 [END] /v1/openai/v1/vector_stores/{vector_store_id}/search
+         [StatusCode.OK] (71.71ms)
+INFO     2025-09-10 14:51:48,577 console_span_processor:48 telemetry:     raw_path:
+         /v1/openai/v1/vector_stores/vs_dab05212-db05-402c-91ef-57e41797406b/search
+INFO     2025-09-10 14:51:48,578 console_span_processor:62 telemetry:  21:51:48.280 [INFO] 127.0.0.1:56904 - "POST 
+         /v1/openai/v1/vector_stores/vs_dab05212-db05-402c-91ef-57e41797406b/search HTTP/1.1" 200
+INFO     2025-09-10 14:51:48,579 console_span_processor:28 telemetry: 21:51:48.579 [START] /v1/openai/v1/vector_stores/{vector_store_id}/search
+INFO     2025-09-10 14:51:48,587 console_span_processor:39 telemetry: 21:51:48.582 [END] ModelsRoutingTable.get_model [StatusCode.OK] (0.48ms)
+INFO     2025-09-10 14:51:48,588 console_span_processor:48 telemetry:     output: {'identifier': 'sentence-transformers/all-MiniLM-L6-v2',
+         'provider_resource_id': 'all-MiniLM-L6-v2', 'provider_id': 'sentence-transformers', 'type': 'model', 'owner': None, 'source':
+         'listed_from_provider', 'metadata': {'embedding_dimension': 384}, 'model_type': 'embedding'}
+INFO     2025-09-10 14:51:48,592 console_span_processor:39 telemetry: 21:51:48.589 [END] ModelsRoutingTable.get_provider_impl [StatusCode.OK] (0.19ms)
+INFO     2025-09-10 14:51:48,592 console_span_processor:48 telemetry:     output:
+         <llama_stack.providers.inline.inference.sentence_transformers.sentence_transformers.SentenceTransformersInferenceImpl object at 0x158e83350>
+INFO     2025-09-10 14:51:48,601 console_span_processor:39 telemetry: 21:51:48.594 [END] ModelsRoutingTable.get_model [StatusCode.OK] (0.16ms)
+INFO     2025-09-10 14:51:48,602 console_span_processor:48 telemetry:     output: {'identifier': 'sentence-transformers/all-MiniLM-L6-v2',
+         'provider_resource_id': 'all-MiniLM-L6-v2', 'provider_id': 'sentence-transformers', 'type': 'model', 'owner': None, 'source':
+         'listed_from_provider', 'metadata': {'embedding_dimension': 384}, 'model_type': 'embedding'}
+INFO     2025-09-10 14:51:48,605 console_span_processor:39 telemetry: 21:51:48.603 [END] InferenceRouter.openai_embeddings [StatusCode.OK] (21.97ms)
+INFO     2025-09-10 14:51:48,606 console_span_processor:48 telemetry:     output: {'object': 'list', 'data': [{'object': 'embedding', 'embedding':
+         [0.0018660365603864193, -0.008432421833276749, -0.009860510006546974, 0.07527908682823181, -0.008166038431227207, -0.08819602429866791,
+         -0.025107912719249725, -0.029266025871038437, 0.014401894062757492, 0.06447950005531311, 0.04112009331583977, -0.04028748720884323,
+         -0.02221696823835373, -0.009235711768269539, -0.04810388386249542, -0.02784842811524868, 0.016442419961094856, -0.07583750039339066,
+         -0.06627479195594788, 0.01860453188419342, 0.0008918081293813884, 0.011179491877555847, -0.048671603202819824, -0.021989954635500908,
+         0.04990514740347862, -0.003409573109820485, -0.06727275997400284, -0.054264623671770096, -0.022502778097987175, -0.03865018114447594,
+         -0.0471607930958271, 0.01671769842505455, 0.024404950439929962, 0.09877222776412964, -0.01660928875207901, 0.01298862136900425,
+         0.02916400320827961, -0.0722617655992508, 0.041335687041282654, -0.037857454270124435, -0.02524978667497635, -0.010434110648930073, -0....
+INFO     2025-09-10 14:51:48,609 console_span_processor:39 telemetry: 21:51:48.607 [END] FaissVectorIOAdapter.query_chunks [StatusCode.OK] (26.56ms)
+INFO     2025-09-10 14:51:48,609 console_span_processor:48 telemetry:     output: {'chunks': [{'content': 'Acme ships globally in 3-5 business days.',
+         'metadata': {'document_id': 'file-489db9aae0424745960e3408ff0f477f', 'token_count': 12, 'metadata_token_count': 1}, 'embedding':
+         [-0.012346820905804634, -0.04591671749949455, 0.00545357633382082, 0.0767817348241806, -0.01030217856168747, -0.07128783315420151,
+         -0.047789815813302994, -0.0042580426670610905, -0.08333089202642441, 0.03493816778063774, 0.03678377717733383, -0.022125769406557083,
+         -0.001785362372174859, 0.03860057145357132, 0.017692940309643745, -0.011320478282868862, 0.020480643957853317, -0.14444826543331146,
+         -0.03509785607457161, -0.08347956091165543, 0.01723267324268818, -0.020868107676506042, -0.04816542565822601, -0.04063615947961807,
+         -0.02612607553601265, -0.06816241890192032, -0.021907860413193703, -0.05328420177102089, 0.0035953382030129433, -0.060775868594646454,
+         -0.07895789295434952, 0.08725343644618988, -0.004096562508493662, 0.07713106274604797, 0.04764929413795471, 0.03390089422464371, -0.03...
+INFO     2025-09-10 14:51:48,614 console_span_processor:39 telemetry: 21:51:48.612 [END] VectorDBsRoutingTable.openai_search_vector_store
+         [StatusCode.OK] (31.13ms)
+INFO     2025-09-10 14:51:48,615 console_span_processor:48 telemetry:     output: {'object': 'vector_store.search_results.page', 'search_query': 'How 
+         long does shipping take?', 'data': [{'file_id': '', 'filename': '', 'score': 1.0150761400159014, 'attributes': {'document_id':
+         'file-489db9aae0424745960e3408ff0f477f', 'token_count': 12.0, 'metadata_token_count': 1.0}, 'content': [{'type': 'text', 'text': 'Acme ships 
+         globally in 3-5 business days.'}]}], 'has_more': False, 'next_page': None}
+INFO     2025-09-10 14:51:48,626 console_span_processor:39 telemetry: 21:51:48.616 [END] VectorIORouter.openai_search_vector_store [StatusCode.OK]
+         (35.85ms)
+INFO     2025-09-10 14:51:48,627 console_span_processor:48 telemetry:     output: {'object': 'vector_store.search_results.page', 'search_query': 'How 
+         long does shipping take?', 'data': [{'file_id': '', 'filename': '', 'score': 1.0150761400159014, 'attributes': {'document_id':
+         'file-489db9aae0424745960e3408ff0f477f', 'token_count': 12.0, 'metadata_token_count': 1.0}, 'content': [{'type': 'text', 'text': 'Acme ships 
+         globally in 3-5 business days.'}]}], 'has_more': False, 'next_page': None}
+INFO     2025-09-10 14:51:48,635 console_span_processor:39 telemetry: 21:51:48.632 [END] /v1/openai/v1/vector_stores/{vector_store_id}/search
+         [StatusCode.OK] (52.29ms)
+INFO     2025-09-10 14:51:48,636 console_span_processor:48 telemetry:     raw_path:
+         /v1/openai/v1/vector_stores/vs_dab05212-db05-402c-91ef-57e41797406b/search
+INFO     2025-09-10 14:51:48,636 console_span_processor:62 telemetry:  21:51:48.299 [INFO] 127.0.0.1:56904 - "POST 
+         /v1/openai/v1/vector_stores/vs_dab05212-db05-402c-91ef-57e41797406b/search HTTP/1.1" 200
+INFO     2025-09-10 14:51:48,637 console_span_processor:28 telemetry: 21:51:48.637 [START] /v1/openai/v1/chat/completions
+INFO     2025-09-10 14:51:48,640 console_span_processor:39 telemetry: 21:51:48.638 [END] ModelsRoutingTable.get_model [StatusCode.OK] (0.15ms)
+INFO     2025-09-10 14:51:48,641 console_span_processor:48 telemetry:     output: {'identifier': 'together/meta-llama/Llama-3.3-70B-Instruct-Turbo',
+         'provider_resource_id': 'meta-llama/Llama-3.3-70B-Instruct-Turbo', 'provider_id': 'together', 'type': 'model', 'owner': None, 'source':
+         'listed_from_provider', 'metadata': {}, 'model_type': 'llm'}
+INFO     2025-09-10 14:51:48,643 console_span_processor:39 telemetry: 21:51:48.642 [END] ModelsRoutingTable.get_provider_impl [StatusCode.OK] (0.16ms)
+INFO     2025-09-10 14:51:48,643 console_span_processor:48 telemetry:     output:
+         <llama_stack.providers.remote.inference.together.together.TogetherInferenceAdapter object at 0x137069220>
+INFO     2025-09-10 14:51:48,653 console_span_processor:39 telemetry: 21:51:48.645 [END] ModelsRoutingTable.get_model [StatusCode.OK] (0.14ms)
+INFO     2025-09-10 14:51:48,653 console_span_processor:48 telemetry:     output: {'identifier': 'together/meta-llama/Llama-3.3-70B-Instruct-Turbo',
+         'provider_resource_id': 'meta-llama/Llama-3.3-70B-Instruct-Turbo', 'provider_id': 'together', 'type': 'model', 'owner': None, 'source':
+         'listed_from_provider', 'metadata': {}, 'model_type': 'llm'}
+INFO     2025-09-10 14:51:53,149 console_span_processor:39 telemetry: 21:51:53.146 [END] TogetherInferenceAdapter.openai_chat_completion
+         [StatusCode.OK] (4501.83ms)
+INFO     2025-09-10 14:51:53,150 console_span_processor:48 telemetry:     output: {'id': 'oAitaoX-4YNCb4-97d22618bd651584', 'choices':
+         [{'finish_reason': 'stop', 'index': 0, 'logprobs': None, 'message': {'content': 'Thought: I should try to use the Llama Stack Vector Store 
+         RAG tool again with a different input to see if I can get a relevant result.\n\nAction: Llama Stack Vector Store RAG tool\nAction Input: 
+         {"query": "shipping time", "vector_store_id": "vs_dab05212-db05-402c-91ef-57e41797406b", "top_k": 1}', 'refusal': None, 'role': 'assistant',
+         'annotations': None, 'audio': None, 'function_call': None, 'tool_calls': []}, 'seed': 7090030016801882000}], 'created': 1757541108, 'model':
+         'meta-llama/Llama-3.3-70B-Instruct-Turbo', 'object': 'chat.completion', 'service_tier': None, 'system_fingerprint': None, 'usage':
+         {'completion_tokens': 89, 'prompt_tokens': 1494, 'total_tokens': 1583, 'completion_tokens_details': None, 'prompt_tokens_details': None,
+         'cached_tokens': 0}, 'prompt': []}
+INFO     2025-09-10 14:51:53,153 uvicorn.access:473 uncategorized: ::1:56900 - "POST /v1/openai/v1/chat/completions HTTP/1.1" 200
+INFO     2025-09-10 14:51:53,157 console_span_processor:39 telemetry: 21:51:53.153 [END] InferenceRouter.openai_chat_completion [StatusCode.OK]
+         (4515.32ms)
+INFO     2025-09-10 14:51:53,158 console_span_processor:48 telemetry:     output: {'id': 'oAitaoX-4YNCb4-97d22618bd651584', 'choices':
+         [{'finish_reason': 'stop', 'index': 0, 'logprobs': None, 'message': {'content': 'Thought: I should try to use the Llama Stack Vector Store 
+         RAG tool again with a different input to see if I can get a relevant result.\n\nAction: Llama Stack Vector Store RAG tool\nAction Input: 
+         {"query": "shipping time", "vector_store_id": "vs_dab05212-db05-402c-91ef-57e41797406b", "top_k": 1}', 'refusal': None, 'role': 'assistant',
+         'annotations': None, 'audio': None, 'function_call': None, 'tool_calls': None}, 'seed': 7090030016801882000}], 'created': 1757541108,
+         'model': 'meta-llama/Llama-3.3-70B-Instruct-Turbo', 'object': 'chat.completion', 'service_tier': None, 'system_fingerprint': None, 'usage':
+         {'completion_tokens': 89, 'prompt_tokens': 1494, 'total_tokens': 1583, 'completion_tokens_details': None, 'prompt_tokens_details': None,
+         'cached_tokens': 0}, 'prompt': [], 'metrics': [{'trace_id': 'dd10581530aada6b43c22d2c221fafe0', 'span_id': 'ba2f798e...
+INFO     2025-09-10 14:51:53,159 console_span_processor:62 telemetry:  21:51:53.152 [INFO] metric.prompt_tokens
+INFO     2025-09-10 14:51:53,160 console_span_processor:67 telemetry: value: 1494
+INFO     2025-09-10 14:51:53,160 console_span_processor:67 telemetry: unit: tokens
+INFO     2025-09-10 14:51:53,160 console_span_processor:67 telemetry: model_id: together/meta-llama/Llama-3.3-70B-Instruct-Turbo
+INFO     2025-09-10 14:51:53,161 console_span_processor:67 telemetry: provider_id: together
+INFO     2025-09-10 14:51:53,161 console_span_processor:62 telemetry:  21:51:53.152 [INFO] metric.completion_tokens
+INFO     2025-09-10 14:51:53,162 console_span_processor:67 telemetry: value: 89
+INFO     2025-09-10 14:51:53,162 console_span_processor:67 telemetry: unit: tokens
+INFO     2025-09-10 14:51:53,163 console_span_processor:67 telemetry: model_id: together/meta-llama/Llama-3.3-70B-Instruct-Turbo
+INFO     2025-09-10 14:51:53,165 console_span_processor:67 telemetry: provider_id: together
+INFO     2025-09-10 14:51:53,166 console_span_processor:62 telemetry:  21:51:53.152 [INFO] metric.total_tokens
+INFO     2025-09-10 14:51:53,166 console_span_processor:67 telemetry: value: 1583
+INFO     2025-09-10 14:51:53,167 console_span_processor:67 telemetry: unit: tokens
+INFO     2025-09-10 14:51:53,167 console_span_processor:67 telemetry: model_id: together/meta-llama/Llama-3.3-70B-Instruct-Turbo
+INFO     2025-09-10 14:51:53,167 console_span_processor:67 telemetry: provider_id: together
+INFO     2025-09-10 14:51:53,183 console_span_processor:39 telemetry: 21:51:53.169 [END] /v1/openai/v1/chat/completions [StatusCode.OK] (4532.11ms)
+INFO     2025-09-10 14:51:53,184 console_span_processor:48 telemetry:     raw_path: /v1/openai/v1/chat/completions
+INFO     2025-09-10 14:51:53,184 console_span_processor:62 telemetry:  21:51:53.153 [INFO] ::1:56900 - "POST /v1/openai/v1/chat/completions HTTP/1.1"
+         200
+INFO     2025-09-10 14:51:53,185 console_span_processor:28 telemetry: 21:51:53.185 [START] /v1/openai/v1/vector_stores/{vector_store_id}/search
+INFO     2025-09-10 14:51:53,212 console_span_processor:39 telemetry: 21:51:53.187 [END] ModelsRoutingTable.get_model [StatusCode.OK] (0.15ms)
+INFO     2025-09-10 14:51:53,213 console_span_processor:48 telemetry:     output: {'identifier': 'sentence-transformers/all-MiniLM-L6-v2',
+         'provider_resource_id': 'all-MiniLM-L6-v2', 'provider_id': 'sentence-transformers', 'type': 'model', 'owner': None, 'source':
+         'listed_from_provider', 'metadata': {'embedding_dimension': 384}, 'model_type': 'embedding'}
+INFO     2025-09-10 14:51:53,232 console_span_processor:39 telemetry: 21:51:53.214 [END] ModelsRoutingTable.get_provider_impl [StatusCode.OK] (0.34ms)
+INFO     2025-09-10 14:51:53,233 console_span_processor:48 telemetry:     output:
+         <llama_stack.providers.inline.inference.sentence_transformers.sentence_transformers.SentenceTransformersInferenceImpl object at 0x158e83350>
+INFO     2025-09-10 14:51:53,245 console_span_processor:39 telemetry: 21:51:53.234 [END] ModelsRoutingTable.get_model [StatusCode.OK] (0.18ms)
+INFO     2025-09-10 14:51:53,246 console_span_processor:48 telemetry:     output: {'identifier': 'sentence-transformers/all-MiniLM-L6-v2',
+         'provider_resource_id': 'all-MiniLM-L6-v2', 'provider_id': 'sentence-transformers', 'type': 'model', 'owner': None, 'source':
+         'listed_from_provider', 'metadata': {'embedding_dimension': 384}, 'model_type': 'embedding'}
+INFO     2025-09-10 14:51:54,285 uvicorn.access:473 uncategorized: 127.0.0.1:56904 - "POST 
+         /v1/openai/v1/vector_stores/vs_dab05212-db05-402c-91ef-57e41797406b/search HTTP/1.1" 200
+INFO     2025-09-10 14:51:54,305 console_span_processor:39 telemetry: 21:51:54.289 [END] InferenceRouter.openai_embeddings [StatusCode.OK] (1102.92ms)
+INFO     2025-09-10 14:51:54,306 console_span_processor:48 telemetry:     output: {'object': 'list', 'data': [{'object': 'embedding', 'embedding':
+         [-0.03642994910478592, -0.005766282323747873, -0.028219470754265785, 0.05074828490614891, 0.023913491517305374, -0.056256189942359924,
+         0.0007000428740866482, 0.02684980072081089, 0.005779022816568613, 0.0757804811000824, 0.02611902356147766, -0.04347166419029236,
+         -0.07426290959119797, 0.0033695625606924295, -0.03395790979266167, -0.018579166382551193, 0.031040646135807037, -0.06419522315263748,
+         -0.11664945632219315, 0.011454297229647636, 0.00203332188539207, 0.00506530050188303, -0.05711820721626282, -0.006884930655360222,
+         0.0057834964245557785, 0.005828470923006535, -0.05446002259850502, -0.032767001539468765, -0.023773472756147385, -0.046458274126052856,
+         -0.0579390786588192, 0.052776239812374115, -0.017773080617189407, 0.07396063208580017, -0.007082517724484205, -0.006252078805118799,
+         0.06502724438905716, -0.04769469425082207, 0.08519715070724487, -0.04995475336909294, -0.02040106989443302, -0.024357719346880913, -0....
+INFO     2025-09-10 14:51:54,318 console_span_processor:39 telemetry: 21:51:54.308 [END] FaissVectorIOAdapter.query_chunks [StatusCode.OK] (1121.71ms)
+INFO     2025-09-10 14:51:54,319 console_span_processor:48 telemetry:     output: {'chunks': [{'content': 'Acme ships globally in 3-5 business days.',
+         'metadata': {'document_id': 'file-489db9aae0424745960e3408ff0f477f', 'token_count': 12, 'metadata_token_count': 1}, 'embedding':
+         [-0.012346820905804634, -0.04591671749949455, 0.00545357633382082, 0.0767817348241806, -0.01030217856168747, -0.07128783315420151,
+         -0.047789815813302994, -0.0042580426670610905, -0.08333089202642441, 0.03493816778063774, 0.03678377717733383, -0.022125769406557083,
+         -0.001785362372174859, 0.03860057145357132, 0.017692940309643745, -0.011320478282868862, 0.020480643957853317, -0.14444826543331146,
+         -0.03509785607457161, -0.08347956091165543, 0.01723267324268818, -0.020868107676506042, -0.04816542565822601, -0.04063615947961807,
+         -0.02612607553601265, -0.06816241890192032, -0.021907860413193703, -0.05328420177102089, 0.0035953382030129433, -0.060775868594646454,
+         -0.07895789295434952, 0.08725343644618988, -0.004096562508493662, 0.07713106274604797, 0.04764929413795471, 0.03390089422464371, -0.03...
+INFO     2025-09-10 14:51:54,322 uvicorn.access:473 uncategorized: 127.0.0.1:56904 - "POST 
+         /v1/openai/v1/vector_stores/vs_dab05212-db05-402c-91ef-57e41797406b/search HTTP/1.1" 200
+INFO     2025-09-10 14:51:54,342 console_span_processor:39 telemetry: 21:51:54.324 [END] VectorDBsRoutingTable.openai_search_vector_store
+         [StatusCode.OK] (1137.83ms)
+INFO     2025-09-10 14:51:54,342 console_span_processor:48 telemetry:     output: {'object': 'vector_store.search_results.page', 'search_query':
+         'shipping time', 'data': [{'file_id': '', 'filename': '', 'score': 0.9984249880472625, 'attributes': {'document_id':
+         'file-489db9aae0424745960e3408ff0f477f', 'token_count': 12.0, 'metadata_token_count': 1.0}, 'content': [{'type': 'text', 'text': 'Acme ships 
+         globally in 3-5 business days.'}]}], 'has_more': False, 'next_page': None}
+INFO     2025-09-10 14:51:54,351 console_span_processor:39 telemetry: 21:51:54.344 [END] VectorIORouter.openai_search_vector_store [StatusCode.OK]
+         (1157.96ms)
+INFO     2025-09-10 14:51:54,353 uvicorn.access:473 uncategorized: 127.0.0.1:56904 - "POST 
+         /v1/openai/v1/vector_stores/vs_dab05212-db05-402c-91ef-57e41797406b/search HTTP/1.1" 200
+INFO     2025-09-10 14:51:54,353 console_span_processor:48 telemetry:     output: {'object': 'vector_store.search_results.page', 'search_query':
+         'shipping time', 'data': [{'file_id': '', 'filename': '', 'score': 0.9984249880472625, 'attributes': {'document_id':
+         'file-489db9aae0424745960e3408ff0f477f', 'token_count': 12.0, 'metadata_token_count': 1.0}, 'content': [{'type': 'text', 'text': 'Acme ships 
+         globally in 3-5 business days.'}]}], 'has_more': False, 'next_page': None}
+INFO     2025-09-10 14:51:54,360 console_span_processor:39 telemetry: 21:51:54.355 [END] /v1/openai/v1/vector_stores/{vector_store_id}/search
+         [StatusCode.OK] (1170.42ms)
+INFO     2025-09-10 14:51:54,360 console_span_processor:48 telemetry:     raw_path:
+         /v1/openai/v1/vector_stores/vs_dab05212-db05-402c-91ef-57e41797406b/search
+INFO     2025-09-10 14:51:54,361 console_span_processor:62 telemetry:  21:51:54.293 [INFO] 127.0.0.1:56904 - "POST 
+         /v1/openai/v1/vector_stores/vs_dab05212-db05-402c-91ef-57e41797406b/search HTTP/1.1" 200
+INFO     2025-09-10 14:51:54,362 console_span_processor:28 telemetry: 21:51:54.362 [START] /v1/openai/v1/vector_stores/{vector_store_id}/search
+INFO     2025-09-10 14:51:54,366 console_span_processor:39 telemetry: 21:51:54.364 [END] ModelsRoutingTable.get_model [StatusCode.OK] (0.19ms)
+INFO     2025-09-10 14:51:54,367 console_span_processor:48 telemetry:     output: {'identifier': 'sentence-transformers/all-MiniLM-L6-v2',
+         'provider_resource_id': 'all-MiniLM-L6-v2', 'provider_id': 'sentence-transformers', 'type': 'model', 'owner': None, 'source':
+         'listed_from_provider', 'metadata': {'embedding_dimension': 384}, 'model_type': 'embedding'}
+INFO     2025-09-10 14:51:54,373 uvicorn.access:473 uncategorized: 127.0.0.1:56904 - "POST 
+         /v1/openai/v1/vector_stores/vs_dab05212-db05-402c-91ef-57e41797406b/search HTTP/1.1" 200
+INFO     2025-09-10 14:51:54,380 console_span_processor:39 telemetry: 21:51:54.369 [END] ModelsRoutingTable.get_provider_impl [StatusCode.OK] (0.28ms)
+INFO     2025-09-10 14:51:54,381 console_span_processor:48 telemetry:     output:
+         <llama_stack.providers.inline.inference.sentence_transformers.sentence_transformers.SentenceTransformersInferenceImpl object at 0x158e83350>
+INFO     2025-09-10 14:51:54,389 console_span_processor:39 telemetry: 21:51:54.386 [END] ModelsRoutingTable.get_model [StatusCode.OK] (0.21ms)
+INFO     2025-09-10 14:51:54,390 console_span_processor:48 telemetry:     output: {'identifier': 'sentence-transformers/all-MiniLM-L6-v2',
+         'provider_resource_id': 'all-MiniLM-L6-v2', 'provider_id': 'sentence-transformers', 'type': 'model', 'owner': None, 'source':
+         'listed_from_provider', 'metadata': {'embedding_dimension': 384}, 'model_type': 'embedding'}
+INFO     2025-09-10 14:51:54,393 console_span_processor:39 telemetry: 21:51:54.391 [END] InferenceRouter.openai_embeddings [StatusCode.OK] (26.85ms)
+INFO     2025-09-10 14:51:54,394 console_span_processor:48 telemetry:     output: {'object': 'list', 'data': [{'object': 'embedding', 'embedding':
+         [-0.03642994910478592, -0.005766282323747873, -0.028219470754265785, 0.05074828490614891, 0.023913491517305374, -0.056256189942359924,
+         0.0007000428740866482, 0.02684980072081089, 0.005779022816568613, 0.0757804811000824, 0.02611902356147766, -0.04347166419029236,
+         -0.07426290959119797, 0.0033695625606924295, -0.03395790979266167, -0.018579166382551193, 0.031040646135807037, -0.06419522315263748,
+         -0.11664945632219315, 0.011454297229647636, 0.00203332188539207, 0.00506530050188303, -0.05711820721626282, -0.006884930655360222,
+         0.0057834964245557785, 0.005828470923006535, -0.05446002259850502, -0.032767001539468765, -0.023773472756147385, -0.046458274126052856,
+         -0.0579390786588192, 0.052776239812374115, -0.017773080617189407, 0.07396063208580017, -0.007082517724484205, -0.006252078805118799,
+         0.06502724438905716, -0.04769469425082207, 0.08519715070724487, -0.04995475336909294, -0.02040106989443302, -0.024357719346880913, -0....
+INFO     2025-09-10 14:51:54,425 uvicorn.access:473 uncategorized: 127.0.0.1:56904 - "POST 
+         /v1/openai/v1/vector_stores/vs_dab05212-db05-402c-91ef-57e41797406b/search HTTP/1.1" 200
+INFO     2025-09-10 14:51:54,438 console_span_processor:39 telemetry: 21:51:54.411 [END] FaissVectorIOAdapter.query_chunks [StatusCode.OK] (47.55ms)
+INFO     2025-09-10 14:51:54,440 console_span_processor:48 telemetry:     output: {'chunks': [{'content': 'Acme ships globally in 3-5 business days.',
+         'metadata': {'document_id': 'file-489db9aae0424745960e3408ff0f477f', 'token_count': 12, 'metadata_token_count': 1}, 'embedding':
+         [-0.012346820905804634, -0.04591671749949455, 0.00545357633382082, 0.0767817348241806, -0.01030217856168747, -0.07128783315420151,
+         -0.047789815813302994, -0.0042580426670610905, -0.08333089202642441, 0.03493816778063774, 0.03678377717733383, -0.022125769406557083,
+         -0.001785362372174859, 0.03860057145357132, 0.017692940309643745, -0.011320478282868862, 0.020480643957853317, -0.14444826543331146,
+         -0.03509785607457161, -0.08347956091165543, 0.01723267324268818, -0.020868107676506042, -0.04816542565822601, -0.04063615947961807,
+         -0.02612607553601265, -0.06816241890192032, -0.021907860413193703, -0.05328420177102089, 0.0035953382030129433, -0.060775868594646454,
+         -0.07895789295434952, 0.08725343644618988, -0.004096562508493662, 0.07713106274604797, 0.04764929413795471, 0.03390089422464371, -0.03...
+INFO     2025-09-10 14:51:54,445 console_span_processor:39 telemetry: 21:51:54.442 [END] VectorDBsRoutingTable.openai_search_vector_store
+         [StatusCode.OK] (78.94ms)
+INFO     2025-09-10 14:51:54,446 console_span_processor:48 telemetry:     output: {'object': 'vector_store.search_results.page', 'search_query':
+         'shipping time', 'data': [{'file_id': '', 'filename': '', 'score': 0.9984249880472625, 'attributes': {'document_id':
+         'file-489db9aae0424745960e3408ff0f477f', 'token_count': 12.0, 'metadata_token_count': 1.0}, 'content': [{'type': 'text', 'text': 'Acme ships 
+         globally in 3-5 business days.'}]}], 'has_more': False, 'next_page': None}
+INFO     2025-09-10 14:51:54,457 uvicorn.access:473 uncategorized: 127.0.0.1:56904 - "POST 
+         /v1/openai/v1/vector_stores/vs_dab05212-db05-402c-91ef-57e41797406b/search HTTP/1.1" 200
+INFO     2025-09-10 14:51:54,457 console_span_processor:39 telemetry: 21:51:54.452 [END] VectorIORouter.openai_search_vector_store [StatusCode.OK]
+         (89.01ms)
+INFO     2025-09-10 14:51:54,459 console_span_processor:48 telemetry:     output: {'object': 'vector_store.search_results.page', 'search_query':
+         'shipping time', 'data': [{'file_id': '', 'filename': '', 'score': 0.9984249880472625, 'attributes': {'document_id':
+         'file-489db9aae0424745960e3408ff0f477f', 'token_count': 12.0, 'metadata_token_count': 1.0}, 'content': [{'type': 'text', 'text': 'Acme ships 
+         globally in 3-5 business days.'}]}], 'has_more': False, 'next_page': None}
+INFO     2025-09-10 14:51:54,468 console_span_processor:39 telemetry: 21:51:54.463 [END] /v1/openai/v1/vector_stores/{vector_store_id}/search
+         [StatusCode.OK] (100.62ms)
+INFO     2025-09-10 14:51:54,469 console_span_processor:48 telemetry:     raw_path:
+         /v1/openai/v1/vector_stores/vs_dab05212-db05-402c-91ef-57e41797406b/search
+INFO     2025-09-10 14:51:54,469 console_span_processor:62 telemetry:  21:51:54.323 [INFO] 127.0.0.1:56904 - "POST 
+         /v1/openai/v1/vector_stores/vs_dab05212-db05-402c-91ef-57e41797406b/search HTTP/1.1" 200
+INFO     2025-09-10 14:51:54,471 console_span_processor:28 telemetry: 21:51:54.471 [START] /v1/openai/v1/vector_stores/{vector_store_id}/search
+INFO     2025-09-10 14:51:54,489 console_span_processor:39 telemetry: 21:51:54.474 [END] ModelsRoutingTable.get_model [StatusCode.OK] (0.26ms)
+INFO     2025-09-10 14:51:54,490 console_span_processor:48 telemetry:     output: {'identifier': 'sentence-transformers/all-MiniLM-L6-v2',
+         'provider_resource_id': 'all-MiniLM-L6-v2', 'provider_id': 'sentence-transformers', 'type': 'model', 'owner': None, 'source':
+         'listed_from_provider', 'metadata': {'embedding_dimension': 384}, 'model_type': 'embedding'}
+INFO     2025-09-10 14:51:54,502 console_span_processor:39 telemetry: 21:51:54.495 [END] ModelsRoutingTable.get_provider_impl [StatusCode.OK] (2.67ms)
+INFO     2025-09-10 14:51:54,504 console_span_processor:48 telemetry:     output:
+         <llama_stack.providers.inline.inference.sentence_transformers.sentence_transformers.SentenceTransformersInferenceImpl object at 0x158e83350>
+INFO     2025-09-10 14:51:54,524 console_span_processor:39 telemetry: 21:51:54.514 [END] ModelsRoutingTable.get_model [StatusCode.OK] (0.79ms)
+INFO     2025-09-10 14:51:54,526 console_span_processor:48 telemetry:     output: {'identifier': 'sentence-transformers/all-MiniLM-L6-v2',
+         'provider_resource_id': 'all-MiniLM-L6-v2', 'provider_id': 'sentence-transformers', 'type': 'model', 'owner': None, 'source':
+         'listed_from_provider', 'metadata': {'embedding_dimension': 384}, 'model_type': 'embedding'}
+INFO     2025-09-10 14:51:54,529 console_span_processor:39 telemetry: 21:51:54.527 [END] InferenceRouter.openai_embeddings [StatusCode.OK] (53.95ms)
+INFO     2025-09-10 14:51:54,537 console_span_processor:48 telemetry:     output: {'object': 'list', 'data': [{'object': 'embedding', 'embedding':
+         [-0.03642994910478592, -0.005766282323747873, -0.028219470754265785, 0.05074828490614891, 0.023913491517305374, -0.056256189942359924,
+         0.0007000428740866482, 0.02684980072081089, 0.005779022816568613, 0.0757804811000824, 0.02611902356147766, -0.04347166419029236,
+         -0.07426290959119797, 0.0033695625606924295, -0.03395790979266167, -0.018579166382551193, 0.031040646135807037, -0.06419522315263748,
+         -0.11664945632219315, 0.011454297229647636, 0.00203332188539207, 0.00506530050188303, -0.05711820721626282, -0.006884930655360222,
+         0.0057834964245557785, 0.005828470923006535, -0.05446002259850502, -0.032767001539468765, -0.023773472756147385, -0.046458274126052856,
+         -0.0579390786588192, 0.052776239812374115, -0.017773080617189407, 0.07396063208580017, -0.007082517724484205, -0.006252078805118799,
+         0.06502724438905716, -0.04769469425082207, 0.08519715070724487, -0.04995475336909294, -0.02040106989443302, -0.024357719346880913, -0....
+INFO     2025-09-10 14:51:54,550 console_span_processor:39 telemetry: 21:51:54.542 [END] FaissVectorIOAdapter.query_chunks [StatusCode.OK] (68.87ms)
+INFO     2025-09-10 14:51:54,551 console_span_processor:48 telemetry:     output: {'chunks': [{'content': 'Acme ships globally in 3-5 business days.',
+         'metadata': {'document_id': 'file-489db9aae0424745960e3408ff0f477f', 'token_count': 12, 'metadata_token_count': 1}, 'embedding':
+         [-0.012346820905804634, -0.04591671749949455, 0.00545357633382082, 0.0767817348241806, -0.01030217856168747, -0.07128783315420151,
+         -0.047789815813302994, -0.0042580426670610905, -0.08333089202642441, 0.03493816778063774, 0.03678377717733383, -0.022125769406557083,
+         -0.001785362372174859, 0.03860057145357132, 0.017692940309643745, -0.011320478282868862, 0.020480643957853317, -0.14444826543331146,
+         -0.03509785607457161, -0.08347956091165543, 0.01723267324268818, -0.020868107676506042, -0.04816542565822601, -0.04063615947961807,
+         -0.02612607553601265, -0.06816241890192032, -0.021907860413193703, -0.05328420177102089, 0.0035953382030129433, -0.060775868594646454,
+         -0.07895789295434952, 0.08725343644618988, -0.004096562508493662, 0.07713106274604797, 0.04764929413795471, 0.03390089422464371, -0.03...
+INFO     2025-09-10 14:51:54,557 console_span_processor:39 telemetry: 21:51:54.554 [END] VectorDBsRoutingTable.openai_search_vector_store
+         [StatusCode.OK] (81.34ms)
+INFO     2025-09-10 14:51:54,559 console_span_processor:48 telemetry:     output: {'object': 'vector_store.search_results.page', 'search_query':
+         'shipping time', 'data': [{'file_id': '', 'filename': '', 'score': 0.9984249880472625, 'attributes': {'document_id':
+         'file-489db9aae0424745960e3408ff0f477f', 'token_count': 12.0, 'metadata_token_count': 1.0}, 'content': [{'type': 'text', 'text': 'Acme ships 
+         globally in 3-5 business days.'}]}], 'has_more': False, 'next_page': None}
+INFO     2025-09-10 14:51:54,573 console_span_processor:39 telemetry: 21:51:54.564 [END] VectorIORouter.openai_search_vector_store [StatusCode.OK]
+         (91.94ms)
+INFO     2025-09-10 14:51:54,574 console_span_processor:48 telemetry:     output: {'object': 'vector_store.search_results.page', 'search_query':
+         'shipping time', 'data': [{'file_id': '', 'filename': '', 'score': 0.9984249880472625, 'attributes': {'document_id':
+         'file-489db9aae0424745960e3408ff0f477f', 'token_count': 12.0, 'metadata_token_count': 1.0}, 'content': [{'type': 'text', 'text': 'Acme ships 
+         globally in 3-5 business days.'}]}], 'has_more': False, 'next_page': None}
+INFO     2025-09-10 14:51:54,583 console_span_processor:39 telemetry: 21:51:54.575 [END] /v1/openai/v1/vector_stores/{vector_store_id}/search
+         [StatusCode.OK] (104.63ms)
+INFO     2025-09-10 14:51:54,584 console_span_processor:48 telemetry:     raw_path:
+         /v1/openai/v1/vector_stores/vs_dab05212-db05-402c-91ef-57e41797406b/search
+INFO     2025-09-10 14:51:54,584 console_span_processor:62 telemetry:  21:51:54.354 [INFO] 127.0.0.1:56904 - "POST 
+         /v1/openai/v1/vector_stores/vs_dab05212-db05-402c-91ef-57e41797406b/search HTTP/1.1" 200
+INFO     2025-09-10 14:51:54,585 console_span_processor:28 telemetry: 21:51:54.585 [START] /v1/openai/v1/vector_stores/{vector_store_id}/search
+INFO     2025-09-10 14:51:54,591 console_span_processor:39 telemetry: 21:51:54.589 [END] ModelsRoutingTable.get_model [StatusCode.OK] (0.17ms)
+INFO     2025-09-10 14:51:54,591 console_span_processor:48 telemetry:     output: {'identifier': 'sentence-transformers/all-MiniLM-L6-v2',
+         'provider_resource_id': 'all-MiniLM-L6-v2', 'provider_id': 'sentence-transformers', 'type': 'model', 'owner': None, 'source':
+         'listed_from_provider', 'metadata': {'embedding_dimension': 384}, 'model_type': 'embedding'}
+INFO     2025-09-10 14:51:54,594 console_span_processor:39 telemetry: 21:51:54.592 [END] ModelsRoutingTable.get_provider_impl [StatusCode.OK] (0.16ms)
+INFO     2025-09-10 14:51:54,595 console_span_processor:48 telemetry:     output:
+         <llama_stack.providers.inline.inference.sentence_transformers.sentence_transformers.SentenceTransformersInferenceImpl object at 0x158e83350>
+INFO     2025-09-10 14:51:54,599 console_span_processor:39 telemetry: 21:51:54.596 [END] ModelsRoutingTable.get_model [StatusCode.OK] (0.16ms)
+INFO     2025-09-10 14:51:54,599 console_span_processor:48 telemetry:     output: {'identifier': 'sentence-transformers/all-MiniLM-L6-v2',
+         'provider_resource_id': 'all-MiniLM-L6-v2', 'provider_id': 'sentence-transformers', 'type': 'model', 'owner': None, 'source':
+         'listed_from_provider', 'metadata': {'embedding_dimension': 384}, 'model_type': 'embedding'}
+INFO     2025-09-10 14:51:54,604 console_span_processor:39 telemetry: 21:51:54.602 [END] InferenceRouter.openai_embeddings [StatusCode.OK] (13.16ms)
+INFO     2025-09-10 14:51:54,605 console_span_processor:48 telemetry:     output: {'object': 'list', 'data': [{'object': 'embedding', 'embedding':
+         [-0.03642994910478592, -0.005766282323747873, -0.028219470754265785, 0.05074828490614891, 0.023913491517305374, -0.056256189942359924,
+         0.0007000428740866482, 0.02684980072081089, 0.005779022816568613, 0.0757804811000824, 0.02611902356147766, -0.04347166419029236,
+         -0.07426290959119797, 0.0033695625606924295, -0.03395790979266167, -0.018579166382551193, 0.031040646135807037, -0.06419522315263748,
+         -0.11664945632219315, 0.011454297229647636, 0.00203332188539207, 0.00506530050188303, -0.05711820721626282, -0.006884930655360222,
+         0.0057834964245557785, 0.005828470923006535, -0.05446002259850502, -0.032767001539468765, -0.023773472756147385, -0.046458274126052856,
+         -0.0579390786588192, 0.052776239812374115, -0.017773080617189407, 0.07396063208580017, -0.007082517724484205, -0.006252078805118799,
+         0.06502724438905716, -0.04769469425082207, 0.08519715070724487, -0.04995475336909294, -0.02040106989443302, -0.024357719346880913, -0....
+INFO     2025-09-10 14:51:54,627 console_span_processor:39 telemetry: 21:51:54.621 [END] FaissVectorIOAdapter.query_chunks [StatusCode.OK] (32.66ms)
+INFO     2025-09-10 14:51:54,634 console_span_processor:48 telemetry:     output: {'chunks': [{'content': 'Acme ships globally in 3-5 business days.',
+         'metadata': {'document_id': 'file-489db9aae0424745960e3408ff0f477f', 'token_count': 12, 'metadata_token_count': 1}, 'embedding':
+         [-0.012346820905804634, -0.04591671749949455, 0.00545357633382082, 0.0767817348241806, -0.01030217856168747, -0.07128783315420151,
+         -0.047789815813302994, -0.0042580426670610905, -0.08333089202642441, 0.03493816778063774, 0.03678377717733383, -0.022125769406557083,
+         -0.001785362372174859, 0.03860057145357132, 0.017692940309643745, -0.011320478282868862, 0.020480643957853317, -0.14444826543331146,
+         -0.03509785607457161, -0.08347956091165543, 0.01723267324268818, -0.020868107676506042, -0.04816542565822601, -0.04063615947961807,
+         -0.02612607553601265, -0.06816241890192032, -0.021907860413193703, -0.05328420177102089, 0.0035953382030129433, -0.060775868594646454,
+         -0.07895789295434952, 0.08725343644618988, -0.004096562508493662, 0.07713106274604797, 0.04764929413795471, 0.03390089422464371, -0.03...
+INFO     2025-09-10 14:51:54,643 console_span_processor:39 telemetry: 21:51:54.637 [END] VectorDBsRoutingTable.openai_search_vector_store
+         [StatusCode.OK] (49.21ms)
+INFO     2025-09-10 14:51:54,644 console_span_processor:48 telemetry:     output: {'object': 'vector_store.search_results.page', 'search_query':
+         'shipping time', 'data': [{'file_id': '', 'filename': '', 'score': 0.9984249880472625, 'attributes': {'document_id':
+         'file-489db9aae0424745960e3408ff0f477f', 'token_count': 12.0, 'metadata_token_count': 1.0}, 'content': [{'type': 'text', 'text': 'Acme ships 
+         globally in 3-5 business days.'}]}], 'has_more': False, 'next_page': None}
+INFO     2025-09-10 14:51:54,654 console_span_processor:39 telemetry: 21:51:54.646 [END] VectorIORouter.openai_search_vector_store [StatusCode.OK]
+         (57.62ms)
+INFO     2025-09-10 14:51:54,656 console_span_processor:48 telemetry:     output: {'object': 'vector_store.search_results.page', 'search_query':
+         'shipping time', 'data': [{'file_id': '', 'filename': '', 'score': 0.9984249880472625, 'attributes': {'document_id':
+         'file-489db9aae0424745960e3408ff0f477f', 'token_count': 12.0, 'metadata_token_count': 1.0}, 'content': [{'type': 'text', 'text': 'Acme ships 
+         globally in 3-5 business days.'}]}], 'has_more': False, 'next_page': None}
+INFO     2025-09-10 14:51:54,664 console_span_processor:39 telemetry: 21:51:54.657 [END] /v1/openai/v1/vector_stores/{vector_store_id}/search
+         [StatusCode.OK] (72.19ms)
+INFO     2025-09-10 14:51:54,666 console_span_processor:48 telemetry:     raw_path:
+         /v1/openai/v1/vector_stores/vs_dab05212-db05-402c-91ef-57e41797406b/search
+INFO     2025-09-10 14:51:54,670 console_span_processor:62 telemetry:  21:51:54.374 [INFO] 127.0.0.1:56904 - "POST 
+         /v1/openai/v1/vector_stores/vs_dab05212-db05-402c-91ef-57e41797406b/search HTTP/1.1" 200
+INFO     2025-09-10 14:51:54,673 console_span_processor:28 telemetry: 21:51:54.673 [START] /v1/openai/v1/vector_stores/{vector_store_id}/search
+INFO     2025-09-10 14:51:54,687 console_span_processor:39 telemetry: 21:51:54.678 [END] ModelsRoutingTable.get_model [StatusCode.OK] (0.38ms)
+INFO     2025-09-10 14:51:54,688 console_span_processor:48 telemetry:     output: {'identifier': 'sentence-transformers/all-MiniLM-L6-v2',
+         'provider_resource_id': 'all-MiniLM-L6-v2', 'provider_id': 'sentence-transformers', 'type': 'model', 'owner': None, 'source':
+         'listed_from_provider', 'metadata': {'embedding_dimension': 384}, 'model_type': 'embedding'}
+INFO     2025-09-10 14:51:54,697 console_span_processor:39 telemetry: 21:51:54.691 [END] ModelsRoutingTable.get_provider_impl [StatusCode.OK] (0.21ms)
+INFO     2025-09-10 14:51:54,698 console_span_processor:48 telemetry:     output:
+         <llama_stack.providers.inline.inference.sentence_transformers.sentence_transformers.SentenceTransformersInferenceImpl object at 0x158e83350>
+INFO     2025-09-10 14:51:54,701 console_span_processor:39 telemetry: 21:51:54.699 [END] ModelsRoutingTable.get_model [StatusCode.OK] (0.24ms)
+INFO     2025-09-10 14:51:54,704 console_span_processor:48 telemetry:     output: {'identifier': 'sentence-transformers/all-MiniLM-L6-v2',
+         'provider_resource_id': 'all-MiniLM-L6-v2', 'provider_id': 'sentence-transformers', 'type': 'model', 'owner': None, 'source':
+         'listed_from_provider', 'metadata': {'embedding_dimension': 384}, 'model_type': 'embedding'}
+INFO     2025-09-10 14:51:54,716 console_span_processor:39 telemetry: 21:51:54.706 [END] InferenceRouter.openai_embeddings [StatusCode.OK] (29.72ms)
+INFO     2025-09-10 14:51:54,718 console_span_processor:48 telemetry:     output: {'object': 'list', 'data': [{'object': 'embedding', 'embedding':
+         [-0.03642994910478592, -0.005766282323747873, -0.028219470754265785, 0.05074828490614891, 0.023913491517305374, -0.056256189942359924,
+         0.0007000428740866482, 0.02684980072081089, 0.005779022816568613, 0.0757804811000824, 0.02611902356147766, -0.04347166419029236,
+         -0.07426290959119797, 0.0033695625606924295, -0.03395790979266167, -0.018579166382551193, 0.031040646135807037, -0.06419522315263748,
+         -0.11664945632219315, 0.011454297229647636, 0.00203332188539207, 0.00506530050188303, -0.05711820721626282, -0.006884930655360222,
+         0.0057834964245557785, 0.005828470923006535, -0.05446002259850502, -0.032767001539468765, -0.023773472756147385, -0.046458274126052856,
+         -0.0579390786588192, 0.052776239812374115, -0.017773080617189407, 0.07396063208580017, -0.007082517724484205, -0.006252078805118799,
+         0.06502724438905716, -0.04769469425082207, 0.08519715070724487, -0.04995475336909294, -0.02040106989443302, -0.024357719346880913, -0....
+INFO     2025-09-10 14:51:54,724 console_span_processor:39 telemetry: 21:51:54.721 [END] FaissVectorIOAdapter.query_chunks [StatusCode.OK] (45.99ms)
+INFO     2025-09-10 14:51:54,724 console_span_processor:48 telemetry:     output: {'chunks': [{'content': 'Acme ships globally in 3-5 business days.',
+         'metadata': {'document_id': 'file-489db9aae0424745960e3408ff0f477f', 'token_count': 12, 'metadata_token_count': 1}, 'embedding':
+         [-0.012346820905804634, -0.04591671749949455, 0.00545357633382082, 0.0767817348241806, -0.01030217856168747, -0.07128783315420151,
+         -0.047789815813302994, -0.0042580426670610905, -0.08333089202642441, 0.03493816778063774, 0.03678377717733383, -0.022125769406557083,
+         -0.001785362372174859, 0.03860057145357132, 0.017692940309643745, -0.011320478282868862, 0.020480643957853317, -0.14444826543331146,
+         -0.03509785607457161, -0.08347956091165543, 0.01723267324268818, -0.020868107676506042, -0.04816542565822601, -0.04063615947961807,
+         -0.02612607553601265, -0.06816241890192032, -0.021907860413193703, -0.05328420177102089, 0.0035953382030129433, -0.060775868594646454,
+         -0.07895789295434952, 0.08725343644618988, -0.004096562508493662, 0.07713106274604797, 0.04764929413795471, 0.03390089422464371, -0.03...
+INFO     2025-09-10 14:51:54,727 console_span_processor:39 telemetry: 21:51:54.726 [END] VectorDBsRoutingTable.openai_search_vector_store
+         [StatusCode.OK] (51.80ms)
+INFO     2025-09-10 14:51:54,728 console_span_processor:48 telemetry:     output: {'object': 'vector_store.search_results.page', 'search_query':
+         'shipping time', 'data': [{'file_id': '', 'filename': '', 'score': 0.9984249880472625, 'attributes': {'document_id':
+         'file-489db9aae0424745960e3408ff0f477f', 'token_count': 12.0, 'metadata_token_count': 1.0}, 'content': [{'type': 'text', 'text': 'Acme ships 
+         globally in 3-5 business days.'}]}], 'has_more': False, 'next_page': None}
+INFO     2025-09-10 14:51:54,732 console_span_processor:39 telemetry: 21:51:54.730 [END] VectorIORouter.openai_search_vector_store [StatusCode.OK]
+         (56.53ms)
+INFO     2025-09-10 14:51:54,732 console_span_processor:48 telemetry:     output: {'object': 'vector_store.search_results.page', 'search_query':
+         'shipping time', 'data': [{'file_id': '', 'filename': '', 'score': 0.9984249880472625, 'attributes': {'document_id':
+         'file-489db9aae0424745960e3408ff0f477f', 'token_count': 12.0, 'metadata_token_count': 1.0}, 'content': [{'type': 'text', 'text': 'Acme ships 
+         globally in 3-5 business days.'}]}], 'has_more': False, 'next_page': None}
+INFO     2025-09-10 14:51:54,735 console_span_processor:39 telemetry: 21:51:54.734 [END] /v1/openai/v1/vector_stores/{vector_store_id}/search
+         [StatusCode.OK] (60.83ms)
+INFO     2025-09-10 14:51:54,736 console_span_processor:48 telemetry:     raw_path:
+         /v1/openai/v1/vector_stores/vs_dab05212-db05-402c-91ef-57e41797406b/search
+INFO     2025-09-10 14:51:54,736 console_span_processor:62 telemetry:  21:51:54.426 [INFO] 127.0.0.1:56904 - "POST 
+         /v1/openai/v1/vector_stores/vs_dab05212-db05-402c-91ef-57e41797406b/search HTTP/1.1" 200
+INFO     2025-09-10 14:51:54,737 console_span_processor:28 telemetry: 21:51:54.737 [START] /v1/openai/v1/vector_stores/{vector_store_id}/search
+INFO     2025-09-10 14:51:54,747 console_span_processor:39 telemetry: 21:51:54.740 [END] ModelsRoutingTable.get_model [StatusCode.OK] (0.15ms)
+INFO     2025-09-10 14:51:54,748 console_span_processor:48 telemetry:     output: {'identifier': 'sentence-transformers/all-MiniLM-L6-v2',
+         'provider_resource_id': 'all-MiniLM-L6-v2', 'provider_id': 'sentence-transformers', 'type': 'model', 'owner': None, 'source':
+         'listed_from_provider', 'metadata': {'embedding_dimension': 384}, 'model_type': 'embedding'}
+INFO     2025-09-10 14:51:54,752 console_span_processor:39 telemetry: 21:51:54.750 [END] ModelsRoutingTable.get_provider_impl [StatusCode.OK] (0.22ms)
+INFO     2025-09-10 14:51:54,752 console_span_processor:48 telemetry:     output:
+         <llama_stack.providers.inline.inference.sentence_transformers.sentence_transformers.SentenceTransformersInferenceImpl object at 0x158e83350>
+INFO     2025-09-10 14:51:54,755 console_span_processor:39 telemetry: 21:51:54.754 [END] ModelsRoutingTable.get_model [StatusCode.OK] (0.32ms)
+INFO     2025-09-10 14:51:54,756 console_span_processor:48 telemetry:     output: {'identifier': 'sentence-transformers/all-MiniLM-L6-v2',
+         'provider_resource_id': 'all-MiniLM-L6-v2', 'provider_id': 'sentence-transformers', 'type': 'model', 'owner': None, 'source':
+         'listed_from_provider', 'metadata': {'embedding_dimension': 384}, 'model_type': 'embedding'}
+INFO     2025-09-10 14:51:54,758 console_span_processor:39 telemetry: 21:51:54.756 [END] InferenceRouter.openai_embeddings [StatusCode.OK] (17.05ms)
+INFO     2025-09-10 14:51:54,761 console_span_processor:48 telemetry:     output: {'object': 'list', 'data': [{'object': 'embedding', 'embedding':
+         [-0.03642994910478592, -0.005766282323747873, -0.028219470754265785, 0.05074828490614891, 0.023913491517305374, -0.056256189942359924,
+         0.0007000428740866482, 0.02684980072081089, 0.005779022816568613, 0.0757804811000824, 0.02611902356147766, -0.04347166419029236,
+         -0.07426290959119797, 0.0033695625606924295, -0.03395790979266167, -0.018579166382551193, 0.031040646135807037, -0.06419522315263748,
+         -0.11664945632219315, 0.011454297229647636, 0.00203332188539207, 0.00506530050188303, -0.05711820721626282, -0.006884930655360222,
+         0.0057834964245557785, 0.005828470923006535, -0.05446002259850502, -0.032767001539468765, -0.023773472756147385, -0.046458274126052856,
+         -0.0579390786588192, 0.052776239812374115, -0.017773080617189407, 0.07396063208580017, -0.007082517724484205, -0.006252078805118799,
+         0.06502724438905716, -0.04769469425082207, 0.08519715070724487, -0.04995475336909294, -0.02040106989443302, -0.024357719346880913, -0....
+INFO     2025-09-10 14:51:54,767 console_span_processor:39 telemetry: 21:51:54.762 [END] FaissVectorIOAdapter.query_chunks [StatusCode.OK] (23.20ms)
+INFO     2025-09-10 14:51:54,767 console_span_processor:48 telemetry:     output: {'chunks': [{'content': 'Acme ships globally in 3-5 business days.',
+         'metadata': {'document_id': 'file-489db9aae0424745960e3408ff0f477f', 'token_count': 12, 'metadata_token_count': 1}, 'embedding':
+         [-0.012346820905804634, -0.04591671749949455, 0.00545357633382082, 0.0767817348241806, -0.01030217856168747, -0.07128783315420151,
+         -0.047789815813302994, -0.0042580426670610905, -0.08333089202642441, 0.03493816778063774, 0.03678377717733383, -0.022125769406557083,
+         -0.001785362372174859, 0.03860057145357132, 0.017692940309643745, -0.011320478282868862, 0.020480643957853317, -0.14444826543331146,
+         -0.03509785607457161, -0.08347956091165543, 0.01723267324268818, -0.020868107676506042, -0.04816542565822601, -0.04063615947961807,
+         -0.02612607553601265, -0.06816241890192032, -0.021907860413193703, -0.05328420177102089, 0.0035953382030129433, -0.060775868594646454,
+         -0.07895789295434952, 0.08725343644618988, -0.004096562508493662, 0.07713106274604797, 0.04764929413795471, 0.03390089422464371, -0.03...
+INFO     2025-09-10 14:51:54,772 console_span_processor:39 telemetry: 21:51:54.769 [END] VectorDBsRoutingTable.openai_search_vector_store
+         [StatusCode.OK] (30.01ms)
+INFO     2025-09-10 14:51:54,773 console_span_processor:48 telemetry:     output: {'object': 'vector_store.search_results.page', 'search_query':
+         'shipping time', 'data': [{'file_id': '', 'filename': '', 'score': 0.9984249880472625, 'attributes': {'document_id':
+         'file-489db9aae0424745960e3408ff0f477f', 'token_count': 12.0, 'metadata_token_count': 1.0}, 'content': [{'type': 'text', 'text': 'Acme ships 
+         globally in 3-5 business days.'}]}], 'has_more': False, 'next_page': None}
+INFO     2025-09-10 14:51:54,775 console_span_processor:39 telemetry: 21:51:54.774 [END] VectorIORouter.openai_search_vector_store [StatusCode.OK]
+         (35.79ms)
+INFO     2025-09-10 14:51:54,776 console_span_processor:48 telemetry:     output: {'object': 'vector_store.search_results.page', 'search_query':
+         'shipping time', 'data': [{'file_id': '', 'filename': '', 'score': 0.9984249880472625, 'attributes': {'document_id':
+         'file-489db9aae0424745960e3408ff0f477f', 'token_count': 12.0, 'metadata_token_count': 1.0}, 'content': [{'type': 'text', 'text': 'Acme ships 
+         globally in 3-5 business days.'}]}], 'has_more': False, 'next_page': None}
+INFO     2025-09-10 14:51:54,778 console_span_processor:39 telemetry: 21:51:54.777 [END] /v1/openai/v1/vector_stores/{vector_store_id}/search
+         [StatusCode.OK] (40.35ms)
+INFO     2025-09-10 14:51:54,779 console_span_processor:48 telemetry:     raw_path:
+         /v1/openai/v1/vector_stores/vs_dab05212-db05-402c-91ef-57e41797406b/search
+INFO     2025-09-10 14:51:54,779 console_span_processor:62 telemetry:  21:51:54.457 [INFO] 127.0.0.1:56904 - "POST 
+         /v1/openai/v1/vector_stores/vs_dab05212-db05-402c-91ef-57e41797406b/search HTTP/1.1" 200
+INFO     2025-09-10 14:51:54,781 console_span_processor:28 telemetry: 21:51:54.781 [START] /v1/openai/v1/chat/completions
+INFO     2025-09-10 14:51:54,790 console_span_processor:39 telemetry: 21:51:54.786 [END] ModelsRoutingTable.get_model [StatusCode.OK] (0.82ms)
+INFO     2025-09-10 14:51:54,790 console_span_processor:48 telemetry:     output: {'identifier': 'together/meta-llama/Llama-3.3-70B-Instruct-Turbo',
+         'provider_resource_id': 'meta-llama/Llama-3.3-70B-Instruct-Turbo', 'provider_id': 'together', 'type': 'model', 'owner': None, 'source':
+         'listed_from_provider', 'metadata': {}, 'model_type': 'llm'}
+INFO     2025-09-10 14:51:54,794 console_span_processor:39 telemetry: 21:51:54.792 [END] ModelsRoutingTable.get_provider_impl [StatusCode.OK] (0.24ms)
+INFO     2025-09-10 14:51:54,794 console_span_processor:48 telemetry:     output:
+         <llama_stack.providers.remote.inference.together.together.TogetherInferenceAdapter object at 0x137069220>
+INFO     2025-09-10 14:51:54,798 console_span_processor:39 telemetry: 21:51:54.796 [END] ModelsRoutingTable.get_model [StatusCode.OK] (0.21ms)
+INFO     2025-09-10 14:51:54,798 console_span_processor:48 telemetry:     output: {'identifier': 'together/meta-llama/Llama-3.3-70B-Instruct-Turbo',
+         'provider_resource_id': 'meta-llama/Llama-3.3-70B-Instruct-Turbo', 'provider_id': 'together', 'type': 'model', 'owner': None, 'source':
+         'listed_from_provider', 'metadata': {}, 'model_type': 'llm'}
+INFO     2025-09-10 14:51:57,872 console_span_processor:39 telemetry: 21:51:57.868 [END] TogetherInferenceAdapter.openai_chat_completion
+         [StatusCode.OK] (3072.99ms)
+INFO     2025-09-10 14:51:57,872 console_span_processor:48 telemetry:     output: {'id': 'oAitcfB-4YNCb4-97d2263f6ff7cfe9', 'choices':
+         [{'finish_reason': 'stop', 'index': 0, 'logprobs': None, 'message': {'content': 'Thought: Since I couldn\'t get a relevant result from the
+         Llama Stack Vector Store RAG tool, I will provide a general answer to the question "How long does shipping take?".\n\nThe shipping time can
+         vary depending on the destination, shipping method, and other factors. Typically, shipping times can range from a few days to several weeks
+         or even months. Here are some general guidelines:\n\n* Domestic shipping: 1-7 business days\n* International shipping: 7-21 business days or
+         more\n* Express shipping: 1-3 business days\n* Standard shipping: 3-7 business days\n\nPlease note that these are general estimates and the
+         actual shipping time may vary depending on the specific circumstances.\n\nThought: I now can give a great answer\nFinal Answer: The shipping
+         time can vary depending on the destination, shipping method, and other factors. Typically, shipping tim...
+INFO     2025-09-10 14:51:57,875 uvicorn.access:473 uncategorized: ::1:56900 - "POST /v1/openai/v1/chat/completions HTTP/1.1" 200
+INFO     2025-09-10 14:51:57,877 console_span_processor:39 telemetry: 21:51:57.876 [END] InferenceRouter.openai_chat_completion [StatusCode.OK]
+         (3091.29ms)
+INFO     2025-09-10 14:51:57,878 console_span_processor:48 telemetry:     output: {'id': 'oAitcfB-4YNCb4-97d2263f6ff7cfe9', 'choices':
+         [{'finish_reason': 'stop', 'index': 0, 'logprobs': None, 'message': {'content': 'Thought: Since I couldn\'t get a relevant result from the
+         Llama Stack Vector Store RAG tool, I will provide a general answer to the question "How long does shipping take?".\n\nThe shipping time can
+         vary depending on the destination, shipping method, and other factors. Typically, shipping times can range from a few days to several weeks
+         or even months. Here are some general guidelines:\n\n* Domestic shipping: 1-7 business days\n* International shipping: 7-21 business days or
+         more\n* Express shipping: 1-3 business days\n* Standard shipping: 3-7 business days\n\nPlease note that these are general estimates and the
+         actual shipping time may vary depending on the specific circumstances.\n\nThought: I now can give a great answer\nFinal Answer: The shipping
+         time can vary depending on the destination, shipping method, and other factors. Typically, shipping tim...
+INFO     2025-09-10 14:51:57,880 console_span_processor:62 telemetry:  21:51:57.874 [INFO] metric.prompt_tokens
+INFO     2025-09-10 14:51:57,880 console_span_processor:67 telemetry: value: 1948
+INFO     2025-09-10 14:51:57,881 console_span_processor:67 telemetry: unit: tokens
+INFO     2025-09-10 14:51:57,881 console_span_processor:67 telemetry: model_id: together/meta-llama/Llama-3.3-70B-Instruct-Turbo
+INFO     2025-09-10 14:51:57,881 console_span_processor:67 telemetry: provider_id: together
+INFO     2025-09-10 14:51:57,882 console_span_processor:62 telemetry:  21:51:57.874 [INFO] metric.completion_tokens
+INFO     2025-09-10 14:51:57,882 console_span_processor:67 telemetry: value: 264
+INFO     2025-09-10 14:51:57,882 console_span_processor:67 telemetry: unit: tokens
+INFO     2025-09-10 14:51:57,883 console_span_processor:67 telemetry: model_id: together/meta-llama/Llama-3.3-70B-Instruct-Turbo
+INFO     2025-09-10 14:51:57,883 console_span_processor:67 telemetry: provider_id: together
+INFO     2025-09-10 14:51:57,884 console_span_processor:62 telemetry:  21:51:57.874 [INFO] metric.total_tokens
+INFO     2025-09-10 14:51:57,884 console_span_processor:67 telemetry: value: 2212
+INFO     2025-09-10 14:51:57,884 console_span_processor:67 telemetry: unit: tokens
+INFO     2025-09-10 14:51:57,885 console_span_processor:67 telemetry: model_id: together/meta-llama/Llama-3.3-70B-Instruct-Turbo
+INFO     2025-09-10 14:51:57,886 console_span_processor:67 telemetry: provider_id: together
+INFO     2025-09-10 14:51:57,890 console_span_processor:39 telemetry: 21:51:57.887 [END] /v1/openai/v1/chat/completions [StatusCode.OK] (3106.16ms)
+INFO     2025-09-10 14:51:57,891 console_span_processor:48 telemetry:     raw_path: /v1/openai/v1/chat/completions
+INFO     2025-09-10 14:51:57,891 console_span_processor:62 telemetry:  21:51:57.875 [INFO] ::1:56900 - "POST /v1/openai/v1/chat/completions HTTP/1.1"
+         200
+INFO     2025-09-10 14:55:09,448 console_span_processor:28 telemetry: 21:55:09.448 [START] /v1/openai/v1/chat/completions
+INFO     2025-09-10 14:55:09,463 console_span_processor:39 telemetry: 21:55:09.453 [END] ModelsRoutingTable.get_model [StatusCode.OK] (0.18ms)
+INFO     2025-09-10 14:55:09,465 console_span_processor:48 telemetry:     output: {'identifier': 'together/meta-llama/Llama-3.3-70B-Instruct-Turbo',
+         'provider_resource_id': 'meta-llama/Llama-3.3-70B-Instruct-Turbo', 'provider_id': 'together', 'type': 'model', 'owner': None, 'source':
+         'listed_from_provider', 'metadata': {}, 'model_type': 'llm'}
+INFO     2025-09-10 14:55:09,478 console_span_processor:39 telemetry: 21:55:09.467 [END] ModelsRoutingTable.get_provider_impl [StatusCode.OK] (0.18ms)
+INFO     2025-09-10 14:55:09,479 console_span_processor:48 telemetry:     output:
+         <llama_stack.providers.remote.inference.together.together.TogetherInferenceAdapter object at 0x137069220>
+INFO     2025-09-10 14:55:09,483 console_span_processor:39 telemetry: 21:55:09.480 [END] ModelsRoutingTable.get_model [StatusCode.OK] (0.20ms)
+INFO     2025-09-10 14:55:09,484 console_span_processor:48 telemetry:     output: {'identifier': 'together/meta-llama/Llama-3.3-70B-Instruct-Turbo',
+         'provider_resource_id': 'meta-llama/Llama-3.3-70B-Instruct-Turbo', 'provider_id': 'together', 'type': 'model', 'owner': None, 'source':
+         'listed_from_provider', 'metadata': {}, 'model_type': 'llm'}
+INFO     2025-09-10 14:55:11,246 console_span_processor:39 telemetry: 21:55:11.242 [END] TogetherInferenceAdapter.openai_chat_completion
+         [StatusCode.OK] (1762.00ms)
+INFO     2025-09-10 14:55:11,247 console_span_processor:48 telemetry:     output: {'id': 'oAiuccr-62bZhn-97d22b0209009e58', 'choices':
+         [{'finish_reason': 'stop', 'index': 0, 'logprobs': None, 'message': {'content': 'Thought: To answer the question "How long does shipping 
+         take?", I need to search the provided vector store for relevant information. The vector store ID is vs_dab05212-db05-402c-91ef-57e41797406b. 
+         I will use the Llama Stack Vector Store RAG tool to find the most relevant documents.\n\nAction: Llama Stack Vector Store RAG tool\nAction 
+         Input: {"query": "How long does shipping take?", "vector_store_id": "vs_dab05212-db05-402c-91ef-57e41797406b", "top_k": 1, "score_threshold":
+         0}', 'refusal': None, 'role': 'assistant', 'annotations': None, 'audio': None, 'function_call': None, 'tool_calls': []}, 'seed':
+         3440355828201570300}], 'created': 1757541309, 'model': 'meta-llama/Llama-3.3-70B-Instruct-Turbo', 'object': 'chat.completion',
+         'service_tier': None, 'system_fingerprint': None, 'usage': {'completion_tokens': 138, 'prompt_tokens': 495, 'total_tokens': 633, ...
+INFO     2025-09-10 14:55:11,249 uvicorn.access:473 uncategorized: ::1:57854 - "POST /v1/openai/v1/chat/completions HTTP/1.1" 200
+INFO     2025-09-10 14:55:11,259 console_span_processor:39 telemetry: 21:55:11.250 [END] InferenceRouter.openai_chat_completion [StatusCode.OK]
+         (1797.12ms)
+INFO     2025-09-10 14:55:11,260 console_span_processor:48 telemetry:     output: {'id': 'oAiuccr-62bZhn-97d22b0209009e58', 'choices':
+         [{'finish_reason': 'stop', 'index': 0, 'logprobs': None, 'message': {'content': 'Thought: To answer the question "How long does shipping 
+         take?", I need to search the provided vector store for relevant information. The vector store ID is vs_dab05212-db05-402c-91ef-57e41797406b. 
+         I will use the Llama Stack Vector Store RAG tool to find the most relevant documents.\n\nAction: Llama Stack Vector Store RAG tool\nAction 
+         Input: {"query": "How long does shipping take?", "vector_store_id": "vs_dab05212-db05-402c-91ef-57e41797406b", "top_k": 1, "score_threshold":
+         0}', 'refusal': None, 'role': 'assistant', 'annotations': None, 'audio': None, 'function_call': None, 'tool_calls': None}, 'seed':
+         3440355828201570300}], 'created': 1757541309, 'model': 'meta-llama/Llama-3.3-70B-Instruct-Turbo', 'object': 'chat.completion',
+         'service_tier': None, 'system_fingerprint': None, 'usage': {'completion_tokens': 138, 'prompt_tokens': 495, 'total_tokens': 633...
+INFO     2025-09-10 14:55:11,261 console_span_processor:62 telemetry:  21:55:11.248 [INFO] metric.prompt_tokens
+INFO     2025-09-10 14:55:11,261 console_span_processor:67 telemetry: value: 495
+INFO     2025-09-10 14:55:11,262 console_span_processor:67 telemetry: unit: tokens
+INFO     2025-09-10 14:55:11,262 console_span_processor:67 telemetry: model_id: together/meta-llama/Llama-3.3-70B-Instruct-Turbo
+INFO     2025-09-10 14:55:11,263 console_span_processor:67 telemetry: provider_id: together
+INFO     2025-09-10 14:55:11,263 console_span_processor:62 telemetry:  21:55:11.249 [INFO] metric.completion_tokens
+INFO     2025-09-10 14:55:11,264 console_span_processor:67 telemetry: value: 138
+INFO     2025-09-10 14:55:11,264 console_span_processor:67 telemetry: unit: tokens
+INFO     2025-09-10 14:55:11,265 console_span_processor:67 telemetry: model_id: together/meta-llama/Llama-3.3-70B-Instruct-Turbo
+INFO     2025-09-10 14:55:11,265 console_span_processor:67 telemetry: provider_id: together
+INFO     2025-09-10 14:55:11,265 console_span_processor:62 telemetry:  21:55:11.249 [INFO] metric.total_tokens
+INFO     2025-09-10 14:55:11,266 console_span_processor:67 telemetry: value: 633
+INFO     2025-09-10 14:55:11,266 console_span_processor:67 telemetry: unit: tokens
+INFO     2025-09-10 14:55:11,266 console_span_processor:67 telemetry: model_id: together/meta-llama/Llama-3.3-70B-Instruct-Turbo
+INFO     2025-09-10 14:55:11,267 console_span_processor:67 telemetry: provider_id: together
+INFO     2025-09-10 14:55:11,284 console_span_processor:39 telemetry: 21:55:11.268 [END] /v1/openai/v1/chat/completions [StatusCode.OK] (1819.72ms)
+INFO     2025-09-10 14:55:11,285 console_span_processor:48 telemetry:     raw_path: /v1/openai/v1/chat/completions
+INFO     2025-09-10 14:55:11,287 console_span_processor:62 telemetry:  21:55:11.250 [INFO] ::1:57854 - "POST /v1/openai/v1/chat/completions HTTP/1.1"
+         200
+INFO     2025-09-10 14:55:11,289 console_span_processor:28 telemetry: 21:55:11.289 [START] /v1/openai/v1/vector_stores/{vector_store_id}/search
+INFO     2025-09-10 14:55:11,316 console_span_processor:39 telemetry: 21:55:11.306 [END] ModelsRoutingTable.get_model [StatusCode.OK] (0.72ms)
+INFO     2025-09-10 14:55:11,317 console_span_processor:48 telemetry:     output: {'identifier': 'sentence-transformers/all-MiniLM-L6-v2',
+         'provider_resource_id': 'all-MiniLM-L6-v2', 'provider_id': 'sentence-transformers', 'type': 'model', 'owner': None, 'source':
+         'listed_from_provider', 'metadata': {'embedding_dimension': 384}, 'model_type': 'embedding'}
+INFO     2025-09-10 14:55:11,325 console_span_processor:39 telemetry: 21:55:11.319 [END] ModelsRoutingTable.get_provider_impl [StatusCode.OK] (0.33ms)
+INFO     2025-09-10 14:55:11,326 console_span_processor:48 telemetry:     output:
+         <llama_stack.providers.inline.inference.sentence_transformers.sentence_transformers.SentenceTransformersInferenceImpl object at 0x158e83350>
+INFO     2025-09-10 14:55:11,337 console_span_processor:39 telemetry: 21:55:11.328 [END] ModelsRoutingTable.get_model [StatusCode.OK] (0.26ms)
+INFO     2025-09-10 14:55:11,339 console_span_processor:48 telemetry:     output: {'identifier': 'sentence-transformers/all-MiniLM-L6-v2',
+         'provider_resource_id': 'all-MiniLM-L6-v2', 'provider_id': 'sentence-transformers', 'type': 'model', 'owner': None, 'source':
+         'listed_from_provider', 'metadata': {'embedding_dimension': 384}, 'model_type': 'embedding'}
+INFO     2025-09-10 14:55:11,725 uvicorn.access:473 uncategorized: 127.0.0.1:57863 - "POST 
+         /v1/openai/v1/vector_stores/vs_dab05212-db05-402c-91ef-57e41797406b/search HTTP/1.1" 200
+INFO     2025-09-10 14:55:11,816 console_span_processor:39 telemetry: 21:55:11.712 [END] InferenceRouter.openai_embeddings [StatusCode.OK] (407.13ms)
+INFO     2025-09-10 14:55:11,817 console_span_processor:48 telemetry:     output: {'object': 'list', 'data': [{'object': 'embedding', 'embedding':
+         [0.0018660365603864193, -0.008432421833276749, -0.009860510006546974, 0.07527908682823181, -0.008166038431227207, -0.08819602429866791,
+         -0.025107912719249725, -0.029266025871038437, 0.014401894062757492, 0.06447950005531311, 0.04112009331583977, -0.04028748720884323,
+         -0.02221696823835373, -0.009235711768269539, -0.04810388386249542, -0.02784842811524868, 0.016442419961094856, -0.07583750039339066,
+         -0.06627479195594788, 0.01860453188419342, 0.0008918081293813884, 0.011179491877555847, -0.048671603202819824, -0.021989954635500908,
+         0.04990514740347862, -0.003409573109820485, -0.06727275997400284, -0.054264623671770096, -0.022502778097987175, -0.03865018114447594,
+         -0.0471607930958271, 0.01671769842505455, 0.024404950439929962, 0.09877222776412964, -0.01660928875207901, 0.01298862136900425,
+         0.02916400320827961, -0.0722617655992508, 0.041335687041282654, -0.037857454270124435, -0.02524978667497635, -0.010434110648930073, -0....
+INFO     2025-09-10 14:55:11,821 console_span_processor:39 telemetry: 21:55:11.818 [END] FaissVectorIOAdapter.query_chunks [StatusCode.OK] (515.05ms)
+INFO     2025-09-10 14:55:11,822 console_span_processor:48 telemetry:     output: {'chunks': [{'content': 'Acme ships globally in 3-5 business days.',
+         'metadata': {'document_id': 'file-489db9aae0424745960e3408ff0f477f', 'token_count': 12, 'metadata_token_count': 1}, 'embedding':
+         [-0.012346820905804634, -0.04591671749949455, 0.00545357633382082, 0.0767817348241806, -0.01030217856168747, -0.07128783315420151,
+         -0.047789815813302994, -0.0042580426670610905, -0.08333089202642441, 0.03493816778063774, 0.03678377717733383, -0.022125769406557083,
+         -0.001785362372174859, 0.03860057145357132, 0.017692940309643745, -0.011320478282868862, 0.020480643957853317, -0.14444826543331146,
+         -0.03509785607457161, -0.08347956091165543, 0.01723267324268818, -0.020868107676506042, -0.04816542565822601, -0.04063615947961807,
+         -0.02612607553601265, -0.06816241890192032, -0.021907860413193703, -0.05328420177102089, 0.0035953382030129433, -0.060775868594646454,
+         -0.07895789295434952, 0.08725343644618988, -0.004096562508493662, 0.07713106274604797, 0.04764929413795471, 0.03390089422464371, -0.03...
+INFO     2025-09-10 14:55:11,831 console_span_processor:39 telemetry: 21:55:11.824 [END] VectorDBsRoutingTable.openai_search_vector_store
+         [StatusCode.OK] (524.43ms)
+INFO     2025-09-10 14:55:11,832 console_span_processor:48 telemetry:     output: {'object': 'vector_store.search_results.page', 'search_query': 'How 
+         long does shipping take?', 'data': [{'file_id': '', 'filename': '', 'score': 1.0150761400159014, 'attributes': {'document_id':
+         'file-489db9aae0424745960e3408ff0f477f', 'token_count': 12.0, 'metadata_token_count': 1.0}, 'content': [{'type': 'text', 'text': 'Acme ships 
+         globally in 3-5 business days.'}]}], 'has_more': False, 'next_page': None}
+INFO     2025-09-10 14:55:11,837 console_span_processor:39 telemetry: 21:55:11.834 [END] VectorIORouter.openai_search_vector_store [StatusCode.OK]
+         (535.84ms)
+INFO     2025-09-10 14:55:11,838 console_span_processor:48 telemetry:     output: {'object': 'vector_store.search_results.page', 'search_query': 'How 
+         long does shipping take?', 'data': [{'file_id': '', 'filename': '', 'score': 1.0150761400159014, 'attributes': {'document_id':
+         'file-489db9aae0424745960e3408ff0f477f', 'token_count': 12.0, 'metadata_token_count': 1.0}, 'content': [{'type': 'text', 'text': 'Acme ships 
+         globally in 3-5 business days.'}]}], 'has_more': False, 'next_page': None}
+INFO     2025-09-10 14:55:11,843 console_span_processor:39 telemetry: 21:55:11.840 [END] /v1/openai/v1/vector_stores/{vector_store_id}/search
+         [StatusCode.OK] (550.67ms)
+INFO     2025-09-10 14:55:11,844 console_span_processor:48 telemetry:     raw_path:
+         /v1/openai/v1/vector_stores/vs_dab05212-db05-402c-91ef-57e41797406b/search
+INFO     2025-09-10 14:55:11,844 console_span_processor:62 telemetry:  21:55:11.728 [INFO] 127.0.0.1:57863 - "POST 
+         /v1/openai/v1/vector_stores/vs_dab05212-db05-402c-91ef-57e41797406b/search HTTP/1.1" 200
+INFO     2025-09-10 14:55:11,845 console_span_processor:28 telemetry: 21:55:11.845 [START] /v1/openai/v1/chat/completions
+INFO     2025-09-10 14:55:11,848 console_span_processor:39 telemetry: 21:55:11.846 [END] ModelsRoutingTable.get_model [StatusCode.OK] (0.15ms)
+INFO     2025-09-10 14:55:11,849 console_span_processor:48 telemetry:     output: {'identifier': 'together/meta-llama/Llama-3.3-70B-Instruct-Turbo',
+         'provider_resource_id': 'meta-llama/Llama-3.3-70B-Instruct-Turbo', 'provider_id': 'together', 'type': 'model', 'owner': None, 'source':
+         'listed_from_provider', 'metadata': {}, 'model_type': 'llm'}
+INFO     2025-09-10 14:55:11,866 console_span_processor:39 telemetry: 21:55:11.850 [END] ModelsRoutingTable.get_provider_impl [StatusCode.OK] (0.33ms)
+INFO     2025-09-10 14:55:11,866 console_span_processor:48 telemetry:     output:
+         <llama_stack.providers.remote.inference.together.together.TogetherInferenceAdapter object at 0x137069220>
+INFO     2025-09-10 14:55:11,893 console_span_processor:39 telemetry: 21:55:11.868 [END] ModelsRoutingTable.get_model [StatusCode.OK] (0.18ms)
+INFO     2025-09-10 14:55:11,894 console_span_processor:48 telemetry:     output: {'identifier': 'together/meta-llama/Llama-3.3-70B-Instruct-Turbo',
+         'provider_resource_id': 'meta-llama/Llama-3.3-70B-Instruct-Turbo', 'provider_id': 'together', 'type': 'model', 'owner': None, 'source':
+         'listed_from_provider', 'metadata': {}, 'model_type': 'llm'}
+INFO     2025-09-10 14:55:12,290 console_span_processor:39 telemetry: 21:55:12.288 [END] TogetherInferenceAdapter.openai_chat_completion
+         [StatusCode.OK] (421.30ms)
+INFO     2025-09-10 14:55:12,291 console_span_processor:48 telemetry:     output: {'id': 'oAiudJu-62bZhn-97d22b1098a9ed39', 'choices':
+         [{'finish_reason': 'stop', 'index': 0, 'logprobs': None, 'message': {'content': 'Thought: I now know the final answer\nFinal Answer: Acme 
+         ships globally in 3-5 business days.', 'refusal': None, 'role': 'assistant', 'annotations': None, 'audio': None, 'function_call': None,
+         'tool_calls': []}, 'seed': 9970525031584217000}], 'created': 1757541312, 'model': 'meta-llama/Llama-3.3-70B-Instruct-Turbo', 'object':
+         'chat.completion', 'service_tier': None, 'system_fingerprint': None, 'usage': {'completion_tokens': 25, 'prompt_tokens': 655, 'total_tokens':
+         680, 'completion_tokens_details': None, 'prompt_tokens_details': None, 'cached_tokens': 512}, 'prompt': []}
+INFO     2025-09-10 14:55:12,297 uvicorn.access:473 uncategorized: ::1:57854 - "POST /v1/openai/v1/chat/completions HTTP/1.1" 200
+INFO     2025-09-10 14:55:12,324 console_span_processor:39 telemetry: 21:55:12.298 [END] InferenceRouter.openai_chat_completion [StatusCode.OK]
+         (451.81ms)
+INFO     2025-09-10 14:55:12,326 console_span_processor:48 telemetry:     output: {'id': 'oAiudJu-62bZhn-97d22b1098a9ed39', 'choices':
+         [{'finish_reason': 'stop', 'index': 0, 'logprobs': None, 'message': {'content': 'Thought: I now know the final answer\nFinal Answer: Acme 
+         ships globally in 3-5 business days.', 'refusal': None, 'role': 'assistant', 'annotations': None, 'audio': None, 'function_call': None,
+         'tool_calls': None}, 'seed': 9970525031584217000}], 'created': 1757541312, 'model': 'meta-llama/Llama-3.3-70B-Instruct-Turbo', 'object':
+         'chat.completion', 'service_tier': None, 'system_fingerprint': None, 'usage': {'completion_tokens': 25, 'prompt_tokens': 655, 'total_tokens':
+         680, 'completion_tokens_details': None, 'prompt_tokens_details': None, 'cached_tokens': 512}, 'prompt': [], 'metrics': [{'trace_id':
+         'b0fe401af1dce917b7278f7a651b67bc', 'span_id': '7130a92e6a6a8fba', 'timestamp': '2025-09-10T21:55:12.297305Z', 'attributes': {'model_id':
+         'together/meta-llama/Llama-3.3-70B-Instruct-Turbo', 'provider_id': 'together'}, 'type': 'metric', 'metric': 'prompt_toke...
+INFO     2025-09-10 14:55:12,327 console_span_processor:62 telemetry:  21:55:12.297 [INFO] metric.prompt_tokens
+INFO     2025-09-10 14:55:12,328 console_span_processor:67 telemetry: value: 655
+INFO     2025-09-10 14:55:12,328 console_span_processor:67 telemetry: unit: tokens
+INFO     2025-09-10 14:55:12,329 console_span_processor:67 telemetry: model_id: together/meta-llama/Llama-3.3-70B-Instruct-Turbo
+INFO     2025-09-10 14:55:12,329 console_span_processor:67 telemetry: provider_id: together
+INFO     2025-09-10 14:55:12,329 console_span_processor:62 telemetry:  21:55:12.297 [INFO] metric.completion_tokens
+INFO     2025-09-10 14:55:12,330 console_span_processor:67 telemetry: value: 25
+INFO     2025-09-10 14:55:12,330 console_span_processor:67 telemetry: unit: tokens
+INFO     2025-09-10 14:55:12,331 console_span_processor:67 telemetry: model_id: together/meta-llama/Llama-3.3-70B-Instruct-Turbo
+INFO     2025-09-10 14:55:12,331 console_span_processor:67 telemetry: provider_id: together
+INFO     2025-09-10 14:55:12,332 console_span_processor:62 telemetry:  21:55:12.297 [INFO] metric.total_tokens
+INFO     2025-09-10 14:55:12,332 console_span_processor:67 telemetry: value: 680
+INFO     2025-09-10 14:55:12,332 console_span_processor:67 telemetry: unit: tokens
+INFO     2025-09-10 14:55:12,333 console_span_processor:67 telemetry: model_id: together/meta-llama/Llama-3.3-70B-Instruct-Turbo
+INFO     2025-09-10 14:55:12,333 console_span_processor:67 telemetry: provider_id: together
+INFO     2025-09-10 14:55:12,353 console_span_processor:39 telemetry: 21:55:12.335 [END] /v1/openai/v1/chat/completions [StatusCode.OK] (489.36ms)
+INFO     2025-09-10 14:55:12,355 console_span_processor:48 telemetry:     raw_path: /v1/openai/v1/chat/completions
+INFO     2025-09-10 14:55:12,358 console_span_processor:62 telemetry:  21:55:12.298 [INFO] ::1:57854 - "POST /v1/openai/v1/chat/completions HTTP/1.1"
+         200
+INFO     2025-09-10 14:55:19,314 console_span_processor:28 telemetry: 21:55:19.314 [START] /v1/openai/v1/chat/completions
+INFO     2025-09-10 14:55:19,320 console_span_processor:39 telemetry: 21:55:19.316 [END] ModelsRoutingTable.get_model [StatusCode.OK] (0.15ms)
+INFO     2025-09-10 14:55:19,322 console_span_processor:48 telemetry:     output: {'identifier': 'together/meta-llama/Llama-3.3-70B-Instruct-Turbo',
+         'provider_resource_id': 'meta-llama/Llama-3.3-70B-Instruct-Turbo', 'provider_id': 'together', 'type': 'model', 'owner': None, 'source':
+         'listed_from_provider', 'metadata': {}, 'model_type': 'llm'}
+INFO     2025-09-10 14:55:19,325 console_span_processor:39 telemetry: 21:55:19.323 [END] ModelsRoutingTable.get_provider_impl [StatusCode.OK] (0.17ms)
+INFO     2025-09-10 14:55:19,326 console_span_processor:48 telemetry:     output:
+         <llama_stack.providers.remote.inference.together.together.TogetherInferenceAdapter object at 0x137069220>
+INFO     2025-09-10 14:55:19,331 console_span_processor:39 telemetry: 21:55:19.327 [END] ModelsRoutingTable.get_model [StatusCode.OK] (0.23ms)
+INFO     2025-09-10 14:55:19,332 console_span_processor:48 telemetry:     output: {'identifier': 'together/meta-llama/Llama-3.3-70B-Instruct-Turbo',
+         'provider_resource_id': 'meta-llama/Llama-3.3-70B-Instruct-Turbo', 'provider_id': 'together', 'type': 'model', 'owner': None, 'source':
+         'listed_from_provider', 'metadata': {}, 'model_type': 'llm'}
+INFO     2025-09-10 14:55:21,608 console_span_processor:39 telemetry: 21:55:21.606 [END] TogetherInferenceAdapter.openai_chat_completion
+         [StatusCode.OK] (2279.25ms)
+INFO     2025-09-10 14:55:21,610 console_span_processor:48 telemetry:     output: {'id': 'oAiufav-4YNCb4-97d22b3f6cb27af2', 'choices':
+         [{'finish_reason': 'stop', 'index': 0, 'logprobs': None, 'message': {'content': 'Thought: To answer the question "Can I return a product 
+         after 40 days?", I need to search the vector store to find relevant information about the return policy. I will use the Llama Stack Vector 
+         Store RAG tool to search the provided vector store ID.\n\nAction: Llama Stack Vector Store RAG tool\nAction Input: {"query": "return policy 
+         after 40 days", "vector_store_id": "vs_dab05212-db05-402c-91ef-57e41797406b", "top_k": 1, "score_threshold": 0.5}', 'refusal': None, 'role':
+         'assistant', 'annotations': None, 'audio': None, 'function_call': None, 'tool_calls': []}, 'seed': 17340032880891308000}], 'created':
+         1757541319, 'model': 'meta-llama/Llama-3.3-70B-Instruct-Turbo', 'object': 'chat.completion', 'service_tier': None, 'system_fingerprint':
+         None, 'usage': {'completion_tokens': 124, 'prompt_tokens': 499, 'total_tokens': 623, 'completion_tokens_details': No...
+INFO     2025-09-10 14:55:21,611 uvicorn.access:473 uncategorized: ::1:57884 - "POST /v1/openai/v1/chat/completions HTTP/1.1" 200
+INFO     2025-09-10 14:55:21,631 console_span_processor:39 telemetry: 21:55:21.612 [END] InferenceRouter.openai_chat_completion [StatusCode.OK]
+         (2297.16ms)
+INFO     2025-09-10 14:55:21,632 console_span_processor:48 telemetry:     output: {'id': 'oAiufav-4YNCb4-97d22b3f6cb27af2', 'choices':
+         [{'finish_reason': 'stop', 'index': 0, 'logprobs': None, 'message': {'content': 'Thought: To answer the question "Can I return a product 
+         after 40 days?", I need to search the vector store to find relevant information about the return policy. I will use the Llama Stack Vector 
+         Store RAG tool to search the provided vector store ID.\n\nAction: Llama Stack Vector Store RAG tool\nAction Input: {"query": "return policy 
+         after 40 days", "vector_store_id": "vs_dab05212-db05-402c-91ef-57e41797406b", "top_k": 1, "score_threshold": 0.5}', 'refusal': None, 'role':
+         'assistant', 'annotations': None, 'audio': None, 'function_call': None, 'tool_calls': None}, 'seed': 17340032880891308000}], 'created':
+         1757541319, 'model': 'meta-llama/Llama-3.3-70B-Instruct-Turbo', 'object': 'chat.completion', 'service_tier': None, 'system_fingerprint':
+         None, 'usage': {'completion_tokens': 124, 'prompt_tokens': 499, 'total_tokens': 623, 'completion_tokens_details': ...
+INFO     2025-09-10 14:55:21,636 console_span_processor:62 telemetry:  21:55:21.611 [INFO] metric.prompt_tokens
+INFO     2025-09-10 14:55:21,637 console_span_processor:67 telemetry: value: 499
+INFO     2025-09-10 14:55:21,638 console_span_processor:67 telemetry: unit: tokens
+INFO     2025-09-10 14:55:21,638 console_span_processor:67 telemetry: model_id: together/meta-llama/Llama-3.3-70B-Instruct-Turbo
+INFO     2025-09-10 14:55:21,639 console_span_processor:67 telemetry: provider_id: together
+INFO     2025-09-10 14:55:21,639 console_span_processor:62 telemetry:  21:55:21.611 [INFO] metric.completion_tokens
+INFO     2025-09-10 14:55:21,639 console_span_processor:67 telemetry: value: 124
+INFO     2025-09-10 14:55:21,640 console_span_processor:67 telemetry: unit: tokens
+INFO     2025-09-10 14:55:21,640 console_span_processor:67 telemetry: model_id: together/meta-llama/Llama-3.3-70B-Instruct-Turbo
+INFO     2025-09-10 14:55:21,641 console_span_processor:67 telemetry: provider_id: together
+INFO     2025-09-10 14:55:21,641 console_span_processor:62 telemetry:  21:55:21.611 [INFO] metric.total_tokens
+INFO     2025-09-10 14:55:21,641 console_span_processor:67 telemetry: value: 623
+INFO     2025-09-10 14:55:21,642 console_span_processor:67 telemetry: unit: tokens
+INFO     2025-09-10 14:55:21,642 console_span_processor:67 telemetry: model_id: together/meta-llama/Llama-3.3-70B-Instruct-Turbo
+INFO     2025-09-10 14:55:21,642 console_span_processor:67 telemetry: provider_id: together
+INFO     2025-09-10 14:55:21,648 console_span_processor:39 telemetry: 21:55:21.643 [END] /v1/openai/v1/chat/completions [StatusCode.OK] (2329.06ms)
+INFO     2025-09-10 14:55:21,649 console_span_processor:48 telemetry:     raw_path: /v1/openai/v1/chat/completions
+INFO     2025-09-10 14:55:21,650 console_span_processor:62 telemetry:  21:55:21.612 [INFO] ::1:57884 - "POST /v1/openai/v1/chat/completions HTTP/1.1"
+         200
+INFO     2025-09-10 14:55:21,651 console_span_processor:28 telemetry: 21:55:21.651 [START] /v1/openai/v1/vector_stores/{vector_store_id}/search
+INFO     2025-09-10 14:55:21,674 console_span_processor:39 telemetry: 21:55:21.654 [END] ModelsRoutingTable.get_model [StatusCode.OK] (0.21ms)
+INFO     2025-09-10 14:55:21,675 console_span_processor:48 telemetry:     output: {'identifier': 'sentence-transformers/all-MiniLM-L6-v2',
+         'provider_resource_id': 'all-MiniLM-L6-v2', 'provider_id': 'sentence-transformers', 'type': 'model', 'owner': None, 'source':
+         'listed_from_provider', 'metadata': {'embedding_dimension': 384}, 'model_type': 'embedding'}
+INFO     2025-09-10 14:55:21,685 console_span_processor:39 telemetry: 21:55:21.678 [END] ModelsRoutingTable.get_provider_impl [StatusCode.OK] (0.70ms)
+INFO     2025-09-10 14:55:21,687 console_span_processor:48 telemetry:     output:
+         <llama_stack.providers.inline.inference.sentence_transformers.sentence_transformers.SentenceTransformersInferenceImpl object at 0x158e83350>
+INFO     2025-09-10 14:55:21,692 console_span_processor:39 telemetry: 21:55:21.689 [END] ModelsRoutingTable.get_model [StatusCode.OK] (0.53ms)
+INFO     2025-09-10 14:55:21,692 console_span_processor:48 telemetry:     output: {'identifier': 'sentence-transformers/all-MiniLM-L6-v2',
+         'provider_resource_id': 'all-MiniLM-L6-v2', 'provider_id': 'sentence-transformers', 'type': 'model', 'owner': None, 'source':
+         'listed_from_provider', 'metadata': {'embedding_dimension': 384}, 'model_type': 'embedding'}
+INFO     2025-09-10 14:55:22,019 uvicorn.access:473 uncategorized: 127.0.0.1:57888 - "POST 
+         /v1/openai/v1/vector_stores/vs_dab05212-db05-402c-91ef-57e41797406b/search HTTP/1.1" 200
+INFO     2025-09-10 14:55:22,024 console_span_processor:39 telemetry: 21:55:22.017 [END] InferenceRouter.openai_embeddings [StatusCode.OK] (363.31ms)
+INFO     2025-09-10 14:55:22,025 console_span_processor:48 telemetry:     output: {'object': 'list', 'data': [{'object': 'embedding', 'embedding':
+         [-0.0018892501248046756, 0.030758941546082497, 0.10938595235347748, 0.04589828848838806, 0.0006913804681971669, 0.014387426897883415,
+         -0.015357869677245617, -0.011600838042795658, -0.08975677192211151, 0.053308334201574326, 0.0744965448975563, 0.01633235439658165,
+         0.010754235088825226, 0.004617820493876934, 0.009704680182039738, -0.03412426635622978, -0.04056152328848839, -0.019626466557383537,
+         -0.08569952845573425, -0.043116409331560135, 0.025109080597758293, -0.03337268903851509, -0.06306783854961395, 0.021730247884988785,
+         0.01859521120786667, 0.0934692993760109, -0.02332254685461521, 0.03396535664796829, -0.04177931323647499, -0.011372999288141727,
+         0.015714162960648537, -0.04297734424471855, -0.06080453842878342, -0.05970568209886551, -0.013541588559746742, 0.04719769209623337,
+         -0.1263798028230667, -0.09081613272428513, -0.044160086661577225, 0.010452632792294025, -0.04007401689887047, 0.029781267046928406,
+         -0.03511...
+INFO     2025-09-10 14:55:22,031 console_span_processor:39 telemetry: 21:55:22.027 [END] FaissVectorIOAdapter.query_chunks [StatusCode.OK] (373.68ms)
+INFO     2025-09-10 14:55:22,032 console_span_processor:48 telemetry:     output: {'chunks': [{'content': 'Returns are accepted within 30 days of 
+         purchase.', 'metadata': {'document_id': 'file-b2f38b0e164347f5a2b6bbe211e33ff3', 'token_count': 10, 'metadata_token_count': 1}, 'embedding':
+         [-0.050018310546875, 0.0007208543829619884, 0.048539839684963226, 0.013606906868517399, -0.006504438817501068, 0.004046895541250706,
+         -0.04984626919031143, -8.024782437132671e-05, -0.02159256860613823, 0.0015519573353230953, 0.10069309175014496, 0.04437215253710747,
+         -0.022335151210427284, -0.05230937898159027, -0.028983373194932938, -0.049531035125255585, 0.03074926696717739, -0.03056054376065731,
+         0.021489247679710388, 0.016868554055690765, -0.028984908014535904, -0.07053273916244507, -0.022961033508181572, -0.0717858299612999,
+         0.0674479678273201, 0.021412895992398262, -0.06054700165987015, 0.021910998970270157, -0.023192649707198143, -0.028992125764489174,
+         0.025179730728268623, -0.006338999141007662, -0.001874048844911158, 0.009305929765105247, 0.011422501876950264, 0.0376049801707...
+INFO     2025-09-10 14:55:22,057 console_span_processor:39 telemetry: 21:55:22.037 [END] VectorDBsRoutingTable.openai_search_vector_store
+         [StatusCode.OK] (384.28ms)
+INFO     2025-09-10 14:55:22,058 console_span_processor:48 telemetry:     output: {'object': 'vector_store.search_results.page', 'search_query':
+         'return policy after 40 days', 'data': [{'file_id': '', 'filename': '', 'score': 1.3987585167097674, 'attributes': {'document_id':
+         'file-b2f38b0e164347f5a2b6bbe211e33ff3', 'token_count': 10.0, 'metadata_token_count': 1.0}, 'content': [{'type': 'text', 'text': 'Returns are
+         accepted within 30 days of purchase.'}]}], 'has_more': False, 'next_page': None}
+INFO     2025-09-10 14:55:22,063 console_span_processor:39 telemetry: 21:55:22.060 [END] VectorIORouter.openai_search_vector_store [StatusCode.OK]
+         (407.47ms)
+INFO     2025-09-10 14:55:22,064 console_span_processor:48 telemetry:     output: {'object': 'vector_store.search_results.page', 'search_query':
+         'return policy after 40 days', 'data': [{'file_id': '', 'filename': '', 'score': 1.3987585167097674, 'attributes': {'document_id':
+         'file-b2f38b0e164347f5a2b6bbe211e33ff3', 'token_count': 10.0, 'metadata_token_count': 1.0}, 'content': [{'type': 'text', 'text': 'Returns are
+         accepted within 30 days of purchase.'}]}], 'has_more': False, 'next_page': None}
+INFO     2025-09-10 14:55:22,175 console_span_processor:39 telemetry: 21:55:22.096 [END] /v1/openai/v1/vector_stores/{vector_store_id}/search
+         [StatusCode.OK] (444.58ms)
+INFO     2025-09-10 14:55:22,176 console_span_processor:48 telemetry:     raw_path:
+         /v1/openai/v1/vector_stores/vs_dab05212-db05-402c-91ef-57e41797406b/search
+INFO     2025-09-10 14:55:22,178 console_span_processor:62 telemetry:  21:55:22.021 [INFO] 127.0.0.1:57888 - "POST 
+         /v1/openai/v1/vector_stores/vs_dab05212-db05-402c-91ef-57e41797406b/search HTTP/1.1" 200
+INFO     2025-09-10 14:55:22,186 console_span_processor:28 telemetry: 21:55:22.186 [START] /v1/openai/v1/chat/completions
+INFO     2025-09-10 14:55:22,195 console_span_processor:39 telemetry: 21:55:22.188 [END] ModelsRoutingTable.get_model [StatusCode.OK] (0.30ms)
+INFO     2025-09-10 14:55:22,196 console_span_processor:48 telemetry:     output: {'identifier': 'together/meta-llama/Llama-3.3-70B-Instruct-Turbo',
+         'provider_resource_id': 'meta-llama/Llama-3.3-70B-Instruct-Turbo', 'provider_id': 'together', 'type': 'model', 'owner': None, 'source':
+         'listed_from_provider', 'metadata': {}, 'model_type': 'llm'}
+INFO     2025-09-10 14:55:22,218 console_span_processor:39 telemetry: 21:55:22.197 [END] ModelsRoutingTable.get_provider_impl [StatusCode.OK] (0.20ms)
+INFO     2025-09-10 14:55:22,220 console_span_processor:48 telemetry:     output:
+         <llama_stack.providers.remote.inference.together.together.TogetherInferenceAdapter object at 0x137069220>
+INFO     2025-09-10 14:55:22,227 console_span_processor:39 telemetry: 21:55:22.224 [END] ModelsRoutingTable.get_model [StatusCode.OK] (0.19ms)
+INFO     2025-09-10 14:55:22,228 console_span_processor:48 telemetry:     output: {'identifier': 'together/meta-llama/Llama-3.3-70B-Instruct-Turbo',
+         'provider_resource_id': 'meta-llama/Llama-3.3-70B-Instruct-Turbo', 'provider_id': 'together', 'type': 'model', 'owner': None, 'source':
+         'listed_from_provider', 'metadata': {}, 'model_type': 'llm'}
+INFO     2025-09-10 14:55:22,805 console_span_processor:39 telemetry: 21:55:22.801 [END] TogetherInferenceAdapter.openai_chat_completion
+         [StatusCode.OK] (577.41ms)
+INFO     2025-09-10 14:55:22,806 console_span_processor:48 telemetry:     output: {'id': 'oAiugR1-62bZhn-97d22b51096017f0', 'choices':
+         [{'finish_reason': 'stop', 'index': 0, 'logprobs': None, 'message': {'content': 'Thought: I now know the final answer\nFinal Answer: Returns 
+         are accepted within 30 days of purchase.', 'refusal': None, 'role': 'assistant', 'annotations': None, 'audio': None, 'function_call': None,
+         'tool_calls': []}, 'seed': 2982499907717596700}], 'created': 1757541322, 'model': 'meta-llama/Llama-3.3-70B-Instruct-Turbo', 'object':
+         'chat.completion', 'service_tier': None, 'system_fingerprint': None, 'usage': {'completion_tokens': 23, 'prompt_tokens': 643, 'total_tokens':
+         666, 'completion_tokens_details': None, 'prompt_tokens_details': None, 'cached_tokens': 256}, 'prompt': []}
+INFO     2025-09-10 14:55:22,834 uvicorn.access:473 uncategorized: ::1:57884 - "POST /v1/openai/v1/chat/completions HTTP/1.1" 200
+INFO     2025-09-10 14:55:22,873 console_span_processor:39 telemetry: 21:55:22.836 [END] InferenceRouter.openai_chat_completion [StatusCode.OK]
+         (648.69ms)
+INFO     2025-09-10 14:55:22,874 console_span_processor:48 telemetry:     output: {'id': 'oAiugR1-62bZhn-97d22b51096017f0', 'choices':
+         [{'finish_reason': 'stop', 'index': 0, 'logprobs': None, 'message': {'content': 'Thought: I now know the final answer\nFinal Answer: Returns 
+         are accepted within 30 days of purchase.', 'refusal': None, 'role': 'assistant', 'annotations': None, 'audio': None, 'function_call': None,
+         'tool_calls': None}, 'seed': 2982499907717596700}], 'created': 1757541322, 'model': 'meta-llama/Llama-3.3-70B-Instruct-Turbo', 'object':
+         'chat.completion', 'service_tier': None, 'system_fingerprint': None, 'usage': {'completion_tokens': 23, 'prompt_tokens': 643, 'total_tokens':
+         666, 'completion_tokens_details': None, 'prompt_tokens_details': None, 'cached_tokens': 256}, 'prompt': [], 'metrics': [{'trace_id':
+         'd588c782bd804c0cb49e43ac8c028e7a', 'span_id': '396bc114a416fc6c', 'timestamp': '2025-09-10T21:55:22.832220Z', 'attributes': {'model_id':
+         'together/meta-llama/Llama-3.3-70B-Instruct-Turbo', 'provider_id': 'together'}, 'type': 'metric', 'metric': 'prom...
+INFO     2025-09-10 14:55:22,876 console_span_processor:62 telemetry:  21:55:22.832 [INFO] metric.prompt_tokens
+INFO     2025-09-10 14:55:22,877 console_span_processor:67 telemetry: value: 643
+INFO     2025-09-10 14:55:22,878 console_span_processor:67 telemetry: unit: tokens
+INFO     2025-09-10 14:55:22,878 console_span_processor:67 telemetry: model_id: together/meta-llama/Llama-3.3-70B-Instruct-Turbo
+INFO     2025-09-10 14:55:22,879 console_span_processor:67 telemetry: provider_id: together
+INFO     2025-09-10 14:55:22,879 console_span_processor:62 telemetry:  21:55:22.832 [INFO] metric.completion_tokens
+INFO     2025-09-10 14:55:22,880 console_span_processor:67 telemetry: value: 23
+INFO     2025-09-10 14:55:22,881 console_span_processor:67 telemetry: unit: tokens
+INFO     2025-09-10 14:55:22,881 console_span_processor:67 telemetry: model_id: together/meta-llama/Llama-3.3-70B-Instruct-Turbo
+INFO     2025-09-10 14:55:22,882 console_span_processor:67 telemetry: provider_id: together
+INFO     2025-09-10 14:55:22,882 console_span_processor:62 telemetry:  21:55:22.832 [INFO] metric.total_tokens
+INFO     2025-09-10 14:55:22,883 console_span_processor:67 telemetry: value: 666
+INFO     2025-09-10 14:55:22,886 console_span_processor:67 telemetry: unit: tokens
+INFO     2025-09-10 14:55:22,887 console_span_processor:67 telemetry: model_id: together/meta-llama/Llama-3.3-70B-Instruct-Turbo
+INFO     2025-09-10 14:55:22,888 console_span_processor:67 telemetry: provider_id: together
+INFO     2025-09-10 14:55:22,913 console_span_processor:39 telemetry: 21:55:22.891 [END] /v1/openai/v1/chat/completions [StatusCode.OK] (704.86ms)
+INFO     2025-09-10 14:55:22,915 console_span_processor:48 telemetry:     raw_path: /v1/openai/v1/chat/completions
+INFO     2025-09-10 14:55:22,915 console_span_processor:62 telemetry:  21:55:22.835 [INFO] ::1:57884 - "POST /v1/openai/v1/chat/completions HTTP/1.1"
+         200
+INFO     2025-09-10 14:57:34,753 uvicorn.error:264 uncategorized: Shutting down
+INFO     2025-09-10 14:57:34,867 uvicorn.error:67 uncategorized: Waiting for application shutdown.
+INFO     2025-09-10 14:57:34,868 __main__:170 core::server: Shutting down
+INFO     2025-09-10 14:57:34,868 llama_stack.core.stack:357 core: Shutting down ModelsRoutingTable
+INFO     2025-09-10 14:57:34,869 llama_stack.core.stack:357 core: Shutting down DatasetsRoutingTable
+INFO     2025-09-10 14:57:34,870 llama_stack.core.stack:357 core: Shutting down DatasetIORouter
+INFO     2025-09-10 14:57:34,870 llama_stack.core.stack:357 core: Shutting down TelemetryAdapter
+INFO     2025-09-10 14:57:34,871 llama_stack.core.stack:357 core: Shutting down InferenceRouter
+INFO     2025-09-10 14:57:34,874 llama_stack.core.stack:357 core: Shutting down LocalfsFilesImpl
+INFO     2025-09-10 14:57:34,875 llama_stack.core.stack:357 core: Shutting down ShieldsRoutingTable
+INFO     2025-09-10 14:57:34,876 llama_stack.core.stack:357 core: Shutting down SafetyRouter
+INFO     2025-09-10 14:57:34,877 llama_stack.core.stack:357 core: Shutting down VectorDBsRoutingTable
+INFO     2025-09-10 14:57:34,877 llama_stack.core.stack:357 core: Shutting down VectorIORouter
+INFO     2025-09-10 14:57:34,878 llama_stack.core.stack:357 core: Shutting down ToolGroupsRoutingTable
+INFO     2025-09-10 14:57:34,878 llama_stack.core.stack:357 core: Shutting down ToolRuntimeRouter
+INFO     2025-09-10 14:57:34,879 llama_stack.core.stack:357 core: Shutting down MetaReferenceAgentsImpl
+INFO     2025-09-10 14:57:34,879 llama_stack.core.stack:357 core: Shutting down TorchtunePostTrainingImpl
+INFO     2025-09-10 14:57:34,881 llama_stack.core.stack:357 core: Shutting down ScoringFunctionsRoutingTable
+INFO     2025-09-10 14:57:34,882 llama_stack.core.stack:357 core: Shutting down ScoringRouter
+INFO     2025-09-10 14:57:34,882 llama_stack.core.stack:357 core: Shutting down ReferenceBatchesImpl
+INFO     2025-09-10 14:57:34,883 llama_stack.core.stack:357 core: Shutting down BenchmarksRoutingTable
+INFO     2025-09-10 14:57:34,884 llama_stack.core.stack:357 core: Shutting down EvalRouter
+INFO     2025-09-10 14:57:34,884 llama_stack.core.stack:357 core: Shutting down DistributionInspectImpl
+INFO     2025-09-10 14:57:34,885 llama_stack.core.stack:357 core: Shutting down ProviderImpl
+INFO     2025-09-10 14:57:34,886 uvicorn.error:76 uncategorized: Application shutdown complete.
+INFO     2025-09-10 14:57:34,887 uvicorn.error:94 uncategorized: Finished server process [26795]
+/Users/kaiwu/miniconda3/lib/python3.12/multiprocessing/resource_tracker.py:255: UserWarning: resource_tracker: There appear to be 1 leaked semaphore objects to clean up at shutdown
+  warnings.warn('resource_tracker: There appear to be %d '