From 1ec7216c3ff669eff09f570c1da6bfe1c1283478 Mon Sep 17 00:00:00 2001
From: ehhuang <ehhuang@users.noreply.github.com>
Date: Tue, 21 Oct 2025 11:33:23 -0700
Subject: [PATCH] chore: update quick_start (#3878)

# What does this PR do?


## Test Plan
---
 docs/quick_start.ipynb | 111 ++++++++++++++++++++++++++---------------
 1 file changed, 72 insertions(+), 39 deletions(-)

diff --git a/docs/quick_start.ipynb b/docs/quick_start.ipynb
index 4ddde693f..0d5ad227c 100644
--- a/docs/quick_start.ipynb
+++ b/docs/quick_start.ipynb
@@ -126,17 +126,31 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 1,
    "id": "J2kGed0R5PSf",
    "metadata": {
     "colab": {
      "base_uri": "https://localhost:8080/"
     },
-    "collapsed": true,
     "id": "J2kGed0R5PSf",
     "outputId": "2478ea60-8d35-48a1-b011-f233831740c5"
    },
-   "outputs": [],
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\u001b[2mUsing Python 3.12.12 environment at: /opt/homebrew/Caskroom/miniconda/base/envs/test\u001b[0m\n",
+      "\u001b[2mAudited \u001b[1m52 packages\u001b[0m \u001b[2min 1.56s\u001b[0m\u001b[0m\n",
+      "\u001b[2mUsing Python 3.12.12 environment at: /opt/homebrew/Caskroom/miniconda/base/envs/test\u001b[0m\n",
+      "\u001b[2mAudited \u001b[1m3 packages\u001b[0m \u001b[2min 122ms\u001b[0m\u001b[0m\n",
+      "\u001b[2mUsing Python 3.12.12 environment at: /opt/homebrew/Caskroom/miniconda/base/envs/test\u001b[0m\n",
+      "\u001b[2mAudited \u001b[1m3 packages\u001b[0m \u001b[2min 197ms\u001b[0m\u001b[0m\n",
+      "\u001b[2mUsing Python 3.12.12 environment at: /opt/homebrew/Caskroom/miniconda/base/envs/test\u001b[0m\n",
+      "\u001b[2mAudited \u001b[1m1 package\u001b[0m \u001b[2min 11ms\u001b[0m\u001b[0m\n"
+     ]
+    }
+   ],
    "source": [
     "import os\n",
     "import subprocess\n",
@@ -150,7 +164,7 @@
     "def run_llama_stack_server_background():\n",
     "    log_file = open(\"llama_stack_server.log\", \"w\")\n",
     "    process = subprocess.Popen(\n",
-    "        f\"OLLAMA_URL=http://localhost:11434 uv run --with llama-stack llama stack run starter\n",
+    "        f\"OLLAMA_URL=http://localhost:11434 uv run --with llama-stack llama stack run starter\",\n",
     "        shell=True,\n",
     "        stdout=log_file,\n",
     "        stderr=log_file,\n",
@@ -200,7 +214,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 7,
+   "execution_count": 2,
    "id": "f779283d",
    "metadata": {},
    "outputs": [
@@ -208,8 +222,8 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Starting Llama Stack server with PID: 787100\n",
-      "Waiting for server to start\n",
+      "Starting Llama Stack server with PID: 20778\n",
+      "Waiting for server to start........\n",
       "Server is ready!\n"
      ]
     }
@@ -229,65 +243,84 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 8,
+   "execution_count": 3,
    "id": "7da71011",
    "metadata": {},
    "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "INFO:httpx:HTTP Request: GET http://0.0.0.0:8321/v1/models \"HTTP/1.1 200 OK\"\n",
+      "INFO:httpx:HTTP Request: POST http://0.0.0.0:8321/v1/files \"HTTP/1.1 200 OK\"\n",
+      "INFO:httpx:HTTP Request: POST http://0.0.0.0:8321/v1/vector_stores \"HTTP/1.1 200 OK\"\n",
+      "INFO:httpx:HTTP Request: POST http://0.0.0.0:8321/v1/conversations \"HTTP/1.1 200 OK\"\n",
+      "INFO:httpx:HTTP Request: POST http://0.0.0.0:8321/v1/responses \"HTTP/1.1 200 OK\"\n"
+     ]
+    },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "rag_tool> Ingesting document: https://www.paulgraham.com/greatwork.html\n",
       "prompt> How do you do great work?\n",
-      "\u001b[33minference> \u001b[0m\u001b[33m[k\u001b[0m\u001b[33mnowledge\u001b[0m\u001b[33m_search\u001b[0m\u001b[33m(query\u001b[0m\u001b[33m=\"\u001b[0m\u001b[33mWhat\u001b[0m\u001b[33m is\u001b[0m\u001b[33m the\u001b[0m\u001b[33m key\u001b[0m\u001b[33m to\u001b[0m\u001b[33m doing\u001b[0m\u001b[33m great\u001b[0m\u001b[33m work\u001b[0m\u001b[33m\")]\u001b[0m\u001b[97m\u001b[0m\n",
-      "\u001b[32mtool_execution> Tool:knowledge_search Args:{'query': 'What is the key to doing great work'}\u001b[0m\n",
-      "\u001b[32mtool_execution> Tool:knowledge_search Response:[TextContentItem(text='knowledge_search tool found 5 chunks:\\nBEGIN of knowledge_search tool results.\\n', type='text'), TextContentItem(text=\"Result 1:\\nDocument_id:docum\\nContent:  work. Doing great work means doing something important\\nso well that you expand people's ideas of what's possible. But\\nthere's no threshold for importance. It's a matter of degree, and\\noften hard to judge at the time anyway.\\n\", type='text'), TextContentItem(text=\"Result 2:\\nDocument_id:docum\\nContent:  work. Doing great work means doing something important\\nso well that you expand people's ideas of what's possible. But\\nthere's no threshold for importance. It's a matter of degree, and\\noften hard to judge at the time anyway.\\n\", type='text'), TextContentItem(text=\"Result 3:\\nDocument_id:docum\\nContent:  work. Doing great work means doing something important\\nso well that you expand people's ideas of what's possible. But\\nthere's no threshold for importance. It's a matter of degree, and\\noften hard to judge at the time anyway.\\n\", type='text'), TextContentItem(text=\"Result 4:\\nDocument_id:docum\\nContent:  work. Doing great work means doing something important\\nso well that you expand people's ideas of what's possible. But\\nthere's no threshold for importance. It's a matter of degree, and\\noften hard to judge at the time anyway.\\n\", type='text'), TextContentItem(text=\"Result 5:\\nDocument_id:docum\\nContent:  work. Doing great work means doing something important\\nso well that you expand people's ideas of what's possible. But\\nthere's no threshold for importance. It's a matter of degree, and\\noften hard to judge at the time anyway.\\n\", type='text'), TextContentItem(text='END of knowledge_search tool results.\\n', type='text'), TextContentItem(text='The above results were retrieved to help answer the user\\'s query: \"What is the key to doing great work\". Use them as supporting information only in answering this query.\\n', type='text')]\u001b[0m\n",
-      "\u001b[33minference> \u001b[0m\u001b[33mDoing\u001b[0m\u001b[33m great\u001b[0m\u001b[33m work\u001b[0m\u001b[33m means\u001b[0m\u001b[33m doing\u001b[0m\u001b[33m something\u001b[0m\u001b[33m important\u001b[0m\u001b[33m so\u001b[0m\u001b[33m well\u001b[0m\u001b[33m that\u001b[0m\u001b[33m you\u001b[0m\u001b[33m expand\u001b[0m\u001b[33m people\u001b[0m\u001b[33m's\u001b[0m\u001b[33m ideas\u001b[0m\u001b[33m of\u001b[0m\u001b[33m what\u001b[0m\u001b[33m's\u001b[0m\u001b[33m possible\u001b[0m\u001b[33m.\u001b[0m\u001b[33m However\u001b[0m\u001b[33m,\u001b[0m\u001b[33m there\u001b[0m\u001b[33m's\u001b[0m\u001b[33m no\u001b[0m\u001b[33m threshold\u001b[0m\u001b[33m for\u001b[0m\u001b[33m importance\u001b[0m\u001b[33m,\u001b[0m\u001b[33m and\u001b[0m\u001b[33m it\u001b[0m\u001b[33m's\u001b[0m\u001b[33m often\u001b[0m\u001b[33m hard\u001b[0m\u001b[33m to\u001b[0m\u001b[33m judge\u001b[0m\u001b[33m at\u001b[0m\u001b[33m the\u001b[0m\u001b[33m time\u001b[0m\u001b[33m anyway\u001b[0m\u001b[33m.\u001b[0m\u001b[33m Great\u001b[0m\u001b[33m work\u001b[0m\u001b[33m is\u001b[0m\u001b[33m a\u001b[0m\u001b[33m matter\u001b[0m\u001b[33m of\u001b[0m\u001b[33m degree\u001b[0m\u001b[33m,\u001b[0m\u001b[33m and\u001b[0m\u001b[33m it\u001b[0m\u001b[33m can\u001b[0m\u001b[33m be\u001b[0m\u001b[33m difficult\u001b[0m\u001b[33m to\u001b[0m\u001b[33m determine\u001b[0m\u001b[33m whether\u001b[0m\u001b[33m someone\u001b[0m\u001b[33m has\u001b[0m\u001b[33m done\u001b[0m\u001b[33m great\u001b[0m\u001b[33m work\u001b[0m\u001b[33m until\u001b[0m\u001b[33m after\u001b[0m\u001b[33m the\u001b[0m\u001b[33m fact\u001b[0m\u001b[33m.\u001b[0m\u001b[97m\u001b[0m\n",
-      "\u001b[30m\u001b[0m"
+      "🤔 Doing great work involves a combination of skills, habits, and mindsets. Here are some key principles:\n",
+      "\n",
+      "1. **Set Clear Goals**: Start with a clear vision of what you want to achieve. Define specific, measurable, achievable, relevant, and time-bound (SMART) goals.\n",
+      "\n",
+      "2. **Plan and Prioritize**: Break your goals into smaller, manageable tasks. Prioritize these tasks based on their importance and urgency.\n",
+      "\n",
+      "3. **Focus on Quality**: Aim for high-quality outcomes rather than just finishing tasks. Pay attention to detail, and ensure your work meets or exceeds standards.\n",
+      "\n",
+      "4. **Stay Organized**: Keep your workspace, both physical and digital, organized to help you stay focused and efficient.\n",
+      "\n",
+      "5. **Manage Your Time**: Use time management techniques such as the Pomodoro Technique, time blocking, or the Eisenhower Box to maximize productivity.\n",
+      "\n",
+      "6. **Seek Feedback and Learn**: Regularly seek feedback from peers, mentors, or supervisors. Use constructive criticism to improve continuously.\n",
+      "\n",
+      "7. **Innovate and Improve**: Look for ways to improve processes or introduce new ideas. Be open to change and willing to adapt.\n",
+      "\n",
+      "8. **Stay Motivated and Persistent**: Keep your end goals in mind to stay motivated. Overcome setbacks with resilience and persistence.\n",
+      "\n",
+      "9. **Balance and Rest**: Ensure you maintain a healthy work-life balance. Take breaks and manage stress to sustain long-term productivity.\n",
+      "\n",
+      "10. **Reflect and Adjust**: Regularly assess your progress and adjust your strategies as needed. Reflect on what works well and what doesn't.\n",
+      "\n",
+      "By incorporating these elements, you can consistently produce high-quality work and achieve excellence in your endeavors.\n"
      ]
     }
    ],
    "source": [
     "from llama_stack_client import Agent, AgentEventLogger, RAGDocument, LlamaStackClient\n",
+    "import requests\n",
     "\n",
-    "vector_db_id = \"my_demo_vector_db\"\n",
+    "vector_store_id = \"my_demo_vector_db\"\n",
     "client = LlamaStackClient(base_url=\"http://0.0.0.0:8321\")\n",
     "\n",
     "models = client.models.list()\n",
     "\n",
     "# Select the first ollama and first ollama's embedding model\n",
     "model_id = next(m for m in models if m.model_type == \"llm\" and m.provider_id == \"ollama\").identifier\n",
-    "embedding_model = next(m for m in models if m.model_type == \"embedding\" and m.provider_id == \"ollama\")\n",
-    "embedding_model_id = embedding_model.identifier\n",
-    "embedding_dimension = embedding_model.metadata[\"embedding_dimension\"]\n",
     "\n",
-    "_ = client.vector_dbs.register(\n",
-    "    vector_db_id=vector_db_id,\n",
-    "    embedding_model=embedding_model_id,\n",
-    "    embedding_dimension=embedding_dimension,\n",
-    "    provider_id=\"faiss\",\n",
-    ")\n",
+    "\n",
     "source = \"https://www.paulgraham.com/greatwork.html\"\n",
-    "print(\"rag_tool> Ingesting document:\", source)\n",
-    "document = RAGDocument(\n",
-    "    document_id=\"document_1\",\n",
-    "    content=source,\n",
-    "    mime_type=\"text/html\",\n",
-    "    metadata={},\n",
+    "response = requests.get(source)\n",
+    "file = client.files.create(\n",
+    "    file=response.content,\n",
+    "    purpose='assistants'\n",
     ")\n",
-    "client.tool_runtime.rag_tool.insert(\n",
-    "    documents=[document],\n",
-    "    vector_db_id=vector_db_id,\n",
-    "    chunk_size_in_tokens=50,\n",
+    "vector_store = client.vector_stores.create(\n",
+    "    name=vector_store_id,\n",
+    "    file_ids=[file.id],\n",
     ")\n",
+    "\n",
     "agent = Agent(\n",
     "    client,\n",
     "    model=model_id,\n",
     "    instructions=\"You are a helpful assistant\",\n",
     "    tools=[\n",
     "        {\n",
-    "            \"name\": \"builtin::rag/knowledge_search\",\n",
-    "            \"args\": {\"vector_db_ids\": [vector_db_id]},\n",
+    "            \"type\": \"file_search\",\n",
+    "            \"vector_store_ids\": [vector_store_id],\n",
     "        }\n",
     "    ],\n",
     ")\n",
@@ -302,7 +335,7 @@
     ")\n",
     "\n",
     "for log in AgentEventLogger().log(response):\n",
-    "    log.print()"
+    "    print(log, end=\"\")"
    ]
   },
   {
@@ -344,7 +377,7 @@
    "provenance": []
   },
   "kernelspec": {
-   "display_name": "Python 3",
+   "display_name": "Python 3 (ipykernel)",
    "language": "python",
    "name": "python3"
   },
@@ -358,7 +391,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.10.6"
+   "version": "3.12.12"
   }
  },
  "nbformat": 4,