diff --git a/docs/docs/getting_started/demo_script.py b/docs/docs/getting_started/demo_script.py
index fcae51d86..a2f72749f 100644
--- a/docs/docs/getting_started/demo_script.py
+++ b/docs/docs/getting_started/demo_script.py
@@ -24,16 +24,12 @@ from openai import OpenAI
 # Initialize OpenAI client pointing to Llama Stack server
 client = OpenAI(base_url="http://localhost:8321/v1/", api_key="none")
 
-# Shared setup: Create vector store and upload document
-print("=" * 80)
-print("SETUP: Creating vector store and uploading document")
-print("=" * 80)
+print("RAG demonstration\n")
 
 url = "https://www.paulgraham.com/greatwork.html"
 print(f"Fetching document from: {url}")
 
 vs = client.vector_stores.create()
-print(f"Vector store created: {vs.id}")
 
 response = requests.get(url)
 pseudo_file = io.BytesIO(str(response.content).encode("utf-8"))
@@ -44,15 +40,19 @@ client.vector_stores.files.create(vector_store_id=vs.id, file_id=uploaded_file.i
 print(f"File uploaded and added to vector store: {uploaded_file.id}")
 
 query = "How do you do great work?"
-
-# ============================================================================
-# APPROACH 1: Responses API (Recommended for most use cases)
-# ============================================================================
-print("\n" + "=" * 80)
-print("APPROACH 1: Responses API (Automatic Tool Calling)")
-print("=" * 80)
 print(f"Query: {query}\n")
 
+print(
+    """
+RAG using Responses API:
+   - Automatic tool calling (model decides when to search)
+   - Simpler code, less control
+   - Best for: Conversational agents, automatic workflows
+
+"""
+)
+
+print("Reply via Responses API:\n")
 resp = client.responses.create(
     model=os.getenv("INFERENCE_MODEL", "ollama/llama3.2:3b"),
     input=query,
@@ -60,39 +60,36 @@ resp = client.responses.create(
     include=["file_search_call.results"],
 )
 
-print("Response (Responses API):")
 print("-" * 80)
 print(resp.output[-1].content[-1].text)
 print("-" * 80)
 
-# ============================================================================
-# APPROACH 2: Chat Completions API
-# ============================================================================
-print("\n" + "=" * 80)
-print("APPROACH 2: Chat Completions API (Manual Retrieval)")
-print("=" * 80)
-print(f"Query: {query}\n")
+print(
+    """
+
+RAG using Chat Completions API:
+   - Manual retrieval (you control the search)
+   - More code, more control
+   - Best for: Custom RAG patterns, batch processing, specialized workflows
+"""
+)
 
-# Step 1: Search vector store explicitly
 print("Searching vector store...")
 search_results = client.vector_stores.search(
     vector_store_id=vs.id, query=query, max_num_results=3, rewrite_query=False
 )
 
-# Step 2: Extract context from search results
+# Extract context from search results
 context_chunks = []
 for result in search_results.data:
     # result.content is a list of Content objects, extract the text from each
-    if hasattr(result, "content") and result.content:
-        for content_item in result.content:
-            if hasattr(content_item, "text") and content_item.text:
-                context_chunks.append(content_item.text)
+    for content_item in result.content:
+        context_chunks.append(content_item.text)
 
 context = "\n\n".join(context_chunks)
 print(f"Found {len(context_chunks)} relevant chunks\n")
 
-# Step 3: Use Chat Completions with retrieved context
-print("Generating response with chat completions...")
+print("Reply via Chat Completions API:\n")
 completion = client.chat.completions.create(
     model=os.getenv("INFERENCE_MODEL", "ollama/llama3.2:3b"),
     messages=[
@@ -108,29 +105,6 @@ completion = client.chat.completions.create(
     temperature=0.7,
 )
 
-print("Response (Chat Completions API):")
 print("-" * 80)
 print(completion.choices[0].message.content)
 print("-" * 80)
-
-# ============================================================================
-# Summary
-# ============================================================================
-print("\n" + "=" * 80)
-print("SUMMARY")
-print("=" * 80)
-print(
-    """
-Both approaches successfully performed RAG:
-
-1. Responses API:
-   - Automatic tool calling (model decides when to search)
-   - Simpler code, less control
-   - Best for: Conversational agents, automatic workflows
-
-2. Chat Completions API:
-   - Manual retrieval (you control the search)
-   - More code, more control
-   - Best for: Custom RAG patterns, batch processing, specialized workflows
-"""
-)