From 0027c5b61434fe098c2ee0fef746c37deb5f1532 Mon Sep 17 00:00:00 2001
From: Sanyam Bhutani <sanyambhutani@meta.com>
Date: Thu, 21 Nov 2024 06:53:47 -0800
Subject: [PATCH] update nb-1

---
 zero_to_hero_guide/00_Inference101.ipynb | 26 ++++++++++++++----------
 zero_to_hero_guide/quickstart.md         |  7 +++++++
 2 files changed, 22 insertions(+), 11 deletions(-)

diff --git a/zero_to_hero_guide/00_Inference101.ipynb b/zero_to_hero_guide/00_Inference101.ipynb
index 4da0d0df1..eb2816feb 100644
--- a/zero_to_hero_guide/00_Inference101.ipynb
+++ b/zero_to_hero_guide/00_Inference101.ipynb
@@ -85,7 +85,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 3,
+   "execution_count": null,
    "id": "77c29dba",
    "metadata": {},
    "outputs": [
@@ -104,7 +104,8 @@
     "        {\"role\": \"system\", \"content\": \"You are a friendly assistant.\"},\n",
     "        {\"role\": \"user\", \"content\": \"Write a two-sentence poem about llama.\"}\n",
     "    ],\n",
-    "    model='Llama3.2-11B-Vision-Instruct',\n",
+    "    model_id='Llama3.2-11B-Vision-Instruct',\n",
+    "    stream=True\n",
     ")\n",
     "\n",
     "print(response.completion_message.content)"
@@ -124,7 +125,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 4,
+   "execution_count": null,
    "id": "5c6812da",
    "metadata": {},
    "outputs": [
@@ -143,7 +144,8 @@
     "        {\"role\": \"system\", \"content\": \"You are shakespeare.\"},\n",
     "        {\"role\": \"user\", \"content\": \"Write a two-sentence poem about llama.\"}\n",
     "    ],\n",
-    "    model='Llama3.2-11B-Vision-Instruct',\n",
+    "    model_id='Llama3.2-11B-Vision-Instruct',\n",
+    "    stream=True\n",
     ")\n",
     "\n",
     "print(response.completion_message.content)"
@@ -226,7 +228,8 @@
     "        message = {\"role\": \"user\", \"content\": user_input}\n",
     "        response = client.inference.chat_completion(\n",
     "            messages=[message],\n",
-    "            model='Llama3.2-11B-Vision-Instruct',\n",
+    "            model_id='Llama3.2-11B-Vision-Instruct',\n",
+    "            stream=True\n",
     "        )\n",
     "        cprint(f'> Response: {response.completion_message.content}', 'cyan')\n",
     "\n",
@@ -274,7 +277,8 @@
     "\n",
     "        response = client.inference.chat_completion(\n",
     "            messages=conversation_history,\n",
-    "            model='Llama3.2-11B-Vision-Instruct',\n",
+    "            model_id='Llama3.2-11B-Vision-Instruct',\n",
+    "            stream=True\n",
     "        )\n",
     "        cprint(f'> Response: {response.completion_message.content}', 'cyan')\n",
     "\n",
@@ -299,7 +303,7 @@
    "source": [
     "## Streaming Responses\n",
     "\n",
-    "Llama Stack offers a `stream` parameter in the `chat_completion` function, which allows partial responses to be returned progressively as they are generated. This can enhance user experience by providing immediate feedback without waiting for the entire response to be processed."
+    "Llama Stack offers a `stream` parameter in the `chat_completion` function, which allows partial responses to be returned progressively as they are generated. This can enhance user experience by providing immediate feedback without waiting for the entire response to be processed. You can change this `bool` value to `True` or `False` to allow streaming"
    ]
   },
   {
@@ -322,8 +326,8 @@
     "\n",
     "    response = client.inference.chat_completion(\n",
     "        messages=[message],\n",
-    "        model='Llama3.2-11B-Vision-Instruct',\n",
-    "        stream=stream,\n",
+    "        model_id='Llama3.2-11B-Vision-Instruct',\n",
+    "        stream=True\n",
     "    )\n",
     "\n",
     "    if not stream:\n",
@@ -341,7 +345,7 @@
  ],
  "metadata": {
   "kernelspec": {
-   "display_name": "Python 3 (ipykernel)",
+   "display_name": "base",
    "language": "python",
    "name": "python3"
   },
@@ -355,7 +359,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.10.15"
+   "version": "3.12.2"
   }
  },
  "nbformat": 4,
diff --git a/zero_to_hero_guide/quickstart.md b/zero_to_hero_guide/quickstart.md
index 107bf0d12..c18d0ff03 100644
--- a/zero_to_hero_guide/quickstart.md
+++ b/zero_to_hero_guide/quickstart.md
@@ -102,6 +102,13 @@ If you're looking for more specific topics like tool calling or agent setup, we
      llama stack build --template ollama --image-type conda
      ```
 
+After this step, you will see the console output:
+```
+Build Successful! Next steps: 
+   1. Set the environment variables: LLAMASTACK_PORT, OLLAMA_URL, INFERENCE_MODEL, SAFETY_MODEL
+   2. `llama stack run /Users/username/.llama/distributions/llamastack-ollama/ollama-run.yaml`
+```
+
 2. **Edit Configuration**:
    - Modify the `ollama-run.yaml` file located at `/Users/yourusername/.llama/distributions/llamastack-ollama/ollama-run.yaml`:
      - Change the `chromadb` port to `8000`.