update nb-1

2025-12-16 10:12:37 +00:00 · 2024-11-21 06:53:47 -08:00 · 2024-11-21 06:53:47 -08:00 · 0027c5b614
commit 0027c5b614
parent 06b699f5ec
2 changed files with 22 additions and 11 deletions
--- a/zero_to_hero_guide/00_Inference101.ipynb
+++ b/zero_to_hero_guide/00_Inference101.ipynb
@ -85,7 +85,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 3,
+   "execution_count": null,
   "id": "77c29dba",
   "metadata": {},
   "outputs": [
@ -104,7 +104,8 @@
    "        {\"role\": \"system\", \"content\": \"You are a friendly assistant.\"},\n",
    "        {\"role\": \"user\", \"content\": \"Write a two-sentence poem about llama.\"}\n",
    "    ],\n",
-    "    model='Llama3.2-11B-Vision-Instruct',\n",
+    "    model_id='Llama3.2-11B-Vision-Instruct',\n",
+    "    stream=True\n",
    ")\n",
    "\n",
    "print(response.completion_message.content)"
@ -124,7 +125,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 4,
+   "execution_count": null,
   "id": "5c6812da",
   "metadata": {},
   "outputs": [
@ -143,7 +144,8 @@
    "        {\"role\": \"system\", \"content\": \"You are shakespeare.\"},\n",
    "        {\"role\": \"user\", \"content\": \"Write a two-sentence poem about llama.\"}\n",
    "    ],\n",
-    "    model='Llama3.2-11B-Vision-Instruct',\n",
+    "    model_id='Llama3.2-11B-Vision-Instruct',\n",
+    "    stream=True\n",
    ")\n",
    "\n",
    "print(response.completion_message.content)"
@ -226,7 +228,8 @@
    "        message = {\"role\": \"user\", \"content\": user_input}\n",
    "        response = client.inference.chat_completion(\n",
    "            messages=[message],\n",
-    "            model='Llama3.2-11B-Vision-Instruct',\n",
+    "            model_id='Llama3.2-11B-Vision-Instruct',\n",
+    "            stream=True\n",
    "        )\n",
    "        cprint(f'> Response: {response.completion_message.content}', 'cyan')\n",
    "\n",
@ -274,7 +277,8 @@
    "\n",
    "        response = client.inference.chat_completion(\n",
    "            messages=conversation_history,\n",
-    "            model='Llama3.2-11B-Vision-Instruct',\n",
+    "            model_id='Llama3.2-11B-Vision-Instruct',\n",
+    "            stream=True\n",
    "        )\n",
    "        cprint(f'> Response: {response.completion_message.content}', 'cyan')\n",
    "\n",
@ -299,7 +303,7 @@
   "source": [
    "## Streaming Responses\n",
    "\n",
-    "Llama Stack offers a `stream` parameter in the `chat_completion` function, which allows partial responses to be returned progressively as they are generated. This can enhance user experience by providing immediate feedback without waiting for the entire response to be processed."
+    "Llama Stack offers a `stream` parameter in the `chat_completion` function, which allows partial responses to be returned progressively as they are generated. This can enhance user experience by providing immediate feedback without waiting for the entire response to be processed. You can change this `bool` value to `True` or `False` to allow streaming"
   ]
  },
  {
@ -322,8 +326,8 @@
    "\n",
    "    response = client.inference.chat_completion(\n",
    "        messages=[message],\n",
-    "        model='Llama3.2-11B-Vision-Instruct',\n",
-    "        stream=stream,\n",
+    "        model_id='Llama3.2-11B-Vision-Instruct',\n",
+    "        stream=True\n",
    "    )\n",
    "\n",
    "    if not stream:\n",
@ -341,7 +345,7 @@
 ],
 "metadata": {
  "kernelspec": {
-   "display_name": "Python 3 (ipykernel)",
+   "display_name": "base",
   "language": "python",
   "name": "python3"
  },
@ -355,7 +359,7 @@
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
-   "version": "3.10.15"
+   "version": "3.12.2"
  }
 },
 "nbformat": 4,
--- a/zero_to_hero_guide/quickstart.md
+++ b/zero_to_hero_guide/quickstart.md
@ -102,6 +102,13 @@ If you're looking for more specific topics like tool calling or agent setup, we
     llama stack build --template ollama --image-type conda
     ```

+After this step, you will see the console output:
+```
+Build Successful! Next steps: 
+   1. Set the environment variables: LLAMASTACK_PORT, OLLAMA_URL, INFERENCE_MODEL, SAFETY_MODEL
+   2. `llama stack run /Users/username/.llama/distributions/llamastack-ollama/ollama-run.yaml`
+```
+
 2. **Edit Configuration**:
   - Modify the `ollama-run.yaml` file located at `/Users/yourusername/.llama/distributions/llamastack-ollama/ollama-run.yaml`:
     - Change the `chromadb` port to `8000`.