pr review changes

2025-10-15 22:47:59 +00:00 · 2024-11-08 14:50:44 -08:00 · 2024-11-08 14:50:44 -08:00 · c79c8367b7
commit c79c8367b7
parent 6dd5ea7631
6 changed files with 152 additions and 209 deletions
--- a/docs/zero_to_hero_guide/00_Inference101.ipynb
+++ b/docs/zero_to_hero_guide/00_Inference101.ipynb
@ -77,7 +77,6 @@
   "outputs": [],
   "source": [
    "from llama_stack_client import LlamaStackClient\n",
-    "from llama_stack_client.types import SystemMessage, UserMessage\n",
    "\n",
    "client = LlamaStackClient(base_url=f'http://{HOST}:{PORT}')"
   ]
@ -102,18 +101,18 @@
     "name": "stdout",
     "output_type": "stream",
     "text": [
-      "A gentle llama roams the land,\n",
-      "With soft fur and a gentle hand.\n"
+      "With soft fur and gentle eyes,\n",
+      "The llama roams, a peaceful surprise.\n"
     ]
    }
   ],
   "source": [
    "response = client.inference.chat_completion(\n",
    "    messages=[\n",
-    "        SystemMessage(content='You are a friendly assistant.', role='system'),\n",
-    "        UserMessage(content='Write a two-sentence poem about llama.', role='user')\n",
+    "        {\"role\": \"system\", \"content\": \"You are a friendly assistant.\"},\n",
+    "        {\"role\": \"user\", \"content\": \"Write a two-sentence poem about llama.\"}\n",
    "    ],\n",
-    "    model='Llama3.1-8B-Instruct',\n",
+    "    model='Llama3.2-11B-Vision-Instruct',\n",
    ")\n",
    "\n",
    "print(response.completion_message.content)"
@ -128,11 +127,6 @@
    "\n",
    "Effective prompt creation (often called 'prompt engineering') is essential for quality responses. Here are best practices for structuring your prompts to get the most out of the Llama Stack model:\n",
    "\n",
-    "1. **System Messages**: Use `SystemMessage` to set the model's behavior. This is similar to providing top-level instructions for tone, format, or specific behavior.\n",
-    "   - **Example**: `SystemMessage(content='You are a friendly assistant that explains complex topics simply.')`\n",
-    "2. **User Messages**: Define the task or question you want to ask the model with a `UserMessage`. The clearer and more direct you are, the better the response.\n",
-    "   - **Example**: `UserMessage(content='Explain recursion in programming in simple terms.')`\n",
-    "\n",
    "### Sample Prompt"
   ]
  },
@ -154,10 +148,10 @@
   "source": [
    "response = client.inference.chat_completion(\n",
    "    messages=[\n",
-    "        SystemMessage(content='You are shakespeare.', role='system'),\n",
-    "        UserMessage(content='Write a two-sentence poem about llama.', role='user')\n",
+    "        {\"role\": \"system\", \"content\": \"You are shakespeare.\"},\n",
+    "        {\"role\": \"user\", \"content\": \"Write a two-sentence poem about llama.\"}\n",
    "    ],\n",
-    "    model='Llama3.1-8B-Instruct',\n",
+    "    model='Llama3.2-11B-Vision-Instruct',\n",
    ")\n",
    "\n",
    "print(response.completion_message.content)"
@ -175,45 +169,57 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 5,
+   "execution_count": null,
   "id": "02211625",
   "metadata": {},
   "outputs": [
    {
-     "name": "stdout",
+     "name": "stdin",
     "output_type": "stream",
     "text": [
-      "User>  Write me a 3 sentence poem about alpaca\n"
+      "User>  1+1\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
-      "\u001b[36m> Response: Softly grazing, gentle soul,\n",
-      "Alpaca's fleece, a treasure whole,\n",
-      "In Andean fields, they softly roll.\u001b[0m\n"
+      "\u001b[36m> Response: 2\u001b[0m\n"
+     ]
+    },
+    {
+     "name": "stdin",
+     "output_type": "stream",
+     "text": [
+      "User>  what is llama\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
-      "User>  exit\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\u001b[33mEnding conversation. Goodbye!\u001b[0m\n"
+      "\u001b[36m> Response: A llama is a domesticated mammal native to South America, specifically the Andean region. It belongs to the camelid family, which also includes camels, alpacas, guanacos, and vicuñas.\n",
+      "\n",
+      "Here are some interesting facts about llamas:\n",
+      "\n",
+      "1. **Physical Characteristics**: Llamas are large, even-toed ungulates with a distinctive appearance. They have a long neck, a small head, and a soft, woolly coat that can be various colors, including white, brown, gray, and black.\n",
+      "2. **Size**: Llamas typically grow to be between 5 and 6 feet (1.5 to 1.8 meters) tall at the shoulder and weigh between 280 and 450 pounds (127 to 204 kilograms).\n",
+      "3. **Habitat**: Llamas are native to the Andean highlands, where they live in herds and roam freely. They are well adapted to the harsh, high-altitude climate of the Andes.\n",
+      "4. **Diet**: Llamas are herbivores and feed on a variety of plants, including grasses, leaves, and shrubs. They are known for their ability to digest plant material that other animals cannot.\n",
+      "5. **Behavior**: Llamas are social animals and live in herds. They are known for their intelligence, curiosity, and strong sense of self-preservation.\n",
+      "6. **Purpose**: Llamas have been domesticated for thousands of years and have been used for a variety of purposes, including:\n",
+      "\t* **Pack animals**: Llamas are often used as pack animals, carrying goods and supplies over long distances.\n",
+      "\t* **Fiber production**: Llama wool is highly valued for its softness, warmth, and durability.\n",
+      "\t* **Meat**: Llama meat is consumed in some parts of the world, particularly in South America.\n",
+      "\t* **Companionship**: Llamas are often kept as pets or companions, due to their gentle nature and intelligence.\n",
+      "\n",
+      "Overall, llamas are fascinating animals that have been an integral part of Andean culture for thousands of years.\u001b[0m\n"
     ]
    }
   ],
   "source": [
    "import asyncio\n",
    "from llama_stack_client import LlamaStackClient\n",
-    "from llama_stack_client.types import UserMessage\n",
    "from termcolor import cprint\n",
    "\n",
    "client = LlamaStackClient(base_url=f'http://{HOST}:{PORT}')\n",
@ -225,17 +231,17 @@
    "            cprint('Ending conversation. Goodbye!', 'yellow')\n",
    "            break\n",
    "\n",
-    "        message = UserMessage(content=user_input, role='user')\n",
+    "        message = {\"role\": \"user\", \"content\": user_input}\n",
    "        response = client.inference.chat_completion(\n",
    "            messages=[message],\n",
-    "            model='Llama3.1-8B-Instruct',\n",
+    "            model='Llama3.2-11B-Vision-Instruct',\n",
    "        )\n",
    "        cprint(f'> Response: {response.completion_message.content}', 'cyan')\n",
    "\n",
-    "# Run the chat loop in a Jupyter Notebook cell using `await`\n",
+    "# Run the chat loop in a Jupyter Notebook cell using await\n",
    "await chat_loop()\n",
    "# To run it in a python file, use this line instead\n",
-    "# asyncio.run(chat_loop())"
+    "# asyncio.run(chat_loop())\n"
   ]
  },
  {
@ -250,66 +256,15 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 6,
+   "execution_count": null,
   "id": "9496f75c",
   "metadata": {},
   "outputs": [
    {
-     "name": "stdout",
+     "name": "stdin",
     "output_type": "stream",
     "text": [
-      "User>  what is 1+1\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\u001b[36m> Response: 1 + 1 = 2\u001b[0m\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "User>  what is llama + alpaca\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\u001b[36m> Response: That's a creative and imaginative question. However, since llamas and alpacas are animals, not numbers, we can't perform a mathematical operation on them.\n",
-      "\n",
-      "But if we were to interpret this as a creative or humorous question, we could say that the result of \"llama + alpaca\" is a fun and fuzzy bundle of South American camelid cuteness!\u001b[0m\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "User>  what was the first question\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\u001b[36m> Response: The first question was \"what is 1+1\"\u001b[0m\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "User>  exit\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\u001b[33mEnding conversation. Goodbye!\u001b[0m\n"
+      "User>  1+1\n"
     ]
    }
   ],
@ -322,22 +277,27 @@
    "            cprint('Ending conversation. Goodbye!', 'yellow')\n",
    "            break\n",
    "\n",
-    "        user_message = UserMessage(content=user_input, role='user')\n",
+    "        user_message = {\"role\": \"user\", \"content\": user_input}\n",
    "        conversation_history.append(user_message)\n",
    "\n",
    "        response = client.inference.chat_completion(\n",
    "            messages=conversation_history,\n",
-    "            model='Llama3.1-8B-Instruct',\n",
+    "            model='Llama3.2-11B-Vision-Instruct',\n",
    "        )\n",
    "        cprint(f'> Response: {response.completion_message.content}', 'cyan')\n",
    "\n",
-    "        assistant_message = UserMessage(content=response.completion_message.content, role='user')\n",
+    "        # Append the assistant message with all required fields\n",
+    "        assistant_message = {\n",
+    "            \"role\": \"user\",\n",
+    "            \"content\": response.completion_message.content,\n",
+    "            # Add any additional required fields here if necessary\n",
+    "        }\n",
    "        conversation_history.append(assistant_message)\n",
    "\n",
    "# Use `await` in the Jupyter Notebook cell to call the function\n",
    "await chat_loop()\n",
    "# To run it in a python file, use this line instead\n",
-    "# asyncio.run(chat_loop())"
+    "# asyncio.run(chat_loop())\n"
   ]
  },
  {
@ -354,39 +314,25 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 7,
+   "execution_count": null,
   "id": "d119026e",
   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\u001b[32mUser> Write me a 3 sentence poem about llama\u001b[0m\n",
-      "\u001b[36mAssistant> \u001b[0m\u001b[33mSoft\u001b[0m\u001b[33mly\u001b[0m\u001b[33m padded\u001b[0m\u001b[33m feet\u001b[0m\u001b[33m on\u001b[0m\u001b[33m the\u001b[0m\u001b[33m ground\u001b[0m\u001b[33m,\n",
-      "\u001b[0m\u001b[33mA\u001b[0m\u001b[33m gentle\u001b[0m\u001b[33m llama\u001b[0m\u001b[33m's\u001b[0m\u001b[33m peaceful\u001b[0m\u001b[33m sound\u001b[0m\u001b[33m,\n",
-      "\u001b[0m\u001b[33mF\u001b[0m\u001b[33murry\u001b[0m\u001b[33m coat\u001b[0m\u001b[33m and\u001b[0m\u001b[33m calm\u001b[0m\u001b[33m,\u001b[0m\u001b[33m serene\u001b[0m\u001b[33m eyes\u001b[0m\u001b[33m all\u001b[0m\u001b[33m around\u001b[0m\u001b[33m.\u001b[0m\u001b[97m\u001b[0m\n"
-     ]
-    }
-   ],
+   "outputs": [],
   "source": [
-    "import asyncio\n",
-    "from llama_stack_client import LlamaStackClient\n",
    "from llama_stack_client.lib.inference.event_logger import EventLogger\n",
-    "from llama_stack_client.types import UserMessage\n",
-    "from termcolor import cprint\n",
    "\n",
    "async def run_main(stream: bool = True):\n",
    "    client = LlamaStackClient(base_url=f'http://{HOST}:{PORT}')\n",
    "\n",
-    "    message = UserMessage(\n",
-    "        content='Write me a 3 sentence poem about llama', role='user'\n",
-    "    )\n",
-    "    cprint(f'User> {message.content}', 'green')\n",
+    "    message = {\n",
+    "        \"role\": \"user\",\n",
+    "        \"content\": 'Write me a 3 sentence poem about llama'\n",
+    "    }\n",
+    "    cprint(f'User> {message[\"content\"]}', 'green')\n",
    "\n",
    "    response = client.inference.chat_completion(\n",
    "        messages=[message],\n",
-    "        model='Llama3.1-8B-Instruct',\n",
+    "        model='Llama3.2-11B-Vision-Instruct',\n",
    "        stream=stream,\n",
    "    )\n",
    "\n",
@ -399,7 +345,7 @@
    "# In a Jupyter Notebook cell, use `await` to call the function\n",
    "await run_main()\n",
    "# To run it in a python file, use this line instead\n",
-    "# asyncio.run(chat_loop())"
+    "# asyncio.run(run_main())\n"
   ]
  }
 ],