From a229de6d1eaff027e7970202d71c1ee7e63ae3a2 Mon Sep 17 00:00:00 2001
From: Jeff Tang <jeff.x.tang@gmail.com>
Date: Fri, 7 Feb 2025 15:36:15 -0800
Subject: [PATCH] Getting started notebook update (#936)

# What does this PR do?

Added examples (Section 4) of using Llama Stack 0.1 distro on together
and Llama 3.2 to answer questions about an image with LS Chat and Agent
APIs.
---
 docs/getting_started.ipynb | 225 +++++++++++++++++++++++++++++++++++++
 1 file changed, 225 insertions(+)

diff --git a/docs/getting_started.ipynb b/docs/getting_started.ipynb
index 96e39eb82..abe537c8e 100644
--- a/docs/getting_started.ipynb
+++ b/docs/getting_started.ipynb
@@ -3396,6 +3396,231 @@
         "response = client.scoring.score(input_rows=rows, scoring_functions=scoring_params)\n",
         "pprint(response)\n"
       ]
+    },
+    {
+      "cell_type": "markdown",
+      "id": "ad077440",
+      "metadata": {},
+      "source": [
+        "## 4. Image Understanding with Llama 3.2\n",
+        "\n",
+        "Below is a complete example of using Together's Llama Stack 0.1 server at https://llama-stack.together.ai to ask Llama 3.2 questions about an image."
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "id": "82e381ec",
+      "metadata": {},
+      "source": [
+        "### 4.1 Setup and helpers\n",
+        "\n",
+        "Below we install the Llama Stack client 0.1, download the example image, define two image helpers, and set Llama Stack Together server URL and Llama 3.2 model name.\n"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "id": "865fc5a8",
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "!pip install llama-stack-client==0.1.0"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "id": "44e05e16",
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "!wget https://raw.githubusercontent.com/meta-llama/llama-models/refs/heads/main/Llama_Repo.jpeg"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "id": "469750f7",
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "from PIL import Image\n",
+        "import matplotlib.pyplot as plt\n",
+        "\n",
+        "def display_image(path):\n",
+        "  img = Image.open(path)\n",
+        "  plt.imshow(img)\n",
+        "  plt.axis('off')\n",
+        "  plt.show()\n",
+        "\n",
+        "display_image(\"Llama_Repo.jpeg\")"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "id": "a2c1e1c2",
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "import base64\n",
+        "\n",
+        "def encode_image(image_path):\n",
+        "    with open(image_path, \"rb\") as image_file:\n",
+        "        base64_string = base64.b64encode(image_file.read()).decode(\"utf-8\")\n",
+        "        base64_url = f\"data:image/png;base64,{base64_string}\"\n",
+        "        return base64_url"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "id": "c565f99e",
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "from llama_stack_client import LlamaStackClient\n",
+        "\n",
+        "LLAMA_STACK_API_TOGETHER_URL=\"https://llama-stack.together.ai\"\n",
+        "LLAMA32_11B_INSTRUCT = \"meta-llama/Llama-3.2-11B-Vision-Instruct\""
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "id": "7737cd41",
+      "metadata": {},
+      "source": [
+        "### 4.2 Using Llama Stack Chat API\n",
+        "\n",
+        "The code below uses the Llama Stack 0.1's chat API to interact with Llama 3.2:"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "id": "d7914894",
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "from llama_stack_client.lib.inference.event_logger import EventLogger\n",
+        "\n",
+        "async def run_main(image_path: str, prompt):\n",
+        "    client = LlamaStackClient(\n",
+        "        base_url=LLAMA_STACK_API_TOGETHER_URL,\n",
+        "    )\n",
+        "\n",
+        "    message = {\n",
+        "        \"role\": \"user\",\n",
+        "        \"content\": [\n",
+        "            {\n",
+        "                \"type\": \"image\",\n",
+        "                \"image\": {\n",
+        "                     \"url\": {\n",
+        "                          \"uri\": encode_image(image_path)\n",
+        "                     }\n",
+        "                }\n",
+        "            },\n",
+        "            {\n",
+        "                \"type\": \"text\",\n",
+        "                \"text\": prompt,\n",
+        "            }\n",
+        "        ]\n",
+        "    }\n",
+        "\n",
+        "    response = client.inference.chat_completion(\n",
+        "        messages=[message],\n",
+        "        model_id=LLAMA32_11B_INSTRUCT,\n",
+        "        stream=False,\n",
+        "    )\n",
+        "\n",
+        "    print(response.completion_message.content.lower().strip())"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "id": "4ee09b97",
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "await run_main(\"Llama_Repo.jpeg\",\n",
+        "     \"How many different colors are those llamas?\\\n",
+        "     What are those colors?\")"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "id": "e741d7b9",
+      "metadata": {},
+      "source": [
+        "### 4.3 Using Llama Stack Agent API\n",
+        "\n",
+        "The code below uses the Llama Stack 0.1's Agent API to interact with Llama 3.2:"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "id": "f9a83275",
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "from llama_stack_client.lib.agents.agent import Agent\n",
+        "from llama_stack_client.lib.agents.event_logger import EventLogger\n",
+        "from llama_stack_client.types.agent_create_params import AgentConfig\n",
+        "\n",
+        "async def run_main(image_path, prompt):\n",
+        "    base64_image = encode_image(image_path)\n",
+        "\n",
+        "    client = LlamaStackClient(\n",
+        "        base_url=LLAMA_STACK_API_TOGETHER_URL,\n",
+        "    )\n",
+        "\n",
+        "    agent_config = AgentConfig(\n",
+        "        model=LLAMA32_11B_INSTRUCT,\n",
+        "        instructions=\"You are a helpful assistant\",\n",
+        "        enable_session_persistence=False,\n",
+        "    )\n",
+        "\n",
+        "    agent = Agent(client, agent_config)\n",
+        "    session_id = agent.create_session(\"test-session\")\n",
+        "\n",
+        "    response = agent.create_turn(\n",
+        "        messages=[{\n",
+        "            \"role\": \"user\",\n",
+        "            \"content\": [\n",
+        "                {\n",
+        "                    \"type\": \"image\",\n",
+        "                    \"image\": {\n",
+        "                         \"url\": {\n",
+        "                              \"uri\": encode_image(image_path)\n",
+        "                         }\n",
+        "                    }\n",
+        "                },\n",
+        "                {\n",
+        "                    \"type\": \"text\",\n",
+        "                    \"text\": prompt,\n",
+        "                }\n",
+        "            ]\n",
+        "        }],\n",
+        "        session_id=session_id,\n",
+        "    )\n",
+        "\n",
+        "    for log in EventLogger().log(response):\n",
+        "        log.print()"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "id": "15d0098b",
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "await run_main(\"Llama_Repo.jpeg\",\n",
+        "         \"How many different colors are those llamas?\\\n",
+        "         What are those colors?\")"
+      ]
     }
   ],
   "metadata": {