Getting started notebook update (#936)

# What does this PR do? Added examples (Section 4) of using Llama Stack 0.1 distro on together and Llama 3.2 to answer questions about an image with LS Chat and Agent APIs.
2025-02-07 15:36:15 -08:00 · 2025-02-07 15:36:15 -08:00 · a229de6d1e
commit a229de6d1e
parent ddd06105a4
1 changed files with 225 additions and 0 deletions
--- a/docs/getting_started.ipynb
+++ b/docs/getting_started.ipynb
@ -3396,6 +3396,231 @@
        "response = client.scoring.score(input_rows=rows, scoring_functions=scoring_params)\n",
        "pprint(response)\n"
      ]
+    },
+    {
+      "cell_type": "markdown",
+      "id": "ad077440",
+      "metadata": {},
+      "source": [
+        "## 4. Image Understanding with Llama 3.2\n",
+        "\n",
+        "Below is a complete example of using Together's Llama Stack 0.1 server at https://llama-stack.together.ai to ask Llama 3.2 questions about an image."
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "id": "82e381ec",
+      "metadata": {},
+      "source": [
+        "### 4.1 Setup and helpers\n",
+        "\n",
+        "Below we install the Llama Stack client 0.1, download the example image, define two image helpers, and set Llama Stack Together server URL and Llama 3.2 model name.\n"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "id": "865fc5a8",
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "!pip install llama-stack-client==0.1.0"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "id": "44e05e16",
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "!wget https://raw.githubusercontent.com/meta-llama/llama-models/refs/heads/main/Llama_Repo.jpeg"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "id": "469750f7",
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "from PIL import Image\n",
+        "import matplotlib.pyplot as plt\n",
+        "\n",
+        "def display_image(path):\n",
+        "  img = Image.open(path)\n",
+        "  plt.imshow(img)\n",
+        "  plt.axis('off')\n",
+        "  plt.show()\n",
+        "\n",
+        "display_image(\"Llama_Repo.jpeg\")"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "id": "a2c1e1c2",
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "import base64\n",
+        "\n",
+        "def encode_image(image_path):\n",
+        "    with open(image_path, \"rb\") as image_file:\n",
+        "        base64_string = base64.b64encode(image_file.read()).decode(\"utf-8\")\n",
+        "        base64_url = f\"data:image/png;base64,{base64_string}\"\n",
+        "        return base64_url"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "id": "c565f99e",
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "from llama_stack_client import LlamaStackClient\n",
+        "\n",
+        "LLAMA_STACK_API_TOGETHER_URL=\"https://llama-stack.together.ai\"\n",
+        "LLAMA32_11B_INSTRUCT = \"meta-llama/Llama-3.2-11B-Vision-Instruct\""
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "id": "7737cd41",
+      "metadata": {},
+      "source": [
+        "### 4.2 Using Llama Stack Chat API\n",
+        "\n",
+        "The code below uses the Llama Stack 0.1's chat API to interact with Llama 3.2:"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "id": "d7914894",
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "from llama_stack_client.lib.inference.event_logger import EventLogger\n",
+        "\n",
+        "async def run_main(image_path: str, prompt):\n",
+        "    client = LlamaStackClient(\n",
+        "        base_url=LLAMA_STACK_API_TOGETHER_URL,\n",
+        "    )\n",
+        "\n",
+        "    message = {\n",
+        "        \"role\": \"user\",\n",
+        "        \"content\": [\n",
+        "            {\n",
+        "                \"type\": \"image\",\n",
+        "                \"image\": {\n",
+        "                     \"url\": {\n",
+        "                          \"uri\": encode_image(image_path)\n",
+        "                     }\n",
+        "                }\n",
+        "            },\n",
+        "            {\n",
+        "                \"type\": \"text\",\n",
+        "                \"text\": prompt,\n",
+        "            }\n",
+        "        ]\n",
+        "    }\n",
+        "\n",
+        "    response = client.inference.chat_completion(\n",
+        "        messages=[message],\n",
+        "        model_id=LLAMA32_11B_INSTRUCT,\n",
+        "        stream=False,\n",
+        "    )\n",
+        "\n",
+        "    print(response.completion_message.content.lower().strip())"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "id": "4ee09b97",
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "await run_main(\"Llama_Repo.jpeg\",\n",
+        "     \"How many different colors are those llamas?\\\n",
+        "     What are those colors?\")"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "id": "e741d7b9",
+      "metadata": {},
+      "source": [
+        "### 4.3 Using Llama Stack Agent API\n",
+        "\n",
+        "The code below uses the Llama Stack 0.1's Agent API to interact with Llama 3.2:"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "id": "f9a83275",
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "from llama_stack_client.lib.agents.agent import Agent\n",
+        "from llama_stack_client.lib.agents.event_logger import EventLogger\n",
+        "from llama_stack_client.types.agent_create_params import AgentConfig\n",
+        "\n",
+        "async def run_main(image_path, prompt):\n",
+        "    base64_image = encode_image(image_path)\n",
+        "\n",
+        "    client = LlamaStackClient(\n",
+        "        base_url=LLAMA_STACK_API_TOGETHER_URL,\n",
+        "    )\n",
+        "\n",
+        "    agent_config = AgentConfig(\n",
+        "        model=LLAMA32_11B_INSTRUCT,\n",
+        "        instructions=\"You are a helpful assistant\",\n",
+        "        enable_session_persistence=False,\n",
+        "    )\n",
+        "\n",
+        "    agent = Agent(client, agent_config)\n",
+        "    session_id = agent.create_session(\"test-session\")\n",
+        "\n",
+        "    response = agent.create_turn(\n",
+        "        messages=[{\n",
+        "            \"role\": \"user\",\n",
+        "            \"content\": [\n",
+        "                {\n",
+        "                    \"type\": \"image\",\n",
+        "                    \"image\": {\n",
+        "                         \"url\": {\n",
+        "                              \"uri\": encode_image(image_path)\n",
+        "                         }\n",
+        "                    }\n",
+        "                },\n",
+        "                {\n",
+        "                    \"type\": \"text\",\n",
+        "                    \"text\": prompt,\n",
+        "                }\n",
+        "            ]\n",
+        "        }],\n",
+        "        session_id=session_id,\n",
+        "    )\n",
+        "\n",
+        "    for log in EventLogger().log(response):\n",
+        "        log.print()"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "id": "15d0098b",
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "await run_main(\"Llama_Repo.jpeg\",\n",
+        "         \"How many different colors are those llamas?\\\n",
+        "         What are those colors?\")"
+      ]
    }
  ],
  "metadata": {