diff --git a/docs/getting_started.ipynb b/docs/getting_started.ipynb index 96e39eb82..abe537c8e 100644 --- a/docs/getting_started.ipynb +++ b/docs/getting_started.ipynb @@ -3396,6 +3396,231 @@ "response = client.scoring.score(input_rows=rows, scoring_functions=scoring_params)\n", "pprint(response)\n" ] + }, + { + "cell_type": "markdown", + "id": "ad077440", + "metadata": {}, + "source": [ + "## 4. Image Understanding with Llama 3.2\n", + "\n", + "Below is a complete example of using Together's Llama Stack 0.1 server at https://llama-stack.together.ai to ask Llama 3.2 questions about an image." + ] + }, + { + "cell_type": "markdown", + "id": "82e381ec", + "metadata": {}, + "source": [ + "### 4.1 Setup and helpers\n", + "\n", + "Below we install the Llama Stack client 0.1, download the example image, define two image helpers, and set Llama Stack Together server URL and Llama 3.2 model name.\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "865fc5a8", + "metadata": {}, + "outputs": [], + "source": [ + "!pip install llama-stack-client==0.1.0" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "44e05e16", + "metadata": {}, + "outputs": [], + "source": [ + "!wget https://raw.githubusercontent.com/meta-llama/llama-models/refs/heads/main/Llama_Repo.jpeg" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "469750f7", + "metadata": {}, + "outputs": [], + "source": [ + "from PIL import Image\n", + "import matplotlib.pyplot as plt\n", + "\n", + "def display_image(path):\n", + " img = Image.open(path)\n", + " plt.imshow(img)\n", + " plt.axis('off')\n", + " plt.show()\n", + "\n", + "display_image(\"Llama_Repo.jpeg\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "a2c1e1c2", + "metadata": {}, + "outputs": [], + "source": [ + "import base64\n", + "\n", + "def encode_image(image_path):\n", + " with open(image_path, \"rb\") as image_file:\n", + " base64_string = base64.b64encode(image_file.read()).decode(\"utf-8\")\n", + " base64_url = f\"data:image/png;base64,{base64_string}\"\n", + " return base64_url" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "c565f99e", + "metadata": {}, + "outputs": [], + "source": [ + "from llama_stack_client import LlamaStackClient\n", + "\n", + "LLAMA_STACK_API_TOGETHER_URL=\"https://llama-stack.together.ai\"\n", + "LLAMA32_11B_INSTRUCT = \"meta-llama/Llama-3.2-11B-Vision-Instruct\"" + ] + }, + { + "cell_type": "markdown", + "id": "7737cd41", + "metadata": {}, + "source": [ + "### 4.2 Using Llama Stack Chat API\n", + "\n", + "The code below uses the Llama Stack 0.1's chat API to interact with Llama 3.2:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d7914894", + "metadata": {}, + "outputs": [], + "source": [ + "from llama_stack_client.lib.inference.event_logger import EventLogger\n", + "\n", + "async def run_main(image_path: str, prompt):\n", + " client = LlamaStackClient(\n", + " base_url=LLAMA_STACK_API_TOGETHER_URL,\n", + " )\n", + "\n", + " message = {\n", + " \"role\": \"user\",\n", + " \"content\": [\n", + " {\n", + " \"type\": \"image\",\n", + " \"image\": {\n", + " \"url\": {\n", + " \"uri\": encode_image(image_path)\n", + " }\n", + " }\n", + " },\n", + " {\n", + " \"type\": \"text\",\n", + " \"text\": prompt,\n", + " }\n", + " ]\n", + " }\n", + "\n", + " response = client.inference.chat_completion(\n", + " messages=[message],\n", + " model_id=LLAMA32_11B_INSTRUCT,\n", + " stream=False,\n", + " )\n", + "\n", + " print(response.completion_message.content.lower().strip())" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "4ee09b97", + "metadata": {}, + "outputs": [], + "source": [ + "await run_main(\"Llama_Repo.jpeg\",\n", + " \"How many different colors are those llamas?\\\n", + " What are those colors?\")" + ] + }, + { + "cell_type": "markdown", + "id": "e741d7b9", + "metadata": {}, + "source": [ + "### 4.3 Using Llama Stack Agent API\n", + "\n", + "The code below uses the Llama Stack 0.1's Agent API to interact with Llama 3.2:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "f9a83275", + "metadata": {}, + "outputs": [], + "source": [ + "from llama_stack_client.lib.agents.agent import Agent\n", + "from llama_stack_client.lib.agents.event_logger import EventLogger\n", + "from llama_stack_client.types.agent_create_params import AgentConfig\n", + "\n", + "async def run_main(image_path, prompt):\n", + " base64_image = encode_image(image_path)\n", + "\n", + " client = LlamaStackClient(\n", + " base_url=LLAMA_STACK_API_TOGETHER_URL,\n", + " )\n", + "\n", + " agent_config = AgentConfig(\n", + " model=LLAMA32_11B_INSTRUCT,\n", + " instructions=\"You are a helpful assistant\",\n", + " enable_session_persistence=False,\n", + " )\n", + "\n", + " agent = Agent(client, agent_config)\n", + " session_id = agent.create_session(\"test-session\")\n", + "\n", + " response = agent.create_turn(\n", + " messages=[{\n", + " \"role\": \"user\",\n", + " \"content\": [\n", + " {\n", + " \"type\": \"image\",\n", + " \"image\": {\n", + " \"url\": {\n", + " \"uri\": encode_image(image_path)\n", + " }\n", + " }\n", + " },\n", + " {\n", + " \"type\": \"text\",\n", + " \"text\": prompt,\n", + " }\n", + " ]\n", + " }],\n", + " session_id=session_id,\n", + " )\n", + "\n", + " for log in EventLogger().log(response):\n", + " log.print()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "15d0098b", + "metadata": {}, + "outputs": [], + "source": [ + "await run_main(\"Llama_Repo.jpeg\",\n", + " \"How many different colors are those llamas?\\\n", + " What are those colors?\")" + ] } ], "metadata": {