{ "cells": [ { "cell_type": "markdown", "id": "923343b0-d4bd-4361-b8d4-dd29f86a0fbd", "metadata": {}, "source": [ "## Getting Started with LlamaStack Vision API\n", "\n", "Before you begin, please ensure Llama Stack is installed and set up by following the [Getting Started Guide](https://llama-stack.readthedocs.io/en/latest/getting_started/index.html).\n", "\n", "Let's import the necessary packages" ] }, { "cell_type": "code", "execution_count": 1, "id": "eae04594-49f9-43af-bb42-9df114d9ddd6", "metadata": {}, "outputs": [], "source": [ "import asyncio\n", "import base64\n", "import mimetypes\n", "from llama_stack_client import LlamaStackClient\n", "from llama_stack_client.lib.inference.event_logger import EventLogger\n", "from llama_stack_client.types import UserMessage\n", "from termcolor import cprint" ] }, { "cell_type": "markdown", "id": "143837c6-1072-4015-8297-514712704087", "metadata": {}, "source": [ "## Configuration\n", "Set up your connection parameters:" ] }, { "cell_type": "code", "execution_count": 2, "id": "1d293479-9dde-4b68-94ab-d0c4c61ab08c", "metadata": {}, "outputs": [], "source": [ "HOST = \"localhost\" # Replace with your host\n", "PORT = 5000 # Replace with your port" ] }, { "cell_type": "markdown", "id": "51984856-dfc7-4226-817a-1d44853e6661", "metadata": {}, "source": [ "## Helper Functions\n", "Let's create some utility functions to handle image processing and API interaction:" ] }, { "cell_type": "code", "execution_count": 3, "id": "8e65aae0-3ef0-4084-8c59-273a89ac9510", "metadata": {}, "outputs": [], "source": [ "def encode_image_to_data_url(file_path: str) -> str:\n", " \"\"\"\n", " Encode an image file to a data URL.\n", "\n", " Args:\n", " file_path (str): Path to the image file\n", "\n", " Returns:\n", " str: Data URL string\n", " \"\"\"\n", " mime_type, _ = mimetypes.guess_type(file_path)\n", " if mime_type is None:\n", " raise ValueError(\"Could not determine MIME type of the file\")\n", "\n", " with open(file_path, \"rb\") as image_file:\n", " encoded_string = base64.b64encode(image_file.read()).decode(\"utf-8\")\n", "\n", " return f\"data:{mime_type};base64,{encoded_string}\"\n", "\n", "async def process_image(client: LlamaStackClient, image_path: str, stream: bool = True):\n", " \"\"\"\n", " Process an image through the LlamaStack Vision API.\n", "\n", " Args:\n", " client (LlamaStackClient): Initialized client\n", " image_path (str): Path to image file\n", " stream (bool): Whether to stream the response\n", " \"\"\"\n", " data_url = encode_image_to_data_url(image_path)\n", "\n", " message = UserMessage(\n", " role=\"user\",\n", " content=[\n", " {\"image\": {\"uri\": data_url}},\n", " \"Describe what is in this image.\",\n", " ],\n", " )\n", "\n", " cprint(f\"User> Sending image for analysis...\", \"green\")\n", " response = client.inference.chat_completion(\n", " messages=[message],\n", " model=\"Llama3.2-11B-Vision-Instruct\",\n", " stream=stream,\n", " )\n", "\n", " if not stream:\n", " cprint(f\"> Response: {response}\", \"cyan\")\n", " else:\n", " async for log in EventLogger().log(response):\n", " log.print()" ] }, { "cell_type": "markdown", "id": "8073b673-e730-4557-8980-fd8b7ea11975", "metadata": {}, "source": [ "## Chat with Image\n", "\n", "Now let's put it all together:" ] }, { "cell_type": "code", "execution_count": 4, "id": "64d36476-95d7-49f9-a548-312cf8d8c49e", "metadata": {}, "outputs": [ { "ename": "FileNotFoundError", "evalue": "[Errno 2] No such file or directory: 'logo.png'", "output_type": "error", "traceback": [ "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", "\u001b[0;31mFileNotFoundError\u001b[0m Traceback (most recent call last)", "Cell \u001b[0;32mIn[4], line 17\u001b[0m\n\u001b[1;32m 14\u001b[0m \u001b[38;5;28mprint\u001b[39m(models_response)\n\u001b[1;32m 16\u001b[0m \u001b[38;5;66;03m# Execute the main function\u001b[39;00m\n\u001b[0;32m---> 17\u001b[0m \u001b[38;5;28;01mawait\u001b[39;00m main()\n", "Cell \u001b[0;32mIn[4], line 9\u001b[0m, in \u001b[0;36mmain\u001b[0;34m()\u001b[0m\n\u001b[1;32m 4\u001b[0m client \u001b[38;5;241m=\u001b[39m LlamaStackClient(\n\u001b[1;32m 5\u001b[0m base_url\u001b[38;5;241m=\u001b[39m\u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mhttp://\u001b[39m\u001b[38;5;132;01m{\u001b[39;00mHOST\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m:\u001b[39m\u001b[38;5;132;01m{\u001b[39;00mPORT\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m\"\u001b[39m,\n\u001b[1;32m 6\u001b[0m )\n\u001b[1;32m 8\u001b[0m \u001b[38;5;66;03m# Process image\u001b[39;00m\n\u001b[0;32m----> 9\u001b[0m \u001b[38;5;28;01mawait\u001b[39;00m process_image(client, \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mlogo.png\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[1;32m 11\u001b[0m \u001b[38;5;66;03m# Query available models\u001b[39;00m\n\u001b[1;32m 12\u001b[0m models_response \u001b[38;5;241m=\u001b[39m client\u001b[38;5;241m.\u001b[39mmodels\u001b[38;5;241m.\u001b[39mlist()\n", "Cell \u001b[0;32mIn[3], line 29\u001b[0m, in \u001b[0;36mprocess_image\u001b[0;34m(client, image_path, stream)\u001b[0m\n\u001b[1;32m 20\u001b[0m \u001b[38;5;28;01masync\u001b[39;00m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mprocess_image\u001b[39m(client: LlamaStackClient, image_path: \u001b[38;5;28mstr\u001b[39m, stream: \u001b[38;5;28mbool\u001b[39m \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mTrue\u001b[39;00m):\n\u001b[1;32m 21\u001b[0m \u001b[38;5;250m \u001b[39m\u001b[38;5;124;03m\"\"\"\u001b[39;00m\n\u001b[1;32m 22\u001b[0m \u001b[38;5;124;03m Process an image through the LlamaStack Vision API.\u001b[39;00m\n\u001b[1;32m 23\u001b[0m \n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 27\u001b[0m \u001b[38;5;124;03m stream (bool): Whether to stream the response\u001b[39;00m\n\u001b[1;32m 28\u001b[0m \u001b[38;5;124;03m \"\"\"\u001b[39;00m\n\u001b[0;32m---> 29\u001b[0m data_url \u001b[38;5;241m=\u001b[39m \u001b[43mencode_image_to_data_url\u001b[49m\u001b[43m(\u001b[49m\u001b[43mimage_path\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 31\u001b[0m message \u001b[38;5;241m=\u001b[39m UserMessage(\n\u001b[1;32m 32\u001b[0m role\u001b[38;5;241m=\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124muser\u001b[39m\u001b[38;5;124m\"\u001b[39m,\n\u001b[1;32m 33\u001b[0m content\u001b[38;5;241m=\u001b[39m[\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 36\u001b[0m ],\n\u001b[1;32m 37\u001b[0m )\n\u001b[1;32m 39\u001b[0m cprint(\u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mUser> Sending image for analysis...\u001b[39m\u001b[38;5;124m\"\u001b[39m, \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mgreen\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n", "Cell \u001b[0;32mIn[3], line 15\u001b[0m, in \u001b[0;36mencode_image_to_data_url\u001b[0;34m(file_path)\u001b[0m\n\u001b[1;32m 12\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m mime_type \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[1;32m 13\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mValueError\u001b[39;00m(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mCould not determine MIME type of the file\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[0;32m---> 15\u001b[0m \u001b[38;5;28;01mwith\u001b[39;00m \u001b[38;5;28;43mopen\u001b[39;49m\u001b[43m(\u001b[49m\u001b[43mfile_path\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mrb\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m)\u001b[49m \u001b[38;5;28;01mas\u001b[39;00m image_file:\n\u001b[1;32m 16\u001b[0m encoded_string \u001b[38;5;241m=\u001b[39m base64\u001b[38;5;241m.\u001b[39mb64encode(image_file\u001b[38;5;241m.\u001b[39mread())\u001b[38;5;241m.\u001b[39mdecode(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mutf-8\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[1;32m 18\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mdata:\u001b[39m\u001b[38;5;132;01m{\u001b[39;00mmime_type\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m;base64,\u001b[39m\u001b[38;5;132;01m{\u001b[39;00mencoded_string\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m\"\u001b[39m\n", "File \u001b[0;32m~/miniconda3/envs/stack/lib/python3.10/site-packages/IPython/core/interactiveshell.py:324\u001b[0m, in \u001b[0;36m_modified_open\u001b[0;34m(file, *args, **kwargs)\u001b[0m\n\u001b[1;32m 317\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m file \u001b[38;5;129;01min\u001b[39;00m {\u001b[38;5;241m0\u001b[39m, \u001b[38;5;241m1\u001b[39m, \u001b[38;5;241m2\u001b[39m}:\n\u001b[1;32m 318\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mValueError\u001b[39;00m(\n\u001b[1;32m 319\u001b[0m \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mIPython won\u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mt let you open fd=\u001b[39m\u001b[38;5;132;01m{\u001b[39;00mfile\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m by default \u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 320\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mas it is likely to crash IPython. If you know what you are doing, \u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 321\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124myou can use builtins\u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124m open.\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 322\u001b[0m )\n\u001b[0;32m--> 324\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mio_open\u001b[49m\u001b[43m(\u001b[49m\u001b[43mfile\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n", "\u001b[0;31mFileNotFoundError\u001b[0m: [Errno 2] No such file or directory: 'logo.png'" ] } ], "source": [ "# [Cell 5] - Initialize client and process image\n", "async def main():\n", " # Initialize client\n", " client = LlamaStackClient(\n", " base_url=f\"http://{HOST}:{PORT}\",\n", " )\n", "\n", " # Process image\n", " await process_image(client, \"logo.png\")\n", "\n", " # Query available models\n", " models_response = client.models.list()\n", " print(\"\\nAvailable Models:\")\n", " print(models_response)\n", "\n", "# Execute the main function\n", "await main()" ] }, { "cell_type": "markdown", "id": "9b39efb4", "metadata": {}, "source": [ "Thanks for checking out this notebook! \n", "\n", "The next one in the series will teach you one of the favorite applications of Large Language Models: [Tool Calling](./03_Tool_Calling101.ipynb). Enjoy!" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.10.15" } }, "nbformat": 4, "nbformat_minor": 5 }