forked from phoenix-oss/llama-stack-mirror
		
	# What does this PR do? Adding nbformat version fixes this issue. Not sure exactly why this needs to be done, but this version was rewritten to the bottom of a nb file when I changed its name trying to get to the bottom of this. When I opened it on GH the issue was no longer present Closes #1837 ## Test Plan N/A
		
			
				
	
	
		
			206 lines
		
	
	
	
		
			11 KiB
		
	
	
	
		
			Text
		
	
	
	
	
	
			
		
		
	
	
			206 lines
		
	
	
	
		
			11 KiB
		
	
	
	
		
			Text
		
	
	
	
	
	
| {
 | |
|   "cells": [
 | |
|     {
 | |
|       "cell_type": "markdown",
 | |
|       "id": "923343b0-d4bd-4361-b8d4-dd29f86a0fbd",
 | |
|       "metadata": {},
 | |
|       "source": [
 | |
|         "## Getting Started with LlamaStack Vision API\n",
 | |
|         "\n",
 | |
|         "Before you begin, please ensure Llama Stack is installed and set up by following the [Getting Started Guide](https://llama-stack.readthedocs.io/en/latest/getting_started/index.html).\n",
 | |
|         "\n",
 | |
|         "Let's import the necessary packages"
 | |
|       ]
 | |
|     },
 | |
|     {
 | |
|       "cell_type": "code",
 | |
|       "execution_count": 1,
 | |
|       "id": "eae04594-49f9-43af-bb42-9df114d9ddd6",
 | |
|       "metadata": {},
 | |
|       "outputs": [],
 | |
|       "source": [
 | |
|         "import asyncio\n",
 | |
|         "import base64\n",
 | |
|         "import mimetypes\n",
 | |
|         "from llama_stack_client import LlamaStackClient\n",
 | |
|         "from llama_stack_client.lib.inference.event_logger import EventLogger\n",
 | |
|         "from llama_stack_client.types import UserMessage\n",
 | |
|         "from termcolor import cprint"
 | |
|       ]
 | |
|     },
 | |
|     {
 | |
|       "cell_type": "markdown",
 | |
|       "id": "143837c6-1072-4015-8297-514712704087",
 | |
|       "metadata": {},
 | |
|       "source": [
 | |
|         "## Configuration\n",
 | |
|         "Set up your connection parameters:"
 | |
|       ]
 | |
|     },
 | |
|     {
 | |
|       "cell_type": "code",
 | |
|       "execution_count": null,
 | |
|       "id": "1d293479-9dde-4b68-94ab-d0c4c61ab08c",
 | |
|       "metadata": {},
 | |
|       "outputs": [],
 | |
|       "source": [
 | |
|         "HOST = \"localhost\"  # Replace with your host\n",
 | |
|         "CLOUD_PORT = 8321       # Replace with your cloud distro port\n",
 | |
|         "MODEL_NAME='Llama3.2-11B-Vision-Instruct'"
 | |
|       ]
 | |
|     },
 | |
|     {
 | |
|       "cell_type": "markdown",
 | |
|       "id": "51984856-dfc7-4226-817a-1d44853e6661",
 | |
|       "metadata": {},
 | |
|       "source": [
 | |
|         "## Helper Functions\n",
 | |
|         "Let's create some utility functions to handle image processing and API interaction:"
 | |
|       ]
 | |
|     },
 | |
|     {
 | |
|       "cell_type": "code",
 | |
|       "execution_count": null,
 | |
|       "id": "8e65aae0-3ef0-4084-8c59-273a89ac9510",
 | |
|       "metadata": {},
 | |
|       "outputs": [],
 | |
|       "source": [
 | |
|         "import base64\n",
 | |
|         "import mimetypes\n",
 | |
|         "from termcolor import cprint\n",
 | |
|         "from llama_stack_client.lib.inference.event_logger import EventLogger\n",
 | |
|         "\n",
 | |
|         "def encode_image_to_data_url(file_path: str) -> str:\n",
 | |
|         "    \"\"\"\n",
 | |
|         "    Encode an image file to a data URL.\n",
 | |
|         "\n",
 | |
|         "    Args:\n",
 | |
|         "        file_path (str): Path to the image file\n",
 | |
|         "\n",
 | |
|         "    Returns:\n",
 | |
|         "        str: Data URL string\n",
 | |
|         "    \"\"\"\n",
 | |
|         "    mime_type, _ = mimetypes.guess_type(file_path)\n",
 | |
|         "    if mime_type is None:\n",
 | |
|         "        raise ValueError(\"Could not determine MIME type of the file\")\n",
 | |
|         "\n",
 | |
|         "    with open(file_path, \"rb\") as image_file:\n",
 | |
|         "        encoded_string = base64.b64encode(image_file.read()).decode(\"utf-8\")\n",
 | |
|         "\n",
 | |
|         "    return f\"data:{mime_type};base64,{encoded_string}\"\n",
 | |
|         "\n",
 | |
|         "async def process_image(client, image_path: str, stream: bool = True):\n",
 | |
|         "    \"\"\"\n",
 | |
|         "    Process an image through the LlamaStack Vision API.\n",
 | |
|         "\n",
 | |
|         "    Args:\n",
 | |
|         "        client (LlamaStackClient): Initialized client\n",
 | |
|         "        image_path (str): Path to image file\n",
 | |
|         "        stream (bool): Whether to stream the response\n",
 | |
|         "    \"\"\"\n",
 | |
|         "    data_url = encode_image_to_data_url(image_path)\n",
 | |
|         "\n",
 | |
|         "    message = {\n",
 | |
|         "        \"role\": \"user\",\n",
 | |
|         "        \"content\": [\n",
 | |
|         "            {\"image\": {\"uri\": data_url}},\n",
 | |
|         "            \"Describe what is in this image.\"\n",
 | |
|         "        ]\n",
 | |
|         "    }\n",
 | |
|         "\n",
 | |
|         "    cprint(\"User> Sending image for analysis...\", \"green\")\n",
 | |
|         "    response = client.inference.chat_completion(\n",
 | |
|         "        messages=[message],\n",
 | |
|         "        model_id=MODEL_NAME,\n",
 | |
|         "        stream=stream,\n",
 | |
|         "    )\n",
 | |
|         "\n",
 | |
|         "    if not stream:\n",
 | |
|         "        cprint(f\"> Response: {response}\", \"cyan\")\n",
 | |
|         "    else:\n",
 | |
|         "        async for log in EventLogger().log(response):\n",
 | |
|         "            log.print()\n"
 | |
|       ]
 | |
|     },
 | |
|     {
 | |
|       "cell_type": "markdown",
 | |
|       "id": "8073b673-e730-4557-8980-fd8b7ea11975",
 | |
|       "metadata": {},
 | |
|       "source": [
 | |
|         "## Chat with Image\n",
 | |
|         "\n",
 | |
|         "Now let's put it all together:"
 | |
|       ]
 | |
|     },
 | |
|     {
 | |
|       "cell_type": "code",
 | |
|       "execution_count": 6,
 | |
|       "id": "64d36476-95d7-49f9-a548-312cf8d8c49e",
 | |
|       "metadata": {},
 | |
|       "outputs": [
 | |
|         {
 | |
|           "name": "stdout",
 | |
|           "output_type": "stream",
 | |
|           "text": [
 | |
|             "\u001b[32mUser> Sending image for analysis...\u001b[0m\n",
 | |
|             "\u001b[36mAssistant> \u001b[0m\u001b[33mThe\u001b[0m\u001b[33m image\u001b[0m\u001b[33m features\u001b[0m\u001b[33m a\u001b[0m\u001b[33m simple\u001b[0m\u001b[33m,\u001b[0m\u001b[33m mon\u001b[0m\u001b[33moch\u001b[0m\u001b[33mromatic\u001b[0m\u001b[33m line\u001b[0m\u001b[33m drawing\u001b[0m\u001b[33m of\u001b[0m\u001b[33m a\u001b[0m\u001b[33m llama\u001b[0m\u001b[33m,\u001b[0m\u001b[33m with\u001b[0m\u001b[33m the\u001b[0m\u001b[33m words\u001b[0m\u001b[33m \"\u001b[0m\u001b[33mLL\u001b[0m\u001b[33mAMA\u001b[0m\u001b[33m STACK\u001b[0m\u001b[33m\"\u001b[0m\u001b[33m written\u001b[0m\u001b[33m above\u001b[0m\u001b[33m it\u001b[0m\u001b[33m.\u001b[0m\u001b[33m The\u001b[0m\u001b[33m llama\u001b[0m\u001b[33m is\u001b[0m\u001b[33m depicted\u001b[0m\u001b[33m in\u001b[0m\u001b[33m a\u001b[0m\u001b[33m cartoon\u001b[0m\u001b[33mish\u001b[0m\u001b[33m style\u001b[0m\u001b[33m,\u001b[0m\u001b[33m with\u001b[0m\u001b[33m a\u001b[0m\u001b[33m large\u001b[0m\u001b[33m body\u001b[0m\u001b[33m and\u001b[0m\u001b[33m a\u001b[0m\u001b[33m long\u001b[0m\u001b[33m neck\u001b[0m\u001b[33m.\u001b[0m\u001b[33m It\u001b[0m\u001b[33m has\u001b[0m\u001b[33m a\u001b[0m\u001b[33m distinctive\u001b[0m\u001b[33m head\u001b[0m\u001b[33m shape\u001b[0m\u001b[33m,\u001b[0m\u001b[33m with\u001b[0m\u001b[33m a\u001b[0m\u001b[33m small\u001b[0m\u001b[33m circle\u001b[0m\u001b[33m for\u001b[0m\u001b[33m the\u001b[0m\u001b[33m eye\u001b[0m\u001b[33m and\u001b[0m\u001b[33m a\u001b[0m\u001b[33m curved\u001b[0m\u001b[33m line\u001b[0m\u001b[33m for\u001b[0m\u001b[33m the\u001b[0m\u001b[33m mouth\u001b[0m\u001b[33m.\u001b[0m\u001b[33m The\u001b[0m\u001b[33m llama\u001b[0m\u001b[33m's\u001b[0m\u001b[33m body\u001b[0m\u001b[33m is\u001b[0m\u001b[33m composed\u001b[0m\u001b[33m of\u001b[0m\u001b[33m several\u001b[0m\u001b[33m rounded\u001b[0m\u001b[33m shapes\u001b[0m\u001b[33m,\u001b[0m\u001b[33m giving\u001b[0m\u001b[33m it\u001b[0m\u001b[33m a\u001b[0m\u001b[33m soft\u001b[0m\u001b[33m and\u001b[0m\u001b[33m cudd\u001b[0m\u001b[33mly\u001b[0m\u001b[33m appearance\u001b[0m\u001b[33m.\n",
 | |
|             "\n",
 | |
|             "\u001b[0m\u001b[33mThe\u001b[0m\u001b[33m words\u001b[0m\u001b[33m \"\u001b[0m\u001b[33mLL\u001b[0m\u001b[33mAMA\u001b[0m\u001b[33m STACK\u001b[0m\u001b[33m\"\u001b[0m\u001b[33m are\u001b[0m\u001b[33m written\u001b[0m\u001b[33m in\u001b[0m\u001b[33m a\u001b[0m\u001b[33m playful\u001b[0m\u001b[33m,\u001b[0m\u001b[33m handwritten\u001b[0m\u001b[33m font\u001b[0m\u001b[33m above\u001b[0m\u001b[33m the\u001b[0m\u001b[33m llama\u001b[0m\u001b[33m's\u001b[0m\u001b[33m head\u001b[0m\u001b[33m.\u001b[0m\u001b[33m The\u001b[0m\u001b[33m text\u001b[0m\u001b[33m is\u001b[0m\u001b[33m also\u001b[0m\u001b[33m in\u001b[0m\u001b[33m a\u001b[0m\u001b[33m mon\u001b[0m\u001b[33moch\u001b[0m\u001b[33mromatic\u001b[0m\u001b[33m color\u001b[0m\u001b[33m scheme\u001b[0m\u001b[33m,\u001b[0m\u001b[33m matching\u001b[0m\u001b[33m the\u001b[0m\u001b[33m llama\u001b[0m\u001b[33m's\u001b[0m\u001b[33m outline\u001b[0m\u001b[33m.\u001b[0m\u001b[33m The\u001b[0m\u001b[33m background\u001b[0m\u001b[33m of\u001b[0m\u001b[33m the\u001b[0m\u001b[33m image\u001b[0m\u001b[33m is\u001b[0m\u001b[33m a\u001b[0m\u001b[33m solid\u001b[0m\u001b[33m black\u001b[0m\u001b[33m color\u001b[0m\u001b[33m,\u001b[0m\u001b[33m which\u001b[0m\u001b[33m provides\u001b[0m\u001b[33m a\u001b[0m\u001b[33m clean\u001b[0m\u001b[33m and\u001b[0m\u001b[33m simple\u001b[0m\u001b[33m contrast\u001b[0m\u001b[33m to\u001b[0m\u001b[33m the\u001b[0m\u001b[33m llama\u001b[0m\u001b[33m's\u001b[0m\u001b[33m design\u001b[0m\u001b[33m.\n",
 | |
|             "\n",
 | |
|             "\u001b[0m\u001b[33mOverall\u001b[0m\u001b[33m,\u001b[0m\u001b[33m the\u001b[0m\u001b[33m image\u001b[0m\u001b[33m appears\u001b[0m\u001b[33m to\u001b[0m\u001b[33m be\u001b[0m\u001b[33m a\u001b[0m\u001b[33m logo\u001b[0m\u001b[33m or\u001b[0m\u001b[33m icon\u001b[0m\u001b[33m for\u001b[0m\u001b[33m a\u001b[0m\u001b[33m brand\u001b[0m\u001b[33m or\u001b[0m\u001b[33m product\u001b[0m\u001b[33m called\u001b[0m\u001b[33m \"\u001b[0m\u001b[33mL\u001b[0m\u001b[33mlama\u001b[0m\u001b[33m Stack\u001b[0m\u001b[33m.\"\u001b[0m\u001b[33m The\u001b[0m\u001b[33m use\u001b[0m\u001b[33m of\u001b[0m\u001b[33m a\u001b[0m\u001b[33m cartoon\u001b[0m\u001b[33m llama\u001b[0m\u001b[33m and\u001b[0m\u001b[33m a\u001b[0m\u001b[33m playful\u001b[0m\u001b[33m font\u001b[0m\u001b[33m suggests\u001b[0m\u001b[33m a\u001b[0m\u001b[33m l\u001b[0m\u001b[33migh\u001b[0m\u001b[33mthe\u001b[0m\u001b[33mart\u001b[0m\u001b[33med\u001b[0m\u001b[33m and\u001b[0m\u001b[33m humorous\u001b[0m\u001b[33m tone\u001b[0m\u001b[33m,\u001b[0m\u001b[33m while\u001b[0m\u001b[33m the\u001b[0m\u001b[33m mon\u001b[0m\u001b[33moch\u001b[0m\u001b[33mromatic\u001b[0m\u001b[33m color\u001b[0m\u001b[33m scheme\u001b[0m\u001b[33m gives\u001b[0m\u001b[33m the\u001b[0m\u001b[33m image\u001b[0m\u001b[33m a\u001b[0m\u001b[33m clean\u001b[0m\u001b[33m and\u001b[0m\u001b[33m modern\u001b[0m\u001b[33m feel\u001b[0m\u001b[33m.\u001b[0m\u001b[97m\u001b[0m\n"
 | |
|           ]
 | |
|         }
 | |
|       ],
 | |
|       "source": [
 | |
|         "# [Cell 5] - Initialize client and process image\n",
 | |
|         "async def main():\n",
 | |
|         "    # Initialize client\n",
 | |
|         "    client = LlamaStackClient(\n",
 | |
|         "        base_url=f\"http://{HOST}:{PORT}\",\n",
 | |
|         "    )\n",
 | |
|         "\n",
 | |
|         "    # Process image\n",
 | |
|         "    await process_image(client, \"../_static/llama-stack-logo.png\")\n",
 | |
|         "\n",
 | |
|         "\n",
 | |
|         "\n",
 | |
|         "# Execute the main function\n",
 | |
|         "await main()"
 | |
|       ]
 | |
|     },
 | |
|     {
 | |
|       "cell_type": "markdown",
 | |
|       "id": "9b39efb4",
 | |
|       "metadata": {},
 | |
|       "source": [
 | |
|         "Thanks for checking out this notebook! \n",
 | |
|         "\n",
 | |
|         "The next one in the series will teach you one of the favorite applications of Large Language Models: [Tool Calling](./04_Tool_Calling101.ipynb). Enjoy!"
 | |
|       ]
 | |
|     }
 | |
|   ],
 | |
|   "metadata": {
 | |
|     "fileHeader": "",
 | |
|     "fileUid": "37bbbfda-8e42-446c-89c7-59dd49e2d339",
 | |
|     "isAdHoc": false,
 | |
|     "kernelspec": {
 | |
|       "display_name": "base",
 | |
|       "language": "python",
 | |
|       "name": "python3"
 | |
|     },
 | |
|     "language_info": {
 | |
|       "codemirror_mode": {
 | |
|         "name": "ipython",
 | |
|         "version": 3
 | |
|       },
 | |
|       "file_extension": ".py",
 | |
|       "mimetype": "text/x-python",
 | |
|       "name": "python",
 | |
|       "nbconvert_exporter": "python",
 | |
|       "pygments_lexer": "ipython3",
 | |
|       "version": "3.12.2"
 | |
|     }
 | |
|   },
 | |
|   "nbformat": 4,
 | |
|   "nbformat_minor": 5
 | |
| }
 |