mirror of
https://github.com/meta-llama/llama-stack.git
synced 2025-07-04 13:15:24 +00:00
# What does this PR do? closes #2553 ## Test Plan run through notebooks w/ llama stack running on localhost:{8321,8322}
188 lines
5.9 KiB
Text
188 lines
5.9 KiB
Text
{
|
|
"cells": [
|
|
{
|
|
"cell_type": "markdown",
|
|
"id": "923343b0-d4bd-4361-b8d4-dd29f86a0fbd",
|
|
"metadata": {},
|
|
"source": [
|
|
"## Getting Started with LlamaStack Vision API\n",
|
|
"\n",
|
|
"Before you begin, please ensure Llama Stack is installed and set up by following the [Getting Started Guide](https://llama-stack.readthedocs.io/en/latest/getting_started/index.html).\n",
|
|
"\n",
|
|
"Let's import the necessary packages"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 1,
|
|
"id": "eae04594-49f9-43af-bb42-9df114d9ddd6",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"import asyncio\n",
|
|
"import base64\n",
|
|
"import mimetypes\n",
|
|
"from llama_stack_client import LlamaStackClient\n",
|
|
"from termcolor import cprint"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"id": "143837c6-1072-4015-8297-514712704087",
|
|
"metadata": {},
|
|
"source": [
|
|
"## Configuration\n",
|
|
"Set up your connection parameters:"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "1d293479-9dde-4b68-94ab-d0c4c61ab08c",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"HOST = \"localhost\" # Replace with your host\n",
|
|
"PORT = 8321 # Replace with your cloud distro port\n",
|
|
"MODEL_NAME='meta-llama/Llama3.2-11B-Vision-Instruct'"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"id": "51984856-dfc7-4226-817a-1d44853e6661",
|
|
"metadata": {},
|
|
"source": [
|
|
"## Helper Functions\n",
|
|
"Let's create some utility functions to handle image processing and API interaction:"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "8e65aae0-3ef0-4084-8c59-273a89ac9510",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"def encode_image_to_data_url(file_path: str) -> str:\n",
|
|
" \"\"\"\n",
|
|
" Encode an image file to a data URL.\n",
|
|
"\n",
|
|
" Args:\n",
|
|
" file_path (str): Path to the image file\n",
|
|
"\n",
|
|
" Returns:\n",
|
|
" str: Data URL string\n",
|
|
" \"\"\"\n",
|
|
" mime_type, _ = mimetypes.guess_type(file_path)\n",
|
|
" if mime_type is None:\n",
|
|
" raise ValueError(\"Could not determine MIME type of the file\")\n",
|
|
"\n",
|
|
" with open(file_path, \"rb\") as image_file:\n",
|
|
" encoded_string = base64.b64encode(image_file.read()).decode(\"utf-8\")\n",
|
|
"\n",
|
|
" return f\"data:{mime_type};base64,{encoded_string}\"\n",
|
|
"\n",
|
|
"async def process_image(client, image_path: str, stream: bool = True):\n",
|
|
" \"\"\"\n",
|
|
" Process an image through the LlamaStack Vision API.\n",
|
|
"\n",
|
|
" Args:\n",
|
|
" client (LlamaStackClient): Initialized client\n",
|
|
" image_path (str): Path to image file\n",
|
|
" stream (bool): Whether to stream the response\n",
|
|
" \"\"\"\n",
|
|
" data_url = encode_image_to_data_url(image_path)\n",
|
|
"\n",
|
|
" message = {\n",
|
|
" \"role\": \"user\",\n",
|
|
" \"content\": [\n",
|
|
" {\"type\": \"image\", \"image\": {\"url\": {\"uri\": data_url}}},\n",
|
|
" {\"type\": \"text\", \"text\": \"Describe what is in this image.\"}\n",
|
|
" ]\n",
|
|
" }\n",
|
|
"\n",
|
|
" cprint(\"User> Sending image for analysis...\", \"green\")\n",
|
|
" response = client.inference.chat_completion(\n",
|
|
" messages=[message],\n",
|
|
" model_id=MODEL_NAME,\n",
|
|
" stream=stream,\n",
|
|
" )\n",
|
|
"\n",
|
|
" cprint(f'Assistant> ', color='cyan', end='')\n",
|
|
" if not stream:\n",
|
|
" cprint(response.completion_message.content, color='yellow')\n",
|
|
" else:\n",
|
|
" for chunk in response:\n",
|
|
" cprint(chunk.event.delta.text, color='yellow', end='')\n",
|
|
" cprint('')\n"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"id": "8073b673-e730-4557-8980-fd8b7ea11975",
|
|
"metadata": {},
|
|
"source": [
|
|
"## Chat with Image\n",
|
|
"\n",
|
|
"Now let's put it all together:"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "64d36476-95d7-49f9-a548-312cf8d8c49e",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"# [Cell 5] - Initialize client and process image\n",
|
|
"async def main():\n",
|
|
" # Initialize client\n",
|
|
" client = LlamaStackClient(\n",
|
|
" base_url=f\"http://{HOST}:{PORT}\",\n",
|
|
" )\n",
|
|
"\n",
|
|
" # Process image\n",
|
|
" await process_image(client, \"../_static/llama-stack-logo.png\")\n",
|
|
"\n",
|
|
"\n",
|
|
"\n",
|
|
"# Execute the main function\n",
|
|
"await main()"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"id": "9b39efb4",
|
|
"metadata": {},
|
|
"source": [
|
|
"Thanks for checking out this notebook! \n",
|
|
"\n",
|
|
"The next one in the series will teach you one of the favorite applications of Large Language Models: [Tool Calling](./04_Tool_Calling101.ipynb). Enjoy!"
|
|
]
|
|
}
|
|
],
|
|
"metadata": {
|
|
"fileHeader": "",
|
|
"fileUid": "37bbbfda-8e42-446c-89c7-59dd49e2d339",
|
|
"isAdHoc": false,
|
|
"kernelspec": {
|
|
"display_name": "llama-stack",
|
|
"language": "python",
|
|
"name": "python3"
|
|
},
|
|
"language_info": {
|
|
"codemirror_mode": {
|
|
"name": "ipython",
|
|
"version": 3
|
|
},
|
|
"file_extension": ".py",
|
|
"mimetype": "text/x-python",
|
|
"name": "python",
|
|
"nbconvert_exporter": "python",
|
|
"pygments_lexer": "ipython3",
|
|
"version": "3.12.11"
|
|
}
|
|
},
|
|
"nbformat": 4,
|
|
"nbformat_minor": 5
|
|
}
|