## Getting Started with LlamaStack Vision API

Let's import the necessary packages

In [1]:
import asyncio
import base64
import mimetypes
from llama_stack_client import LlamaStackClient
from llama_stack_client.lib.inference.event_logger import EventLogger
from llama_stack_client.types import UserMessage
from termcolor import cprint

## Configuration
Set up your connection parameters:

In [2]:
HOST = "localhost"  # Replace with your host
PORT = 5001        # Replace with your port

## Helper Functions
Let's create some utility functions to handle image processing and API interaction:

In [3]:
def encode_image_to_data_url(file_path: str) -> str:
    """
    Encode an image file to a data URL.
    
    Args:
        file_path (str): Path to the image file
        
    Returns:
        str: Data URL string
    """
    mime_type, _ = mimetypes.guess_type(file_path)
    if mime_type is None:
        raise ValueError("Could not determine MIME type of the file")
        
    with open(file_path, "rb") as image_file:
        encoded_string = base64.b64encode(image_file.read()).decode("utf-8")
        
    return f"data:{mime_type};base64,{encoded_string}"

async def process_image(client: LlamaStackClient, image_path: str, stream: bool = True):
    """
    Process an image through the LlamaStack Vision API.
    
    Args:
        client (LlamaStackClient): Initialized client
        image_path (str): Path to image file
        stream (bool): Whether to stream the response
    """
    data_url = encode_image_to_data_url(image_path)
    
    message = UserMessage(
        role="user",
        content=[
            {"image": {"uri": data_url}},
            "Describe what is in this image.",
        ],
    )
    
    cprint(f"User> Sending image for analysis...", "green")
    response = client.inference.chat_completion(
        messages=[message],
        model="Llama3.2-11B-Vision-Instruct",
        stream=stream,
    )
    
    if not stream:
        cprint(f"> Response: {response}", "cyan")
    else:
        async for log in EventLogger().log(response):
            log.print()

## Chat with Image

Now let's put it all together:

In [None]:
# [Cell 5] - Initialize client and process image
async def main():
    # Initialize client
    client = LlamaStackClient(
        base_url=f"http://{HOST}:{PORT}",
    )
    
    # Process image
    await process_image(client, "logo.png")
    
    # Query available models
    models_response = client.models.list()
    print("\nAvailable Models:")
    print(models_response)

# Execute the main function
await main()

[32mUser> Sending image for analysis...[0m
[36mAssistant> [0m[33mThe[0m[33m image[0m[33m features[0m[33m a[0m[33m styl[0m[33mized[0m[33m,[0m[33m mon[0m[33moch[0m[33mromatic[0m[33m logo[0m[33m for[0m[33m "[0m[33mLL[0m[33mAMA[0m[33m STACK[0m[33m"[0m[33m against[0m[33m a[0m[33m solid[0m[33m black[0m[33m background[0m[33m.[0m[33m The[0m[33m logo[0m[33m is[0m[33m centered[0m[33m and[0m[33m consists[0m[33m of[0m[33m a[0m[33m simple[0m[33m line[0m[33m drawing[0m[33m of[0m[33m a[0m[33m llama[0m[33m's[0m[33m head[0m[33m and[0m[33m neck[0m[33m,[0m[33m with[0m[33m its[0m[33m body[0m[33m replaced[0m[33m by[0m[33m a[0m[33m stack[0m[33m of[0m[33m three[0m[33m rounded[0m[33m rectangles[0m[33m resembling[0m[33m a[0m[33m pile[0m[33m of[0m[33m pancakes[0m[33m or[0m[33m a[0m[33m stack[0m[33m of[0m[33m books[0m[33m.[0m[33m The[0m[33m llama[0m[33m's[0m[33m head[0m[33m

In [None]:
#fin