 let's explore how to have a conversation about images using the Memory API! This section will show you how to:
1. Load and prepare images for the API
2. Send image-based queries
3. Create an interactive chat loop with images


In [None]:
import asyncio
import base64
import mimetypes
from pathlib import Path
from typing import Optional, Union

from llama_stack_client import LlamaStackClient
from llama_stack_client.types import UserMessage
from llama_stack_client.lib.inference.event_logger import EventLogger
from termcolor import cprint

# Helper function to convert image to data URL
def image_to_data_url(file_path: Union[str, Path]) -> str:
    """Convert an image file to a data URL format.

    Args:
        file_path: Path to the image file

    Returns:
        str: Data URL containing the encoded image
    """
    file_path = Path(file_path)
    if not file_path.exists():
        raise FileNotFoundError(f"Image not found: {file_path}")

    mime_type, _ = mimetypes.guess_type(str(file_path))
    if mime_type is None:
        raise ValueError("Could not determine MIME type of the image")

    with open(file_path, "rb") as image_file:
        encoded_string = base64.b64encode(image_file.read()).decode("utf-8")

    return f"data:{mime_type};base64,{encoded_string}"

## 2. Create an Interactive Image Chat

Let's create a function that enables back-and-forth conversation about an image:

In [None]:
from IPython.display import Image, display
import ipywidgets as widgets

# Display the image we'll be chatting about
image_path = "your_image.jpg"  # Replace with your image path
display(Image(filename=image_path))

# Initialize the client
client = LlamaStackClient(
    base_url=f"http://localhost:8000",  # Adjust host/port as needed
)

# Create chat interface
output = widgets.Output()
text_input = widgets.Text(
    value='',
    placeholder='Type your question about the image...',
    description='Ask:',
    disabled=False
)

# Display interface
display(text_input, output)

# Handle chat interaction
async def on_submit(change):
    with output:
        question = text_input.value
        if question.lower() == 'exit':
            print("Chat ended.")
            return

        message = UserMessage(
            role="user",
            content=[
                {"image": {"uri": image_to_data_url(image_path)}},
                question,
            ],
        )

        print(f"\nUser> {question}")
        response = client.inference.chat_completion(
            messages=[message],
            model="Llama3.2-11B-Vision-Instruct",
            stream=True,
        )

        print("Assistant> ", end='')
        async for log in EventLogger().log(response):
            log.print()

        text_input.value = ''  # Clear input after sending

text_input.on_submit(lambda x: asyncio.create_task(on_submit(x)))

## Tool Calling

In this section, we'll explore how to enhance your applications with tool calling capabilities. We'll cover:
1. Setting up and using the Brave Search API
2. Creating custom tools
3. Configuring tool prompts and safety settings

In [None]:
import asyncio
import os
from typing import Dict, List, Optional
from dotenv import load_dotenv

from llama_stack_client import LlamaStackClient
from llama_stack_client.lib.agents.agent import Agent
from llama_stack_client.lib.agents.event_logger import EventLogger
from llama_stack_client.types.agent_create_params import (
    AgentConfig,
    AgentConfigToolSearchToolDefinition,
)

# Load environment variables
load_dotenv()

# Helper function to create an agent with tools
async def create_tool_agent(
    client: LlamaStackClient,
    tools: List[Dict],
    instructions: str = "You are a helpful assistant",
    model: str = "Llama3.1-8B-Instruct",
) -> Agent:
    """Create an agent with specified tools."""
    agent_config = AgentConfig(
        model=model,
        instructions=instructions,
        sampling_params={
            "strategy": "greedy",
            "temperature": 1.0,
            "top_p": 0.9,
        },
        tools=tools,
        tool_choice="auto",
        tool_prompt_format="json",
        input_shields=["Llama-Guard-3-1B"],
        output_shields=["Llama-Guard-3-1B"],
        enable_session_persistence=True,
    )

    return Agent(client, agent_config)

First, create a `.env` file in your notebook directory with your Brave Search API key:

```
BRAVE_SEARCH_API_KEY=your_key_here
```


In [None]:
async def create_search_agent(client: LlamaStackClient) -> Agent:
    """Create an agent with Brave Search capability."""
    search_tool = AgentConfigToolSearchToolDefinition(
        type="brave_search",
        engine="brave",
        api_key=os.getenv("BRAVE_SEARCH_API_KEY"),
    )

    return await create_tool_agent(
        client=client,
        tools=[search_tool],
        instructions="""
        You are a research assistant that can search the web.
        Always cite your sources with URLs when providing information.
        Format your responses as:

        FINDINGS:
        [Your summary here]

        SOURCES:
        - [Source title](URL)
        """
    )

# Example usage
async def search_example():
    client = LlamaStackClient(base_url="http://localhost:8000")
    agent = await create_search_agent(client)

    # Create a session
    session_id = agent.create_session("search-session")

    # Example queries
    queries = [
        "What are the latest developments in quantum computing?",
        "Who won the most recent Super Bowl?",
    ]

    for query in queries:
        print(f"\nQuery: {query}")
        print("-" * 50)

        response = agent.create_turn(
            messages=[{"role": "user", "content": query}],
            session_id=session_id,
        )

        async for log in EventLogger().log(response):
            log.print()

# Run the example (in Jupyter, use asyncio.run())
await search_example()

## 3. Custom Tool Creation

Let's create a custom weather tool:

In [None]:
from typing import TypedDict, Optional
from datetime import datetime

# Define tool types
class WeatherInput(TypedDict):
    location: str
    date: Optional[str]

class WeatherOutput(TypedDict):
    temperature: float
    conditions: str
    humidity: float

class WeatherTool:
    """Example custom tool for weather information."""

    def __init__(self, api_key: Optional[str] = None):
        self.api_key = api_key

    async def get_weather(self, location: str, date: Optional[str] = None) -> WeatherOutput:
        """Simulate getting weather data (replace with actual API call)."""
        # Mock implementation
        return {
            "temperature": 72.5,
            "conditions": "partly cloudy",
            "humidity": 65.0
        }

    async def __call__(self, input_data: WeatherInput) -> WeatherOutput:
        """Make the tool callable with structured input."""
        return await self.get_weather(
            location=input_data["location"],
            date=input_data.get("date")
        )

async def create_weather_agent(client: LlamaStackClient) -> Agent:
    """Create an agent with weather tool capability."""
    weather_tool = {
        "type": "function",
        "function": {
            "name": "get_weather",
            "description": "Get weather information for a location",
            "parameters": {
                "type": "object",
                "properties": {
                    "location": {
                        "type": "string",
                        "description": "City or location name"
                    },
                    "date": {
                        "type": "string",
                        "description": "Optional date (YYYY-MM-DD)",
                        "format": "date"
                    }
                },
                "required": ["location"]
            }
        },
        "implementation": WeatherTool()
    }

    return await create_tool_agent(
        client=client,
        tools=[weather_tool],
        instructions="""
        You are a weather assistant that can provide weather information.
        Always specify the location clearly in your responses.
        Include both temperature and conditions in your summaries.
        """
    )

# Example usage
async def weather_example():
    client = LlamaStackClient(base_url="http://localhost:8000")
    agent = await create_weather_agent(client)

    session_id = agent.create_session("weather-session")

    queries = [
        "What's the weather like in San Francisco?",
        "Tell me the weather in Tokyo tomorrow",
    ]

    for query in queries:
        print(f"\nQuery: {query}")
        print("-" * 50)

        response = agent.create_turn(
            messages=[{"role": "user", "content": query}],
            session_id=session_id,
        )

        async for log in EventLogger().log(response):
            log.print()

# Run the example
await weather_example()

## Multi-Tool Agent

In [None]:
async def create_multi_tool_agent(client: LlamaStackClient) -> Agent:
    """Create an agent with multiple tools."""
    tools = [
        # Brave Search tool
        AgentConfigToolSearchToolDefinition(
            type="brave_search",
            engine="brave",
            api_key=os.getenv("BRAVE_SEARCH_API_KEY"),
        ),
        # Weather tool
        {
            "type": "function",
            "function": {
                "name": "get_weather",
                "description": "Get weather information for a location",
                "parameters": {
                    "type": "object",
                    "properties": {
                        "location": {"type": "string"},
                        "date": {"type": "string", "format": "date"}
                    },
                    "required": ["location"]
                }
            },
            "implementation": WeatherTool()
        }
    ]

    return await create_tool_agent(
        client=client,
        tools=tools,
        instructions="""
        You are an assistant that can search the web and check weather information.
        Use the appropriate tool based on the user's question.
        For weather queries, always specify location and conditions.
        For web searches, always cite your sources.
        """
    )

# Interactive example with multi-tool agent
async def interactive_multi_tool():
    client = LlamaStackClient(base_url="http://localhost:8000")
    agent = await create_multi_tool_agent(client)
    session_id = agent.create_session("interactive-session")

    print("ü§ñ Multi-tool Agent Ready! (type 'exit' to quit)")
    print("Example questions:")
    print("- What's the weather in Paris and what events are happening there?")
    print("- Tell me about recent space discoveries and the weather on Mars")

    while True:
        query = input("\nYour question: ")
        if query.lower() == 'exit':
            break

        print("\nThinking...")
        try:
            response = agent.create_turn(
                messages=[{"role": "user", "content": query}],
                session_id=session_id,
            )

            async for log in EventLogger().log(response):
                log.print()
        except Exception as e:
            print(f"Error: {e}")

# Run interactive example
await interactive_multi_tool()

## Memory 

Getting Started with Memory API Tutorial üöÄ
Welcome! This interactive tutorial will guide you through using the Memory API, a powerful tool for document storage and retrieval. Whether you're new to vector databases or an experienced developer, this notebook will help you understand the basics and get up and running quickly.
What you'll learn:

How to set up and configure the Memory API client
Creating and managing memory banks (vector stores)
Different ways to insert documents into the system
How to perform intelligent queries on your documents

Prerequisites:

Basic Python knowledge
A running instance of the Memory API server (we'll use localhost in this tutorial)

Let's start by installing the required packages:

In [None]:
# Install the client library and a helper package for colored output
!pip install llama-stack-client termcolor

# üí° Note: If you're running this in a new environment, you might need to restart
# your kernel after installation

1. Initial Setup
First, we'll import the necessary libraries and set up some helper functions. Let's break down what each import does:

llama_stack_client: Our main interface to the Memory API
base64: Helps us encode files for transmission
mimetypes: Determines file types automatically
termcolor: Makes our output prettier with colors

‚ùì Question: Why do we need to convert files to data URLs?
Answer: Data URLs allow us to embed file contents directly in our requests, making it easier to transmit files to the API without needing separate file uploads.

In [None]:
import base64
import json
import mimetypes
import os
from pathlib import Path

from llama_stack_client import LlamaStackClient
from llama_stack_client.types.memory_insert_params import Document
from termcolor import cprint

# Helper function to convert files to data URLs
def data_url_from_file(file_path: str) -> str:
    """Convert a file to a data URL for API transmission

    Args:
        file_path (str): Path to the file to convert

    Returns:
        str: Data URL containing the file's contents

    Example:
        >>> url = data_url_from_file('example.txt')
        >>> print(url[:30])  # Preview the start of the URL
        'data:text/plain;base64,SGVsbG8='
    """
    if not os.path.exists(file_path):
        raise FileNotFoundError(f"File not found: {file_path}")

    with open(file_path, "rb") as file:
        file_content = file.read()

    base64_content = base64.b64encode(file_content).decode("utf-8")
    mime_type, _ = mimetypes.guess_type(file_path)

    data_url = f"data:{mime_type};base64,{base64_content}"
    return data_url

2. Initialize Client and Create Memory Bank
Now we'll set up our connection to the Memory API and create our first memory bank. A memory bank is like a specialized database that stores document embeddings for semantic search.
‚ùì Key Concepts:

embedding_model: The model used to convert text into vector representations
chunk_size: How large each piece of text should be when splitting documents
overlap_size: How much overlap between chunks (helps maintain context)

‚ú® Pro Tip: Choose your chunk size based on your use case. Smaller chunks (256-512 tokens) are better for precise retrieval, while larger chunks (1024+ tokens) maintain more context.

In [None]:
# Configure connection parameters
HOST = "localhost"  # Replace with your host if using a remote server
PORT = 8000        # Replace with your port if different

# Initialize client
client = LlamaStackClient(
    base_url=f"http://{HOST}:{PORT}",
)

# Let's see what providers are available
# Providers determine where and how your data is stored
providers = client.providers.list()
print("Available providers:")
print(json.dumps(providers, indent=2))

# Create a memory bank with optimized settings for general use
client.memory_banks.register(
    memory_bank={
        "identifier": "tutorial_bank",  # A unique name for your memory bank
        "embedding_model": "all-MiniLM-L6-v2",  # A lightweight but effective model
        "chunk_size_in_tokens": 512,  # Good balance between precision and context
        "overlap_size_in_tokens": 64,  # Helps maintain context between chunks
        "provider_id": providers["memory"][0].provider_id,  # Use the first available provider
    }
)

# Let's verify our memory bank was created
memory_banks = client.memory_banks.list()
print("\nRegistered memory banks:")
print(json.dumps(memory_banks, indent=2))

# üéØ Exercise: Try creating another memory bank with different settings!
# What happens if you try to create a bank with the same identifier?

3. Insert Documents
The Memory API supports multiple ways to add documents. We'll demonstrate two common approaches:

Loading documents from URLs
Loading documents from local files

‚ùì Important Concepts:

Each document needs a unique document_id
Metadata helps organize and filter documents later
The API automatically processes and chunks documents

In [None]:
# Example URLs to documentation
# üí° Replace these with your own URLs or use the examples
urls = [
    "memory_optimizations.rst",
    "chat.rst",
    "llama3.rst",
]

# Create documents from URLs
# We add metadata to help organize our documents
url_documents = [
    Document(
        document_id=f"url-doc-{i}",  # Unique ID for each document
        content=f"https://raw.githubusercontent.com/pytorch/torchtune/main/docs/source/tutorials/{url}",
        mime_type="text/plain",
        metadata={"source": "url", "filename": url},  # Metadata helps with organization
    )
    for i, url in enumerate(urls)
]

# Example with local files
# üí° Replace these with your actual files
local_files = ["example.txt", "readme.md"]
file_documents = [
    Document(
        document_id=f"file-doc-{i}",
        content=data_url_from_file(path),
        metadata={"source": "local", "filename": path},
    )
    for i, path in enumerate(local_files)
    if os.path.exists(path)
]

# Combine all documents
all_documents = url_documents + file_documents

# Insert documents into memory bank
response = client.memory.insert(
    bank_id="tutorial_bank",
    documents=all_documents,
)

print("Documents inserted successfully!")

# üéØ Exercise: Try adding your own documents!
# - What happens if you try to insert a document with an existing ID?
# - What other metadata might be useful to add?

4. Query the Memory Bank
Now for the exciting part - querying our documents! The Memory API uses semantic search to find relevant content based on meaning, not just keywords.
‚ùì Understanding Scores:

Scores range from 0 to 1, with 1 being the most relevant
Generally, scores above 0.7 indicate strong relevance
Consider your use case when deciding on score thresholds

In [None]:
def print_query_results(query: str):
    """Helper function to print query results in a readable format

    Args:
        query (str): The search query to execute
    """
    print(f"\nQuery: {query}")
    print("-" * 50)

    response = client.memory.query(
        bank_id="tutorial_bank",
        query=[query],  # The API accepts multiple queries at once!
    )

    for i, (chunk, score) in enumerate(zip(response.chunks, response.scores)):
        print(f"\nResult {i+1} (Score: {score:.3f})")
        print("=" * 40)
        print(chunk)
        print("=" * 40)

# Let's try some example queries
queries = [
    "How do I use LoRA?",  # Technical question
    "Tell me about memory optimizations",  # General topic
    "What are the key features of Llama 3?"  # Product-specific
]

for query in queries:
    print_query_results(query)

# üéØ Exercises:
# 1. Try writing your own queries! What works well? What doesn't?
# 2. How do different phrasings of the same question affect results?
# 3. What happens if you query for content that isn't in your documents?

5. Advanced Usage: Query with Metadata Filtering
One powerful feature is the ability to filter results based on metadata. This helps when you want to search within specific subsets of your documents.
‚ùì Use Cases for Metadata Filtering:

Search within specific document types
Filter by date ranges
Limit results to certain authors or sources

In [None]:
# Query with metadata filter
response = client.memory.query(
    bank_id="tutorial_bank",
    query=["Tell me about optimization"],
    metadata_filter={"source": "url"}  # Only search in URL documents
)

print("\nFiltered Query Results:")
print("-" * 50)
for chunk, score in zip(response.chunks, response.scores):
    print(f"Score: {score:.3f}")
    print(f"Chunk:\n{chunk}\n")

# üéØ Advanced Exercises:
# 1. Try combining multiple metadata filters
# 2. Compare results with and without filters
# 3. What happens with non-existent metadata fields?