In [4]:
from llama_stack_client import LlamaStackClient
from llama_stack_client.types import Document
from llama_stack.distribution.library_client import LlamaStackAsLibraryClient
from llama_stack_client.types.agent_create_params import AgentConfig
from llama_stack_client.lib.agents.agent import Agent
from rich.pretty import pprint
import json
import uuid
from pydantic import BaseModel
import rich
import os

In [11]:
MODEL_ID = "meta-llama/Llama-3.3-70B-Instruct"

client = LlamaStackClient(
    base_url="http://localhost:8321",
    provider_data={
        "fireworks_api_key": os.environ["FIREWORKS_API_KEY"]
    }
)

urls = [
    "memory_optimizations.rst",
    "chat.rst",
    "llama3.rst",
    "datasets.rst",
    "qat_finetune.rst",
    "lora_finetune.rst",
]

attachments = [
    {
        "content": f"https://raw.githubusercontent.com/pytorch/torchtune/main/docs/source/tutorials/{url}",
        "mime_type": "text/plain",
    }

    for i, url in enumerate(urls)
]

simple_agent = Agent(client, model=MODEL_ID, 
                     instructions="You are a helpful assistant that can answer questions about the Torchtune project.")

In [12]:
simple_session_id = simple_agent.create_session(session_name=f"simple_session_{uuid.uuid4()}")
response = simple_agent.create_turn(
        messages=[
            {
                "role": "user",
                "content": "What precision formats does torchtune support?"
            }
        ],
        documents=attachments,
        session_id=simple_session_id,
        stream=False
    )

pprint(response)

session_response = client.agents.session.retrieve(agent_id=simple_agent.agent_id, session_id=simple_session_id)
pprint(session_response)