{ "cells": [ { "cell_type": "code", "execution_count": 14, "metadata": {}, "outputs": [], "source": [ "from llama_stack_client import LlamaStackClient\n", "from llama_stack_client.types import Document\n", "from llama_stack.distribution.library_client import LlamaStackAsLibraryClient\n", "from llama_stack_client.types.agent_create_params import AgentConfig\n", "from llama_stack_client.lib.agents.agent import Agent\n", "from rich.pretty import pprint\n", "import json\n", "import uuid\n", "from pydantic import BaseModel\n", "import rich\n", "import os" ] }, { "cell_type": "code", "execution_count": 18, "metadata": {}, "outputs": [], "source": [ "MODEL_ID = \"meta-llama/Llama-3.3-70B-Instruct\"\n", "\n", "client = LlamaStackClient(\n", " base_url=\"http://localhost:8321\",\n", " provider_data={\n", " \"fireworks_api_key\": os.environ[\"FIREWORKS_API_KEY\"]\n", " }\n", ")\n", "\n", "urls = [\n", " \"memory_optimizations.rst\",\n", " \"chat.rst\",\n", " \"llama3.rst\",\n", " \"datasets.rst\",\n", " \"qat_finetune.rst\",\n", " \"lora_finetune.rst\",\n", "]\n", "\n", "attachments = [\n", " {\n", " \"content\": f\"https://raw.githubusercontent.com/pytorch/torchtune/main/docs/source/tutorials/{url}\",\n", " \"mime_type\": \"text/plain\",\n", " }\n", "\n", " for i, url in enumerate(urls)\n", "]\n", "\n", "simple_agent = Agent(client, model=MODEL_ID, \n", " instructions=\"You are a helpful assistant that can answer questions about the Torchtune project.\")" ] }, { "cell_type": "code", "execution_count": 19, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
Turn(\n", "│ input_messages=[\n", "│ │ UserMessage(content='What precision formats does torchtune support?', role='user', context=None)\n", "│ ],\n", "│ output_message=CompletionMessage(\n", "│ │ content='Torchtune supports the following precision formats:\\n\\n* FP32 (32-bit floating point)\\n* FP16 (16-bit floating point)\\n* INT8 (8-bit integer)\\n* BF16 (Brain Floating Point 16, a 16-bit floating point format)\\n\\nThese precision formats can be used for model weights, activations, and gradients, allowing for flexible and efficient tuning of models for various hardware and performance requirements.',\n", "│ │ role='assistant',\n", "│ │ stop_reason='end_of_turn',\n", "│ │ tool_calls=[]\n", "│ ),\n", "│ session_id='1c23c79b-3945-4e99-bda6-7922b6b4e91c',\n", "│ started_at=datetime.datetime(2025, 3, 20, 22, 41, 0, 175804, tzinfo=TzInfo(UTC)),\n", "│ steps=[\n", "│ │ InferenceStep(\n", "│ │ │ api_model_response=CompletionMessage(\n", "│ │ │ │ content='Torchtune supports the following precision formats:\\n\\n* FP32 (32-bit floating point)\\n* FP16 (16-bit floating point)\\n* INT8 (8-bit integer)\\n* BF16 (Brain Floating Point 16, a 16-bit floating point format)\\n\\nThese precision formats can be used for model weights, activations, and gradients, allowing for flexible and efficient tuning of models for various hardware and performance requirements.',\n", "│ │ │ │ role='assistant',\n", "│ │ │ │ stop_reason='end_of_turn',\n", "│ │ │ │ tool_calls=[]\n", "│ │ │ ),\n", "│ │ │ step_id='bf452f18-8fae-470e-9e97-b1af60628fc1',\n", "│ │ │ step_type='inference',\n", "│ │ │ turn_id='efb92c6d-d482-4dd2-ad4b-3250c1e9a231',\n", "│ │ │ completed_at=datetime.datetime(2025, 3, 20, 22, 41, 1, 618765, tzinfo=TzInfo(UTC)),\n", "│ │ │ started_at=datetime.datetime(2025, 3, 20, 22, 41, 0, 175855, tzinfo=TzInfo(UTC))\n", "│ │ )\n", "│ ],\n", "│ turn_id='efb92c6d-d482-4dd2-ad4b-3250c1e9a231',\n", "│ completed_at=datetime.datetime(2025, 3, 20, 22, 41, 1, 631357, tzinfo=TzInfo(UTC)),\n", "│ output_attachments=[]\n", ")\n", "\n" ], "text/plain": [ "\u001b[1;35mTurn\u001b[0m\u001b[1m(\u001b[0m\n", "\u001b[2;32m│ \u001b[0m\u001b[33minput_messages\u001b[0m=\u001b[1m[\u001b[0m\n", "\u001b[2;32m│ │ \u001b[0m\u001b[1;35mUserMessage\u001b[0m\u001b[1m(\u001b[0m\u001b[33mcontent\u001b[0m=\u001b[32m'What precision formats does torchtune support?'\u001b[0m, \u001b[33mrole\u001b[0m=\u001b[32m'user'\u001b[0m, \u001b[33mcontext\u001b[0m=\u001b[3;35mNone\u001b[0m\u001b[1m)\u001b[0m\n", "\u001b[2;32m│ \u001b[0m\u001b[1m]\u001b[0m,\n", "\u001b[2;32m│ \u001b[0m\u001b[33moutput_message\u001b[0m=\u001b[1;35mCompletionMessage\u001b[0m\u001b[1m(\u001b[0m\n", "\u001b[2;32m│ │ \u001b[0m\u001b[33mcontent\u001b[0m=\u001b[32m'Torchtune supports the following precision formats:\\n\\n* FP32 \u001b[0m\u001b[32m(\u001b[0m\u001b[32m32-bit floating point\u001b[0m\u001b[32m)\u001b[0m\u001b[32m\\n* FP16 \u001b[0m\u001b[32m(\u001b[0m\u001b[32m16-bit floating point\u001b[0m\u001b[32m)\u001b[0m\u001b[32m\\n* INT8 \u001b[0m\u001b[32m(\u001b[0m\u001b[32m8-bit integer\u001b[0m\u001b[32m)\u001b[0m\u001b[32m\\n* BF16 \u001b[0m\u001b[32m(\u001b[0m\u001b[32mBrain Floating Point 16, a 16-bit floating point format\u001b[0m\u001b[32m)\u001b[0m\u001b[32m\\n\\nThese precision formats can be used for model weights, activations, and gradients, allowing for flexible and efficient tuning of models for various hardware and performance requirements.'\u001b[0m,\n", "\u001b[2;32m│ │ \u001b[0m\u001b[33mrole\u001b[0m=\u001b[32m'assistant'\u001b[0m,\n", "\u001b[2;32m│ │ \u001b[0m\u001b[33mstop_reason\u001b[0m=\u001b[32m'end_of_turn'\u001b[0m,\n", "\u001b[2;32m│ │ \u001b[0m\u001b[33mtool_calls\u001b[0m=\u001b[1m[\u001b[0m\u001b[1m]\u001b[0m\n", "\u001b[2;32m│ \u001b[0m\u001b[1m)\u001b[0m,\n", "\u001b[2;32m│ \u001b[0m\u001b[33msession_id\u001b[0m=\u001b[32m'1c23c79b-3945-4e99-bda6-7922b6b4e91c'\u001b[0m,\n", "\u001b[2;32m│ \u001b[0m\u001b[33mstarted_at\u001b[0m=\u001b[1;35mdatetime\u001b[0m\u001b[1;35m.datetime\u001b[0m\u001b[1m(\u001b[0m\u001b[1;36m2025\u001b[0m, \u001b[1;36m3\u001b[0m, \u001b[1;36m20\u001b[0m, \u001b[1;36m22\u001b[0m, \u001b[1;36m41\u001b[0m, \u001b[1;36m0\u001b[0m, \u001b[1;36m175804\u001b[0m, \u001b[33mtzinfo\u001b[0m=\u001b[1;35mTzInfo\u001b[0m\u001b[1m(\u001b[0mUTC\u001b[1m)\u001b[0m\u001b[1m)\u001b[0m,\n", "\u001b[2;32m│ \u001b[0m\u001b[33msteps\u001b[0m=\u001b[1m[\u001b[0m\n", "\u001b[2;32m│ │ \u001b[0m\u001b[1;35mInferenceStep\u001b[0m\u001b[1m(\u001b[0m\n", "\u001b[2;32m│ │ │ \u001b[0m\u001b[33mapi_model_response\u001b[0m=\u001b[1;35mCompletionMessage\u001b[0m\u001b[1m(\u001b[0m\n", "\u001b[2;32m│ │ │ │ \u001b[0m\u001b[33mcontent\u001b[0m=\u001b[32m'Torchtune supports the following precision formats:\\n\\n* FP32 \u001b[0m\u001b[32m(\u001b[0m\u001b[32m32-bit floating point\u001b[0m\u001b[32m)\u001b[0m\u001b[32m\\n* FP16 \u001b[0m\u001b[32m(\u001b[0m\u001b[32m16-bit floating point\u001b[0m\u001b[32m)\u001b[0m\u001b[32m\\n* INT8 \u001b[0m\u001b[32m(\u001b[0m\u001b[32m8-bit integer\u001b[0m\u001b[32m)\u001b[0m\u001b[32m\\n* BF16 \u001b[0m\u001b[32m(\u001b[0m\u001b[32mBrain Floating Point 16, a 16-bit floating point format\u001b[0m\u001b[32m)\u001b[0m\u001b[32m\\n\\nThese precision formats can be used for model weights, activations, and gradients, allowing for flexible and efficient tuning of models for various hardware and performance requirements.'\u001b[0m,\n", "\u001b[2;32m│ │ │ │ \u001b[0m\u001b[33mrole\u001b[0m=\u001b[32m'assistant'\u001b[0m,\n", "\u001b[2;32m│ │ │ │ \u001b[0m\u001b[33mstop_reason\u001b[0m=\u001b[32m'end_of_turn'\u001b[0m,\n", "\u001b[2;32m│ │ │ │ \u001b[0m\u001b[33mtool_calls\u001b[0m=\u001b[1m[\u001b[0m\u001b[1m]\u001b[0m\n", "\u001b[2;32m│ │ │ \u001b[0m\u001b[1m)\u001b[0m,\n", "\u001b[2;32m│ │ │ \u001b[0m\u001b[33mstep_id\u001b[0m=\u001b[32m'bf452f18-8fae-470e-9e97-b1af60628fc1'\u001b[0m,\n", "\u001b[2;32m│ │ │ \u001b[0m\u001b[33mstep_type\u001b[0m=\u001b[32m'inference'\u001b[0m,\n", "\u001b[2;32m│ │ │ \u001b[0m\u001b[33mturn_id\u001b[0m=\u001b[32m'efb92c6d-d482-4dd2-ad4b-3250c1e9a231'\u001b[0m,\n", "\u001b[2;32m│ │ │ \u001b[0m\u001b[33mcompleted_at\u001b[0m=\u001b[1;35mdatetime\u001b[0m\u001b[1;35m.datetime\u001b[0m\u001b[1m(\u001b[0m\u001b[1;36m2025\u001b[0m, \u001b[1;36m3\u001b[0m, \u001b[1;36m20\u001b[0m, \u001b[1;36m22\u001b[0m, \u001b[1;36m41\u001b[0m, \u001b[1;36m1\u001b[0m, \u001b[1;36m618765\u001b[0m, \u001b[33mtzinfo\u001b[0m=\u001b[1;35mTzInfo\u001b[0m\u001b[1m(\u001b[0mUTC\u001b[1m)\u001b[0m\u001b[1m)\u001b[0m,\n", "\u001b[2;32m│ │ │ \u001b[0m\u001b[33mstarted_at\u001b[0m=\u001b[1;35mdatetime\u001b[0m\u001b[1;35m.datetime\u001b[0m\u001b[1m(\u001b[0m\u001b[1;36m2025\u001b[0m, \u001b[1;36m3\u001b[0m, \u001b[1;36m20\u001b[0m, \u001b[1;36m22\u001b[0m, \u001b[1;36m41\u001b[0m, \u001b[1;36m0\u001b[0m, \u001b[1;36m175855\u001b[0m, \u001b[33mtzinfo\u001b[0m=\u001b[1;35mTzInfo\u001b[0m\u001b[1m(\u001b[0mUTC\u001b[1m)\u001b[0m\u001b[1m)\u001b[0m\n", "\u001b[2;32m│ │ \u001b[0m\u001b[1m)\u001b[0m\n", "\u001b[2;32m│ \u001b[0m\u001b[1m]\u001b[0m,\n", "\u001b[2;32m│ \u001b[0m\u001b[33mturn_id\u001b[0m=\u001b[32m'efb92c6d-d482-4dd2-ad4b-3250c1e9a231'\u001b[0m,\n", "\u001b[2;32m│ \u001b[0m\u001b[33mcompleted_at\u001b[0m=\u001b[1;35mdatetime\u001b[0m\u001b[1;35m.datetime\u001b[0m\u001b[1m(\u001b[0m\u001b[1;36m2025\u001b[0m, \u001b[1;36m3\u001b[0m, \u001b[1;36m20\u001b[0m, \u001b[1;36m22\u001b[0m, \u001b[1;36m41\u001b[0m, \u001b[1;36m1\u001b[0m, \u001b[1;36m631357\u001b[0m, \u001b[33mtzinfo\u001b[0m=\u001b[1;35mTzInfo\u001b[0m\u001b[1m(\u001b[0mUTC\u001b[1m)\u001b[0m\u001b[1m)\u001b[0m,\n", "\u001b[2;32m│ \u001b[0m\u001b[33moutput_attachments\u001b[0m=\u001b[1m[\u001b[0m\u001b[1m]\u001b[0m\n", "\u001b[1m)\u001b[0m\n" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "simple_session_id = simple_agent.create_session(session_name=f\"simple_session_{uuid.uuid4()}\")\n", "response = simple_agent.create_turn(\n", " messages=[\n", " {\n", " \"role\": \"user\",\n", " \"content\": \"What precision formats does torchtune support?\"\n", " }\n", " ],\n", " session_id=simple_session_id,\n", " stream=False\n", " )\n", "\n", "pprint(response)\n", "\n", "session_response = client.agents.session.retrieve(agent_id=simple_agent.agent_id, session_id=simple_session_id)\n", "pprint(session_response)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "master", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.10.16" } }, "nbformat": 4, "nbformat_minor": 2 }