update to use AsyncLlamaStackClient, /v1/health and drop EventLogger

2025-07-29 15:23:51 +00:00 · 2025-06-30 13:27:38 -04:00 · 2025-06-30 13:27:38 -04:00 · 417677814c
commit 417677814c
parent a325b38008
1 changed files with 236 additions and 259 deletions
--- a/docs/zero_to_hero_guide/01_Local_Cloud_Inference101.ipynb
+++ b/docs/zero_to_hero_guide/01_Local_Cloud_Inference101.ipynb
@ -26,7 +26,7 @@
  },
  {
   "cell_type": "code",
-      "execution_count": 1,
+   "execution_count": null,
   "id": "d80c0926",
   "metadata": {},
   "outputs": [],
@ -48,16 +48,16 @@
  },
  {
   "cell_type": "code",
-      "execution_count": 2,
+   "execution_count": null,
   "id": "7f868dfe",
   "metadata": {},
   "outputs": [],
   "source": [
-        "from llama_stack_client import LlamaStackClient\n",
+    "from llama_stack_client import AsyncLlamaStackClient\n",
    "\n",
    "# Configure local and cloud clients\n",
-        "local_client = LlamaStackClient(base_url=f'http://{HOST}:{LOCAL_PORT}')\n",
-        "cloud_client = LlamaStackClient(base_url=f'http://{HOST}:{CLOUD_PORT}')"
+    "local_client = AsyncLlamaStackClient(base_url=f'http://{HOST}:{LOCAL_PORT}')\n",
+    "cloud_client = AsyncLlamaStackClient(base_url=f'http://{HOST}:{CLOUD_PORT}')"
   ]
  },
  {
@ -67,23 +67,15 @@
   "source": [
    "#### 3. Client Selection with Fallback\n",
    "\n",
-        "The `select_client` function checks if the local client is available using a lightweight `/health` check. If the local client is unavailable, it automatically switches to the cloud client.\n"
+    "The `select_client` function checks if the local client is available using a lightweight `/v1/health` check. If the local client is unavailable, it automatically switches to the cloud client.\n"
   ]
  },
  {
   "cell_type": "code",
-      "execution_count": 3,
+   "execution_count": null,
   "id": "ff0c8277",
   "metadata": {},
-      "outputs": [
-        {
-          "name": "stdout",
-          "output_type": "stream",
-          "text": [
-            "\u001b[33mUsing local client.\u001b[0m\n"
-          ]
-        }
-      ],
+   "outputs": [],
   "source": [
    "import httpx\n",
    "from termcolor import cprint\n",
@ -91,7 +83,7 @@
    "async def check_client_health(client, client_name: str) -> bool:\n",
    "    try:\n",
    "        async with httpx.AsyncClient() as http_client:\n",
-        "            response = await http_client.get(f'{client.base_url}/health')\n",
+    "            response = await http_client.get(f'{client.base_url}/v1/health')\n",
    "            if response.status_code == 200:\n",
    "                cprint(f'Using {client_name} client.', 'yellow')\n",
    "                return True\n",
@ -102,7 +94,7 @@
    "        cprint(f'Failed to connect to {client_name} client.', 'red')\n",
    "        return False\n",
    "\n",
-        "async def select_client(use_local: bool) -> LlamaStackClient:\n",
+    "async def select_client(use_local: bool) -> AsyncLlamaStackClient:\n",
    "    if use_local and await check_client_health(local_client, 'local'):\n",
    "        return local_client\n",
    "\n",
@ -127,13 +119,12 @@
  },
  {
   "cell_type": "code",
-      "execution_count": 4,
+   "execution_count": null,
   "id": "5e19cc20",
   "metadata": {},
   "outputs": [],
   "source": [
    "from termcolor import cprint\n",
-        "from llama_stack_client.lib.inference.event_logger import EventLogger\n",
    "\n",
    "async def get_llama_response(stream: bool = True, use_local: bool = True):\n",
    "    client = await select_client(use_local)  # Selects the available client\n",
@ -143,17 +134,19 @@
    "    }\n",
    "    cprint(f'User> {message[\"content\"]}', 'green')\n",
    "\n",
-        "    response = client.inference.chat_completion(\n",
+    "    response = await client.inference.chat_completion(\n",
    "        messages=[message],\n",
-        "        model='Llama3.2-11B-Vision-Instruct',\n",
+    "        model_id='meta-llama/Llama3.2-11B-Vision-Instruct',\n",
    "        stream=stream,\n",
    "    )\n",
    "\n",
+    "    cprint(f'Assistant> ', color='cyan', end='')\n",
    "    if not stream:\n",
-        "        cprint(f'> Response: {response.completion_message.content}', 'cyan')\n",
+    "        cprint(response.completion_message.content, color='yellow')\n",
    "    else:\n",
-        "        async for log in EventLogger().log(response):\n",
-        "            log.print()\n"
+    "        async for chunk in response:\n",
+    "            cprint(chunk.event.delta.text, color='yellow', end='')\n",
+    "        cprint('')"
   ]
  },
  {
@ -168,21 +161,10 @@
  },
  {
   "cell_type": "code",
-      "execution_count": 7,
+   "execution_count": null,
   "id": "c10f487e",
   "metadata": {},
-      "outputs": [
-        {
-          "name": "stdout",
-          "output_type": "stream",
-          "text": [
-            "\u001b[33mUsing cloud client.\u001b[0m\n",
-            "\u001b[32mUser> hello world, write me a 2 sentence poem about the moon\u001b[0m\n",
-            "\u001b[36mAssistant> \u001b[0m\u001b[33mSilver\u001b[0m\u001b[33m cres\u001b[0m\u001b[33mcent\u001b[0m\u001b[33m in\u001b[0m\u001b[33m the\u001b[0m\u001b[33m midnight\u001b[0m\u001b[33m sky\u001b[0m\u001b[33m,\n",
-            "\u001b[0m\u001b[33mA\u001b[0m\u001b[33m gentle\u001b[0m\u001b[33m glow\u001b[0m\u001b[33m that\u001b[0m\u001b[33m whispers\u001b[0m\u001b[33m,\u001b[0m\u001b[33m \"\u001b[0m\u001b[33mI\u001b[0m\u001b[33m'm\u001b[0m\u001b[33m passing\u001b[0m\u001b[33m by\u001b[0m\u001b[33m.\"\u001b[0m\u001b[97m\u001b[0m\n"
-          ]
-        }
-      ],
+   "outputs": [],
   "source": [
    "import asyncio\n",
    "\n",
@ -203,21 +185,10 @@
  },
  {
   "cell_type": "code",
-      "execution_count": 8,
+   "execution_count": null,
   "id": "02eacfaf-c7f1-494b-ac28-129d2a0258e3",
   "metadata": {},
-      "outputs": [
-        {
-          "name": "stdout",
-          "output_type": "stream",
-          "text": [
-            "\u001b[33mUsing local client.\u001b[0m\n",
-            "\u001b[32mUser> hello world, write me a 2 sentence poem about the moon\u001b[0m\n",
-            "\u001b[36mAssistant> \u001b[0m\u001b[33mSilver\u001b[0m\u001b[33m cres\u001b[0m\u001b[33mcent\u001b[0m\u001b[33m in\u001b[0m\u001b[33m the\u001b[0m\u001b[33m midnight\u001b[0m\u001b[33m sky\u001b[0m\u001b[33m,\n",
-            "\u001b[0m\u001b[33mA\u001b[0m\u001b[33m gentle\u001b[0m\u001b[33m glow\u001b[0m\u001b[33m that\u001b[0m\u001b[33m whispers\u001b[0m\u001b[33m,\u001b[0m\u001b[33m \"\u001b[0m\u001b[33mI\u001b[0m\u001b[33m'm\u001b[0m\u001b[33m passing\u001b[0m\u001b[33m by\u001b[0m\u001b[33m.\"\u001b[0m\u001b[97m\u001b[0m\n"
-          ]
-        }
-      ],
+   "outputs": [],
   "source": [
    "import asyncio\n",
    "\n",
@ -233,6 +204,12 @@
    "\n",
    "The next one will be a guide on [Prompt Engineering](./02_Prompt_Engineering101.ipynb), please continue learning!"
   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "3ad6db48",
+   "metadata": {},
+   "source": []
  }
 ],
 "metadata": {