mirror of
https://github.com/meta-llama/llama-stack.git
synced 2025-07-30 07:39:38 +00:00
doc enhancement for the inference, prompt engineer, local-cloud
This commit is contained in:
parent
3c707e0a05
commit
e746f741d1
3 changed files with 308 additions and 124 deletions
|
@ -42,7 +42,7 @@
|
|||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"execution_count": 1,
|
||||
"id": "38a39e44",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
|
@ -52,11 +52,9 @@
|
|||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "d1d097ab",
|
||||
"cell_type": "markdown",
|
||||
"id": "7dacaa2d-94e9-42e9-82a0-73522dfc7010",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"### 1. Set Up the Client\n",
|
||||
"\n",
|
||||
|
@ -65,7 +63,7 @@
|
|||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"execution_count": 2,
|
||||
"id": "7a573752",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
|
@ -73,7 +71,7 @@
|
|||
"from llama_stack_client import LlamaStackClient\n",
|
||||
"from llama_stack_client.types import SystemMessage, UserMessage\n",
|
||||
"\n",
|
||||
"client = LlamaStackClient(base_url='http://{HOST}:{PORT}')"
|
||||
"client = LlamaStackClient(base_url=f'http://{HOST}:{PORT}')"
|
||||
]
|
||||
},
|
||||
{
|
||||
|
@ -88,10 +86,19 @@
|
|||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"execution_count": 3,
|
||||
"id": "77c29dba",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"A gentle llama roams the land,\n",
|
||||
"With soft fur and a gentle hand.\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"response = client.inference.chat_completion(\n",
|
||||
" messages=[\n",
|
||||
|
@ -123,10 +130,19 @@
|
|||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"execution_count": 4,
|
||||
"id": "5c6812da",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"O, fairest llama, with thy softest fleece,\n",
|
||||
"Thy gentle eyes, like sapphires, in serenity do cease.\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"response = client.inference.chat_completion(\n",
|
||||
" messages=[\n",
|
||||
|
@ -151,17 +167,48 @@
|
|||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"execution_count": 5,
|
||||
"id": "02211625",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdin",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"User> Write me a 3 sentence poem about alpaca\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"\u001b[36m> Response: Softly grazing, gentle soul,\n",
|
||||
"Alpaca's fleece, a treasure whole,\n",
|
||||
"In Andean fields, they softly roll.\u001b[0m\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "stdin",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"User> exit\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"\u001b[33mEnding conversation. Goodbye!\u001b[0m\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"import asyncio\n",
|
||||
"from llama_stack_client import LlamaStackClient\n",
|
||||
"from llama_stack_client.types import UserMessage\n",
|
||||
"from termcolor import cprint\n",
|
||||
"\n",
|
||||
"client = LlamaStackClient(base_url='http://{HOST}:{PORT}')\n",
|
||||
"client = LlamaStackClient(base_url=f'http://{HOST}:{PORT}')\n",
|
||||
"\n",
|
||||
"async def chat_loop():\n",
|
||||
" while True:\n",
|
||||
|
@ -177,7 +224,10 @@
|
|||
" )\n",
|
||||
" cprint(f'> Response: {response.completion_message.content}', 'cyan')\n",
|
||||
"\n",
|
||||
"asyncio.run(chat_loop())"
|
||||
"# Run the chat loop in a Jupyter Notebook cell using `await`\n",
|
||||
"await chat_loop()\n",
|
||||
"# To run it in a python file, use this line instead\n",
|
||||
"# asyncio.run(chat_loop())"
|
||||
]
|
||||
},
|
||||
{
|
||||
|
@ -192,10 +242,69 @@
|
|||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"execution_count": 6,
|
||||
"id": "9496f75c",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdin",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"User> what is 1+1\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"\u001b[36m> Response: 1 + 1 = 2\u001b[0m\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "stdin",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"User> what is llama + alpaca\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"\u001b[36m> Response: That's a creative and imaginative question. However, since llamas and alpacas are animals, not numbers, we can't perform a mathematical operation on them.\n",
|
||||
"\n",
|
||||
"But if we were to interpret this as a creative or humorous question, we could say that the result of \"llama + alpaca\" is a fun and fuzzy bundle of South American camelid cuteness!\u001b[0m\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "stdin",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"User> what was the first question\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"\u001b[36m> Response: The first question was \"what is 1+1\"\u001b[0m\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "stdin",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"User> exit\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"\u001b[33mEnding conversation. Goodbye!\u001b[0m\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"async def chat_loop():\n",
|
||||
" conversation_history = []\n",
|
||||
|
@ -217,7 +326,10 @@
|
|||
" assistant_message = UserMessage(content=response.completion_message.content, role='user')\n",
|
||||
" conversation_history.append(assistant_message)\n",
|
||||
"\n",
|
||||
"asyncio.run(chat_loop())"
|
||||
"# Use `await` in the Jupyter Notebook cell to call the function\n",
|
||||
"await chat_loop()\n",
|
||||
"# To run it in a python file, use this line instead\n",
|
||||
"# asyncio.run(chat_loop())"
|
||||
]
|
||||
},
|
||||
{
|
||||
|
@ -234,10 +346,21 @@
|
|||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"execution_count": 7,
|
||||
"id": "d119026e",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"\u001b[32mUser> Write me a 3 sentence poem about llama\u001b[0m\n",
|
||||
"\u001b[36mAssistant> \u001b[0m\u001b[33mSoft\u001b[0m\u001b[33mly\u001b[0m\u001b[33m padded\u001b[0m\u001b[33m feet\u001b[0m\u001b[33m on\u001b[0m\u001b[33m the\u001b[0m\u001b[33m ground\u001b[0m\u001b[33m,\n",
|
||||
"\u001b[0m\u001b[33mA\u001b[0m\u001b[33m gentle\u001b[0m\u001b[33m llama\u001b[0m\u001b[33m's\u001b[0m\u001b[33m peaceful\u001b[0m\u001b[33m sound\u001b[0m\u001b[33m,\n",
|
||||
"\u001b[0m\u001b[33mF\u001b[0m\u001b[33murry\u001b[0m\u001b[33m coat\u001b[0m\u001b[33m and\u001b[0m\u001b[33m calm\u001b[0m\u001b[33m,\u001b[0m\u001b[33m serene\u001b[0m\u001b[33m eyes\u001b[0m\u001b[33m all\u001b[0m\u001b[33m around\u001b[0m\u001b[33m.\u001b[0m\u001b[97m\u001b[0m\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"import asyncio\n",
|
||||
"from llama_stack_client import LlamaStackClient\n",
|
||||
|
@ -246,12 +369,12 @@
|
|||
"from termcolor import cprint\n",
|
||||
"\n",
|
||||
"async def run_main(stream: bool = True):\n",
|
||||
" client = LlamaStackClient(base_url='http://{HOST}:{PORT}')\n",
|
||||
" client = LlamaStackClient(base_url=f'http://{HOST}:{PORT}')\n",
|
||||
"\n",
|
||||
" message = UserMessage(\n",
|
||||
" content='hello world, write me a 2 sentence poem about the moon', role='user'\n",
|
||||
" content='Write me a 3 sentence poem about llama', role='user'\n",
|
||||
" )\n",
|
||||
" print(f'User>{message.content}', 'green')\n",
|
||||
" cprint(f'User> {message.content}', 'green')\n",
|
||||
"\n",
|
||||
" response = client.inference.chat_completion(\n",
|
||||
" messages=[message],\n",
|
||||
|
@ -260,22 +383,37 @@
|
|||
" )\n",
|
||||
"\n",
|
||||
" if not stream:\n",
|
||||
" cprint(f'> Response: {response}', 'cyan')\n",
|
||||
" cprint(f'> Response: {response.completion_message.content}', 'cyan')\n",
|
||||
" else:\n",
|
||||
" async for log in EventLogger().log(response):\n",
|
||||
" log.print()\n",
|
||||
"\n",
|
||||
" \n",
|
||||
" models_response = client.models.list()\n",
|
||||
" print(models_response)\n",
|
||||
"\n",
|
||||
"if __name__ == '__main__':\n",
|
||||
" asyncio.run(run_main())"
|
||||
"# In a Jupyter Notebook cell, use `await` to call the function\n",
|
||||
"await run_main()\n",
|
||||
"# To run it in a python file, use this line instead\n",
|
||||
"# asyncio.run(chat_loop())"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3 (ipykernel)",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"name": "python"
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.10.15"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
|
|
|
@ -26,13 +26,14 @@
|
|||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"execution_count": 1,
|
||||
"id": "d80c0926",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"HOST = \"localhost\" # Replace with your host\n",
|
||||
"PORT = 5000 # Replace with your port"
|
||||
"LOCAL_PORT = 5000 # Replace with your local distro port\n",
|
||||
"CLOUD_PORT = 5001 # Replace with your cloud distro port"
|
||||
]
|
||||
},
|
||||
{
|
||||
|
@ -47,7 +48,7 @@
|
|||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"execution_count": 2,
|
||||
"id": "7f868dfe",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
|
@ -55,8 +56,8 @@
|
|||
"from llama_stack_client import LlamaStackClient\n",
|
||||
"\n",
|
||||
"# Configure local and cloud clients\n",
|
||||
"local_client = LlamaStackClient(base_url='http://{HOST}:{LOCAL_PORT}')\n",
|
||||
"cloud_client = LlamaStackClient(base_url='http://{HOST}:{CLOUD_PORT}')"
|
||||
"local_client = LlamaStackClient(base_url=f'http://{HOST}:{LOCAL_PORT}')\n",
|
||||
"cloud_client = LlamaStackClient(base_url=f'http://{HOST}:{CLOUD_PORT}')"
|
||||
]
|
||||
},
|
||||
{
|
||||
|
@ -71,26 +72,47 @@
|
|||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"execution_count": 3,
|
||||
"id": "ff0c8277",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"\u001b[33mUsing local client.\u001b[0m\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"import httpx\n",
|
||||
"from termcolor import cprint\n",
|
||||
"\n",
|
||||
"async def select_client() -> LlamaStackClient:\n",
|
||||
" \"\"\"Use local client if available; otherwise, switch to cloud client.\"\"\"\n",
|
||||
" try:\n",
|
||||
" async with httpx.AsyncClient() as http_client:\n",
|
||||
" response = await http_client.get(f'{local_client.base_url}/health')\n",
|
||||
" if response.status_code == 200:\n",
|
||||
" cprint('Using local client.', 'yellow')\n",
|
||||
" return local_client\n",
|
||||
" except httpx.RequestError:\n",
|
||||
" pass\n",
|
||||
" cprint('Local client unavailable. Switching to cloud client.', 'yellow')\n",
|
||||
" return cloud_client"
|
||||
"async def select_client(use_local: bool) -> LlamaStackClient:\n",
|
||||
" \"\"\"\n",
|
||||
" Selects the client based on the use_local flag.\n",
|
||||
" \n",
|
||||
" Parameters:\n",
|
||||
" - use_local: bool, True to try the local client, False to use the cloud client.\n",
|
||||
" \n",
|
||||
" Returns:\n",
|
||||
" - LlamaStackClient: the selected client instance.\n",
|
||||
" \"\"\"\n",
|
||||
" if use_local:\n",
|
||||
" try:\n",
|
||||
" async with httpx.AsyncClient() as http_client:\n",
|
||||
" response = await http_client.get(f'{local_client.base_url}/health')\n",
|
||||
" if response.status_code == 200:\n",
|
||||
" cprint('Using local client.', 'yellow')\n",
|
||||
" return local_client\n",
|
||||
" except httpx.RequestError:\n",
|
||||
" cprint('Failed to connect to local client.', 'red')\n",
|
||||
"\n",
|
||||
" cprint('Using cloud client.', 'yellow')\n",
|
||||
" return cloud_client\n",
|
||||
"\n",
|
||||
"# Example usage: pass True for local, False for cloud\n",
|
||||
"client = await select_client(use_local=True)\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
|
@ -105,15 +127,17 @@
|
|||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"execution_count": 4,
|
||||
"id": "5e19cc20",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from llama_stack_client.types import UserMessage\n",
|
||||
"from termcolor import cprint\n",
|
||||
"from llama_stack_client.lib.inference.event_logger import EventLogger\n",
|
||||
"\n",
|
||||
"async def get_llama_response(stream: bool = True):\n",
|
||||
" client = await select_client() # Selects the available client\n",
|
||||
"async def get_llama_response(stream: bool = True, use_local: bool = True):\n",
|
||||
" client = await select_client(use_local) # Selects the available client\n",
|
||||
" message = UserMessage(content='hello world, write me a 2 sentence poem about the moon', role='user')\n",
|
||||
" cprint(f'User> {message.content}', 'green')\n",
|
||||
"\n",
|
||||
|
@ -124,11 +148,10 @@
|
|||
" )\n",
|
||||
"\n",
|
||||
" if not stream:\n",
|
||||
" cprint(f'> Response: {response}', 'cyan')\n",
|
||||
" cprint(f'> Response: {response.completion_message.content}', 'cyan')\n",
|
||||
" else:\n",
|
||||
" # Stream tokens progressively\n",
|
||||
" async for log in EventLogger().log(response):\n",
|
||||
" log.print()"
|
||||
" log.print()\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
|
@ -136,82 +159,67 @@
|
|||
"id": "6edf5e57",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"#### 4. Run the Asynchronous Response Generation\n",
|
||||
"#### 4. Run with Cloud Model\n",
|
||||
"\n",
|
||||
"Use `asyncio.run()` to execute `get_llama_response` in an asynchronous event loop.\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"execution_count": 7,
|
||||
"id": "c10f487e",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"\u001b[33mUsing cloud client.\u001b[0m\n",
|
||||
"\u001b[32mUser> hello world, write me a 2 sentence poem about the moon\u001b[0m\n",
|
||||
"\u001b[36mAssistant> \u001b[0m\u001b[33mSilver\u001b[0m\u001b[33m cres\u001b[0m\u001b[33mcent\u001b[0m\u001b[33m in\u001b[0m\u001b[33m the\u001b[0m\u001b[33m midnight\u001b[0m\u001b[33m sky\u001b[0m\u001b[33m,\n",
|
||||
"\u001b[0m\u001b[33mA\u001b[0m\u001b[33m gentle\u001b[0m\u001b[33m glow\u001b[0m\u001b[33m that\u001b[0m\u001b[33m whispers\u001b[0m\u001b[33m,\u001b[0m\u001b[33m \"\u001b[0m\u001b[33mI\u001b[0m\u001b[33m'm\u001b[0m\u001b[33m passing\u001b[0m\u001b[33m by\u001b[0m\u001b[33m.\"\u001b[0m\u001b[97m\u001b[0m\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"import asyncio\n",
|
||||
"\n",
|
||||
"# Initiate the response generation process\n",
|
||||
"asyncio.run(get_llama_response())"
|
||||
"\n",
|
||||
"# Run this function directly in a Jupyter Notebook cell with `await`\n",
|
||||
"await get_llama_response(use_local=False)\n",
|
||||
"# To run it in a python file, use this line instead\n",
|
||||
"# asyncio.run(get_llama_response(use_local=False))"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "56aa9a09",
|
||||
"id": "5c433511-9321-4718-ab7f-e21cf6b5ca79",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Complete code\n",
|
||||
"Summing it up, here's the complete code for local-cloud model implementation with Llama Stack:\n"
|
||||
"#### 4. Run with Local Model\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "d9fd74ff",
|
||||
"execution_count": 8,
|
||||
"id": "02eacfaf-c7f1-494b-ac28-129d2a0258e3",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"\u001b[33mUsing local client.\u001b[0m\n",
|
||||
"\u001b[32mUser> hello world, write me a 2 sentence poem about the moon\u001b[0m\n",
|
||||
"\u001b[36mAssistant> \u001b[0m\u001b[33mSilver\u001b[0m\u001b[33m cres\u001b[0m\u001b[33mcent\u001b[0m\u001b[33m in\u001b[0m\u001b[33m the\u001b[0m\u001b[33m midnight\u001b[0m\u001b[33m sky\u001b[0m\u001b[33m,\n",
|
||||
"\u001b[0m\u001b[33mA\u001b[0m\u001b[33m gentle\u001b[0m\u001b[33m glow\u001b[0m\u001b[33m that\u001b[0m\u001b[33m whispers\u001b[0m\u001b[33m,\u001b[0m\u001b[33m \"\u001b[0m\u001b[33mI\u001b[0m\u001b[33m'm\u001b[0m\u001b[33m passing\u001b[0m\u001b[33m by\u001b[0m\u001b[33m.\"\u001b[0m\u001b[97m\u001b[0m\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"import asyncio\n",
|
||||
"import httpx\n",
|
||||
"from llama_stack_client import LlamaStackClient\n",
|
||||
"from llama_stack_client.lib.inference.event_logger import EventLogger\n",
|
||||
"from llama_stack_client.types import UserMessage\n",
|
||||
"from termcolor import cprint\n",
|
||||
"\n",
|
||||
"local_client = LlamaStackClient(base_url='http://{HOST}:{LOCAL_PORT}')\n",
|
||||
"cloud_client = LlamaStackClient(base_url='http://{HOST}:{CLOUD_PORT}')\n",
|
||||
"\n",
|
||||
"async def select_client() -> LlamaStackClient:\n",
|
||||
" try:\n",
|
||||
" async with httpx.AsyncClient() as http_client:\n",
|
||||
" response = await http_client.get(f'{local_client.base_url}/health')\n",
|
||||
" if response.status_code == 200:\n",
|
||||
" cprint('Using local client.', 'yellow')\n",
|
||||
" return local_client\n",
|
||||
" except httpx.RequestError:\n",
|
||||
" pass\n",
|
||||
" cprint('Local client unavailable. Switching to cloud client.', 'yellow')\n",
|
||||
" return cloud_client\n",
|
||||
"\n",
|
||||
"async def get_llama_response(stream: bool = True):\n",
|
||||
" client = await select_client()\n",
|
||||
" message = UserMessage(\n",
|
||||
" content='hello world, write me a 2 sentence poem about the moon', role='user'\n",
|
||||
" )\n",
|
||||
" cprint(f'User> {message.content}', 'green')\n",
|
||||
"\n",
|
||||
" response = client.inference.chat_completion(\n",
|
||||
" messages=[message],\n",
|
||||
" model='Llama3.2-11B-Vision-Instruct',\n",
|
||||
" stream=stream,\n",
|
||||
" )\n",
|
||||
"\n",
|
||||
" if not stream:\n",
|
||||
" cprint(f'> Response: {response}', 'cyan')\n",
|
||||
" else:\n",
|
||||
" async for log in EventLogger().log(response):\n",
|
||||
" log.print()\n",
|
||||
"\n",
|
||||
"asyncio.run(get_llama_response())"
|
||||
"await get_llama_response(use_local=True)"
|
||||
]
|
||||
},
|
||||
{
|
||||
|
@ -226,8 +234,22 @@
|
|||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3 (ipykernel)",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"name": "python"
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.10.15"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
|
|
|
@ -9,7 +9,7 @@
|
|||
"\n",
|
||||
"Prompt engineering is using natural language to produce a desired response from a large language model (LLM).\n",
|
||||
"\n",
|
||||
"This interactive guide covers prompt engineering & best practices with Llama 3.1 and Llama Stack.\n",
|
||||
"This interactive guide covers prompt engineering & best practices with Llama 3.2 and Llama Stack.\n",
|
||||
"\n",
|
||||
"Before you begin, please ensure Llama Stack is installed and set up by following the [Getting Started Guide](https://llama-stack.readthedocs.io/en/latest/getting_started/index.html)."
|
||||
]
|
||||
|
@ -41,7 +41,7 @@
|
|||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"execution_count": 1,
|
||||
"id": "df35d1e2",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
|
@ -62,14 +62,14 @@
|
|||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"execution_count": 2,
|
||||
"id": "c2a0e359",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from llama_stack_client import LlamaStackClient\n",
|
||||
"\n",
|
||||
"client = LlamaStackClient(base_url='http://{HOST}:{PORT}')"
|
||||
"client = LlamaStackClient(base_url=f'http://{HOST}:{PORT}')"
|
||||
]
|
||||
},
|
||||
{
|
||||
|
@ -84,7 +84,7 @@
|
|||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"execution_count": 3,
|
||||
"id": "da140b33",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
|
@ -148,7 +148,7 @@
|
|||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"execution_count": 4,
|
||||
"id": "8b321089",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
|
@ -170,10 +170,18 @@
|
|||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"execution_count": 5,
|
||||
"id": "4ac1ac3e",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"\u001b[36m> Response: That's Llama!\u001b[0m\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"from termcolor import cprint\n",
|
||||
"\n",
|
||||
|
@ -191,16 +199,24 @@
|
|||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"execution_count": 6,
|
||||
"id": "524189bd",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"\u001b[36m> Response: That's Llama!\u001b[0m\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"from llama_stack_client import LlamaStackClient\n",
|
||||
"from llama_stack_client.types import CompletionMessage, UserMessage\n",
|
||||
"from termcolor import cprint\n",
|
||||
"\n",
|
||||
"client = LlamaStackClient(base_url='http://{HOST}:{PORT}')\n",
|
||||
"client = LlamaStackClient(base_url=f'http://{HOST}:{PORT}')\n",
|
||||
"\n",
|
||||
"response = client.inference.chat_completion(\n",
|
||||
" messages=[\n",
|
||||
|
@ -251,17 +267,25 @@
|
|||
"\n",
|
||||
"The next one will be a guide on how to chat with images, continue to the notebook [here](./02_Image_Chat101.ipynb). Happy learning!"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "cce1f624",
|
||||
"metadata": {},
|
||||
"source": []
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3 (ipykernel)",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"name": "python"
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.10.15"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue