forked from phoenix-oss/llama-stack-mirror
fix: Default to port 8321 everywhere (#1734)
As titled, moved all instances of 5001 to 8321
This commit is contained in:
parent
581e8ae562
commit
127bac6869
56 changed files with 2352 additions and 2305 deletions
|
@ -1,392 +1,393 @@
|
|||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "c1e7571c",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Llama Stack Inference Guide\n",
|
||||
"\n",
|
||||
"This document provides instructions on how to use Llama Stack's `chat_completion` function for generating text using the `Llama3.1-8B-Instruct` model. \n",
|
||||
"\n",
|
||||
"Before you begin, please ensure Llama Stack is installed and set up by following the [Getting Started Guide](https://llama-stack.readthedocs.io/en/latest/getting_started/index.html).\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"### Table of Contents\n",
|
||||
"1. [Quickstart](#quickstart)\n",
|
||||
"2. [Building Effective Prompts](#building-effective-prompts)\n",
|
||||
"3. [Conversation Loop](#conversation-loop)\n",
|
||||
"4. [Conversation History](#conversation-history)\n",
|
||||
"5. [Streaming Responses](#streaming-responses)\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "414301dc",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Quickstart\n",
|
||||
"\n",
|
||||
"This section walks through each step to set up and make a simple text generation request.\n",
|
||||
"\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "25b97dfe",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### 0. Configuration\n",
|
||||
"Set up your connection parameters:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 1,
|
||||
"id": "38a39e44",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"HOST = \"localhost\" # Replace with your host\n",
|
||||
"PORT = 5001 # Replace with your port\n",
|
||||
"MODEL_NAME='meta-llama/Llama-3.2-3B-Instruct'"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "7dacaa2d-94e9-42e9-82a0-73522dfc7010",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### 1. Set Up the Client\n",
|
||||
"\n",
|
||||
"Begin by importing the necessary components from Llama Stack’s client library:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 2,
|
||||
"id": "7a573752",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from llama_stack_client import LlamaStackClient\n",
|
||||
"\n",
|
||||
"client = LlamaStackClient(base_url=f'http://{HOST}:{PORT}')"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "86366383",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### 2. Create a Chat Completion Request\n",
|
||||
"\n",
|
||||
"Use the `chat_completion` function to define the conversation context. Each message you include should have a specific role and content:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"id": "77c29dba",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
"cells": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Here is a two-sentence poem about a llama:\n",
|
||||
"\n",
|
||||
"With soft fur and gentle eyes, the llama roams free,\n",
|
||||
"A majestic creature, wild and carefree.\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"response = client.inference.chat_completion(\n",
|
||||
" messages=[\n",
|
||||
" {\"role\": \"system\", \"content\": \"You are a friendly assistant.\"},\n",
|
||||
" {\"role\": \"user\", \"content\": \"Write a two-sentence poem about llama.\"}\n",
|
||||
" ],\n",
|
||||
" model_id=MODEL_NAME,\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"print(response.completion_message.content)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "e5f16949",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Building Effective Prompts\n",
|
||||
"\n",
|
||||
"Effective prompt creation (often called 'prompt engineering') is essential for quality responses. Here are best practices for structuring your prompts to get the most out of the Llama Stack model:\n",
|
||||
"\n",
|
||||
"### Sample Prompt"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 4,
|
||||
"id": "5c6812da",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
"cell_type": "markdown",
|
||||
"id": "c1e7571c",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Llama Stack Inference Guide\n",
|
||||
"\n",
|
||||
"This document provides instructions on how to use Llama Stack's `chat_completion` function for generating text using the `Llama3.1-8B-Instruct` model. \n",
|
||||
"\n",
|
||||
"Before you begin, please ensure Llama Stack is installed and set up by following the [Getting Started Guide](https://llama-stack.readthedocs.io/en/latest/getting_started/index.html).\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"### Table of Contents\n",
|
||||
"1. [Quickstart](#quickstart)\n",
|
||||
"2. [Building Effective Prompts](#building-effective-prompts)\n",
|
||||
"3. [Conversation Loop](#conversation-loop)\n",
|
||||
"4. [Conversation History](#conversation-history)\n",
|
||||
"5. [Streaming Responses](#streaming-responses)\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"\"O, fair llama, with thy gentle eyes so bright,\n",
|
||||
"In Andean hills, thou dost enthrall with soft delight.\"\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"response = client.inference.chat_completion(\n",
|
||||
" messages=[\n",
|
||||
" {\"role\": \"system\", \"content\": \"You are shakespeare.\"},\n",
|
||||
" {\"role\": \"user\", \"content\": \"Write a two-sentence poem about llama.\"}\n",
|
||||
" ],\n",
|
||||
" model_id=MODEL_NAME, # Changed from model to model_id\n",
|
||||
")\n",
|
||||
"print(response.completion_message.content)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "c8690ef0",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Conversation Loop\n",
|
||||
"\n",
|
||||
"To create a continuous conversation loop, where users can input multiple messages in a session, use the following structure. This example runs an asynchronous loop, ending when the user types 'exit,' 'quit,' or 'bye.'"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 6,
|
||||
"id": "02211625",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
"cell_type": "markdown",
|
||||
"id": "414301dc",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Quickstart\n",
|
||||
"\n",
|
||||
"This section walks through each step to set up and make a simple text generation request.\n",
|
||||
"\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"\u001b[36m> Response: How can I assist you today?\u001b[0m\n",
|
||||
"\u001b[36m> Response: In South American hills, they roam and play,\n",
|
||||
"The llama's gentle eyes gaze out each day.\n",
|
||||
"Their soft fur coats in shades of white and gray,\n",
|
||||
"Inviting all to come and stay.\n",
|
||||
"\n",
|
||||
"With ears that listen, ears so fine,\n",
|
||||
"They hear the whispers of the Andean mine.\n",
|
||||
"Their footsteps quiet on the mountain slope,\n",
|
||||
"As they graze on grasses, a peaceful hope.\n",
|
||||
"\n",
|
||||
"In Incas' time, they were revered as friends,\n",
|
||||
"Their packs they bore, until the very end.\n",
|
||||
"The Spanish came, with guns and strife,\n",
|
||||
"But llamas stood firm, for life.\n",
|
||||
"\n",
|
||||
"Now, they roam free, in fields so wide,\n",
|
||||
"A symbol of resilience, side by side.\n",
|
||||
"With people's lives, a bond so strong,\n",
|
||||
"Together they thrive, all day long.\n",
|
||||
"\n",
|
||||
"Their soft hums echo through the air,\n",
|
||||
"As they wander, without a care.\n",
|
||||
"In their gentle hearts, a wisdom lies,\n",
|
||||
"A testament to the Andean skies.\n",
|
||||
"\n",
|
||||
"So here they'll stay, in this land of old,\n",
|
||||
"The llama's spirit, forever to hold.\u001b[0m\n",
|
||||
"\u001b[33mEnding conversation. Goodbye!\u001b[0m\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"import asyncio\n",
|
||||
"from llama_stack_client import LlamaStackClient\n",
|
||||
"from termcolor import cprint\n",
|
||||
"\n",
|
||||
"client = LlamaStackClient(base_url=f'http://{HOST}:{PORT}')\n",
|
||||
"\n",
|
||||
"async def chat_loop():\n",
|
||||
" while True:\n",
|
||||
" user_input = input('User> ')\n",
|
||||
" if user_input.lower() in ['exit', 'quit', 'bye']:\n",
|
||||
" cprint('Ending conversation. Goodbye!', 'yellow')\n",
|
||||
" break\n",
|
||||
"\n",
|
||||
" message = {\"role\": \"user\", \"content\": user_input}\n",
|
||||
" response = client.inference.chat_completion(\n",
|
||||
" messages=[message],\n",
|
||||
" model_id=MODEL_NAME\n",
|
||||
" )\n",
|
||||
" cprint(f'> Response: {response.completion_message.content}', 'cyan')\n",
|
||||
"\n",
|
||||
"# Run the chat loop in a Jupyter Notebook cell using await\n",
|
||||
"await chat_loop()\n",
|
||||
"# To run it in a python file, use this line instead\n",
|
||||
"# asyncio.run(chat_loop())\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "8cf0d555",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Conversation History\n",
|
||||
"\n",
|
||||
"Maintaining a conversation history allows the model to retain context from previous interactions. Use a list to accumulate messages, enabling continuity throughout the chat session."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 8,
|
||||
"id": "9496f75c",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
"cell_type": "markdown",
|
||||
"id": "25b97dfe",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### 0. Configuration\n",
|
||||
"Set up your connection parameters:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"\u001b[36m> Response: How can I help you today?\u001b[0m\n",
|
||||
"\u001b[36m> Response: Here's a little poem about llamas:\n",
|
||||
"\n",
|
||||
"In Andean highlands, they roam and play,\n",
|
||||
"Their soft fur shining in the sunny day.\n",
|
||||
"With ears so long and eyes so bright,\n",
|
||||
"They watch with gentle curiosity, taking flight.\n",
|
||||
"\n",
|
||||
"Their llama voices hum, a soothing sound,\n",
|
||||
"As they wander through the mountains all around.\n",
|
||||
"Their padded feet barely touch the ground,\n",
|
||||
"As they move with ease, without a single bound.\n",
|
||||
"\n",
|
||||
"In packs or alone, they make their way,\n",
|
||||
"Carrying burdens, come what may.\n",
|
||||
"Their gentle spirit, a sight to see,\n",
|
||||
"A symbol of peace, for you and me.\n",
|
||||
"\n",
|
||||
"With llamas calm, our souls take flight,\n",
|
||||
"In their presence, all is right.\n",
|
||||
"So let us cherish these gentle friends,\n",
|
||||
"And honor their beauty that never ends.\u001b[0m\n",
|
||||
"\u001b[33mEnding conversation. Goodbye!\u001b[0m\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"async def chat_loop():\n",
|
||||
" conversation_history = []\n",
|
||||
" while True:\n",
|
||||
" user_input = input('User> ')\n",
|
||||
" if user_input.lower() in ['exit', 'quit', 'bye']:\n",
|
||||
" cprint('Ending conversation. Goodbye!', 'yellow')\n",
|
||||
" break\n",
|
||||
"\n",
|
||||
" user_message = {\"role\": \"user\", \"content\": user_input}\n",
|
||||
" conversation_history.append(user_message)\n",
|
||||
"\n",
|
||||
" response = client.inference.chat_completion(\n",
|
||||
" messages=conversation_history,\n",
|
||||
" model_id=MODEL_NAME,\n",
|
||||
" )\n",
|
||||
" cprint(f'> Response: {response.completion_message.content}', 'cyan')\n",
|
||||
"\n",
|
||||
" # Append the assistant message with all required fields\n",
|
||||
" assistant_message = {\n",
|
||||
" \"role\": \"user\",\n",
|
||||
" \"content\": response.completion_message.content,\n",
|
||||
" # Add any additional required fields here if necessary\n",
|
||||
" }\n",
|
||||
" conversation_history.append(assistant_message)\n",
|
||||
"\n",
|
||||
"# Use `await` in the Jupyter Notebook cell to call the function\n",
|
||||
"await chat_loop()\n",
|
||||
"# To run it in a python file, use this line instead\n",
|
||||
"# asyncio.run(chat_loop())\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "03fcf5e0",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Streaming Responses\n",
|
||||
"\n",
|
||||
"Llama Stack offers a `stream` parameter in the `chat_completion` function, which allows partial responses to be returned progressively as they are generated. This can enhance user experience by providing immediate feedback without waiting for the entire response to be processed."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 9,
|
||||
"id": "d119026e",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
"cell_type": "code",
|
||||
"execution_count": 1,
|
||||
"id": "38a39e44",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"HOST = \"localhost\" # Replace with your host\n",
|
||||
"PORT = 8321 # Replace with your port\n",
|
||||
"MODEL_NAME='meta-llama/Llama-3.2-3B-Instruct'"
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"\u001b[32mUser> Write me a 3 sentence poem about llama\u001b[0m\n",
|
||||
"\u001b[36mAssistant> \u001b[0m\u001b[33mHere\u001b[0m\u001b[33m is\u001b[0m\u001b[33m a\u001b[0m\u001b[33m \u001b[0m\u001b[33m3\u001b[0m\u001b[33m sentence\u001b[0m\u001b[33m poem\u001b[0m\u001b[33m about\u001b[0m\u001b[33m a\u001b[0m\u001b[33m llama\u001b[0m\u001b[33m:\n",
|
||||
"\n",
|
||||
"\u001b[0m\u001b[33mWith\u001b[0m\u001b[33m soft\u001b[0m\u001b[33m and\u001b[0m\u001b[33m fuzzy\u001b[0m\u001b[33m fur\u001b[0m\u001b[33m so\u001b[0m\u001b[33m bright\u001b[0m\u001b[33m,\n",
|
||||
"\u001b[0m\u001b[33mThe\u001b[0m\u001b[33m llama\u001b[0m\u001b[33m ro\u001b[0m\u001b[33mams\u001b[0m\u001b[33m through\u001b[0m\u001b[33m the\u001b[0m\u001b[33m And\u001b[0m\u001b[33mean\u001b[0m\u001b[33m light\u001b[0m\u001b[33m,\n",
|
||||
"\u001b[0m\u001b[33mA\u001b[0m\u001b[33m gentle\u001b[0m\u001b[33m giant\u001b[0m\u001b[33m,\u001b[0m\u001b[33m a\u001b[0m\u001b[33m w\u001b[0m\u001b[33mondrous\u001b[0m\u001b[33m sight\u001b[0m\u001b[33m.\u001b[0m\u001b[97m\u001b[0m\n"
|
||||
]
|
||||
"cell_type": "markdown",
|
||||
"id": "7dacaa2d-94e9-42e9-82a0-73522dfc7010",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### 1. Set Up the Client\n",
|
||||
"\n",
|
||||
"Begin by importing the necessary components from Llama Stack’s client library:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 2,
|
||||
"id": "7a573752",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from llama_stack_client import LlamaStackClient\n",
|
||||
"\n",
|
||||
"client = LlamaStackClient(base_url=f'http://{HOST}:{PORT}')"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "86366383",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### 2. Create a Chat Completion Request\n",
|
||||
"\n",
|
||||
"Use the `chat_completion` function to define the conversation context. Each message you include should have a specific role and content:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"id": "77c29dba",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Here is a two-sentence poem about a llama:\n",
|
||||
"\n",
|
||||
"With soft fur and gentle eyes, the llama roams free,\n",
|
||||
"A majestic creature, wild and carefree.\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"response = client.inference.chat_completion(\n",
|
||||
" messages=[\n",
|
||||
" {\"role\": \"system\", \"content\": \"You are a friendly assistant.\"},\n",
|
||||
" {\"role\": \"user\", \"content\": \"Write a two-sentence poem about llama.\"}\n",
|
||||
" ],\n",
|
||||
" model_id=MODEL_NAME,\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"print(response.completion_message.content)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "e5f16949",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Building Effective Prompts\n",
|
||||
"\n",
|
||||
"Effective prompt creation (often called 'prompt engineering') is essential for quality responses. Here are best practices for structuring your prompts to get the most out of the Llama Stack model:\n",
|
||||
"\n",
|
||||
"### Sample Prompt"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 4,
|
||||
"id": "5c6812da",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"\"O, fair llama, with thy gentle eyes so bright,\n",
|
||||
"In Andean hills, thou dost enthrall with soft delight.\"\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"response = client.inference.chat_completion(\n",
|
||||
" messages=[\n",
|
||||
" {\"role\": \"system\", \"content\": \"You are shakespeare.\"},\n",
|
||||
" {\"role\": \"user\", \"content\": \"Write a two-sentence poem about llama.\"}\n",
|
||||
" ],\n",
|
||||
" model_id=MODEL_NAME, # Changed from model to model_id\n",
|
||||
")\n",
|
||||
"print(response.completion_message.content)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "c8690ef0",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Conversation Loop\n",
|
||||
"\n",
|
||||
"To create a continuous conversation loop, where users can input multiple messages in a session, use the following structure. This example runs an asynchronous loop, ending when the user types 'exit,' 'quit,' or 'bye.'"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 6,
|
||||
"id": "02211625",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"\u001b[36m> Response: How can I assist you today?\u001b[0m\n",
|
||||
"\u001b[36m> Response: In South American hills, they roam and play,\n",
|
||||
"The llama's gentle eyes gaze out each day.\n",
|
||||
"Their soft fur coats in shades of white and gray,\n",
|
||||
"Inviting all to come and stay.\n",
|
||||
"\n",
|
||||
"With ears that listen, ears so fine,\n",
|
||||
"They hear the whispers of the Andean mine.\n",
|
||||
"Their footsteps quiet on the mountain slope,\n",
|
||||
"As they graze on grasses, a peaceful hope.\n",
|
||||
"\n",
|
||||
"In Incas' time, they were revered as friends,\n",
|
||||
"Their packs they bore, until the very end.\n",
|
||||
"The Spanish came, with guns and strife,\n",
|
||||
"But llamas stood firm, for life.\n",
|
||||
"\n",
|
||||
"Now, they roam free, in fields so wide,\n",
|
||||
"A symbol of resilience, side by side.\n",
|
||||
"With people's lives, a bond so strong,\n",
|
||||
"Together they thrive, all day long.\n",
|
||||
"\n",
|
||||
"Their soft hums echo through the air,\n",
|
||||
"As they wander, without a care.\n",
|
||||
"In their gentle hearts, a wisdom lies,\n",
|
||||
"A testament to the Andean skies.\n",
|
||||
"\n",
|
||||
"So here they'll stay, in this land of old,\n",
|
||||
"The llama's spirit, forever to hold.\u001b[0m\n",
|
||||
"\u001b[33mEnding conversation. Goodbye!\u001b[0m\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"import asyncio\n",
|
||||
"from llama_stack_client import LlamaStackClient\n",
|
||||
"from termcolor import cprint\n",
|
||||
"\n",
|
||||
"client = LlamaStackClient(base_url=f'http://{HOST}:{PORT}')\n",
|
||||
"\n",
|
||||
"async def chat_loop():\n",
|
||||
" while True:\n",
|
||||
" user_input = input('User> ')\n",
|
||||
" if user_input.lower() in ['exit', 'quit', 'bye']:\n",
|
||||
" cprint('Ending conversation. Goodbye!', 'yellow')\n",
|
||||
" break\n",
|
||||
"\n",
|
||||
" message = {\"role\": \"user\", \"content\": user_input}\n",
|
||||
" response = client.inference.chat_completion(\n",
|
||||
" messages=[message],\n",
|
||||
" model_id=MODEL_NAME\n",
|
||||
" )\n",
|
||||
" cprint(f'> Response: {response.completion_message.content}', 'cyan')\n",
|
||||
"\n",
|
||||
"# Run the chat loop in a Jupyter Notebook cell using await\n",
|
||||
"await chat_loop()\n",
|
||||
"# To run it in a python file, use this line instead\n",
|
||||
"# asyncio.run(chat_loop())\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "8cf0d555",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Conversation History\n",
|
||||
"\n",
|
||||
"Maintaining a conversation history allows the model to retain context from previous interactions. Use a list to accumulate messages, enabling continuity throughout the chat session."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 8,
|
||||
"id": "9496f75c",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"\u001b[36m> Response: How can I help you today?\u001b[0m\n",
|
||||
"\u001b[36m> Response: Here's a little poem about llamas:\n",
|
||||
"\n",
|
||||
"In Andean highlands, they roam and play,\n",
|
||||
"Their soft fur shining in the sunny day.\n",
|
||||
"With ears so long and eyes so bright,\n",
|
||||
"They watch with gentle curiosity, taking flight.\n",
|
||||
"\n",
|
||||
"Their llama voices hum, a soothing sound,\n",
|
||||
"As they wander through the mountains all around.\n",
|
||||
"Their padded feet barely touch the ground,\n",
|
||||
"As they move with ease, without a single bound.\n",
|
||||
"\n",
|
||||
"In packs or alone, they make their way,\n",
|
||||
"Carrying burdens, come what may.\n",
|
||||
"Their gentle spirit, a sight to see,\n",
|
||||
"A symbol of peace, for you and me.\n",
|
||||
"\n",
|
||||
"With llamas calm, our souls take flight,\n",
|
||||
"In their presence, all is right.\n",
|
||||
"So let us cherish these gentle friends,\n",
|
||||
"And honor their beauty that never ends.\u001b[0m\n",
|
||||
"\u001b[33mEnding conversation. Goodbye!\u001b[0m\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"async def chat_loop():\n",
|
||||
" conversation_history = []\n",
|
||||
" while True:\n",
|
||||
" user_input = input('User> ')\n",
|
||||
" if user_input.lower() in ['exit', 'quit', 'bye']:\n",
|
||||
" cprint('Ending conversation. Goodbye!', 'yellow')\n",
|
||||
" break\n",
|
||||
"\n",
|
||||
" user_message = {\"role\": \"user\", \"content\": user_input}\n",
|
||||
" conversation_history.append(user_message)\n",
|
||||
"\n",
|
||||
" response = client.inference.chat_completion(\n",
|
||||
" messages=conversation_history,\n",
|
||||
" model_id=MODEL_NAME,\n",
|
||||
" )\n",
|
||||
" cprint(f'> Response: {response.completion_message.content}', 'cyan')\n",
|
||||
"\n",
|
||||
" # Append the assistant message with all required fields\n",
|
||||
" assistant_message = {\n",
|
||||
" \"role\": \"user\",\n",
|
||||
" \"content\": response.completion_message.content,\n",
|
||||
" # Add any additional required fields here if necessary\n",
|
||||
" }\n",
|
||||
" conversation_history.append(assistant_message)\n",
|
||||
"\n",
|
||||
"# Use `await` in the Jupyter Notebook cell to call the function\n",
|
||||
"await chat_loop()\n",
|
||||
"# To run it in a python file, use this line instead\n",
|
||||
"# asyncio.run(chat_loop())\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "03fcf5e0",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Streaming Responses\n",
|
||||
"\n",
|
||||
"Llama Stack offers a `stream` parameter in the `chat_completion` function, which allows partial responses to be returned progressively as they are generated. This can enhance user experience by providing immediate feedback without waiting for the entire response to be processed."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 9,
|
||||
"id": "d119026e",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"\u001b[32mUser> Write me a 3 sentence poem about llama\u001b[0m\n",
|
||||
"\u001b[36mAssistant> \u001b[0m\u001b[33mHere\u001b[0m\u001b[33m is\u001b[0m\u001b[33m a\u001b[0m\u001b[33m \u001b[0m\u001b[33m3\u001b[0m\u001b[33m sentence\u001b[0m\u001b[33m poem\u001b[0m\u001b[33m about\u001b[0m\u001b[33m a\u001b[0m\u001b[33m llama\u001b[0m\u001b[33m:\n",
|
||||
"\n",
|
||||
"\u001b[0m\u001b[33mWith\u001b[0m\u001b[33m soft\u001b[0m\u001b[33m and\u001b[0m\u001b[33m fuzzy\u001b[0m\u001b[33m fur\u001b[0m\u001b[33m so\u001b[0m\u001b[33m bright\u001b[0m\u001b[33m,\n",
|
||||
"\u001b[0m\u001b[33mThe\u001b[0m\u001b[33m llama\u001b[0m\u001b[33m ro\u001b[0m\u001b[33mams\u001b[0m\u001b[33m through\u001b[0m\u001b[33m the\u001b[0m\u001b[33m And\u001b[0m\u001b[33mean\u001b[0m\u001b[33m light\u001b[0m\u001b[33m,\n",
|
||||
"\u001b[0m\u001b[33mA\u001b[0m\u001b[33m gentle\u001b[0m\u001b[33m giant\u001b[0m\u001b[33m,\u001b[0m\u001b[33m a\u001b[0m\u001b[33m w\u001b[0m\u001b[33mondrous\u001b[0m\u001b[33m sight\u001b[0m\u001b[33m.\u001b[0m\u001b[97m\u001b[0m\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"from llama_stack_client.lib.inference.event_logger import EventLogger\n",
|
||||
"\n",
|
||||
"async def run_main(stream: bool = True):\n",
|
||||
" client = LlamaStackClient(base_url=f'http://{HOST}:{PORT}')\n",
|
||||
"\n",
|
||||
" message = {\n",
|
||||
" \"role\": \"user\",\n",
|
||||
" \"content\": 'Write me a 3 sentence poem about llama'\n",
|
||||
" }\n",
|
||||
" cprint(f'User> {message[\"content\"]}', 'green')\n",
|
||||
"\n",
|
||||
" response = client.inference.chat_completion(\n",
|
||||
" messages=[message],\n",
|
||||
" model_id=MODEL_NAME,\n",
|
||||
" stream=stream,\n",
|
||||
" )\n",
|
||||
"\n",
|
||||
" if not stream:\n",
|
||||
" cprint(f'> Response: {response.completion_message.content}', 'cyan')\n",
|
||||
" else:\n",
|
||||
" for log in EventLogger().log(response):\n",
|
||||
" log.print()\n",
|
||||
"\n",
|
||||
"# In a Jupyter Notebook cell, use `await` to call the function\n",
|
||||
"await run_main()\n",
|
||||
"# To run it in a python file, use this line instead\n",
|
||||
"# asyncio.run(run_main())\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"fileHeader": "",
|
||||
"fileUid": "7da25939-a2a3-463c-958e-9cdfd710d158",
|
||||
"isAdHoc": false,
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3 (ipykernel)",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.10.15"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"from llama_stack_client.lib.inference.event_logger import EventLogger\n",
|
||||
"\n",
|
||||
"async def run_main(stream: bool = True):\n",
|
||||
" client = LlamaStackClient(base_url=f'http://{HOST}:{PORT}')\n",
|
||||
"\n",
|
||||
" message = {\n",
|
||||
" \"role\": \"user\",\n",
|
||||
" \"content\": 'Write me a 3 sentence poem about llama'\n",
|
||||
" }\n",
|
||||
" cprint(f'User> {message[\"content\"]}', 'green')\n",
|
||||
"\n",
|
||||
" response = client.inference.chat_completion(\n",
|
||||
" messages=[message],\n",
|
||||
" model_id=MODEL_NAME,\n",
|
||||
" stream=stream,\n",
|
||||
" )\n",
|
||||
"\n",
|
||||
" if not stream:\n",
|
||||
" cprint(f'> Response: {response.completion_message.content}', 'cyan')\n",
|
||||
" else:\n",
|
||||
" for log in EventLogger().log(response):\n",
|
||||
" log.print()\n",
|
||||
"\n",
|
||||
"# In a Jupyter Notebook cell, use `await` to call the function\n",
|
||||
"await run_main()\n",
|
||||
"# To run it in a python file, use this line instead\n",
|
||||
"# asyncio.run(run_main())\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3 (ipykernel)",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.10.15"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 5
|
||||
}
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue