mirror of
https://github.com/meta-llama/llama-stack.git
synced 2025-07-30 07:39:38 +00:00
standardized port and also included pre-req for all notebooks
This commit is contained in:
parent
d0baf24999
commit
b556cd91fd
8 changed files with 177 additions and 42 deletions
|
@ -7,7 +7,10 @@
|
|||
"source": [
|
||||
"# Llama Stack Inference Guide\n",
|
||||
"\n",
|
||||
"This document provides instructions on how to use Llama Stack's `chat_completion` function for generating text using the `Llama3.2-11B-Vision-Instruct` model. Before you begin, please ensure Llama Stack is installed and set up by following the [Getting Started Guide](https://llama-stack.readthedocs.io/en/latest/).\n",
|
||||
"This document provides instructions on how to use Llama Stack's `chat_completion` function for generating text using the `Llama3.2-11B-Vision-Instruct` model. \n",
|
||||
"\n",
|
||||
"Before you begin, please ensure Llama Stack is installed and set up by following the [Getting Started Guide](https://llama-stack.readthedocs.io/en/latest/getting_started/index.html).\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"### Table of Contents\n",
|
||||
"1. [Quickstart](#quickstart)\n",
|
||||
|
@ -25,7 +28,36 @@
|
|||
"## Quickstart\n",
|
||||
"\n",
|
||||
"This section walks through each step to set up and make a simple text generation request.\n",
|
||||
"\n",
|
||||
"\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "25b97dfe",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### 0. Configuration\n",
|
||||
"Set up your connection parameters:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "38a39e44",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"HOST = \"localhost\" # Replace with your host\n",
|
||||
"PORT = 5001 # Replace with your port"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "d1d097ab",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"### 1. Set Up the Client\n",
|
||||
"\n",
|
||||
"Begin by importing the necessary components from Llama Stack’s client library:"
|
||||
|
@ -41,7 +73,7 @@
|
|||
"from llama_stack_client import LlamaStackClient\n",
|
||||
"from llama_stack_client.types import SystemMessage, UserMessage\n",
|
||||
"\n",
|
||||
"client = LlamaStackClient(base_url='http://localhost:5000')"
|
||||
"client = LlamaStackClient(base_url='http://{HOST}:{PORT}')"
|
||||
]
|
||||
},
|
||||
{
|
||||
|
@ -129,7 +161,7 @@
|
|||
"from llama_stack_client.types import UserMessage\n",
|
||||
"from termcolor import cprint\n",
|
||||
"\n",
|
||||
"client = LlamaStackClient(base_url='http://localhost:5000')\n",
|
||||
"client = LlamaStackClient(base_url='http://{HOST}:{PORT}')\n",
|
||||
"\n",
|
||||
"async def chat_loop():\n",
|
||||
" while True:\n",
|
||||
|
@ -214,7 +246,7 @@
|
|||
"from termcolor import cprint\n",
|
||||
"\n",
|
||||
"async def run_main(stream: bool = True):\n",
|
||||
" client = LlamaStackClient(base_url='http://localhost:5000')\n",
|
||||
" client = LlamaStackClient(base_url='http://{HOST}:{PORT}')\n",
|
||||
"\n",
|
||||
" message = UserMessage(\n",
|
||||
" content='hello world, write me a 2 sentence poem about the moon', role='user'\n",
|
||||
|
@ -241,7 +273,11 @@
|
|||
]
|
||||
}
|
||||
],
|
||||
"metadata": {},
|
||||
"metadata": {
|
||||
"language_info": {
|
||||
"name": "python"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 5
|
||||
}
|
||||
|
|
|
@ -35,8 +35,8 @@
|
|||
"from llama_stack_client import LlamaStackClient\n",
|
||||
"\n",
|
||||
"# Configure local and cloud clients\n",
|
||||
"local_client = LlamaStackClient(base_url='http://localhost:5000')\n",
|
||||
"cloud_client = LlamaStackClient(base_url='http://localhost:5001')"
|
||||
"local_client = LlamaStackClient(base_url='http://{HOST}:{LOCAL_PORT}')\n",
|
||||
"cloud_client = LlamaStackClient(base_url='http://{HOST}:{CLOUD_PORT}')"
|
||||
]
|
||||
},
|
||||
{
|
||||
|
@ -157,8 +157,8 @@
|
|||
"from llama_stack_client.types import UserMessage\n",
|
||||
"from termcolor import cprint\n",
|
||||
"\n",
|
||||
"local_client = LlamaStackClient(base_url='http://localhost:5000')\n",
|
||||
"cloud_client = LlamaStackClient(base_url='http://localhost:5001')\n",
|
||||
"local_client = LlamaStackClient(base_url='http://{HOST}:{LOCAL_PORT}')\n",
|
||||
"cloud_client = LlamaStackClient(base_url='http://{HOST}:{CLOUD_PORT}')\n",
|
||||
"\n",
|
||||
"async def select_client() -> LlamaStackClient:\n",
|
||||
" try:\n",
|
||||
|
|
|
@ -5,13 +5,13 @@
|
|||
"id": "cd96f85a",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"<a href=\"https://colab.research.google.com/github/meta-llama/llama-recipes/blob/main/recipes/quickstart/Prompt_Engineering_with_Llama_3.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>\n",
|
||||
"\n",
|
||||
"# Prompt Engineering with Llama Stack\n",
|
||||
"\n",
|
||||
"Prompt engineering is using natural language to produce a desired response from a large language model (LLM).\n",
|
||||
"\n",
|
||||
"This interactive guide covers prompt engineering & best practices with Llama 3.1 and Llama Stack"
|
||||
"This interactive guide covers prompt engineering & best practices with Llama 3.1 and Llama Stack.\n",
|
||||
"\n",
|
||||
"Before you begin, please ensure Llama Stack is installed and set up by following the [Getting Started Guide](https://llama-stack.readthedocs.io/en/latest/getting_started/index.html)."
|
||||
]
|
||||
},
|
||||
{
|
||||
|
@ -30,6 +30,26 @@
|
|||
"### Implementation"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "e065af43",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### 0. Configuration\n",
|
||||
"Set up your connection parameters:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "df35d1e2",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"HOST = \"localhost\" # Replace with your host\n",
|
||||
"PORT = 5001 # Replace with your port"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "a7a25a7e",
|
||||
|
@ -49,7 +69,7 @@
|
|||
"source": [
|
||||
"from llama_stack_client import LlamaStackClient\n",
|
||||
"\n",
|
||||
"client = LlamaStackClient(base_url='http://localhost:5000')"
|
||||
"client = LlamaStackClient(base_url='http://{HOST}:{PORT}')"
|
||||
]
|
||||
},
|
||||
{
|
||||
|
@ -180,7 +200,7 @@
|
|||
"from llama_stack_client.types import CompletionMessage, UserMessage\n",
|
||||
"from termcolor import cprint\n",
|
||||
"\n",
|
||||
"client = LlamaStackClient(base_url='http://localhost:5000')\n",
|
||||
"client = LlamaStackClient(base_url='http://{HOST}:{PORT}')\n",
|
||||
"\n",
|
||||
"response = client.inference.chat_completion(\n",
|
||||
" messages=[\n",
|
||||
|
|
|
@ -7,6 +7,8 @@
|
|||
"source": [
|
||||
"## Getting Started with LlamaStack Vision API\n",
|
||||
"\n",
|
||||
"Before you begin, please ensure Llama Stack is installed and set up by following the [Getting Started Guide](https://llama-stack.readthedocs.io/en/latest/getting_started/index.html).\n",
|
||||
"\n",
|
||||
"Let's import the necessary packages"
|
||||
]
|
||||
},
|
||||
|
@ -37,7 +39,7 @@
|
|||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 2,
|
||||
"execution_count": null,
|
||||
"id": "1d293479-9dde-4b68-94ab-d0c4c61ab08c",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
|
@ -65,33 +67,33 @@
|
|||
"def encode_image_to_data_url(file_path: str) -> str:\n",
|
||||
" \"\"\"\n",
|
||||
" Encode an image file to a data URL.\n",
|
||||
" \n",
|
||||
"\n",
|
||||
" Args:\n",
|
||||
" file_path (str): Path to the image file\n",
|
||||
" \n",
|
||||
"\n",
|
||||
" Returns:\n",
|
||||
" str: Data URL string\n",
|
||||
" \"\"\"\n",
|
||||
" mime_type, _ = mimetypes.guess_type(file_path)\n",
|
||||
" if mime_type is None:\n",
|
||||
" raise ValueError(\"Could not determine MIME type of the file\")\n",
|
||||
" \n",
|
||||
"\n",
|
||||
" with open(file_path, \"rb\") as image_file:\n",
|
||||
" encoded_string = base64.b64encode(image_file.read()).decode(\"utf-8\")\n",
|
||||
" \n",
|
||||
"\n",
|
||||
" return f\"data:{mime_type};base64,{encoded_string}\"\n",
|
||||
"\n",
|
||||
"async def process_image(client: LlamaStackClient, image_path: str, stream: bool = True):\n",
|
||||
" \"\"\"\n",
|
||||
" Process an image through the LlamaStack Vision API.\n",
|
||||
" \n",
|
||||
"\n",
|
||||
" Args:\n",
|
||||
" client (LlamaStackClient): Initialized client\n",
|
||||
" image_path (str): Path to image file\n",
|
||||
" stream (bool): Whether to stream the response\n",
|
||||
" \"\"\"\n",
|
||||
" data_url = encode_image_to_data_url(image_path)\n",
|
||||
" \n",
|
||||
"\n",
|
||||
" message = UserMessage(\n",
|
||||
" role=\"user\",\n",
|
||||
" content=[\n",
|
||||
|
@ -99,14 +101,14 @@
|
|||
" \"Describe what is in this image.\",\n",
|
||||
" ],\n",
|
||||
" )\n",
|
||||
" \n",
|
||||
"\n",
|
||||
" cprint(f\"User> Sending image for analysis...\", \"green\")\n",
|
||||
" response = client.inference.chat_completion(\n",
|
||||
" messages=[message],\n",
|
||||
" model=\"Llama3.2-11B-Vision-Instruct\",\n",
|
||||
" stream=stream,\n",
|
||||
" )\n",
|
||||
" \n",
|
||||
"\n",
|
||||
" if not stream:\n",
|
||||
" cprint(f\"> Response: {response}\", \"cyan\")\n",
|
||||
" else:\n",
|
||||
|
@ -146,10 +148,10 @@
|
|||
" client = LlamaStackClient(\n",
|
||||
" base_url=f\"http://{HOST}:{PORT}\",\n",
|
||||
" )\n",
|
||||
" \n",
|
||||
"\n",
|
||||
" # Process image\n",
|
||||
" await process_image(client, \"logo.png\")\n",
|
||||
" \n",
|
||||
"\n",
|
||||
" # Query available models\n",
|
||||
" models_response = client.models.list()\n",
|
||||
" print(\"\\nAvailable Models:\")\n",
|
||||
|
|
|
@ -4,7 +4,9 @@
|
|||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Tool Calling"
|
||||
"## Tool Calling\n",
|
||||
"\n",
|
||||
"Before you begin, please ensure Llama Stack is installed and set up by following the [Getting Started Guide](https://llama-stack.readthedocs.io/en/latest/getting_started/index.html)."
|
||||
]
|
||||
},
|
||||
{
|
||||
|
@ -17,6 +19,23 @@
|
|||
"3. Configuring tool prompts and safety settings"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Set up your connection parameters:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"HOST = \"localhost\" # Replace with your host\n",
|
||||
"PORT = 5001 # Replace with your port"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 22,
|
||||
|
@ -206,13 +225,13 @@
|
|||
"from datetime import datetime\n",
|
||||
"class WeatherTool:\n",
|
||||
" \"\"\"Example custom tool for weather information.\"\"\"\n",
|
||||
" \n",
|
||||
"\n",
|
||||
" def get_name(self) -> str:\n",
|
||||
" return \"get_weather\"\n",
|
||||
" \n",
|
||||
"\n",
|
||||
" def get_description(self) -> str:\n",
|
||||
" return \"Get weather information for a location\"\n",
|
||||
" \n",
|
||||
"\n",
|
||||
" def get_params_definition(self) -> Dict[str, ToolParamDefinitionParam]:\n",
|
||||
" return {\n",
|
||||
" \"location\": ToolParamDefinitionParam(\n",
|
||||
|
@ -226,7 +245,7 @@
|
|||
" required=False\n",
|
||||
" )\n",
|
||||
" }\n",
|
||||
" \n",
|
||||
"\n",
|
||||
" async def run_impl(self, location: str, date: Optional[str] = None) -> Dict[str, Any]:\n",
|
||||
" \"\"\"Simulate getting weather data (replace with actual API call).\"\"\"\n",
|
||||
" # Mock implementation\n",
|
||||
|
@ -275,7 +294,7 @@
|
|||
" output_shields=[],\n",
|
||||
" enable_session_persistence=True\n",
|
||||
" )\n",
|
||||
" \n",
|
||||
"\n",
|
||||
" # Create the agent with the tool\n",
|
||||
" weather_tool = WeatherTool()\n",
|
||||
" agent = Agent(\n",
|
||||
|
@ -283,7 +302,7 @@
|
|||
" agent_config=agent_config,\n",
|
||||
" custom_tools=[weather_tool]\n",
|
||||
" )\n",
|
||||
" \n",
|
||||
"\n",
|
||||
" return agent\n",
|
||||
"\n",
|
||||
"# Example usage\n",
|
||||
|
@ -291,21 +310,21 @@
|
|||
" client = LlamaStackClient(base_url=\"http://localhost:5001\")\n",
|
||||
" agent = await create_weather_agent(client)\n",
|
||||
" session_id = agent.create_session(\"weather-session\")\n",
|
||||
" \n",
|
||||
"\n",
|
||||
" queries = [\n",
|
||||
" \"What's the weather like in San Francisco?\",\n",
|
||||
" \"Tell me the weather in Tokyo tomorrow\",\n",
|
||||
" ]\n",
|
||||
" \n",
|
||||
"\n",
|
||||
" for query in queries:\n",
|
||||
" print(f\"\\nQuery: {query}\")\n",
|
||||
" print(\"-\" * 50)\n",
|
||||
" \n",
|
||||
"\n",
|
||||
" response = agent.create_turn(\n",
|
||||
" messages=[{\"role\": \"user\", \"content\": query}],\n",
|
||||
" session_id=session_id,\n",
|
||||
" )\n",
|
||||
" \n",
|
||||
"\n",
|
||||
" async for log in EventLogger().log(response):\n",
|
||||
" log.print()\n",
|
||||
"\n",
|
||||
|
|
|
@ -23,11 +23,31 @@
|
|||
"Prerequisites:\n",
|
||||
"\n",
|
||||
"Basic Python knowledge\n",
|
||||
"A running instance of the Memory API server (we'll use localhost in this tutorial)\n",
|
||||
"A running instance of the Memory API server (we'll use localhost in \n",
|
||||
"this tutorial)\n",
|
||||
"\n",
|
||||
"Before you begin, please ensure Llama Stack is installed and set up by following the [Getting Started Guide](https://llama-stack.readthedocs.io/en/latest/getting_started/index.html).\n",
|
||||
"\n",
|
||||
"Let's start by installing the required packages:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Set up your connection parameters:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"HOST = \"localhost\" # Replace with your host\n",
|
||||
"PORT = 5001 # Replace with your port"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
|
|
|
@ -11,7 +11,9 @@
|
|||
"As outlined in our [Responsible Use Guide](https://www.llama.com/docs/how-to-guides/responsible-use-guide-resources/), LLM apps should deploy appropriate system level safeguards to mitigate safety and security risks of LLM system, similar to the following diagram:\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"To that goal, Llama Stack uses **Prompt Guard** and **Llama Guard 3** to secure our system. Here are the quick introduction about them."
|
||||
"To that goal, Llama Stack uses **Prompt Guard** and **Llama Guard 3** to secure our system. Here are the quick introduction about them.\n",
|
||||
"\n",
|
||||
"Before you begin, please ensure Llama Stack is installed and set up by following the [Getting Started Guide](https://llama-stack.readthedocs.io/en/latest/getting_started/index.html)."
|
||||
]
|
||||
},
|
||||
{
|
||||
|
@ -84,6 +86,23 @@
|
|||
"After the server started, you can test safety example using the follow code:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Set up your connection parameters:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"HOST = \"localhost\" # Replace with your host\n",
|
||||
"PORT = 5001 # Replace with your port"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 1,
|
||||
|
@ -163,7 +182,7 @@
|
|||
"\n",
|
||||
"\n",
|
||||
"async def safety_example():\n",
|
||||
" client = SafetyClient(f\"http://localhost:5000\")\n",
|
||||
" client = SafetyClient(f\"http://{HOST}:{PORT}\")\n",
|
||||
"\n",
|
||||
" for message in [\n",
|
||||
" UserMessage(content=\"hello world, write me a 2 sentence poem about the moon\"),\n",
|
||||
|
|
|
@ -21,7 +21,9 @@
|
|||
"- ability to run safety checks using the Llama Guard series of models\n",
|
||||
"- ability to execute tools, including a code execution environment, and loop using the model's multi-step reasoning process\n",
|
||||
"\n",
|
||||
"All of these components are now offered by a single Llama Stack Distribution. Llama Stack defines and standardizes these components and many others that are needed to make building Generative AI applications smoother. Various implementations of these APIs are then assembled together via a **Llama Stack Distribution**."
|
||||
"All of these components are now offered by a single Llama Stack Distribution. Llama Stack defines and standardizes these components and many others that are needed to make building Generative AI applications smoother. Various implementations of these APIs are then assembled together via a **Llama Stack Distribution**.\n",
|
||||
"\n",
|
||||
"Before you begin, please ensure Llama Stack is installed and set up by following the [Getting Started Guide](https://llama-stack.readthedocs.io/en/latest/getting_started/index.html)."
|
||||
]
|
||||
},
|
||||
{
|
||||
|
@ -35,6 +37,23 @@
|
|||
"In this tutorial, with the `Llama3.1-8B-Instruct` server running, we can use the following code to run a simple agent example:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Set up your connection parameters:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"HOST = \"localhost\" # Replace with your host\n",
|
||||
"PORT = 5001 # Replace with your port"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 14,
|
||||
|
@ -99,7 +118,7 @@
|
|||
"os.environ[\"BRAVE_SEARCH_API_KEY\"] = \"YOUR_SEARCH_API_KEY\"\n",
|
||||
"\n",
|
||||
"async def agent_example():\n",
|
||||
" client = LlamaStackClient(base_url=\"http://localhost:5000\")\n",
|
||||
" client = LlamaStackClient(base_url=\"http://{HOST}:{PORT}\")\n",
|
||||
" models_response = client.models.list()\n",
|
||||
" for model in models_response:\n",
|
||||
" if model.identifier.endswith(\"Instruct\"):\n",
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue