mirror of
https://github.com/meta-llama/llama-stack.git
synced 2025-10-07 20:50:52 +00:00
chore!: remove --image-type and --image-name from llama stack run
# What does this PR do? ## Test Plan
This commit is contained in:
parent
0751002bf3
commit
ea30c24595
13 changed files with 4659 additions and 5171 deletions
2
.github/workflows/integration-auth-tests.yml
vendored
2
.github/workflows/integration-auth-tests.yml
vendored
|
@ -86,7 +86,7 @@ jobs:
|
||||||
|
|
||||||
# avoid line breaks in the server log, especially because we grep it below.
|
# avoid line breaks in the server log, especially because we grep it below.
|
||||||
export COLUMNS=1984
|
export COLUMNS=1984
|
||||||
nohup uv run llama stack run $run_dir/run.yaml --image-type venv > server.log 2>&1 &
|
nohup uv run llama stack run $run_dir/run.yaml > server.log 2>&1 &
|
||||||
|
|
||||||
- name: Wait for Llama Stack server to be ready
|
- name: Wait for Llama Stack server to be ready
|
||||||
run: |
|
run: |
|
||||||
|
|
|
@ -59,7 +59,7 @@ jobs:
|
||||||
# Use the virtual environment created by the build step (name comes from build config)
|
# Use the virtual environment created by the build step (name comes from build config)
|
||||||
source ramalama-stack-test/bin/activate
|
source ramalama-stack-test/bin/activate
|
||||||
uv pip list
|
uv pip list
|
||||||
nohup llama stack run tests/external/ramalama-stack/run.yaml --image-type ${{ matrix.image-type }} > server.log 2>&1 &
|
nohup llama stack run tests/external/ramalama-stack/run.yaml > server.log 2>&1 &
|
||||||
|
|
||||||
- name: Wait for Llama Stack server to be ready
|
- name: Wait for Llama Stack server to be ready
|
||||||
run: |
|
run: |
|
||||||
|
|
2
.github/workflows/test-external.yml
vendored
2
.github/workflows/test-external.yml
vendored
|
@ -59,7 +59,7 @@ jobs:
|
||||||
# Use the virtual environment created by the build step (name comes from build config)
|
# Use the virtual environment created by the build step (name comes from build config)
|
||||||
source ci-test/bin/activate
|
source ci-test/bin/activate
|
||||||
uv pip list
|
uv pip list
|
||||||
nohup llama stack run tests/external/run-byoa.yaml --image-type ${{ matrix.image-type }} > server.log 2>&1 &
|
nohup llama stack run tests/external/run-byoa.yaml > server.log 2>&1 &
|
||||||
|
|
||||||
- name: Wait for Llama Stack server to be ready
|
- name: Wait for Llama Stack server to be ready
|
||||||
run: |
|
run: |
|
||||||
|
|
|
@ -52,7 +52,7 @@ You can access the HuggingFace trainer via the `starter` distribution:
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
llama stack build --distro starter --image-type venv
|
llama stack build --distro starter --image-type venv
|
||||||
llama stack run --image-type venv ~/.llama/distributions/starter/starter-run.yaml
|
llama stack run ~/.llama/distributions/starter/starter-run.yaml
|
||||||
```
|
```
|
||||||
|
|
||||||
### Usage Example
|
### Usage Example
|
||||||
|
|
|
@ -322,20 +322,20 @@ Now, let's start the Llama Stack Distribution Server. You will need the YAML con
|
||||||
llama stack run -h
|
llama stack run -h
|
||||||
usage: llama stack run [-h] [--port PORT] [--image-name IMAGE_NAME]
|
usage: llama stack run [-h] [--port PORT] [--image-name IMAGE_NAME]
|
||||||
[--image-type {venv}] [--enable-ui]
|
[--image-type {venv}] [--enable-ui]
|
||||||
[config | template]
|
[config | distro]
|
||||||
|
|
||||||
Start the server for a Llama Stack Distribution. You should have already built (or downloaded) and configured the distribution.
|
Start the server for a Llama Stack Distribution. You should have already built (or downloaded) and configured the distribution.
|
||||||
|
|
||||||
positional arguments:
|
positional arguments:
|
||||||
config | template Path to config file to use for the run or name of known template (`llama stack list` for a list). (default: None)
|
config | distro Path to config file to use for the run or name of known distro (`llama stack list` for a list). (default: None)
|
||||||
|
|
||||||
options:
|
options:
|
||||||
-h, --help show this help message and exit
|
-h, --help show this help message and exit
|
||||||
--port PORT Port to run the server on. It can also be passed via the env var LLAMA_STACK_PORT. (default: 8321)
|
--port PORT Port to run the server on. It can also be passed via the env var LLAMA_STACK_PORT. (default: 8321)
|
||||||
--image-name IMAGE_NAME
|
--image-name IMAGE_NAME
|
||||||
Name of the image to run. Defaults to the current environment (default: None)
|
[DEPRECATED] This flag is no longer supported. Please activate your virtual environment before running. (default: None)
|
||||||
--image-type {venv}
|
--image-type {venv}
|
||||||
Image Type used during the build. This should be venv. (default: None)
|
[DEPRECATED] This flag is no longer supported. Please activate your virtual environment before running. (default: None)
|
||||||
--enable-ui Start the UI server (default: False)
|
--enable-ui Start the UI server (default: False)
|
||||||
```
|
```
|
||||||
|
|
||||||
|
@ -347,9 +347,6 @@ llama stack run tgi
|
||||||
|
|
||||||
# Start using config file
|
# Start using config file
|
||||||
llama stack run ~/.llama/distributions/llamastack-my-local-stack/my-local-stack-run.yaml
|
llama stack run ~/.llama/distributions/llamastack-my-local-stack/my-local-stack-run.yaml
|
||||||
|
|
||||||
# Start using a venv
|
|
||||||
llama stack run --image-type venv ~/.llama/distributions/llamastack-my-local-stack/my-local-stack-run.yaml
|
|
||||||
```
|
```
|
||||||
|
|
||||||
```
|
```
|
||||||
|
|
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
|
@ -1,366 +1,315 @@
|
||||||
{
|
{
|
||||||
"cells": [
|
"cells": [
|
||||||
{
|
{
|
||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"id": "c1e7571c",
|
"id": "c1e7571c",
|
||||||
"metadata": {
|
"metadata": {
|
||||||
"id": "c1e7571c"
|
"id": "c1e7571c"
|
||||||
},
|
},
|
||||||
"source": [
|
"source": [
|
||||||
"[](https://colab.research.google.com/github/meta-llama/llama-stack/blob/main/docs/getting_started.ipynb)\n",
|
"[](https://colab.research.google.com/github/meta-llama/llama-stack/blob/main/docs/getting_started.ipynb)\n",
|
||||||
"\n",
|
"\n",
|
||||||
"# Llama Stack - Building AI Applications\n",
|
"# Llama Stack - Building AI Applications\n",
|
||||||
"\n",
|
"\n",
|
||||||
"<img src=\"https://llamastack.github.io/latest/_images/llama-stack.png\" alt=\"drawing\" width=\"500\"/>\n",
|
"<img src=\"https://llamastack.github.io/latest/_images/llama-stack.png\" alt=\"drawing\" width=\"500\"/>\n",
|
||||||
"\n",
|
"\n",
|
||||||
"Get started with Llama Stack in minutes!\n",
|
"Get started with Llama Stack in minutes!\n",
|
||||||
"\n",
|
"\n",
|
||||||
"[Llama Stack](https://github.com/meta-llama/llama-stack) is a stateful service with REST APIs to support the seamless transition of AI applications across different environments. You can build and test using a local server first and deploy to a hosted endpoint for production.\n",
|
"[Llama Stack](https://github.com/meta-llama/llama-stack) is a stateful service with REST APIs to support the seamless transition of AI applications across different environments. You can build and test using a local server first and deploy to a hosted endpoint for production.\n",
|
||||||
"\n",
|
"\n",
|
||||||
"In this guide, we'll walk through how to build a RAG application locally using Llama Stack with [Ollama](https://ollama.com/)\n",
|
"In this guide, we'll walk through how to build a RAG application locally using Llama Stack with [Ollama](https://ollama.com/)\n",
|
||||||
"as the inference [provider](docs/source/providers/index.md#inference) for a Llama Model.\n"
|
"as the inference [provider](docs/source/providers/index.md#inference) for a Llama Model.\n"
|
||||||
]
|
]
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"id": "4CV1Q19BDMVw",
|
|
||||||
"metadata": {
|
|
||||||
"id": "4CV1Q19BDMVw"
|
|
||||||
},
|
|
||||||
"source": [
|
|
||||||
"## Step 1: Install and setup"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"id": "K4AvfUAJZOeS",
|
|
||||||
"metadata": {
|
|
||||||
"id": "K4AvfUAJZOeS"
|
|
||||||
},
|
|
||||||
"source": [
|
|
||||||
"### 1.1. Install uv and test inference with Ollama\n",
|
|
||||||
"\n",
|
|
||||||
"We'll install [uv](https://docs.astral.sh/uv/) to setup the Python virtual environment, along with [colab-xterm](https://github.com/InfuseAI/colab-xterm) for running command-line tools, and [Ollama](https://ollama.com/download) as the inference provider."
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"id": "7a2d7b85",
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"%pip install uv llama_stack llama-stack-client\n",
|
|
||||||
"\n",
|
|
||||||
"## If running on Collab:\n",
|
|
||||||
"# !pip install colab-xterm\n",
|
|
||||||
"# %load_ext colabxterm\n",
|
|
||||||
"\n",
|
|
||||||
"!curl https://ollama.ai/install.sh | sh"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"id": "39fa584b",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"### 1.2. Test inference with Ollama"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"id": "3bf81522",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"We’ll now launch a terminal and run inference on a Llama model with Ollama to verify that the model is working correctly."
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"id": "a7e8e0f1",
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"## If running on Colab:\n",
|
|
||||||
"# %xterm\n",
|
|
||||||
"\n",
|
|
||||||
"## To be ran in the terminal:\n",
|
|
||||||
"# ollama serve &\n",
|
|
||||||
"# ollama run llama3.2:3b --keepalive 60m"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"id": "f3c5f243",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"If successful, you should see the model respond to a prompt.\n",
|
|
||||||
"\n",
|
|
||||||
"...\n",
|
|
||||||
"```\n",
|
|
||||||
">>> hi\n",
|
|
||||||
"Hello! How can I assist you today?\n",
|
|
||||||
"```"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"id": "oDUB7M_qe-Gs",
|
|
||||||
"metadata": {
|
|
||||||
"id": "oDUB7M_qe-Gs"
|
|
||||||
},
|
|
||||||
"source": [
|
|
||||||
"## Step 2: Run the Llama Stack server\n",
|
|
||||||
"\n",
|
|
||||||
"In this showcase, we will start a Llama Stack server that is running locally."
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"id": "732eadc6",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"### 2.1. Setup the Llama Stack Server"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"id": "J2kGed0R5PSf",
|
|
||||||
"metadata": {
|
|
||||||
"colab": {
|
|
||||||
"base_uri": "https://localhost:8080/"
|
|
||||||
},
|
|
||||||
"collapsed": true,
|
|
||||||
"id": "J2kGed0R5PSf",
|
|
||||||
"outputId": "2478ea60-8d35-48a1-b011-f233831740c5"
|
|
||||||
},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"import os\n",
|
|
||||||
"import subprocess\n",
|
|
||||||
"\n",
|
|
||||||
"if \"UV_SYSTEM_PYTHON\" in os.environ:\n",
|
|
||||||
" del os.environ[\"UV_SYSTEM_PYTHON\"]\n",
|
|
||||||
"\n",
|
|
||||||
"# this command installs all the dependencies needed for the llama stack server with the ollama inference provider\n",
|
|
||||||
"!uv run --with llama-stack llama stack build --distro starter --image-type venv\n",
|
|
||||||
"\n",
|
|
||||||
"def run_llama_stack_server_background():\n",
|
|
||||||
" log_file = open(\"llama_stack_server.log\", \"w\")\n",
|
|
||||||
" process = subprocess.Popen(\n",
|
|
||||||
" f\"OLLAMA_URL=http://localhost:11434 uv run --with llama-stack llama stack run starter --image-type venv\n",
|
|
||||||
" shell=True,\n",
|
|
||||||
" stdout=log_file,\n",
|
|
||||||
" stderr=log_file,\n",
|
|
||||||
" text=True\n",
|
|
||||||
" )\n",
|
|
||||||
"\n",
|
|
||||||
" print(f\"Starting Llama Stack server with PID: {process.pid}\")\n",
|
|
||||||
" return process\n",
|
|
||||||
"\n",
|
|
||||||
"def wait_for_server_to_start():\n",
|
|
||||||
" import requests\n",
|
|
||||||
" from requests.exceptions import ConnectionError\n",
|
|
||||||
" import time\n",
|
|
||||||
"\n",
|
|
||||||
" url = \"http://0.0.0.0:8321/v1/health\"\n",
|
|
||||||
" max_retries = 30\n",
|
|
||||||
" retry_interval = 1\n",
|
|
||||||
"\n",
|
|
||||||
" print(\"Waiting for server to start\", end=\"\")\n",
|
|
||||||
" for _ in range(max_retries):\n",
|
|
||||||
" try:\n",
|
|
||||||
" response = requests.get(url)\n",
|
|
||||||
" if response.status_code == 200:\n",
|
|
||||||
" print(\"\\nServer is ready!\")\n",
|
|
||||||
" return True\n",
|
|
||||||
" except ConnectionError:\n",
|
|
||||||
" print(\".\", end=\"\", flush=True)\n",
|
|
||||||
" time.sleep(retry_interval)\n",
|
|
||||||
"\n",
|
|
||||||
" print(\"\\nServer failed to start after\", max_retries * retry_interval, \"seconds\")\n",
|
|
||||||
" return False\n",
|
|
||||||
"\n",
|
|
||||||
"\n",
|
|
||||||
"# use this helper if needed to kill the server\n",
|
|
||||||
"def kill_llama_stack_server():\n",
|
|
||||||
" # Kill any existing llama stack server processes\n",
|
|
||||||
" os.system(\"ps aux | grep -v grep | grep llama_stack.core.server.server | awk '{print $2}' | xargs kill -9\")\n"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"id": "c40e9efd",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"### 2.2. Start the Llama Stack Server"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": 7,
|
|
||||||
"id": "f779283d",
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [
|
|
||||||
{
|
|
||||||
"name": "stdout",
|
|
||||||
"output_type": "stream",
|
|
||||||
"text": [
|
|
||||||
"Starting Llama Stack server with PID: 787100\n",
|
|
||||||
"Waiting for server to start\n",
|
|
||||||
"Server is ready!\n"
|
|
||||||
]
|
|
||||||
}
|
|
||||||
],
|
|
||||||
"source": [
|
|
||||||
"server_process = run_llama_stack_server_background()\n",
|
|
||||||
"assert wait_for_server_to_start()"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"id": "28477c03",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"## Step 3: Run the demo"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": 8,
|
|
||||||
"id": "7da71011",
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [
|
|
||||||
{
|
|
||||||
"name": "stdout",
|
|
||||||
"output_type": "stream",
|
|
||||||
"text": [
|
|
||||||
"rag_tool> Ingesting document: https://www.paulgraham.com/greatwork.html\n",
|
|
||||||
"prompt> How do you do great work?\n",
|
|
||||||
"\u001b[33minference> \u001b[0m\u001b[33m[k\u001b[0m\u001b[33mnowledge\u001b[0m\u001b[33m_search\u001b[0m\u001b[33m(query\u001b[0m\u001b[33m=\"\u001b[0m\u001b[33mWhat\u001b[0m\u001b[33m is\u001b[0m\u001b[33m the\u001b[0m\u001b[33m key\u001b[0m\u001b[33m to\u001b[0m\u001b[33m doing\u001b[0m\u001b[33m great\u001b[0m\u001b[33m work\u001b[0m\u001b[33m\")]\u001b[0m\u001b[97m\u001b[0m\n",
|
|
||||||
"\u001b[32mtool_execution> Tool:knowledge_search Args:{'query': 'What is the key to doing great work'}\u001b[0m\n",
|
|
||||||
"\u001b[32mtool_execution> Tool:knowledge_search Response:[TextContentItem(text='knowledge_search tool found 5 chunks:\\nBEGIN of knowledge_search tool results.\\n', type='text'), TextContentItem(text=\"Result 1:\\nDocument_id:docum\\nContent: work. Doing great work means doing something important\\nso well that you expand people's ideas of what's possible. But\\nthere's no threshold for importance. It's a matter of degree, and\\noften hard to judge at the time anyway.\\n\", type='text'), TextContentItem(text=\"Result 2:\\nDocument_id:docum\\nContent: work. Doing great work means doing something important\\nso well that you expand people's ideas of what's possible. But\\nthere's no threshold for importance. It's a matter of degree, and\\noften hard to judge at the time anyway.\\n\", type='text'), TextContentItem(text=\"Result 3:\\nDocument_id:docum\\nContent: work. Doing great work means doing something important\\nso well that you expand people's ideas of what's possible. But\\nthere's no threshold for importance. It's a matter of degree, and\\noften hard to judge at the time anyway.\\n\", type='text'), TextContentItem(text=\"Result 4:\\nDocument_id:docum\\nContent: work. Doing great work means doing something important\\nso well that you expand people's ideas of what's possible. But\\nthere's no threshold for importance. It's a matter of degree, and\\noften hard to judge at the time anyway.\\n\", type='text'), TextContentItem(text=\"Result 5:\\nDocument_id:docum\\nContent: work. Doing great work means doing something important\\nso well that you expand people's ideas of what's possible. But\\nthere's no threshold for importance. It's a matter of degree, and\\noften hard to judge at the time anyway.\\n\", type='text'), TextContentItem(text='END of knowledge_search tool results.\\n', type='text'), TextContentItem(text='The above results were retrieved to help answer the user\\'s query: \"What is the key to doing great work\". Use them as supporting information only in answering this query.\\n', type='text')]\u001b[0m\n",
|
|
||||||
"\u001b[33minference> \u001b[0m\u001b[33mDoing\u001b[0m\u001b[33m great\u001b[0m\u001b[33m work\u001b[0m\u001b[33m means\u001b[0m\u001b[33m doing\u001b[0m\u001b[33m something\u001b[0m\u001b[33m important\u001b[0m\u001b[33m so\u001b[0m\u001b[33m well\u001b[0m\u001b[33m that\u001b[0m\u001b[33m you\u001b[0m\u001b[33m expand\u001b[0m\u001b[33m people\u001b[0m\u001b[33m's\u001b[0m\u001b[33m ideas\u001b[0m\u001b[33m of\u001b[0m\u001b[33m what\u001b[0m\u001b[33m's\u001b[0m\u001b[33m possible\u001b[0m\u001b[33m.\u001b[0m\u001b[33m However\u001b[0m\u001b[33m,\u001b[0m\u001b[33m there\u001b[0m\u001b[33m's\u001b[0m\u001b[33m no\u001b[0m\u001b[33m threshold\u001b[0m\u001b[33m for\u001b[0m\u001b[33m importance\u001b[0m\u001b[33m,\u001b[0m\u001b[33m and\u001b[0m\u001b[33m it\u001b[0m\u001b[33m's\u001b[0m\u001b[33m often\u001b[0m\u001b[33m hard\u001b[0m\u001b[33m to\u001b[0m\u001b[33m judge\u001b[0m\u001b[33m at\u001b[0m\u001b[33m the\u001b[0m\u001b[33m time\u001b[0m\u001b[33m anyway\u001b[0m\u001b[33m.\u001b[0m\u001b[33m Great\u001b[0m\u001b[33m work\u001b[0m\u001b[33m is\u001b[0m\u001b[33m a\u001b[0m\u001b[33m matter\u001b[0m\u001b[33m of\u001b[0m\u001b[33m degree\u001b[0m\u001b[33m,\u001b[0m\u001b[33m and\u001b[0m\u001b[33m it\u001b[0m\u001b[33m can\u001b[0m\u001b[33m be\u001b[0m\u001b[33m difficult\u001b[0m\u001b[33m to\u001b[0m\u001b[33m determine\u001b[0m\u001b[33m whether\u001b[0m\u001b[33m someone\u001b[0m\u001b[33m has\u001b[0m\u001b[33m done\u001b[0m\u001b[33m great\u001b[0m\u001b[33m work\u001b[0m\u001b[33m until\u001b[0m\u001b[33m after\u001b[0m\u001b[33m the\u001b[0m\u001b[33m fact\u001b[0m\u001b[33m.\u001b[0m\u001b[97m\u001b[0m\n",
|
|
||||||
"\u001b[30m\u001b[0m"
|
|
||||||
]
|
|
||||||
}
|
|
||||||
],
|
|
||||||
"source": [
|
|
||||||
"from llama_stack_client import Agent, AgentEventLogger, RAGDocument, LlamaStackClient\n",
|
|
||||||
"\n",
|
|
||||||
"vector_db_id = \"my_demo_vector_db\"\n",
|
|
||||||
"client = LlamaStackClient(base_url=\"http://0.0.0.0:8321\")\n",
|
|
||||||
"\n",
|
|
||||||
"models = client.models.list()\n",
|
|
||||||
"\n",
|
|
||||||
"# Select the first ollama and first ollama's embedding model\n",
|
|
||||||
"model_id = next(m for m in models if m.model_type == \"llm\" and m.provider_id == \"ollama\").identifier\n",
|
|
||||||
"embedding_model = next(m for m in models if m.model_type == \"embedding\" and m.provider_id == \"ollama\")\n",
|
|
||||||
"embedding_model_id = embedding_model.identifier\n",
|
|
||||||
"embedding_dimension = embedding_model.metadata[\"embedding_dimension\"]\n",
|
|
||||||
"\n",
|
|
||||||
"_ = client.vector_dbs.register(\n",
|
|
||||||
" vector_db_id=vector_db_id,\n",
|
|
||||||
" embedding_model=embedding_model_id,\n",
|
|
||||||
" embedding_dimension=embedding_dimension,\n",
|
|
||||||
" provider_id=\"faiss\",\n",
|
|
||||||
")\n",
|
|
||||||
"source = \"https://www.paulgraham.com/greatwork.html\"\n",
|
|
||||||
"print(\"rag_tool> Ingesting document:\", source)\n",
|
|
||||||
"document = RAGDocument(\n",
|
|
||||||
" document_id=\"document_1\",\n",
|
|
||||||
" content=source,\n",
|
|
||||||
" mime_type=\"text/html\",\n",
|
|
||||||
" metadata={},\n",
|
|
||||||
")\n",
|
|
||||||
"client.tool_runtime.rag_tool.insert(\n",
|
|
||||||
" documents=[document],\n",
|
|
||||||
" vector_db_id=vector_db_id,\n",
|
|
||||||
" chunk_size_in_tokens=50,\n",
|
|
||||||
")\n",
|
|
||||||
"agent = Agent(\n",
|
|
||||||
" client,\n",
|
|
||||||
" model=model_id,\n",
|
|
||||||
" instructions=\"You are a helpful assistant\",\n",
|
|
||||||
" tools=[\n",
|
|
||||||
" {\n",
|
|
||||||
" \"name\": \"builtin::rag/knowledge_search\",\n",
|
|
||||||
" \"args\": {\"vector_db_ids\": [vector_db_id]},\n",
|
|
||||||
" }\n",
|
|
||||||
" ],\n",
|
|
||||||
")\n",
|
|
||||||
"\n",
|
|
||||||
"prompt = \"How do you do great work?\"\n",
|
|
||||||
"print(\"prompt>\", prompt)\n",
|
|
||||||
"\n",
|
|
||||||
"response = agent.create_turn(\n",
|
|
||||||
" messages=[{\"role\": \"user\", \"content\": prompt}],\n",
|
|
||||||
" session_id=agent.create_session(\"rag_session\"),\n",
|
|
||||||
" stream=True,\n",
|
|
||||||
")\n",
|
|
||||||
"\n",
|
|
||||||
"for log in AgentEventLogger().log(response):\n",
|
|
||||||
" log.print()"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"id": "341aaadf",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"Congratulations! You've successfully built your first RAG application using Llama Stack! 🎉🥳"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"id": "e88e1185",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"## Next Steps"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"id": "bcb73600",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"Now you're ready to dive deeper into Llama Stack!\n",
|
|
||||||
"- Explore the [Detailed Tutorial](./detailed_tutorial.md).\n",
|
|
||||||
"- Try the [Getting Started Notebook](https://github.com/meta-llama/llama-stack/blob/main/docs/getting_started.ipynb).\n",
|
|
||||||
"- Browse more [Notebooks on GitHub](https://github.com/meta-llama/llama-stack/tree/main/docs/notebooks).\n",
|
|
||||||
"- Learn about Llama Stack [Concepts](../concepts/index.md).\n",
|
|
||||||
"- Discover how to [Build Llama Stacks](../distributions/index.md).\n",
|
|
||||||
"- Refer to our [References](../references/index.md) for details on the Llama CLI and Python SDK.\n",
|
|
||||||
"- Check out the [llama-stack-apps](https://github.com/meta-llama/llama-stack-apps/tree/main/examples) repository for example applications and tutorials."
|
|
||||||
]
|
|
||||||
}
|
|
||||||
],
|
|
||||||
"metadata": {
|
|
||||||
"accelerator": "GPU",
|
|
||||||
"colab": {
|
|
||||||
"gpuType": "T4",
|
|
||||||
"provenance": []
|
|
||||||
},
|
|
||||||
"kernelspec": {
|
|
||||||
"display_name": "Python 3",
|
|
||||||
"language": "python",
|
|
||||||
"name": "python3"
|
|
||||||
},
|
|
||||||
"language_info": {
|
|
||||||
"codemirror_mode": {
|
|
||||||
"name": "ipython",
|
|
||||||
"version": 3
|
|
||||||
},
|
|
||||||
"file_extension": ".py",
|
|
||||||
"mimetype": "text/x-python",
|
|
||||||
"name": "python",
|
|
||||||
"nbconvert_exporter": "python",
|
|
||||||
"pygments_lexer": "ipython3",
|
|
||||||
"version": "3.10.6"
|
|
||||||
}
|
|
||||||
},
|
},
|
||||||
"nbformat": 4,
|
{
|
||||||
"nbformat_minor": 5
|
"cell_type": "markdown",
|
||||||
}
|
"id": "4CV1Q19BDMVw",
|
||||||
|
"metadata": {
|
||||||
|
"id": "4CV1Q19BDMVw"
|
||||||
|
},
|
||||||
|
"source": [
|
||||||
|
"## Step 1: Install and setup"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"id": "K4AvfUAJZOeS",
|
||||||
|
"metadata": {
|
||||||
|
"id": "K4AvfUAJZOeS"
|
||||||
|
},
|
||||||
|
"source": [
|
||||||
|
"### 1.1. Install uv and test inference with Ollama\n",
|
||||||
|
"\n",
|
||||||
|
"We'll install [uv](https://docs.astral.sh/uv/) to setup the Python virtual environment, along with [colab-xterm](https://github.com/InfuseAI/colab-xterm) for running command-line tools, and [Ollama](https://ollama.com/download) as the inference provider."
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"id": "7a2d7b85",
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"%pip install uv llama_stack llama-stack-client\n",
|
||||||
|
"\n",
|
||||||
|
"## If running on Collab:\n",
|
||||||
|
"# !pip install colab-xterm\n",
|
||||||
|
"# %load_ext colabxterm\n",
|
||||||
|
"\n",
|
||||||
|
"!curl https://ollama.ai/install.sh | sh"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"id": "39fa584b",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"### 1.2. Test inference with Ollama"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"id": "3bf81522",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"We’ll now launch a terminal and run inference on a Llama model with Ollama to verify that the model is working correctly."
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"id": "a7e8e0f1",
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"## If running on Colab:\n",
|
||||||
|
"# %xterm\n",
|
||||||
|
"\n",
|
||||||
|
"## To be ran in the terminal:\n",
|
||||||
|
"# ollama serve &\n",
|
||||||
|
"# ollama run llama3.2:3b --keepalive 60m"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"id": "f3c5f243",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"If successful, you should see the model respond to a prompt.\n",
|
||||||
|
"\n",
|
||||||
|
"...\n",
|
||||||
|
"```\n",
|
||||||
|
">>> hi\n",
|
||||||
|
"Hello! How can I assist you today?\n",
|
||||||
|
"```"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"id": "oDUB7M_qe-Gs",
|
||||||
|
"metadata": {
|
||||||
|
"id": "oDUB7M_qe-Gs"
|
||||||
|
},
|
||||||
|
"source": [
|
||||||
|
"## Step 2: Run the Llama Stack server\n",
|
||||||
|
"\n",
|
||||||
|
"In this showcase, we will start a Llama Stack server that is running locally."
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"id": "732eadc6",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"### 2.1. Setup the Llama Stack Server"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"id": "J2kGed0R5PSf",
|
||||||
|
"metadata": {
|
||||||
|
"colab": {
|
||||||
|
"base_uri": "https://localhost:8080/"
|
||||||
|
},
|
||||||
|
"collapsed": true,
|
||||||
|
"id": "J2kGed0R5PSf",
|
||||||
|
"outputId": "2478ea60-8d35-48a1-b011-f233831740c5"
|
||||||
|
},
|
||||||
|
"outputs": [],
|
||||||
|
"source": "import os\nimport subprocess\n\nif \"UV_SYSTEM_PYTHON\" in os.environ:\n del os.environ[\"UV_SYSTEM_PYTHON\"]\n\n# this command installs all the dependencies needed for the llama stack server with the ollama inference provider\n!uv run --with llama-stack llama stack build --distro starter --image-type venv\n\ndef run_llama_stack_server_background():\n log_file = open(\"llama_stack_server.log\", \"w\")\n process = subprocess.Popen(\n f\"OLLAMA_URL=http://localhost:11434 uv run --with llama-stack llama stack run starter\",\n shell=True,\n stdout=log_file,\n stderr=log_file,\n text=True\n )\n\n print(f\"Starting Llama Stack server with PID: {process.pid}\")\n return process\n\ndef wait_for_server_to_start():\n import requests\n from requests.exceptions import ConnectionError\n import time\n\n url = \"http://0.0.0.0:8321/v1/health\"\n max_retries = 30\n retry_interval = 1\n\n print(\"Waiting for server to start\", end=\"\")\n for _ in range(max_retries):\n try:\n response = requests.get(url)\n if response.status_code == 200:\n print(\"\\nServer is ready!\")\n return True\n except ConnectionError:\n print(\".\", end=\"\", flush=True)\n time.sleep(retry_interval)\n\n print(\"\\nServer failed to start after\", max_retries * retry_interval, \"seconds\")\n return False\n\n\n# use this helper if needed to kill the server\ndef kill_llama_stack_server():\n # Kill any existing llama stack server processes\n os.system(\"ps aux | grep -v grep | grep llama_stack.core.server.server | awk '{print $2}' | xargs kill -9\")\n"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"id": "c40e9efd",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"### 2.2. Start the Llama Stack Server"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 7,
|
||||||
|
"id": "f779283d",
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [
|
||||||
|
{
|
||||||
|
"name": "stdout",
|
||||||
|
"output_type": "stream",
|
||||||
|
"text": [
|
||||||
|
"Starting Llama Stack server with PID: 787100\n",
|
||||||
|
"Waiting for server to start\n",
|
||||||
|
"Server is ready!\n"
|
||||||
|
]
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"source": [
|
||||||
|
"server_process = run_llama_stack_server_background()\n",
|
||||||
|
"assert wait_for_server_to_start()"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"id": "28477c03",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"## Step 3: Run the demo"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 8,
|
||||||
|
"id": "7da71011",
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [
|
||||||
|
{
|
||||||
|
"name": "stdout",
|
||||||
|
"output_type": "stream",
|
||||||
|
"text": [
|
||||||
|
"rag_tool> Ingesting document: https://www.paulgraham.com/greatwork.html\n",
|
||||||
|
"prompt> How do you do great work?\n",
|
||||||
|
"\u001b[33minference> \u001b[0m\u001b[33m[k\u001b[0m\u001b[33mnowledge\u001b[0m\u001b[33m_search\u001b[0m\u001b[33m(query\u001b[0m\u001b[33m=\"\u001b[0m\u001b[33mWhat\u001b[0m\u001b[33m is\u001b[0m\u001b[33m the\u001b[0m\u001b[33m key\u001b[0m\u001b[33m to\u001b[0m\u001b[33m doing\u001b[0m\u001b[33m great\u001b[0m\u001b[33m work\u001b[0m\u001b[33m\")]\u001b[0m\u001b[97m\u001b[0m\n",
|
||||||
|
"\u001b[32mtool_execution> Tool:knowledge_search Args:{'query': 'What is the key to doing great work'}\u001b[0m\n",
|
||||||
|
"\u001b[32mtool_execution> Tool:knowledge_search Response:[TextContentItem(text='knowledge_search tool found 5 chunks:\\nBEGIN of knowledge_search tool results.\\n', type='text'), TextContentItem(text=\"Result 1:\\nDocument_id:docum\\nContent: work. Doing great work means doing something important\\nso well that you expand people's ideas of what's possible. But\\nthere's no threshold for importance. It's a matter of degree, and\\noften hard to judge at the time anyway.\\n\", type='text'), TextContentItem(text=\"Result 2:\\nDocument_id:docum\\nContent: work. Doing great work means doing something important\\nso well that you expand people's ideas of what's possible. But\\nthere's no threshold for importance. It's a matter of degree, and\\noften hard to judge at the time anyway.\\n\", type='text'), TextContentItem(text=\"Result 3:\\nDocument_id:docum\\nContent: work. Doing great work means doing something important\\nso well that you expand people's ideas of what's possible. But\\nthere's no threshold for importance. It's a matter of degree, and\\noften hard to judge at the time anyway.\\n\", type='text'), TextContentItem(text=\"Result 4:\\nDocument_id:docum\\nContent: work. Doing great work means doing something important\\nso well that you expand people's ideas of what's possible. But\\nthere's no threshold for importance. It's a matter of degree, and\\noften hard to judge at the time anyway.\\n\", type='text'), TextContentItem(text=\"Result 5:\\nDocument_id:docum\\nContent: work. Doing great work means doing something important\\nso well that you expand people's ideas of what's possible. But\\nthere's no threshold for importance. It's a matter of degree, and\\noften hard to judge at the time anyway.\\n\", type='text'), TextContentItem(text='END of knowledge_search tool results.\\n', type='text'), TextContentItem(text='The above results were retrieved to help answer the user\\'s query: \"What is the key to doing great work\". Use them as supporting information only in answering this query.\\n', type='text')]\u001b[0m\n",
|
||||||
|
"\u001b[33minference> \u001b[0m\u001b[33mDoing\u001b[0m\u001b[33m great\u001b[0m\u001b[33m work\u001b[0m\u001b[33m means\u001b[0m\u001b[33m doing\u001b[0m\u001b[33m something\u001b[0m\u001b[33m important\u001b[0m\u001b[33m so\u001b[0m\u001b[33m well\u001b[0m\u001b[33m that\u001b[0m\u001b[33m you\u001b[0m\u001b[33m expand\u001b[0m\u001b[33m people\u001b[0m\u001b[33m's\u001b[0m\u001b[33m ideas\u001b[0m\u001b[33m of\u001b[0m\u001b[33m what\u001b[0m\u001b[33m's\u001b[0m\u001b[33m possible\u001b[0m\u001b[33m.\u001b[0m\u001b[33m However\u001b[0m\u001b[33m,\u001b[0m\u001b[33m there\u001b[0m\u001b[33m's\u001b[0m\u001b[33m no\u001b[0m\u001b[33m threshold\u001b[0m\u001b[33m for\u001b[0m\u001b[33m importance\u001b[0m\u001b[33m,\u001b[0m\u001b[33m and\u001b[0m\u001b[33m it\u001b[0m\u001b[33m's\u001b[0m\u001b[33m often\u001b[0m\u001b[33m hard\u001b[0m\u001b[33m to\u001b[0m\u001b[33m judge\u001b[0m\u001b[33m at\u001b[0m\u001b[33m the\u001b[0m\u001b[33m time\u001b[0m\u001b[33m anyway\u001b[0m\u001b[33m.\u001b[0m\u001b[33m Great\u001b[0m\u001b[33m work\u001b[0m\u001b[33m is\u001b[0m\u001b[33m a\u001b[0m\u001b[33m matter\u001b[0m\u001b[33m of\u001b[0m\u001b[33m degree\u001b[0m\u001b[33m,\u001b[0m\u001b[33m and\u001b[0m\u001b[33m it\u001b[0m\u001b[33m can\u001b[0m\u001b[33m be\u001b[0m\u001b[33m difficult\u001b[0m\u001b[33m to\u001b[0m\u001b[33m determine\u001b[0m\u001b[33m whether\u001b[0m\u001b[33m someone\u001b[0m\u001b[33m has\u001b[0m\u001b[33m done\u001b[0m\u001b[33m great\u001b[0m\u001b[33m work\u001b[0m\u001b[33m until\u001b[0m\u001b[33m after\u001b[0m\u001b[33m the\u001b[0m\u001b[33m fact\u001b[0m\u001b[33m.\u001b[0m\u001b[97m\u001b[0m\n",
|
||||||
|
"\u001b[30m\u001b[0m"
|
||||||
|
]
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"source": [
|
||||||
|
"from llama_stack_client import Agent, AgentEventLogger, RAGDocument, LlamaStackClient\n",
|
||||||
|
"\n",
|
||||||
|
"vector_db_id = \"my_demo_vector_db\"\n",
|
||||||
|
"client = LlamaStackClient(base_url=\"http://0.0.0.0:8321\")\n",
|
||||||
|
"\n",
|
||||||
|
"models = client.models.list()\n",
|
||||||
|
"\n",
|
||||||
|
"# Select the first ollama and first ollama's embedding model\n",
|
||||||
|
"model_id = next(m for m in models if m.model_type == \"llm\" and m.provider_id == \"ollama\").identifier\n",
|
||||||
|
"embedding_model = next(m for m in models if m.model_type == \"embedding\" and m.provider_id == \"ollama\")\n",
|
||||||
|
"embedding_model_id = embedding_model.identifier\n",
|
||||||
|
"embedding_dimension = embedding_model.metadata[\"embedding_dimension\"]\n",
|
||||||
|
"\n",
|
||||||
|
"_ = client.vector_dbs.register(\n",
|
||||||
|
" vector_db_id=vector_db_id,\n",
|
||||||
|
" embedding_model=embedding_model_id,\n",
|
||||||
|
" embedding_dimension=embedding_dimension,\n",
|
||||||
|
" provider_id=\"faiss\",\n",
|
||||||
|
")\n",
|
||||||
|
"source = \"https://www.paulgraham.com/greatwork.html\"\n",
|
||||||
|
"print(\"rag_tool> Ingesting document:\", source)\n",
|
||||||
|
"document = RAGDocument(\n",
|
||||||
|
" document_id=\"document_1\",\n",
|
||||||
|
" content=source,\n",
|
||||||
|
" mime_type=\"text/html\",\n",
|
||||||
|
" metadata={},\n",
|
||||||
|
")\n",
|
||||||
|
"client.tool_runtime.rag_tool.insert(\n",
|
||||||
|
" documents=[document],\n",
|
||||||
|
" vector_db_id=vector_db_id,\n",
|
||||||
|
" chunk_size_in_tokens=50,\n",
|
||||||
|
")\n",
|
||||||
|
"agent = Agent(\n",
|
||||||
|
" client,\n",
|
||||||
|
" model=model_id,\n",
|
||||||
|
" instructions=\"You are a helpful assistant\",\n",
|
||||||
|
" tools=[\n",
|
||||||
|
" {\n",
|
||||||
|
" \"name\": \"builtin::rag/knowledge_search\",\n",
|
||||||
|
" \"args\": {\"vector_db_ids\": [vector_db_id]},\n",
|
||||||
|
" }\n",
|
||||||
|
" ],\n",
|
||||||
|
")\n",
|
||||||
|
"\n",
|
||||||
|
"prompt = \"How do you do great work?\"\n",
|
||||||
|
"print(\"prompt>\", prompt)\n",
|
||||||
|
"\n",
|
||||||
|
"response = agent.create_turn(\n",
|
||||||
|
" messages=[{\"role\": \"user\", \"content\": prompt}],\n",
|
||||||
|
" session_id=agent.create_session(\"rag_session\"),\n",
|
||||||
|
" stream=True,\n",
|
||||||
|
")\n",
|
||||||
|
"\n",
|
||||||
|
"for log in AgentEventLogger().log(response):\n",
|
||||||
|
" log.print()"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"id": "341aaadf",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"Congratulations! You've successfully built your first RAG application using Llama Stack! 🎉🥳"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"id": "e88e1185",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"## Next Steps"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"id": "bcb73600",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"Now you're ready to dive deeper into Llama Stack!\n",
|
||||||
|
"- Explore the [Detailed Tutorial](./detailed_tutorial.md).\n",
|
||||||
|
"- Try the [Getting Started Notebook](https://github.com/meta-llama/llama-stack/blob/main/docs/getting_started.ipynb).\n",
|
||||||
|
"- Browse more [Notebooks on GitHub](https://github.com/meta-llama/llama-stack/tree/main/docs/notebooks).\n",
|
||||||
|
"- Learn about Llama Stack [Concepts](../concepts/index.md).\n",
|
||||||
|
"- Discover how to [Build Llama Stacks](../distributions/index.md).\n",
|
||||||
|
"- Refer to our [References](../references/index.md) for details on the Llama CLI and Python SDK.\n",
|
||||||
|
"- Check out the [llama-stack-apps](https://github.com/meta-llama/llama-stack-apps/tree/main/examples) repository for example applications and tutorials."
|
||||||
|
]
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"metadata": {
|
||||||
|
"accelerator": "GPU",
|
||||||
|
"colab": {
|
||||||
|
"gpuType": "T4",
|
||||||
|
"provenance": []
|
||||||
|
},
|
||||||
|
"kernelspec": {
|
||||||
|
"display_name": "Python 3",
|
||||||
|
"language": "python",
|
||||||
|
"name": "python3"
|
||||||
|
},
|
||||||
|
"language_info": {
|
||||||
|
"codemirror_mode": {
|
||||||
|
"name": "ipython",
|
||||||
|
"version": 3
|
||||||
|
},
|
||||||
|
"file_extension": ".py",
|
||||||
|
"mimetype": "text/x-python",
|
||||||
|
"name": "python",
|
||||||
|
"nbconvert_exporter": "python",
|
||||||
|
"pygments_lexer": "ipython3",
|
||||||
|
"version": "3.10.6"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"nbformat": 4,
|
||||||
|
"nbformat_minor": 5
|
||||||
|
}
|
|
@ -88,7 +88,7 @@ If you're looking for more specific topics, we have a [Zero to Hero Guide](#next
|
||||||
...
|
...
|
||||||
Build Successful!
|
Build Successful!
|
||||||
You can find the newly-built template here: ~/.llama/distributions/starter/starter-run.yaml
|
You can find the newly-built template here: ~/.llama/distributions/starter/starter-run.yaml
|
||||||
You can run the new Llama Stack Distro via: uv run --with llama-stack llama stack run starter --image-type venv
|
You can run the new Llama Stack Distro via: uv run --with llama-stack llama stack run starter
|
||||||
```
|
```
|
||||||
|
|
||||||
3. **Set the ENV variables by exporting them to the terminal**:
|
3. **Set the ENV variables by exporting them to the terminal**:
|
||||||
|
@ -106,7 +106,6 @@ If you're looking for more specific topics, we have a [Zero to Hero Guide](#next
|
||||||
SAFETY_MODEL=$SAFETY_MODEL \
|
SAFETY_MODEL=$SAFETY_MODEL \
|
||||||
OLLAMA_URL=$OLLAMA_URL \
|
OLLAMA_URL=$OLLAMA_URL \
|
||||||
uv run --with llama-stack llama stack run starter \
|
uv run --with llama-stack llama stack run starter \
|
||||||
--image-type venv \
|
|
||||||
--port $LLAMA_STACK_PORT
|
--port $LLAMA_STACK_PORT
|
||||||
```
|
```
|
||||||
Note: Every time you run a new model with `ollama run`, you will need to restart the llama stack. Otherwise it won't see the new model.
|
Note: Every time you run a new model with `ollama run`, you will need to restart the llama stack. Otherwise it won't see the new model.
|
||||||
|
|
|
@ -444,12 +444,22 @@ def _run_stack_build_command_from_build_config(
|
||||||
|
|
||||||
cprint("Build Successful!", color="green", file=sys.stderr)
|
cprint("Build Successful!", color="green", file=sys.stderr)
|
||||||
cprint(f"You can find the newly-built distribution here: {run_config_file}", color="blue", file=sys.stderr)
|
cprint(f"You can find the newly-built distribution here: {run_config_file}", color="blue", file=sys.stderr)
|
||||||
cprint(
|
if build_config.image_type == LlamaStackImageType.VENV:
|
||||||
"You can run the new Llama Stack distro via: "
|
cprint(
|
||||||
+ colored(f"llama stack run {run_config_file} --image-type {build_config.image_type}", "blue"),
|
"You can run the new Llama Stack distro (after activating "
|
||||||
color="green",
|
+ colored(image_name, "cyan")
|
||||||
file=sys.stderr,
|
+ ") via: "
|
||||||
)
|
+ colored(f"llama stack run {run_config_file}", "blue"),
|
||||||
|
color="green",
|
||||||
|
file=sys.stderr,
|
||||||
|
)
|
||||||
|
elif build_config.image_type == LlamaStackImageType.CONTAINER:
|
||||||
|
cprint(
|
||||||
|
"You can run the container with: "
|
||||||
|
+ colored(f"docker run -p 8321:8321 -v ~/.llama:/root/.llama localhost/{image_name} --port 8321", "blue"),
|
||||||
|
color="green",
|
||||||
|
file=sys.stderr,
|
||||||
|
)
|
||||||
return distro_path
|
return distro_path
|
||||||
else:
|
else:
|
||||||
return _generate_run_config(build_config, build_dir, image_name)
|
return _generate_run_config(build_config, build_dir, image_name)
|
||||||
|
|
|
@ -55,12 +55,12 @@ class StackRun(Subcommand):
|
||||||
"--image-name",
|
"--image-name",
|
||||||
type=str,
|
type=str,
|
||||||
default=None,
|
default=None,
|
||||||
help="Name of the image to run. Defaults to the current environment",
|
help="[DEPRECATED] This flag is no longer supported. Please activate your virtual environment before running.",
|
||||||
)
|
)
|
||||||
self.parser.add_argument(
|
self.parser.add_argument(
|
||||||
"--image-type",
|
"--image-type",
|
||||||
type=str,
|
type=str,
|
||||||
help="Image Type used during the build. This can be only venv.",
|
help="[DEPRECATED] This flag is no longer supported. Please activate your virtual environment before running.",
|
||||||
choices=[e.value for e in ImageType if e.value != ImageType.CONTAINER.value],
|
choices=[e.value for e in ImageType if e.value != ImageType.CONTAINER.value],
|
||||||
)
|
)
|
||||||
self.parser.add_argument(
|
self.parser.add_argument(
|
||||||
|
@ -106,11 +106,18 @@ class StackRun(Subcommand):
|
||||||
import yaml
|
import yaml
|
||||||
|
|
||||||
from llama_stack.core.configure import parse_and_maybe_upgrade_config
|
from llama_stack.core.configure import parse_and_maybe_upgrade_config
|
||||||
from llama_stack.core.utils.exec import formulate_run_args, run_command
|
|
||||||
|
if args.image_type or args.image_name:
|
||||||
|
self.parser.error(
|
||||||
|
"The --image-type and --image-name flags are no longer supported.\n\n"
|
||||||
|
"Please activate your virtual environment manually before running `llama stack run`.\n\n"
|
||||||
|
"For example:\n"
|
||||||
|
" source /path/to/venv/bin/activate\n"
|
||||||
|
" llama stack run <config>\n"
|
||||||
|
)
|
||||||
|
|
||||||
if args.enable_ui:
|
if args.enable_ui:
|
||||||
self._start_ui_development_server(args.port)
|
self._start_ui_development_server(args.port)
|
||||||
image_type, image_name = args.image_type, args.image_name
|
|
||||||
|
|
||||||
if args.config:
|
if args.config:
|
||||||
try:
|
try:
|
||||||
|
@ -122,10 +129,6 @@ class StackRun(Subcommand):
|
||||||
else:
|
else:
|
||||||
config_file = None
|
config_file = None
|
||||||
|
|
||||||
# Check if config is required based on image type
|
|
||||||
if image_type == ImageType.VENV.value and not config_file:
|
|
||||||
self.parser.error("Config file is required for venv environment")
|
|
||||||
|
|
||||||
if config_file:
|
if config_file:
|
||||||
logger.info(f"Using run configuration: {config_file}")
|
logger.info(f"Using run configuration: {config_file}")
|
||||||
|
|
||||||
|
@ -140,23 +143,8 @@ class StackRun(Subcommand):
|
||||||
os.makedirs(str(config.external_providers_dir), exist_ok=True)
|
os.makedirs(str(config.external_providers_dir), exist_ok=True)
|
||||||
except AttributeError as e:
|
except AttributeError as e:
|
||||||
self.parser.error(f"failed to parse config file '{config_file}':\n {e}")
|
self.parser.error(f"failed to parse config file '{config_file}':\n {e}")
|
||||||
else:
|
|
||||||
config = None
|
|
||||||
|
|
||||||
# If neither image type nor image name is provided, assume the server should be run directly
|
self._uvicorn_run(config_file, args)
|
||||||
# using the current environment packages.
|
|
||||||
if not image_type and not image_name:
|
|
||||||
logger.info("No image type or image name provided. Assuming environment packages.")
|
|
||||||
self._uvicorn_run(config_file, args)
|
|
||||||
else:
|
|
||||||
run_args = formulate_run_args(image_type, image_name)
|
|
||||||
|
|
||||||
run_args.extend([str(args.port)])
|
|
||||||
|
|
||||||
if config_file:
|
|
||||||
run_args.extend(["--config", str(config_file)])
|
|
||||||
|
|
||||||
run_command(run_args)
|
|
||||||
|
|
||||||
def _uvicorn_run(self, config_file: Path | None, args: argparse.Namespace) -> None:
|
def _uvicorn_run(self, config_file: Path | None, args: argparse.Namespace) -> None:
|
||||||
if not config_file:
|
if not config_file:
|
||||||
|
|
|
@ -186,7 +186,7 @@ if [[ "$STACK_CONFIG" == *"server:"* ]]; then
|
||||||
echo "Llama Stack Server is already running, skipping start"
|
echo "Llama Stack Server is already running, skipping start"
|
||||||
else
|
else
|
||||||
echo "=== Starting Llama Stack Server ==="
|
echo "=== Starting Llama Stack Server ==="
|
||||||
nohup llama stack run ci-tests --image-type venv > server.log 2>&1 &
|
nohup llama stack run ci-tests > server.log 2>&1 &
|
||||||
|
|
||||||
echo "Waiting for Llama Stack Server to start..."
|
echo "Waiting for Llama Stack Server to start..."
|
||||||
for i in {1..30}; do
|
for i in {1..30}; do
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue