mirror of
https://github.com/meta-llama/llama-stack.git
synced 2025-10-08 04:54:38 +00:00
chore!: remove --image-type and --image-name from llama stack run
# What does this PR do? ## Test Plan
This commit is contained in:
parent
0751002bf3
commit
ea30c24595
13 changed files with 4659 additions and 5171 deletions
2
.github/workflows/integration-auth-tests.yml
vendored
2
.github/workflows/integration-auth-tests.yml
vendored
|
@ -86,7 +86,7 @@ jobs:
|
||||||
|
|
||||||
# avoid line breaks in the server log, especially because we grep it below.
|
# avoid line breaks in the server log, especially because we grep it below.
|
||||||
export COLUMNS=1984
|
export COLUMNS=1984
|
||||||
nohup uv run llama stack run $run_dir/run.yaml --image-type venv > server.log 2>&1 &
|
nohup uv run llama stack run $run_dir/run.yaml > server.log 2>&1 &
|
||||||
|
|
||||||
- name: Wait for Llama Stack server to be ready
|
- name: Wait for Llama Stack server to be ready
|
||||||
run: |
|
run: |
|
||||||
|
|
|
@ -59,7 +59,7 @@ jobs:
|
||||||
# Use the virtual environment created by the build step (name comes from build config)
|
# Use the virtual environment created by the build step (name comes from build config)
|
||||||
source ramalama-stack-test/bin/activate
|
source ramalama-stack-test/bin/activate
|
||||||
uv pip list
|
uv pip list
|
||||||
nohup llama stack run tests/external/ramalama-stack/run.yaml --image-type ${{ matrix.image-type }} > server.log 2>&1 &
|
nohup llama stack run tests/external/ramalama-stack/run.yaml > server.log 2>&1 &
|
||||||
|
|
||||||
- name: Wait for Llama Stack server to be ready
|
- name: Wait for Llama Stack server to be ready
|
||||||
run: |
|
run: |
|
||||||
|
|
2
.github/workflows/test-external.yml
vendored
2
.github/workflows/test-external.yml
vendored
|
@ -59,7 +59,7 @@ jobs:
|
||||||
# Use the virtual environment created by the build step (name comes from build config)
|
# Use the virtual environment created by the build step (name comes from build config)
|
||||||
source ci-test/bin/activate
|
source ci-test/bin/activate
|
||||||
uv pip list
|
uv pip list
|
||||||
nohup llama stack run tests/external/run-byoa.yaml --image-type ${{ matrix.image-type }} > server.log 2>&1 &
|
nohup llama stack run tests/external/run-byoa.yaml > server.log 2>&1 &
|
||||||
|
|
||||||
- name: Wait for Llama Stack server to be ready
|
- name: Wait for Llama Stack server to be ready
|
||||||
run: |
|
run: |
|
||||||
|
|
|
@ -52,7 +52,7 @@ You can access the HuggingFace trainer via the `starter` distribution:
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
llama stack build --distro starter --image-type venv
|
llama stack build --distro starter --image-type venv
|
||||||
llama stack run --image-type venv ~/.llama/distributions/starter/starter-run.yaml
|
llama stack run ~/.llama/distributions/starter/starter-run.yaml
|
||||||
```
|
```
|
||||||
|
|
||||||
### Usage Example
|
### Usage Example
|
||||||
|
|
|
@ -322,20 +322,20 @@ Now, let's start the Llama Stack Distribution Server. You will need the YAML con
|
||||||
llama stack run -h
|
llama stack run -h
|
||||||
usage: llama stack run [-h] [--port PORT] [--image-name IMAGE_NAME]
|
usage: llama stack run [-h] [--port PORT] [--image-name IMAGE_NAME]
|
||||||
[--image-type {venv}] [--enable-ui]
|
[--image-type {venv}] [--enable-ui]
|
||||||
[config | template]
|
[config | distro]
|
||||||
|
|
||||||
Start the server for a Llama Stack Distribution. You should have already built (or downloaded) and configured the distribution.
|
Start the server for a Llama Stack Distribution. You should have already built (or downloaded) and configured the distribution.
|
||||||
|
|
||||||
positional arguments:
|
positional arguments:
|
||||||
config | template Path to config file to use for the run or name of known template (`llama stack list` for a list). (default: None)
|
config | distro Path to config file to use for the run or name of known distro (`llama stack list` for a list). (default: None)
|
||||||
|
|
||||||
options:
|
options:
|
||||||
-h, --help show this help message and exit
|
-h, --help show this help message and exit
|
||||||
--port PORT Port to run the server on. It can also be passed via the env var LLAMA_STACK_PORT. (default: 8321)
|
--port PORT Port to run the server on. It can also be passed via the env var LLAMA_STACK_PORT. (default: 8321)
|
||||||
--image-name IMAGE_NAME
|
--image-name IMAGE_NAME
|
||||||
Name of the image to run. Defaults to the current environment (default: None)
|
[DEPRECATED] This flag is no longer supported. Please activate your virtual environment before running. (default: None)
|
||||||
--image-type {venv}
|
--image-type {venv}
|
||||||
Image Type used during the build. This should be venv. (default: None)
|
[DEPRECATED] This flag is no longer supported. Please activate your virtual environment before running. (default: None)
|
||||||
--enable-ui Start the UI server (default: False)
|
--enable-ui Start the UI server (default: False)
|
||||||
```
|
```
|
||||||
|
|
||||||
|
@ -347,9 +347,6 @@ llama stack run tgi
|
||||||
|
|
||||||
# Start using config file
|
# Start using config file
|
||||||
llama stack run ~/.llama/distributions/llamastack-my-local-stack/my-local-stack-run.yaml
|
llama stack run ~/.llama/distributions/llamastack-my-local-stack/my-local-stack-run.yaml
|
||||||
|
|
||||||
# Start using a venv
|
|
||||||
llama stack run --image-type venv ~/.llama/distributions/llamastack-my-local-stack/my-local-stack-run.yaml
|
|
||||||
```
|
```
|
||||||
|
|
||||||
```
|
```
|
||||||
|
|
|
@ -85,88 +85,8 @@
|
||||||
"id": "J2kGed0R5PSf",
|
"id": "J2kGed0R5PSf",
|
||||||
"outputId": "2478ea60-8d35-48a1-b011-f233831740c5"
|
"outputId": "2478ea60-8d35-48a1-b011-f233831740c5"
|
||||||
},
|
},
|
||||||
"outputs": [
|
"outputs": [],
|
||||||
{
|
"source": "import os\nimport subprocess\nimport time\n\n!pip install uv\n\nif \"UV_SYSTEM_PYTHON\" in os.environ:\n del os.environ[\"UV_SYSTEM_PYTHON\"]\n\n# this command installs all the dependencies needed for the llama stack server with the together inference provider\n!uv run --with llama-stack llama stack build --distro together --image-type venv\n\ndef run_llama_stack_server_background():\n log_file = open(\"llama_stack_server.log\", \"w\")\n process = subprocess.Popen(\n \"uv run --with llama-stack llama stack run together\",\n shell=True,\n stdout=log_file,\n stderr=log_file,\n text=True\n )\n\n print(f\"Starting Llama Stack server with PID: {process.pid}\")\n return process\n\ndef wait_for_server_to_start():\n import requests\n from requests.exceptions import ConnectionError\n import time\n\n url = \"http://0.0.0.0:8321/v1/health\"\n max_retries = 30\n retry_interval = 1\n\n print(\"Waiting for server to start\", end=\"\")\n for _ in range(max_retries):\n try:\n response = requests.get(url)\n if response.status_code == 200:\n print(\"\\nServer is ready!\")\n return True\n except ConnectionError:\n print(\".\", end=\"\", flush=True)\n time.sleep(retry_interval)\n\n print(\"\\nServer failed to start after\", max_retries * retry_interval, \"seconds\")\n return False\n\n\n# use this helper if needed to kill the server\ndef kill_llama_stack_server():\n # Kill any existing llama stack server processes\n os.system(\"ps aux | grep -v grep | grep llama_stack.core.server.server | awk '{print $2}' | xargs kill -9\")\n"
|
||||||
"name": "stdout",
|
|
||||||
"output_type": "stream",
|
|
||||||
"text": [
|
|
||||||
"Requirement already satisfied: uv in /opt/homebrew/Caskroom/miniconda/base/envs/stack/lib/python3.10/site-packages (0.5.29)\n",
|
|
||||||
"Environment '/Users/hjshah/git/llama-stack/.venv' already exists, re-using it.\n",
|
|
||||||
"Virtual environment /Users/hjshah/git/llama-stack/.venv is already active\n",
|
|
||||||
"\u001b[2mUsing Python 3.10.16 environment at: /Users/hjshah/git/llama-stack/.venv\u001b[0m\n",
|
|
||||||
"\u001b[2mAudited \u001b[1m1 package\u001b[0m \u001b[2min 314ms\u001b[0m\u001b[0m\n",
|
|
||||||
"Installing pip dependencies\n",
|
|
||||||
"\u001b[2mUsing Python 3.10.16 environment at: /Users/hjshah/git/llama-stack/.venv\u001b[0m\n",
|
|
||||||
"\u001b[2K\u001b[2mResolved \u001b[1m125 packages\u001b[0m \u001b[2min 646ms\u001b[0m\u001b[0m \u001b[0m\n",
|
|
||||||
"\u001b[2mUninstalled \u001b[1m1 package\u001b[0m \u001b[2min 404ms\u001b[0m\u001b[0m\n",
|
|
||||||
"\u001b[2K\u001b[2mInstalled \u001b[1m1 package\u001b[0m \u001b[2min 129ms\u001b[0m\u001b[0m \u001b[0m\n",
|
|
||||||
" \u001b[31m-\u001b[39m \u001b[1mnumpy\u001b[0m\u001b[2m==2.2.3\u001b[0m\n",
|
|
||||||
" \u001b[32m+\u001b[39m \u001b[1mnumpy\u001b[0m\u001b[2m==1.26.4\u001b[0m\n",
|
|
||||||
"sentence-transformers --no-deps\n",
|
|
||||||
"\u001b[2mUsing Python 3.10.16 environment at: /Users/hjshah/git/llama-stack/.venv\u001b[0m\n",
|
|
||||||
"\u001b[2mAudited \u001b[1m1 package\u001b[0m \u001b[2min 54ms\u001b[0m\u001b[0m\n",
|
|
||||||
"torch torchvision --index-url https://download.pytorch.org/whl/cpu\n",
|
|
||||||
"\u001b[2mUsing Python 3.10.16 environment at: /Users/hjshah/git/llama-stack/.venv\u001b[0m\n",
|
|
||||||
"\u001b[2mAudited \u001b[1m2 packages\u001b[0m \u001b[2min 10ms\u001b[0m\u001b[0m\n",
|
|
||||||
"\u001b[32mBuild Successful!\u001b[0m\n"
|
|
||||||
]
|
|
||||||
}
|
|
||||||
],
|
|
||||||
"source": [
|
|
||||||
"import os\n",
|
|
||||||
"import subprocess\n",
|
|
||||||
"import time\n",
|
|
||||||
"\n",
|
|
||||||
"!pip install uv\n",
|
|
||||||
"\n",
|
|
||||||
"if \"UV_SYSTEM_PYTHON\" in os.environ:\n",
|
|
||||||
" del os.environ[\"UV_SYSTEM_PYTHON\"]\n",
|
|
||||||
"\n",
|
|
||||||
"# this command installs all the dependencies needed for the llama stack server with the together inference provider\n",
|
|
||||||
"!uv run --with llama-stack llama stack build --distro together --image-type venv\n",
|
|
||||||
"\n",
|
|
||||||
"def run_llama_stack_server_background():\n",
|
|
||||||
" log_file = open(\"llama_stack_server.log\", \"w\")\n",
|
|
||||||
" process = subprocess.Popen(\n",
|
|
||||||
" \"uv run --with llama-stack llama stack run together --image-type venv\",\n",
|
|
||||||
" shell=True,\n",
|
|
||||||
" stdout=log_file,\n",
|
|
||||||
" stderr=log_file,\n",
|
|
||||||
" text=True\n",
|
|
||||||
" )\n",
|
|
||||||
"\n",
|
|
||||||
" print(f\"Starting Llama Stack server with PID: {process.pid}\")\n",
|
|
||||||
" return process\n",
|
|
||||||
"\n",
|
|
||||||
"def wait_for_server_to_start():\n",
|
|
||||||
" import requests\n",
|
|
||||||
" from requests.exceptions import ConnectionError\n",
|
|
||||||
" import time\n",
|
|
||||||
"\n",
|
|
||||||
" url = \"http://0.0.0.0:8321/v1/health\"\n",
|
|
||||||
" max_retries = 30\n",
|
|
||||||
" retry_interval = 1\n",
|
|
||||||
"\n",
|
|
||||||
" print(\"Waiting for server to start\", end=\"\")\n",
|
|
||||||
" for _ in range(max_retries):\n",
|
|
||||||
" try:\n",
|
|
||||||
" response = requests.get(url)\n",
|
|
||||||
" if response.status_code == 200:\n",
|
|
||||||
" print(\"\\nServer is ready!\")\n",
|
|
||||||
" return True\n",
|
|
||||||
" except ConnectionError:\n",
|
|
||||||
" print(\".\", end=\"\", flush=True)\n",
|
|
||||||
" time.sleep(retry_interval)\n",
|
|
||||||
"\n",
|
|
||||||
" print(\"\\nServer failed to start after\", max_retries * retry_interval, \"seconds\")\n",
|
|
||||||
" return False\n",
|
|
||||||
"\n",
|
|
||||||
"\n",
|
|
||||||
"# use this helper if needed to kill the server\n",
|
|
||||||
"def kill_llama_stack_server():\n",
|
|
||||||
" # Kill any existing llama stack server processes\n",
|
|
||||||
" os.system(\"ps aux | grep -v grep | grep llama_stack.core.server.server | awk '{print $2}' | xargs kill -9\")\n"
|
|
||||||
]
|
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
|
|
|
@ -88,195 +88,8 @@
|
||||||
"id": "J2kGed0R5PSf",
|
"id": "J2kGed0R5PSf",
|
||||||
"outputId": "2478ea60-8d35-48a1-b011-f233831740c5"
|
"outputId": "2478ea60-8d35-48a1-b011-f233831740c5"
|
||||||
},
|
},
|
||||||
"outputs": [
|
"outputs": [],
|
||||||
{
|
"source": "import os\nimport subprocess\nimport time\n\n!uv pip install requests\n\nif \"UV_SYSTEM_PYTHON\" in os.environ:\n del os.environ[\"UV_SYSTEM_PYTHON\"]\n\n# this command installs all the dependencies needed for the llama stack server\n!uv run --with llama-stack llama stack build --distro meta-reference-gpu --image-type venv\n\ndef run_llama_stack_server_background():\n log_file = open(\"llama_stack_server.log\", \"w\")\n process = subprocess.Popen(\n f\"INFERENCE_MODEL={model_id} uv run --with llama-stack llama stack run meta-reference-gpu\",\n shell=True,\n stdout=log_file,\n stderr=log_file,\n text=True\n )\n\n print(f\"Starting Llama Stack server with PID: {process.pid}\")\n return process\n\ndef wait_for_server_to_start():\n import requests\n from requests.exceptions import ConnectionError\n import time\n\n url = \"http://0.0.0.0:8321/v1/health\"\n max_retries = 30\n retry_interval = 1\n\n print(\"Waiting for server to start\", end=\"\")\n for _ in range(max_retries):\n try:\n response = requests.get(url)\n if response.status_code == 200:\n print(\"\\nServer is ready!\")\n return True\n except ConnectionError:\n print(\".\", end=\"\", flush=True)\n time.sleep(retry_interval)\n\n print(\"\\nServer failed to start after\", max_retries * retry_interval, \"seconds\")\n return False\n\n\n# use this helper if needed to kill the server\ndef kill_llama_stack_server():\n # Kill any existing llama stack server processes\n os.system(\"ps aux | grep -v grep | grep llama_stack.core.server.server | awk '{print $2}' | xargs kill -9\")\n"
|
||||||
"name": "stdout",
|
|
||||||
"output_type": "stream",
|
|
||||||
"text": [
|
|
||||||
"Requirement already satisfied: uv in /opt/homebrew/Caskroom/miniconda/base/envs/l4/lib/python3.10/site-packages (0.6.12)\n",
|
|
||||||
"\u001b[2mUsing Python 3.10.16 environment at: /opt/homebrew/Caskroom/miniconda/base/envs/l4\u001b[0m\n",
|
|
||||||
"\u001b[2mAudited \u001b[1m1 package\u001b[0m \u001b[2min 83ms\u001b[0m\u001b[0m\n",
|
|
||||||
"Environment '/Users/erichuang/projects/internal-llama-stack/.venv' already exists, re-using it.\n",
|
|
||||||
"Virtual environment /Users/erichuang/projects/internal-llama-stack/.venv is already active\n",
|
|
||||||
"\u001b[2mUsing Python 3.11.11 environment at: /Users/erichuang/projects/internal-llama-stack/.venv\u001b[0m\n",
|
|
||||||
"\u001b[2mAudited \u001b[1m1 package\u001b[0m \u001b[2min 387ms\u001b[0m\u001b[0m\n",
|
|
||||||
"Installing pip dependencies\n",
|
|
||||||
"\u001b[2mUsing Python 3.11.11 environment at: /Users/erichuang/projects/internal-llama-stack/.venv\u001b[0m\n",
|
|
||||||
"\u001b[2K\u001b[2mResolved \u001b[1m123 packages\u001b[0m \u001b[2min 1.13s\u001b[0m\u001b[0m \u001b[0m\n",
|
|
||||||
"\u001b[2K\u001b[37m⠙\u001b[0m \u001b[2mPreparing packages...\u001b[0m (0/6) \n",
|
|
||||||
"\u001b[2K\u001b[1A\u001b[37m⠙\u001b[0m \u001b[2mPreparing packages...\u001b[0m (0/6)-----\u001b[0m\u001b[0m 0 B/9.53 KiB \u001b[1A\n",
|
|
||||||
"\u001b[2K\u001b[1A\u001b[37m⠙\u001b[0m \u001b[2mPreparing packages...\u001b[0m (0/6)-\u001b[2m\u001b[0m\u001b[0m 9.53 KiB/9.53 KiB \u001b[1A\n",
|
|
||||||
"\u001b[2mshellingham\u001b[0m \u001b[32m------------------------------\u001b[2m\u001b[0m\u001b[0m 9.53 KiB/9.53 KiB\n",
|
|
||||||
"\u001b[2K\u001b[2A\u001b[37m⠙\u001b[0m \u001b[2mPreparing packages...\u001b[0m (0/6)----\u001b[0m\u001b[0m 0 B/44.00 KiB \u001b[2A\n",
|
|
||||||
"\u001b[2mshellingham\u001b[0m \u001b[32m------------------------------\u001b[2m\u001b[0m\u001b[0m 9.53 KiB/9.53 KiB\n",
|
|
||||||
"\u001b[2K\u001b[2A\u001b[37m⠙\u001b[0m \u001b[2mPreparing packages...\u001b[0m (0/6)----\u001b[0m\u001b[0m 14.88 KiB/44.00 KiB \u001b[2A\n",
|
|
||||||
"\u001b[2mshellingham\u001b[0m \u001b[32m------------------------------\u001b[2m\u001b[0m\u001b[0m 9.53 KiB/9.53 KiB\n",
|
|
||||||
"\u001b[2mtabulate \u001b[0m \u001b[32m\u001b[2m------------------------------\u001b[0m\u001b[0m 0 B/34.43 KiB\n",
|
|
||||||
"\u001b[2K\u001b[3A\u001b[37m⠙\u001b[0m \u001b[2mPreparing packages...\u001b[0m (0/6)----\u001b[0m\u001b[0m 14.88 KiB/44.00 KiB \u001b[3A\n",
|
|
||||||
"\u001b[2mshellingham\u001b[0m \u001b[32m------------------------------\u001b[2m\u001b[0m\u001b[0m 9.53 KiB/9.53 KiB\n",
|
|
||||||
"\u001b[2mtabulate \u001b[0m \u001b[32m-------------\u001b[2m-----------------\u001b[0m\u001b[0m 14.83 KiB/34.43 KiB\n",
|
|
||||||
"\u001b[2K\u001b[3A\u001b[37m⠙\u001b[0m \u001b[2mPreparing packages...\u001b[0m (0/6)----\u001b[0m\u001b[0m 14.88 KiB/44.00 KiB \u001b[3A\n",
|
|
||||||
"\u001b[2meval-type-backport\u001b[0m \u001b[32m\u001b[2m------------------------------\u001b[0m\u001b[0m 0 B/5.69 KiB\n",
|
|
||||||
"\u001b[2mshellingham\u001b[0m \u001b[32m------------------------------\u001b[2m\u001b[0m\u001b[0m 9.53 KiB/9.53 KiB\n",
|
|
||||||
"\u001b[2mtabulate \u001b[0m \u001b[32m-------------\u001b[2m-----------------\u001b[0m\u001b[0m 14.83 KiB/34.43 KiB\n",
|
|
||||||
"\u001b[2K\u001b[4A\u001b[37m⠙\u001b[0m \u001b[2mPreparing packages...\u001b[0m (0/6)----\u001b[0m\u001b[0m 14.88 KiB/44.00 KiB \u001b[4A\n",
|
|
||||||
"\u001b[2meval-type-backport\u001b[0m \u001b[32m------------------------------\u001b[2m\u001b[0m\u001b[0m 5.69 KiB/5.69 KiB\n",
|
|
||||||
"\u001b[2mshellingham\u001b[0m \u001b[32m------------------------------\u001b[2m\u001b[0m\u001b[0m 9.53 KiB/9.53 KiB\n",
|
|
||||||
"\u001b[2mtabulate \u001b[0m \u001b[32m-------------\u001b[2m-----------------\u001b[0m\u001b[0m 14.83 KiB/34.43 KiB\n",
|
|
||||||
"\u001b[2K\u001b[4A\u001b[37m⠙\u001b[0m \u001b[2mPreparing packages...\u001b[0m (0/6)----\u001b[0m\u001b[0m 14.88 KiB/44.00 KiB \u001b[4A\n",
|
|
||||||
"\u001b[2meval-type-backport\u001b[0m \u001b[32m------------------------------\u001b[2m\u001b[0m\u001b[0m 5.69 KiB/5.69 KiB\n",
|
|
||||||
"\u001b[2mshellingham\u001b[0m \u001b[32m------------------------------\u001b[2m\u001b[0m\u001b[0m 9.53 KiB/9.53 KiB\n",
|
|
||||||
"\u001b[2mtabulate \u001b[0m \u001b[32m-------------\u001b[2m-----------------\u001b[0m\u001b[0m 14.83 KiB/34.43 KiB\n",
|
|
||||||
"\u001b[2mtyper \u001b[0m \u001b[32m-----------\u001b[2m-------------------\u001b[0m\u001b[0m 14.88 KiB/44.00 KiB\n",
|
|
||||||
"\u001b[2K\u001b[5A\u001b[37m⠙\u001b[0m \u001b[2mPreparing packages...\u001b[0m (0/6)----\u001b[0m\u001b[0m 0 B/85.81 KiB \u001b[5A\n",
|
|
||||||
"\u001b[2meval-type-backport\u001b[0m \u001b[32m------------------------------\u001b[2m\u001b[0m\u001b[0m 5.69 KiB/5.69 KiB\n",
|
|
||||||
"\u001b[2mshellingham\u001b[0m \u001b[32m------------------------------\u001b[2m\u001b[0m\u001b[0m 9.53 KiB/9.53 KiB\n",
|
|
||||||
"\u001b[2mtabulate \u001b[0m \u001b[32m-------------\u001b[2m-----------------\u001b[0m\u001b[0m 14.83 KiB/34.43 KiB\n",
|
|
||||||
"\u001b[2mtyper \u001b[0m \u001b[32m-----------\u001b[2m-------------------\u001b[0m\u001b[0m 14.88 KiB/44.00 KiB\n",
|
|
||||||
"\u001b[2K\u001b[5A\u001b[37m⠙\u001b[0m \u001b[2mPreparing packages...\u001b[0m (0/6)----\u001b[0m\u001b[0m 16.00 KiB/85.81 KiB \u001b[5A\n",
|
|
||||||
"\u001b[2meval-type-backport\u001b[0m \u001b[32m------------------------------\u001b[2m\u001b[0m\u001b[0m 5.69 KiB/5.69 KiB\n",
|
|
||||||
"\u001b[2mshellingham\u001b[0m \u001b[32m------------------------------\u001b[2m\u001b[0m\u001b[0m 9.53 KiB/9.53 KiB\n",
|
|
||||||
"\u001b[2mtabulate \u001b[0m \u001b[32m-------------\u001b[2m-----------------\u001b[0m\u001b[0m 14.83 KiB/34.43 KiB\n",
|
|
||||||
"\u001b[2mtyper \u001b[0m \u001b[32m-----------\u001b[2m-------------------\u001b[0m\u001b[0m 14.88 KiB/44.00 KiB\n",
|
|
||||||
"\u001b[2mtogether \u001b[0m \u001b[32m------\u001b[2m------------------------\u001b[0m\u001b[0m 16.00 KiB/85.81 KiB\n",
|
|
||||||
"\u001b[2K\u001b[6A\u001b[37m⠙\u001b[0m \u001b[2mPreparing packages...\u001b[0m (0/6)----\u001b[0m\u001b[0m 0 B/3.08 MiB \u001b[6A\n",
|
|
||||||
"\u001b[2meval-type-backport\u001b[0m \u001b[32m------------------------------\u001b[2m\u001b[0m\u001b[0m 5.69 KiB/5.69 KiB\n",
|
|
||||||
"\u001b[2mshellingham\u001b[0m \u001b[32m------------------------------\u001b[2m\u001b[0m\u001b[0m 9.53 KiB/9.53 KiB\n",
|
|
||||||
"\u001b[2mtabulate \u001b[0m \u001b[32m-------------\u001b[2m-----------------\u001b[0m\u001b[0m 14.83 KiB/34.43 KiB\n",
|
|
||||||
"\u001b[2mtyper \u001b[0m \u001b[32m-----------\u001b[2m-------------------\u001b[0m\u001b[0m 14.88 KiB/44.00 KiB\n",
|
|
||||||
"\u001b[2mtogether \u001b[0m \u001b[32m------\u001b[2m------------------------\u001b[0m\u001b[0m 16.00 KiB/85.81 KiB\n",
|
|
||||||
"\u001b[2K\u001b[6A\u001b[37m⠙\u001b[0m \u001b[2mPreparing packages...\u001b[0m (0/6)----\u001b[0m\u001b[0m 14.91 KiB/3.08 MiB \u001b[6A\n",
|
|
||||||
"\u001b[2meval-type-backport\u001b[0m \u001b[32m------------------------------\u001b[2m\u001b[0m\u001b[0m 5.69 KiB/5.69 KiB\n",
|
|
||||||
"\u001b[2mshellingham\u001b[0m \u001b[32m------------------------------\u001b[2m\u001b[0m\u001b[0m 9.53 KiB/9.53 KiB\n",
|
|
||||||
"\u001b[2mtabulate \u001b[0m \u001b[32m---------------------------\u001b[2m---\u001b[0m\u001b[0m 30.83 KiB/34.43 KiB\n",
|
|
||||||
"\u001b[2mtyper \u001b[0m \u001b[32m-----------\u001b[2m-------------------\u001b[0m\u001b[0m 14.88 KiB/44.00 KiB\n",
|
|
||||||
"\u001b[2mtogether \u001b[0m \u001b[32m------\u001b[2m------------------------\u001b[0m\u001b[0m 16.00 KiB/85.81 KiB\n",
|
|
||||||
"\u001b[2K\u001b[6A\u001b[37m⠙\u001b[0m \u001b[2mPreparing packages...\u001b[0m (0/6)----\u001b[0m\u001b[0m 14.91 KiB/3.08 MiB \u001b[6A\n",
|
|
||||||
"\u001b[2meval-type-backport\u001b[0m \u001b[32m------------------------------\u001b[2m\u001b[0m\u001b[0m 5.69 KiB/5.69 KiB\n",
|
|
||||||
"\u001b[2mshellingham\u001b[0m \u001b[32m------------------------------\u001b[2m\u001b[0m\u001b[0m 9.53 KiB/9.53 KiB\n",
|
|
||||||
"\u001b[2mtabulate \u001b[0m \u001b[32m------------------------------\u001b[2m\u001b[0m\u001b[0m 34.43 KiB/34.43 KiB\n",
|
|
||||||
"\u001b[2mtyper \u001b[0m \u001b[32m-----------\u001b[2m-------------------\u001b[0m\u001b[0m 14.88 KiB/44.00 KiB\n",
|
|
||||||
"\u001b[2mtogether \u001b[0m \u001b[32m------\u001b[2m------------------------\u001b[0m\u001b[0m 16.00 KiB/85.81 KiB\n",
|
|
||||||
"\u001b[2K\u001b[6A\u001b[37m⠙\u001b[0m \u001b[2mPreparing packages...\u001b[0m (0/6)----\u001b[0m\u001b[0m 14.91 KiB/3.08 MiB \u001b[6A\n",
|
|
||||||
"\u001b[2mshellingham\u001b[0m \u001b[32m------------------------------\u001b[2m\u001b[0m\u001b[0m 9.53 KiB/9.53 KiB\n",
|
|
||||||
"\u001b[2mtabulate \u001b[0m \u001b[32m------------------------------\u001b[2m\u001b[0m\u001b[0m 34.43 KiB/34.43 KiB\n",
|
|
||||||
"\u001b[2mtyper \u001b[0m \u001b[32m-----------\u001b[2m-------------------\u001b[0m\u001b[0m 14.88 KiB/44.00 KiB\n",
|
|
||||||
"\u001b[2mtogether \u001b[0m \u001b[32m------\u001b[2m------------------------\u001b[0m\u001b[0m 16.00 KiB/85.81 KiB\n",
|
|
||||||
"\u001b[2K\u001b[5A\u001b[37m⠙\u001b[0m \u001b[2mPreparing packages...\u001b[0m (0/6)----\u001b[0m\u001b[0m 14.91 KiB/3.08 MiB \u001b[5A\n",
|
|
||||||
"\u001b[2mshellingham\u001b[0m \u001b[32m------------------------------\u001b[2m\u001b[0m\u001b[0m 9.53 KiB/9.53 KiB\n",
|
|
||||||
"\u001b[2mtabulate \u001b[0m \u001b[32m------------------------------\u001b[2m\u001b[0m\u001b[0m 34.43 KiB/34.43 KiB\n",
|
|
||||||
"\u001b[2mtyper \u001b[0m \u001b[32m-----------\u001b[2m-------------------\u001b[0m\u001b[0m 14.88 KiB/44.00 KiB\n",
|
|
||||||
"\u001b[2mtogether \u001b[0m \u001b[32m------\u001b[2m------------------------\u001b[0m\u001b[0m 16.00 KiB/85.81 KiB\n",
|
|
||||||
"\u001b[2K\u001b[5A\u001b[37m⠙\u001b[0m \u001b[2mPreparing packages...\u001b[0m (0/6)----\u001b[0m\u001b[0m 30.91 KiB/3.08 MiB \u001b[5A\n",
|
|
||||||
"\u001b[2mshellingham\u001b[0m \u001b[32m------------------------------\u001b[2m\u001b[0m\u001b[0m 9.53 KiB/9.53 KiB\n",
|
|
||||||
"\u001b[2mtyper \u001b[0m \u001b[32m-----------\u001b[2m-------------------\u001b[0m\u001b[0m 14.88 KiB/44.00 KiB\n",
|
|
||||||
"\u001b[2mtogether \u001b[0m \u001b[32m------\u001b[2m------------------------\u001b[0m\u001b[0m 16.00 KiB/85.81 KiB\n",
|
|
||||||
"\u001b[2K\u001b[4A\u001b[37m⠙\u001b[0m \u001b[2mPreparing packages...\u001b[0m (0/6)----\u001b[0m\u001b[0m 30.91 KiB/3.08 MiB \u001b[4A\n",
|
|
||||||
"\u001b[2mshellingham\u001b[0m \u001b[32m------------------------------\u001b[2m\u001b[0m\u001b[0m 9.53 KiB/9.53 KiB\n",
|
|
||||||
"\u001b[2mtyper \u001b[0m \u001b[32m-----------\u001b[2m-------------------\u001b[0m\u001b[0m 14.88 KiB/44.00 KiB\n",
|
|
||||||
"\u001b[2mtogether \u001b[0m \u001b[32m------\u001b[2m------------------------\u001b[0m\u001b[0m 16.00 KiB/85.81 KiB\n",
|
|
||||||
"\u001b[2K\u001b[4A\u001b[37m⠙\u001b[0m \u001b[2mPreparing packages...\u001b[0m (0/6)----\u001b[0m\u001b[0m 46.91 KiB/3.08 MiB \u001b[4A\n",
|
|
||||||
"\u001b[2mshellingham\u001b[0m \u001b[32m------------------------------\u001b[2m\u001b[0m\u001b[0m 9.53 KiB/9.53 KiB\n",
|
|
||||||
"\u001b[2mtyper \u001b[0m \u001b[32m-----------\u001b[2m-------------------\u001b[0m\u001b[0m 14.88 KiB/44.00 KiB\n",
|
|
||||||
"\u001b[2mtogether \u001b[0m \u001b[32m------\u001b[2m------------------------\u001b[0m\u001b[0m 16.00 KiB/85.81 KiB\n",
|
|
||||||
"\u001b[2K\u001b[4A\u001b[37m⠙\u001b[0m \u001b[2mPreparing packages...\u001b[0m (0/6)----\u001b[0m\u001b[0m 62.91 KiB/3.08 MiB \u001b[4A\n",
|
|
||||||
"\u001b[2mshellingham\u001b[0m \u001b[32m------------------------------\u001b[2m\u001b[0m\u001b[0m 9.53 KiB/9.53 KiB\n",
|
|
||||||
"\u001b[2mtyper \u001b[0m \u001b[32m-----------\u001b[2m-------------------\u001b[0m\u001b[0m 14.88 KiB/44.00 KiB\n",
|
|
||||||
"\u001b[2mtogether \u001b[0m \u001b[32m------\u001b[2m------------------------\u001b[0m\u001b[0m 16.00 KiB/85.81 KiB\n",
|
|
||||||
"\u001b[2K\u001b[4A\u001b[37m⠙\u001b[0m \u001b[2mPreparing packages...\u001b[0m (0/6)----\u001b[0m\u001b[0m 78.91 KiB/3.08 MiB \u001b[4A\n",
|
|
||||||
"\u001b[2mshellingham\u001b[0m \u001b[32m------------------------------\u001b[2m\u001b[0m\u001b[0m 9.53 KiB/9.53 KiB\n",
|
|
||||||
"\u001b[2mtyper \u001b[0m \u001b[32m-----------\u001b[2m-------------------\u001b[0m\u001b[0m 14.88 KiB/44.00 KiB\n",
|
|
||||||
"\u001b[2mtogether \u001b[0m \u001b[32m------\u001b[2m------------------------\u001b[0m\u001b[0m 16.00 KiB/85.81 KiB\n",
|
|
||||||
"\u001b[2K\u001b[4A\u001b[37m⠙\u001b[0m \u001b[2mPreparing packages...\u001b[0m (0/6)----\u001b[0m\u001b[0m 94.91 KiB/3.08 MiB \u001b[4A\n",
|
|
||||||
"\u001b[2mshellingham\u001b[0m \u001b[32m------------------------------\u001b[2m\u001b[0m\u001b[0m 9.53 KiB/9.53 KiB\n",
|
|
||||||
"\u001b[2mtyper \u001b[0m \u001b[32m-----------\u001b[2m-------------------\u001b[0m\u001b[0m 14.88 KiB/44.00 KiB\n",
|
|
||||||
"\u001b[2mtogether \u001b[0m \u001b[32m------------\u001b[2m------------------\u001b[0m\u001b[0m 32.00 KiB/85.81 KiB\n",
|
|
||||||
"\u001b[2K\u001b[4A\u001b[37m⠙\u001b[0m \u001b[2mPreparing packages...\u001b[0m (0/6)----\u001b[0m\u001b[0m 2.62 MiB/3.08 MiB \u001b[4A\n",
|
|
||||||
"\u001b[2mtyper \u001b[0m \u001b[32m----------------------\u001b[2m--------\u001b[0m\u001b[0m 30.88 KiB/44.00 KiB\n",
|
|
||||||
"\u001b[2mtogether \u001b[0m \u001b[32m------------\u001b[2m------------------\u001b[0m\u001b[0m 32.00 KiB/85.81 KiB\n",
|
|
||||||
"\u001b[2K\u001b[3A\u001b[37m⠹\u001b[0m \u001b[2mPreparing packages...\u001b[0m (3/6)----\u001b[0m\u001b[0m 2.62 MiB/3.08 MiB \u001b[3A\n",
|
|
||||||
"\u001b[2mtyper \u001b[0m \u001b[32m------------------------------\u001b[2m\u001b[0m\u001b[0m 44.00 KiB/44.00 KiB\n",
|
|
||||||
"\u001b[2mtogether \u001b[0m \u001b[32m------------\u001b[2m------------------\u001b[0m\u001b[0m 32.00 KiB/85.81 KiB\n",
|
|
||||||
"\u001b[2K\u001b[3A\u001b[37m⠹\u001b[0m \u001b[2mPreparing packages...\u001b[0m (3/6)----\u001b[0m\u001b[0m 2.62 MiB/3.08 MiB \u001b[3A\n",
|
|
||||||
"\u001b[2mtogether \u001b[0m \u001b[32m------------\u001b[2m------------------\u001b[0m\u001b[0m 32.00 KiB/85.81 KiB\n",
|
|
||||||
"\u001b[2K\u001b[2A\u001b[37m⠹\u001b[0m \u001b[2mPreparing packages...\u001b[0m (3/6)2m--\u001b[0m\u001b[0m 2.80 MiB/3.08 MiB \u001b[2A\n",
|
|
||||||
"\u001b[2mtogether \u001b[0m \u001b[32m-----------------\u001b[2m-------------\u001b[0m\u001b[0m 48.00 KiB/85.81 KiB\n",
|
|
||||||
"\u001b[2K\u001b[2A\u001b[37m⠹\u001b[0m \u001b[2mPreparing packages...\u001b[0m (3/6)2m--\u001b[0m\u001b[0m 2.81 MiB/3.08 MiB \u001b[2A\n",
|
|
||||||
"\u001b[2K\u001b[1A\u001b[37m⠹\u001b[0m \u001b[2mPreparing packages...\u001b[0m (3/6)----\u001b[0m\u001b[0m 48.00 KiB/85.81 KiB \u001b[1A\n",
|
|
||||||
"\u001b[2K\u001b[1A\u001b[37m⠹\u001b[0m \u001b[2mPreparing packages...\u001b[0m (3/6)2m--\u001b[0m\u001b[0m 80.00 KiB/85.81 KiB \u001b[1A\n",
|
|
||||||
"\u001b[2K\u001b[2mPrepared \u001b[1m6 packages\u001b[0m \u001b[2min 365ms\u001b[0m\u001b[0m \u001b[1A\n",
|
|
||||||
"\u001b[2K\u001b[2mInstalled \u001b[1m6 packages\u001b[0m \u001b[2min 50ms\u001b[0m\u001b[0m \u001b[0m\n",
|
|
||||||
" \u001b[32m+\u001b[39m \u001b[1meval-type-backport\u001b[0m\u001b[2m==0.2.2\u001b[0m\n",
|
|
||||||
" \u001b[32m+\u001b[39m \u001b[1mfaiss-cpu\u001b[0m\u001b[2m==1.10.0\u001b[0m\n",
|
|
||||||
" \u001b[32m+\u001b[39m \u001b[1mshellingham\u001b[0m\u001b[2m==1.5.4\u001b[0m\n",
|
|
||||||
" \u001b[32m+\u001b[39m \u001b[1mtabulate\u001b[0m\u001b[2m==0.9.0\u001b[0m\n",
|
|
||||||
" \u001b[32m+\u001b[39m \u001b[1mtogether\u001b[0m\u001b[2m==1.5.5\u001b[0m\n",
|
|
||||||
" \u001b[32m+\u001b[39m \u001b[1mtyper\u001b[0m\u001b[2m==0.15.2\u001b[0m\n",
|
|
||||||
"torch torchvision --index-url https://download.pytorch.org/whl/cpu\n",
|
|
||||||
"\u001b[2mUsing Python 3.11.11 environment at: /Users/erichuang/projects/internal-llama-stack/.venv\u001b[0m\n",
|
|
||||||
"\u001b[2mAudited \u001b[1m2 packages\u001b[0m \u001b[2min 32ms\u001b[0m\u001b[0m\n",
|
|
||||||
"sentence-transformers --no-deps\n",
|
|
||||||
"\u001b[2mUsing Python 3.11.11 environment at: /Users/erichuang/projects/internal-llama-stack/.venv\u001b[0m\n",
|
|
||||||
"\u001b[2mAudited \u001b[1m1 package\u001b[0m \u001b[2min 63ms\u001b[0m\u001b[0m\n",
|
|
||||||
"\u001b[32mBuild Successful!\u001b[0m\n"
|
|
||||||
]
|
|
||||||
}
|
|
||||||
],
|
|
||||||
"source": [
|
|
||||||
"import os\n",
|
|
||||||
"import subprocess\n",
|
|
||||||
"import time\n",
|
|
||||||
"\n",
|
|
||||||
"!uv pip install requests\n",
|
|
||||||
"\n",
|
|
||||||
"if \"UV_SYSTEM_PYTHON\" in os.environ:\n",
|
|
||||||
" del os.environ[\"UV_SYSTEM_PYTHON\"]\n",
|
|
||||||
"\n",
|
|
||||||
"# this command installs all the dependencies needed for the llama stack server\n",
|
|
||||||
"!uv run --with llama-stack llama stack build --distro meta-reference-gpu --image-type venv\n",
|
|
||||||
"\n",
|
|
||||||
"def run_llama_stack_server_background():\n",
|
|
||||||
" log_file = open(\"llama_stack_server.log\", \"w\")\n",
|
|
||||||
" process = subprocess.Popen(\n",
|
|
||||||
" f\"INFERENCE_MODEL={model_id} uv run --with llama-stack llama stack run meta-reference-gpu --image-type venv\",\n",
|
|
||||||
" shell=True,\n",
|
|
||||||
" stdout=log_file,\n",
|
|
||||||
" stderr=log_file,\n",
|
|
||||||
" text=True\n",
|
|
||||||
" )\n",
|
|
||||||
"\n",
|
|
||||||
" print(f\"Starting Llama Stack server with PID: {process.pid}\")\n",
|
|
||||||
" return process\n",
|
|
||||||
"\n",
|
|
||||||
"def wait_for_server_to_start():\n",
|
|
||||||
" import requests\n",
|
|
||||||
" from requests.exceptions import ConnectionError\n",
|
|
||||||
" import time\n",
|
|
||||||
"\n",
|
|
||||||
" url = \"http://0.0.0.0:8321/v1/health\"\n",
|
|
||||||
" max_retries = 30\n",
|
|
||||||
" retry_interval = 1\n",
|
|
||||||
"\n",
|
|
||||||
" print(\"Waiting for server to start\", end=\"\")\n",
|
|
||||||
" for _ in range(max_retries):\n",
|
|
||||||
" try:\n",
|
|
||||||
" response = requests.get(url)\n",
|
|
||||||
" if response.status_code == 200:\n",
|
|
||||||
" print(\"\\nServer is ready!\")\n",
|
|
||||||
" return True\n",
|
|
||||||
" except ConnectionError:\n",
|
|
||||||
" print(\".\", end=\"\", flush=True)\n",
|
|
||||||
" time.sleep(retry_interval)\n",
|
|
||||||
"\n",
|
|
||||||
" print(\"\\nServer failed to start after\", max_retries * retry_interval, \"seconds\")\n",
|
|
||||||
" return False\n",
|
|
||||||
"\n",
|
|
||||||
"\n",
|
|
||||||
"# use this helper if needed to kill the server\n",
|
|
||||||
"def kill_llama_stack_server():\n",
|
|
||||||
" # Kill any existing llama stack server processes\n",
|
|
||||||
" os.system(\"ps aux | grep -v grep | grep llama_stack.core.server.server | awk '{print $2}' | xargs kill -9\")\n"
|
|
||||||
]
|
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
|
|
|
@ -77,196 +77,8 @@
|
||||||
"id": "J2kGed0R5PSf",
|
"id": "J2kGed0R5PSf",
|
||||||
"outputId": "2478ea60-8d35-48a1-b011-f233831740c5"
|
"outputId": "2478ea60-8d35-48a1-b011-f233831740c5"
|
||||||
},
|
},
|
||||||
"outputs": [
|
"outputs": [],
|
||||||
{
|
"source": "import os\nimport subprocess\nimport time\n\n!pip install uv\n!uv pip install requests\n\nif \"UV_SYSTEM_PYTHON\" in os.environ:\n del os.environ[\"UV_SYSTEM_PYTHON\"]\n\n# this command installs all the dependencies needed for the llama stack server\n!uv run --with llama-stack llama stack build --distro llama_api --image-type venv\n\ndef run_llama_stack_server_background():\n log_file = open(\"llama_stack_server.log\", \"w\")\n process = subprocess.Popen(\n \"uv run --with llama-stack llama stack run llama_api\",\n shell=True,\n stdout=log_file,\n stderr=log_file,\n text=True\n )\n\n print(f\"Starting Llama Stack server with PID: {process.pid}\")\n return process\n\ndef wait_for_server_to_start():\n import requests\n from requests.exceptions import ConnectionError\n import time\n\n url = \"http://0.0.0.0:8321/v1/health\"\n max_retries = 30\n retry_interval = 1\n\n print(\"Waiting for server to start\", end=\"\")\n for _ in range(max_retries):\n try:\n response = requests.get(url)\n if response.status_code == 200:\n print(\"\\nServer is ready!\")\n return True\n except ConnectionError:\n print(\".\", end=\"\", flush=True)\n time.sleep(retry_interval)\n\n print(\"\\nServer failed to start after\", max_retries * retry_interval, \"seconds\")\n return False\n\n\n# use this helper if needed to kill the server\ndef kill_llama_stack_server():\n # Kill any existing llama stack server processes\n os.system(\"ps aux | grep -v grep | grep llama_stack.core.server.server | awk '{print $2}' | xargs kill -9\")\n"
|
||||||
"name": "stdout",
|
|
||||||
"output_type": "stream",
|
|
||||||
"text": [
|
|
||||||
"Requirement already satisfied: uv in /opt/homebrew/Caskroom/miniconda/base/envs/l4/lib/python3.10/site-packages (0.6.12)\n",
|
|
||||||
"\u001b[2mUsing Python 3.10.16 environment at: /opt/homebrew/Caskroom/miniconda/base/envs/l4\u001b[0m\n",
|
|
||||||
"\u001b[2mAudited \u001b[1m1 package\u001b[0m \u001b[2min 83ms\u001b[0m\u001b[0m\n",
|
|
||||||
"Environment '/Users/erichuang/projects/internal-llama-stack/.venv' already exists, re-using it.\n",
|
|
||||||
"Virtual environment /Users/erichuang/projects/internal-llama-stack/.venv is already active\n",
|
|
||||||
"\u001b[2mUsing Python 3.11.11 environment at: /Users/erichuang/projects/internal-llama-stack/.venv\u001b[0m\n",
|
|
||||||
"\u001b[2mAudited \u001b[1m1 package\u001b[0m \u001b[2min 387ms\u001b[0m\u001b[0m\n",
|
|
||||||
"Installing pip dependencies\n",
|
|
||||||
"\u001b[2mUsing Python 3.11.11 environment at: /Users/erichuang/projects/internal-llama-stack/.venv\u001b[0m\n",
|
|
||||||
"\u001b[2K\u001b[2mResolved \u001b[1m123 packages\u001b[0m \u001b[2min 1.13s\u001b[0m\u001b[0m \u001b[0m\n",
|
|
||||||
"\u001b[2K\u001b[37m⠙\u001b[0m \u001b[2mPreparing packages...\u001b[0m (0/6) \n",
|
|
||||||
"\u001b[2K\u001b[1A\u001b[37m⠙\u001b[0m \u001b[2mPreparing packages...\u001b[0m (0/6)-----\u001b[0m\u001b[0m 0 B/9.53 KiB \u001b[1A\n",
|
|
||||||
"\u001b[2K\u001b[1A\u001b[37m⠙\u001b[0m \u001b[2mPreparing packages...\u001b[0m (0/6)-\u001b[2m\u001b[0m\u001b[0m 9.53 KiB/9.53 KiB \u001b[1A\n",
|
|
||||||
"\u001b[2mshellingham\u001b[0m \u001b[32m------------------------------\u001b[2m\u001b[0m\u001b[0m 9.53 KiB/9.53 KiB\n",
|
|
||||||
"\u001b[2K\u001b[2A\u001b[37m⠙\u001b[0m \u001b[2mPreparing packages...\u001b[0m (0/6)----\u001b[0m\u001b[0m 0 B/44.00 KiB \u001b[2A\n",
|
|
||||||
"\u001b[2mshellingham\u001b[0m \u001b[32m------------------------------\u001b[2m\u001b[0m\u001b[0m 9.53 KiB/9.53 KiB\n",
|
|
||||||
"\u001b[2K\u001b[2A\u001b[37m⠙\u001b[0m \u001b[2mPreparing packages...\u001b[0m (0/6)----\u001b[0m\u001b[0m 14.88 KiB/44.00 KiB \u001b[2A\n",
|
|
||||||
"\u001b[2mshellingham\u001b[0m \u001b[32m------------------------------\u001b[2m\u001b[0m\u001b[0m 9.53 KiB/9.53 KiB\n",
|
|
||||||
"\u001b[2mtabulate \u001b[0m \u001b[32m\u001b[2m------------------------------\u001b[0m\u001b[0m 0 B/34.43 KiB\n",
|
|
||||||
"\u001b[2K\u001b[3A\u001b[37m⠙\u001b[0m \u001b[2mPreparing packages...\u001b[0m (0/6)----\u001b[0m\u001b[0m 14.88 KiB/44.00 KiB \u001b[3A\n",
|
|
||||||
"\u001b[2mshellingham\u001b[0m \u001b[32m------------------------------\u001b[2m\u001b[0m\u001b[0m 9.53 KiB/9.53 KiB\n",
|
|
||||||
"\u001b[2mtabulate \u001b[0m \u001b[32m-------------\u001b[2m-----------------\u001b[0m\u001b[0m 14.83 KiB/34.43 KiB\n",
|
|
||||||
"\u001b[2K\u001b[3A\u001b[37m⠙\u001b[0m \u001b[2mPreparing packages...\u001b[0m (0/6)----\u001b[0m\u001b[0m 14.88 KiB/44.00 KiB \u001b[3A\n",
|
|
||||||
"\u001b[2meval-type-backport\u001b[0m \u001b[32m\u001b[2m------------------------------\u001b[0m\u001b[0m 0 B/5.69 KiB\n",
|
|
||||||
"\u001b[2mshellingham\u001b[0m \u001b[32m------------------------------\u001b[2m\u001b[0m\u001b[0m 9.53 KiB/9.53 KiB\n",
|
|
||||||
"\u001b[2mtabulate \u001b[0m \u001b[32m-------------\u001b[2m-----------------\u001b[0m\u001b[0m 14.83 KiB/34.43 KiB\n",
|
|
||||||
"\u001b[2K\u001b[4A\u001b[37m⠙\u001b[0m \u001b[2mPreparing packages...\u001b[0m (0/6)----\u001b[0m\u001b[0m 14.88 KiB/44.00 KiB \u001b[4A\n",
|
|
||||||
"\u001b[2meval-type-backport\u001b[0m \u001b[32m------------------------------\u001b[2m\u001b[0m\u001b[0m 5.69 KiB/5.69 KiB\n",
|
|
||||||
"\u001b[2mshellingham\u001b[0m \u001b[32m------------------------------\u001b[2m\u001b[0m\u001b[0m 9.53 KiB/9.53 KiB\n",
|
|
||||||
"\u001b[2mtabulate \u001b[0m \u001b[32m-------------\u001b[2m-----------------\u001b[0m\u001b[0m 14.83 KiB/34.43 KiB\n",
|
|
||||||
"\u001b[2K\u001b[4A\u001b[37m⠙\u001b[0m \u001b[2mPreparing packages...\u001b[0m (0/6)----\u001b[0m\u001b[0m 14.88 KiB/44.00 KiB \u001b[4A\n",
|
|
||||||
"\u001b[2meval-type-backport\u001b[0m \u001b[32m------------------------------\u001b[2m\u001b[0m\u001b[0m 5.69 KiB/5.69 KiB\n",
|
|
||||||
"\u001b[2mshellingham\u001b[0m \u001b[32m------------------------------\u001b[2m\u001b[0m\u001b[0m 9.53 KiB/9.53 KiB\n",
|
|
||||||
"\u001b[2mtabulate \u001b[0m \u001b[32m-------------\u001b[2m-----------------\u001b[0m\u001b[0m 14.83 KiB/34.43 KiB\n",
|
|
||||||
"\u001b[2mtyper \u001b[0m \u001b[32m-----------\u001b[2m-------------------\u001b[0m\u001b[0m 14.88 KiB/44.00 KiB\n",
|
|
||||||
"\u001b[2K\u001b[5A\u001b[37m⠙\u001b[0m \u001b[2mPreparing packages...\u001b[0m (0/6)----\u001b[0m\u001b[0m 0 B/85.81 KiB \u001b[5A\n",
|
|
||||||
"\u001b[2meval-type-backport\u001b[0m \u001b[32m------------------------------\u001b[2m\u001b[0m\u001b[0m 5.69 KiB/5.69 KiB\n",
|
|
||||||
"\u001b[2mshellingham\u001b[0m \u001b[32m------------------------------\u001b[2m\u001b[0m\u001b[0m 9.53 KiB/9.53 KiB\n",
|
|
||||||
"\u001b[2mtabulate \u001b[0m \u001b[32m-------------\u001b[2m-----------------\u001b[0m\u001b[0m 14.83 KiB/34.43 KiB\n",
|
|
||||||
"\u001b[2mtyper \u001b[0m \u001b[32m-----------\u001b[2m-------------------\u001b[0m\u001b[0m 14.88 KiB/44.00 KiB\n",
|
|
||||||
"\u001b[2K\u001b[5A\u001b[37m⠙\u001b[0m \u001b[2mPreparing packages...\u001b[0m (0/6)----\u001b[0m\u001b[0m 16.00 KiB/85.81 KiB \u001b[5A\n",
|
|
||||||
"\u001b[2meval-type-backport\u001b[0m \u001b[32m------------------------------\u001b[2m\u001b[0m\u001b[0m 5.69 KiB/5.69 KiB\n",
|
|
||||||
"\u001b[2mshellingham\u001b[0m \u001b[32m------------------------------\u001b[2m\u001b[0m\u001b[0m 9.53 KiB/9.53 KiB\n",
|
|
||||||
"\u001b[2mtabulate \u001b[0m \u001b[32m-------------\u001b[2m-----------------\u001b[0m\u001b[0m 14.83 KiB/34.43 KiB\n",
|
|
||||||
"\u001b[2mtyper \u001b[0m \u001b[32m-----------\u001b[2m-------------------\u001b[0m\u001b[0m 14.88 KiB/44.00 KiB\n",
|
|
||||||
"\u001b[2mtogether \u001b[0m \u001b[32m------\u001b[2m------------------------\u001b[0m\u001b[0m 16.00 KiB/85.81 KiB\n",
|
|
||||||
"\u001b[2K\u001b[6A\u001b[37m⠙\u001b[0m \u001b[2mPreparing packages...\u001b[0m (0/6)----\u001b[0m\u001b[0m 0 B/3.08 MiB \u001b[6A\n",
|
|
||||||
"\u001b[2meval-type-backport\u001b[0m \u001b[32m------------------------------\u001b[2m\u001b[0m\u001b[0m 5.69 KiB/5.69 KiB\n",
|
|
||||||
"\u001b[2mshellingham\u001b[0m \u001b[32m------------------------------\u001b[2m\u001b[0m\u001b[0m 9.53 KiB/9.53 KiB\n",
|
|
||||||
"\u001b[2mtabulate \u001b[0m \u001b[32m-------------\u001b[2m-----------------\u001b[0m\u001b[0m 14.83 KiB/34.43 KiB\n",
|
|
||||||
"\u001b[2mtyper \u001b[0m \u001b[32m-----------\u001b[2m-------------------\u001b[0m\u001b[0m 14.88 KiB/44.00 KiB\n",
|
|
||||||
"\u001b[2mtogether \u001b[0m \u001b[32m------\u001b[2m------------------------\u001b[0m\u001b[0m 16.00 KiB/85.81 KiB\n",
|
|
||||||
"\u001b[2K\u001b[6A\u001b[37m⠙\u001b[0m \u001b[2mPreparing packages...\u001b[0m (0/6)----\u001b[0m\u001b[0m 14.91 KiB/3.08 MiB \u001b[6A\n",
|
|
||||||
"\u001b[2meval-type-backport\u001b[0m \u001b[32m------------------------------\u001b[2m\u001b[0m\u001b[0m 5.69 KiB/5.69 KiB\n",
|
|
||||||
"\u001b[2mshellingham\u001b[0m \u001b[32m------------------------------\u001b[2m\u001b[0m\u001b[0m 9.53 KiB/9.53 KiB\n",
|
|
||||||
"\u001b[2mtabulate \u001b[0m \u001b[32m---------------------------\u001b[2m---\u001b[0m\u001b[0m 30.83 KiB/34.43 KiB\n",
|
|
||||||
"\u001b[2mtyper \u001b[0m \u001b[32m-----------\u001b[2m-------------------\u001b[0m\u001b[0m 14.88 KiB/44.00 KiB\n",
|
|
||||||
"\u001b[2mtogether \u001b[0m \u001b[32m------\u001b[2m------------------------\u001b[0m\u001b[0m 16.00 KiB/85.81 KiB\n",
|
|
||||||
"\u001b[2K\u001b[6A\u001b[37m⠙\u001b[0m \u001b[2mPreparing packages...\u001b[0m (0/6)----\u001b[0m\u001b[0m 14.91 KiB/3.08 MiB \u001b[6A\n",
|
|
||||||
"\u001b[2meval-type-backport\u001b[0m \u001b[32m------------------------------\u001b[2m\u001b[0m\u001b[0m 5.69 KiB/5.69 KiB\n",
|
|
||||||
"\u001b[2mshellingham\u001b[0m \u001b[32m------------------------------\u001b[2m\u001b[0m\u001b[0m 9.53 KiB/9.53 KiB\n",
|
|
||||||
"\u001b[2mtabulate \u001b[0m \u001b[32m------------------------------\u001b[2m\u001b[0m\u001b[0m 34.43 KiB/34.43 KiB\n",
|
|
||||||
"\u001b[2mtyper \u001b[0m \u001b[32m-----------\u001b[2m-------------------\u001b[0m\u001b[0m 14.88 KiB/44.00 KiB\n",
|
|
||||||
"\u001b[2mtogether \u001b[0m \u001b[32m------\u001b[2m------------------------\u001b[0m\u001b[0m 16.00 KiB/85.81 KiB\n",
|
|
||||||
"\u001b[2K\u001b[6A\u001b[37m⠙\u001b[0m \u001b[2mPreparing packages...\u001b[0m (0/6)----\u001b[0m\u001b[0m 14.91 KiB/3.08 MiB \u001b[6A\n",
|
|
||||||
"\u001b[2mshellingham\u001b[0m \u001b[32m------------------------------\u001b[2m\u001b[0m\u001b[0m 9.53 KiB/9.53 KiB\n",
|
|
||||||
"\u001b[2mtabulate \u001b[0m \u001b[32m------------------------------\u001b[2m\u001b[0m\u001b[0m 34.43 KiB/34.43 KiB\n",
|
|
||||||
"\u001b[2mtyper \u001b[0m \u001b[32m-----------\u001b[2m-------------------\u001b[0m\u001b[0m 14.88 KiB/44.00 KiB\n",
|
|
||||||
"\u001b[2mtogether \u001b[0m \u001b[32m------\u001b[2m------------------------\u001b[0m\u001b[0m 16.00 KiB/85.81 KiB\n",
|
|
||||||
"\u001b[2K\u001b[5A\u001b[37m⠙\u001b[0m \u001b[2mPreparing packages...\u001b[0m (0/6)----\u001b[0m\u001b[0m 14.91 KiB/3.08 MiB \u001b[5A\n",
|
|
||||||
"\u001b[2mshellingham\u001b[0m \u001b[32m------------------------------\u001b[2m\u001b[0m\u001b[0m 9.53 KiB/9.53 KiB\n",
|
|
||||||
"\u001b[2mtabulate \u001b[0m \u001b[32m------------------------------\u001b[2m\u001b[0m\u001b[0m 34.43 KiB/34.43 KiB\n",
|
|
||||||
"\u001b[2mtyper \u001b[0m \u001b[32m-----------\u001b[2m-------------------\u001b[0m\u001b[0m 14.88 KiB/44.00 KiB\n",
|
|
||||||
"\u001b[2mtogether \u001b[0m \u001b[32m------\u001b[2m------------------------\u001b[0m\u001b[0m 16.00 KiB/85.81 KiB\n",
|
|
||||||
"\u001b[2K\u001b[5A\u001b[37m⠙\u001b[0m \u001b[2mPreparing packages...\u001b[0m (0/6)----\u001b[0m\u001b[0m 30.91 KiB/3.08 MiB \u001b[5A\n",
|
|
||||||
"\u001b[2mshellingham\u001b[0m \u001b[32m------------------------------\u001b[2m\u001b[0m\u001b[0m 9.53 KiB/9.53 KiB\n",
|
|
||||||
"\u001b[2mtyper \u001b[0m \u001b[32m-----------\u001b[2m-------------------\u001b[0m\u001b[0m 14.88 KiB/44.00 KiB\n",
|
|
||||||
"\u001b[2mtogether \u001b[0m \u001b[32m------\u001b[2m------------------------\u001b[0m\u001b[0m 16.00 KiB/85.81 KiB\n",
|
|
||||||
"\u001b[2K\u001b[4A\u001b[37m⠙\u001b[0m \u001b[2mPreparing packages...\u001b[0m (0/6)----\u001b[0m\u001b[0m 30.91 KiB/3.08 MiB \u001b[4A\n",
|
|
||||||
"\u001b[2mshellingham\u001b[0m \u001b[32m------------------------------\u001b[2m\u001b[0m\u001b[0m 9.53 KiB/9.53 KiB\n",
|
|
||||||
"\u001b[2mtyper \u001b[0m \u001b[32m-----------\u001b[2m-------------------\u001b[0m\u001b[0m 14.88 KiB/44.00 KiB\n",
|
|
||||||
"\u001b[2mtogether \u001b[0m \u001b[32m------\u001b[2m------------------------\u001b[0m\u001b[0m 16.00 KiB/85.81 KiB\n",
|
|
||||||
"\u001b[2K\u001b[4A\u001b[37m⠙\u001b[0m \u001b[2mPreparing packages...\u001b[0m (0/6)----\u001b[0m\u001b[0m 46.91 KiB/3.08 MiB \u001b[4A\n",
|
|
||||||
"\u001b[2mshellingham\u001b[0m \u001b[32m------------------------------\u001b[2m\u001b[0m\u001b[0m 9.53 KiB/9.53 KiB\n",
|
|
||||||
"\u001b[2mtyper \u001b[0m \u001b[32m-----------\u001b[2m-------------------\u001b[0m\u001b[0m 14.88 KiB/44.00 KiB\n",
|
|
||||||
"\u001b[2mtogether \u001b[0m \u001b[32m------\u001b[2m------------------------\u001b[0m\u001b[0m 16.00 KiB/85.81 KiB\n",
|
|
||||||
"\u001b[2K\u001b[4A\u001b[37m⠙\u001b[0m \u001b[2mPreparing packages...\u001b[0m (0/6)----\u001b[0m\u001b[0m 62.91 KiB/3.08 MiB \u001b[4A\n",
|
|
||||||
"\u001b[2mshellingham\u001b[0m \u001b[32m------------------------------\u001b[2m\u001b[0m\u001b[0m 9.53 KiB/9.53 KiB\n",
|
|
||||||
"\u001b[2mtyper \u001b[0m \u001b[32m-----------\u001b[2m-------------------\u001b[0m\u001b[0m 14.88 KiB/44.00 KiB\n",
|
|
||||||
"\u001b[2mtogether \u001b[0m \u001b[32m------\u001b[2m------------------------\u001b[0m\u001b[0m 16.00 KiB/85.81 KiB\n",
|
|
||||||
"\u001b[2K\u001b[4A\u001b[37m⠙\u001b[0m \u001b[2mPreparing packages...\u001b[0m (0/6)----\u001b[0m\u001b[0m 78.91 KiB/3.08 MiB \u001b[4A\n",
|
|
||||||
"\u001b[2mshellingham\u001b[0m \u001b[32m------------------------------\u001b[2m\u001b[0m\u001b[0m 9.53 KiB/9.53 KiB\n",
|
|
||||||
"\u001b[2mtyper \u001b[0m \u001b[32m-----------\u001b[2m-------------------\u001b[0m\u001b[0m 14.88 KiB/44.00 KiB\n",
|
|
||||||
"\u001b[2mtogether \u001b[0m \u001b[32m------\u001b[2m------------------------\u001b[0m\u001b[0m 16.00 KiB/85.81 KiB\n",
|
|
||||||
"\u001b[2K\u001b[4A\u001b[37m⠙\u001b[0m \u001b[2mPreparing packages...\u001b[0m (0/6)----\u001b[0m\u001b[0m 94.91 KiB/3.08 MiB \u001b[4A\n",
|
|
||||||
"\u001b[2mshellingham\u001b[0m \u001b[32m------------------------------\u001b[2m\u001b[0m\u001b[0m 9.53 KiB/9.53 KiB\n",
|
|
||||||
"\u001b[2mtyper \u001b[0m \u001b[32m-----------\u001b[2m-------------------\u001b[0m\u001b[0m 14.88 KiB/44.00 KiB\n",
|
|
||||||
"\u001b[2mtogether \u001b[0m \u001b[32m------------\u001b[2m------------------\u001b[0m\u001b[0m 32.00 KiB/85.81 KiB\n",
|
|
||||||
"\u001b[2K\u001b[4A\u001b[37m⠙\u001b[0m \u001b[2mPreparing packages...\u001b[0m (0/6)----\u001b[0m\u001b[0m 2.62 MiB/3.08 MiB \u001b[4A\n",
|
|
||||||
"\u001b[2mtyper \u001b[0m \u001b[32m----------------------\u001b[2m--------\u001b[0m\u001b[0m 30.88 KiB/44.00 KiB\n",
|
|
||||||
"\u001b[2mtogether \u001b[0m \u001b[32m------------\u001b[2m------------------\u001b[0m\u001b[0m 32.00 KiB/85.81 KiB\n",
|
|
||||||
"\u001b[2K\u001b[3A\u001b[37m⠹\u001b[0m \u001b[2mPreparing packages...\u001b[0m (3/6)----\u001b[0m\u001b[0m 2.62 MiB/3.08 MiB \u001b[3A\n",
|
|
||||||
"\u001b[2mtyper \u001b[0m \u001b[32m------------------------------\u001b[2m\u001b[0m\u001b[0m 44.00 KiB/44.00 KiB\n",
|
|
||||||
"\u001b[2mtogether \u001b[0m \u001b[32m------------\u001b[2m------------------\u001b[0m\u001b[0m 32.00 KiB/85.81 KiB\n",
|
|
||||||
"\u001b[2K\u001b[3A\u001b[37m⠹\u001b[0m \u001b[2mPreparing packages...\u001b[0m (3/6)----\u001b[0m\u001b[0m 2.62 MiB/3.08 MiB \u001b[3A\n",
|
|
||||||
"\u001b[2mtogether \u001b[0m \u001b[32m------------\u001b[2m------------------\u001b[0m\u001b[0m 32.00 KiB/85.81 KiB\n",
|
|
||||||
"\u001b[2K\u001b[2A\u001b[37m⠹\u001b[0m \u001b[2mPreparing packages...\u001b[0m (3/6)2m--\u001b[0m\u001b[0m 2.80 MiB/3.08 MiB \u001b[2A\n",
|
|
||||||
"\u001b[2mtogether \u001b[0m \u001b[32m-----------------\u001b[2m-------------\u001b[0m\u001b[0m 48.00 KiB/85.81 KiB\n",
|
|
||||||
"\u001b[2K\u001b[2A\u001b[37m⠹\u001b[0m \u001b[2mPreparing packages...\u001b[0m (3/6)2m--\u001b[0m\u001b[0m 2.81 MiB/3.08 MiB \u001b[2A\n",
|
|
||||||
"\u001b[2K\u001b[1A\u001b[37m⠹\u001b[0m \u001b[2mPreparing packages...\u001b[0m (3/6)----\u001b[0m\u001b[0m 48.00 KiB/85.81 KiB \u001b[1A\n",
|
|
||||||
"\u001b[2K\u001b[1A\u001b[37m⠹\u001b[0m \u001b[2mPreparing packages...\u001b[0m (3/6)2m--\u001b[0m\u001b[0m 80.00 KiB/85.81 KiB \u001b[1A\n",
|
|
||||||
"\u001b[2K\u001b[2mPrepared \u001b[1m6 packages\u001b[0m \u001b[2min 365ms\u001b[0m\u001b[0m \u001b[1A\n",
|
|
||||||
"\u001b[2K\u001b[2mInstalled \u001b[1m6 packages\u001b[0m \u001b[2min 50ms\u001b[0m\u001b[0m \u001b[0m\n",
|
|
||||||
" \u001b[32m+\u001b[39m \u001b[1meval-type-backport\u001b[0m\u001b[2m==0.2.2\u001b[0m\n",
|
|
||||||
" \u001b[32m+\u001b[39m \u001b[1mfaiss-cpu\u001b[0m\u001b[2m==1.10.0\u001b[0m\n",
|
|
||||||
" \u001b[32m+\u001b[39m \u001b[1mshellingham\u001b[0m\u001b[2m==1.5.4\u001b[0m\n",
|
|
||||||
" \u001b[32m+\u001b[39m \u001b[1mtabulate\u001b[0m\u001b[2m==0.9.0\u001b[0m\n",
|
|
||||||
" \u001b[32m+\u001b[39m \u001b[1mtogether\u001b[0m\u001b[2m==1.5.5\u001b[0m\n",
|
|
||||||
" \u001b[32m+\u001b[39m \u001b[1mtyper\u001b[0m\u001b[2m==0.15.2\u001b[0m\n",
|
|
||||||
"torch torchvision --index-url https://download.pytorch.org/whl/cpu\n",
|
|
||||||
"\u001b[2mUsing Python 3.11.11 environment at: /Users/erichuang/projects/internal-llama-stack/.venv\u001b[0m\n",
|
|
||||||
"\u001b[2mAudited \u001b[1m2 packages\u001b[0m \u001b[2min 32ms\u001b[0m\u001b[0m\n",
|
|
||||||
"sentence-transformers --no-deps\n",
|
|
||||||
"\u001b[2mUsing Python 3.11.11 environment at: /Users/erichuang/projects/internal-llama-stack/.venv\u001b[0m\n",
|
|
||||||
"\u001b[2mAudited \u001b[1m1 package\u001b[0m \u001b[2min 63ms\u001b[0m\u001b[0m\n",
|
|
||||||
"\u001b[32mBuild Successful!\u001b[0m\n"
|
|
||||||
]
|
|
||||||
}
|
|
||||||
],
|
|
||||||
"source": [
|
|
||||||
"import os\n",
|
|
||||||
"import subprocess\n",
|
|
||||||
"import time\n",
|
|
||||||
"\n",
|
|
||||||
"!pip install uv\n",
|
|
||||||
"!uv pip install requests\n",
|
|
||||||
"\n",
|
|
||||||
"if \"UV_SYSTEM_PYTHON\" in os.environ:\n",
|
|
||||||
" del os.environ[\"UV_SYSTEM_PYTHON\"]\n",
|
|
||||||
"\n",
|
|
||||||
"# this command installs all the dependencies needed for the llama stack server\n",
|
|
||||||
"!uv run --with llama-stack llama stack build --distro llama_api --image-type venv\n",
|
|
||||||
"\n",
|
|
||||||
"def run_llama_stack_server_background():\n",
|
|
||||||
" log_file = open(\"llama_stack_server.log\", \"w\")\n",
|
|
||||||
" process = subprocess.Popen(\n",
|
|
||||||
" \"uv run --with llama-stack llama stack run llama_api --image-type venv\",\n",
|
|
||||||
" shell=True,\n",
|
|
||||||
" stdout=log_file,\n",
|
|
||||||
" stderr=log_file,\n",
|
|
||||||
" text=True\n",
|
|
||||||
" )\n",
|
|
||||||
"\n",
|
|
||||||
" print(f\"Starting Llama Stack server with PID: {process.pid}\")\n",
|
|
||||||
" return process\n",
|
|
||||||
"\n",
|
|
||||||
"def wait_for_server_to_start():\n",
|
|
||||||
" import requests\n",
|
|
||||||
" from requests.exceptions import ConnectionError\n",
|
|
||||||
" import time\n",
|
|
||||||
"\n",
|
|
||||||
" url = \"http://0.0.0.0:8321/v1/health\"\n",
|
|
||||||
" max_retries = 30\n",
|
|
||||||
" retry_interval = 1\n",
|
|
||||||
"\n",
|
|
||||||
" print(\"Waiting for server to start\", end=\"\")\n",
|
|
||||||
" for _ in range(max_retries):\n",
|
|
||||||
" try:\n",
|
|
||||||
" response = requests.get(url)\n",
|
|
||||||
" if response.status_code == 200:\n",
|
|
||||||
" print(\"\\nServer is ready!\")\n",
|
|
||||||
" return True\n",
|
|
||||||
" except ConnectionError:\n",
|
|
||||||
" print(\".\", end=\"\", flush=True)\n",
|
|
||||||
" time.sleep(retry_interval)\n",
|
|
||||||
"\n",
|
|
||||||
" print(\"\\nServer failed to start after\", max_retries * retry_interval, \"seconds\")\n",
|
|
||||||
" return False\n",
|
|
||||||
"\n",
|
|
||||||
"\n",
|
|
||||||
"# use this helper if needed to kill the server\n",
|
|
||||||
"def kill_llama_stack_server():\n",
|
|
||||||
" # Kill any existing llama stack server processes\n",
|
|
||||||
" os.system(\"ps aux | grep -v grep | grep llama_stack.core.server.server | awk '{print $2}' | xargs kill -9\")\n"
|
|
||||||
]
|
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
|
|
|
@ -137,58 +137,7 @@
|
||||||
"outputId": "2478ea60-8d35-48a1-b011-f233831740c5"
|
"outputId": "2478ea60-8d35-48a1-b011-f233831740c5"
|
||||||
},
|
},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": "import os\nimport subprocess\n\nif \"UV_SYSTEM_PYTHON\" in os.environ:\n del os.environ[\"UV_SYSTEM_PYTHON\"]\n\n# this command installs all the dependencies needed for the llama stack server with the ollama inference provider\n!uv run --with llama-stack llama stack build --distro starter --image-type venv\n\ndef run_llama_stack_server_background():\n log_file = open(\"llama_stack_server.log\", \"w\")\n process = subprocess.Popen(\n f\"OLLAMA_URL=http://localhost:11434 uv run --with llama-stack llama stack run starter\",\n shell=True,\n stdout=log_file,\n stderr=log_file,\n text=True\n )\n\n print(f\"Starting Llama Stack server with PID: {process.pid}\")\n return process\n\ndef wait_for_server_to_start():\n import requests\n from requests.exceptions import ConnectionError\n import time\n\n url = \"http://0.0.0.0:8321/v1/health\"\n max_retries = 30\n retry_interval = 1\n\n print(\"Waiting for server to start\", end=\"\")\n for _ in range(max_retries):\n try:\n response = requests.get(url)\n if response.status_code == 200:\n print(\"\\nServer is ready!\")\n return True\n except ConnectionError:\n print(\".\", end=\"\", flush=True)\n time.sleep(retry_interval)\n\n print(\"\\nServer failed to start after\", max_retries * retry_interval, \"seconds\")\n return False\n\n\n# use this helper if needed to kill the server\ndef kill_llama_stack_server():\n # Kill any existing llama stack server processes\n os.system(\"ps aux | grep -v grep | grep llama_stack.core.server.server | awk '{print $2}' | xargs kill -9\")\n"
|
||||||
"import os\n",
|
|
||||||
"import subprocess\n",
|
|
||||||
"\n",
|
|
||||||
"if \"UV_SYSTEM_PYTHON\" in os.environ:\n",
|
|
||||||
" del os.environ[\"UV_SYSTEM_PYTHON\"]\n",
|
|
||||||
"\n",
|
|
||||||
"# this command installs all the dependencies needed for the llama stack server with the ollama inference provider\n",
|
|
||||||
"!uv run --with llama-stack llama stack build --distro starter --image-type venv\n",
|
|
||||||
"\n",
|
|
||||||
"def run_llama_stack_server_background():\n",
|
|
||||||
" log_file = open(\"llama_stack_server.log\", \"w\")\n",
|
|
||||||
" process = subprocess.Popen(\n",
|
|
||||||
" f\"OLLAMA_URL=http://localhost:11434 uv run --with llama-stack llama stack run starter --image-type venv\n",
|
|
||||||
" shell=True,\n",
|
|
||||||
" stdout=log_file,\n",
|
|
||||||
" stderr=log_file,\n",
|
|
||||||
" text=True\n",
|
|
||||||
" )\n",
|
|
||||||
"\n",
|
|
||||||
" print(f\"Starting Llama Stack server with PID: {process.pid}\")\n",
|
|
||||||
" return process\n",
|
|
||||||
"\n",
|
|
||||||
"def wait_for_server_to_start():\n",
|
|
||||||
" import requests\n",
|
|
||||||
" from requests.exceptions import ConnectionError\n",
|
|
||||||
" import time\n",
|
|
||||||
"\n",
|
|
||||||
" url = \"http://0.0.0.0:8321/v1/health\"\n",
|
|
||||||
" max_retries = 30\n",
|
|
||||||
" retry_interval = 1\n",
|
|
||||||
"\n",
|
|
||||||
" print(\"Waiting for server to start\", end=\"\")\n",
|
|
||||||
" for _ in range(max_retries):\n",
|
|
||||||
" try:\n",
|
|
||||||
" response = requests.get(url)\n",
|
|
||||||
" if response.status_code == 200:\n",
|
|
||||||
" print(\"\\nServer is ready!\")\n",
|
|
||||||
" return True\n",
|
|
||||||
" except ConnectionError:\n",
|
|
||||||
" print(\".\", end=\"\", flush=True)\n",
|
|
||||||
" time.sleep(retry_interval)\n",
|
|
||||||
"\n",
|
|
||||||
" print(\"\\nServer failed to start after\", max_retries * retry_interval, \"seconds\")\n",
|
|
||||||
" return False\n",
|
|
||||||
"\n",
|
|
||||||
"\n",
|
|
||||||
"# use this helper if needed to kill the server\n",
|
|
||||||
"def kill_llama_stack_server():\n",
|
|
||||||
" # Kill any existing llama stack server processes\n",
|
|
||||||
" os.system(\"ps aux | grep -v grep | grep llama_stack.core.server.server | awk '{print $2}' | xargs kill -9\")\n"
|
|
||||||
]
|
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
|
|
|
@ -88,7 +88,7 @@ If you're looking for more specific topics, we have a [Zero to Hero Guide](#next
|
||||||
...
|
...
|
||||||
Build Successful!
|
Build Successful!
|
||||||
You can find the newly-built template here: ~/.llama/distributions/starter/starter-run.yaml
|
You can find the newly-built template here: ~/.llama/distributions/starter/starter-run.yaml
|
||||||
You can run the new Llama Stack Distro via: uv run --with llama-stack llama stack run starter --image-type venv
|
You can run the new Llama Stack Distro via: uv run --with llama-stack llama stack run starter
|
||||||
```
|
```
|
||||||
|
|
||||||
3. **Set the ENV variables by exporting them to the terminal**:
|
3. **Set the ENV variables by exporting them to the terminal**:
|
||||||
|
@ -106,7 +106,6 @@ If you're looking for more specific topics, we have a [Zero to Hero Guide](#next
|
||||||
SAFETY_MODEL=$SAFETY_MODEL \
|
SAFETY_MODEL=$SAFETY_MODEL \
|
||||||
OLLAMA_URL=$OLLAMA_URL \
|
OLLAMA_URL=$OLLAMA_URL \
|
||||||
uv run --with llama-stack llama stack run starter \
|
uv run --with llama-stack llama stack run starter \
|
||||||
--image-type venv \
|
|
||||||
--port $LLAMA_STACK_PORT
|
--port $LLAMA_STACK_PORT
|
||||||
```
|
```
|
||||||
Note: Every time you run a new model with `ollama run`, you will need to restart the llama stack. Otherwise it won't see the new model.
|
Note: Every time you run a new model with `ollama run`, you will need to restart the llama stack. Otherwise it won't see the new model.
|
||||||
|
|
|
@ -444,9 +444,19 @@ def _run_stack_build_command_from_build_config(
|
||||||
|
|
||||||
cprint("Build Successful!", color="green", file=sys.stderr)
|
cprint("Build Successful!", color="green", file=sys.stderr)
|
||||||
cprint(f"You can find the newly-built distribution here: {run_config_file}", color="blue", file=sys.stderr)
|
cprint(f"You can find the newly-built distribution here: {run_config_file}", color="blue", file=sys.stderr)
|
||||||
|
if build_config.image_type == LlamaStackImageType.VENV:
|
||||||
cprint(
|
cprint(
|
||||||
"You can run the new Llama Stack distro via: "
|
"You can run the new Llama Stack distro (after activating "
|
||||||
+ colored(f"llama stack run {run_config_file} --image-type {build_config.image_type}", "blue"),
|
+ colored(image_name, "cyan")
|
||||||
|
+ ") via: "
|
||||||
|
+ colored(f"llama stack run {run_config_file}", "blue"),
|
||||||
|
color="green",
|
||||||
|
file=sys.stderr,
|
||||||
|
)
|
||||||
|
elif build_config.image_type == LlamaStackImageType.CONTAINER:
|
||||||
|
cprint(
|
||||||
|
"You can run the container with: "
|
||||||
|
+ colored(f"docker run -p 8321:8321 -v ~/.llama:/root/.llama localhost/{image_name} --port 8321", "blue"),
|
||||||
color="green",
|
color="green",
|
||||||
file=sys.stderr,
|
file=sys.stderr,
|
||||||
)
|
)
|
||||||
|
|
|
@ -55,12 +55,12 @@ class StackRun(Subcommand):
|
||||||
"--image-name",
|
"--image-name",
|
||||||
type=str,
|
type=str,
|
||||||
default=None,
|
default=None,
|
||||||
help="Name of the image to run. Defaults to the current environment",
|
help="[DEPRECATED] This flag is no longer supported. Please activate your virtual environment before running.",
|
||||||
)
|
)
|
||||||
self.parser.add_argument(
|
self.parser.add_argument(
|
||||||
"--image-type",
|
"--image-type",
|
||||||
type=str,
|
type=str,
|
||||||
help="Image Type used during the build. This can be only venv.",
|
help="[DEPRECATED] This flag is no longer supported. Please activate your virtual environment before running.",
|
||||||
choices=[e.value for e in ImageType if e.value != ImageType.CONTAINER.value],
|
choices=[e.value for e in ImageType if e.value != ImageType.CONTAINER.value],
|
||||||
)
|
)
|
||||||
self.parser.add_argument(
|
self.parser.add_argument(
|
||||||
|
@ -106,11 +106,18 @@ class StackRun(Subcommand):
|
||||||
import yaml
|
import yaml
|
||||||
|
|
||||||
from llama_stack.core.configure import parse_and_maybe_upgrade_config
|
from llama_stack.core.configure import parse_and_maybe_upgrade_config
|
||||||
from llama_stack.core.utils.exec import formulate_run_args, run_command
|
|
||||||
|
if args.image_type or args.image_name:
|
||||||
|
self.parser.error(
|
||||||
|
"The --image-type and --image-name flags are no longer supported.\n\n"
|
||||||
|
"Please activate your virtual environment manually before running `llama stack run`.\n\n"
|
||||||
|
"For example:\n"
|
||||||
|
" source /path/to/venv/bin/activate\n"
|
||||||
|
" llama stack run <config>\n"
|
||||||
|
)
|
||||||
|
|
||||||
if args.enable_ui:
|
if args.enable_ui:
|
||||||
self._start_ui_development_server(args.port)
|
self._start_ui_development_server(args.port)
|
||||||
image_type, image_name = args.image_type, args.image_name
|
|
||||||
|
|
||||||
if args.config:
|
if args.config:
|
||||||
try:
|
try:
|
||||||
|
@ -122,10 +129,6 @@ class StackRun(Subcommand):
|
||||||
else:
|
else:
|
||||||
config_file = None
|
config_file = None
|
||||||
|
|
||||||
# Check if config is required based on image type
|
|
||||||
if image_type == ImageType.VENV.value and not config_file:
|
|
||||||
self.parser.error("Config file is required for venv environment")
|
|
||||||
|
|
||||||
if config_file:
|
if config_file:
|
||||||
logger.info(f"Using run configuration: {config_file}")
|
logger.info(f"Using run configuration: {config_file}")
|
||||||
|
|
||||||
|
@ -140,23 +143,8 @@ class StackRun(Subcommand):
|
||||||
os.makedirs(str(config.external_providers_dir), exist_ok=True)
|
os.makedirs(str(config.external_providers_dir), exist_ok=True)
|
||||||
except AttributeError as e:
|
except AttributeError as e:
|
||||||
self.parser.error(f"failed to parse config file '{config_file}':\n {e}")
|
self.parser.error(f"failed to parse config file '{config_file}':\n {e}")
|
||||||
else:
|
|
||||||
config = None
|
|
||||||
|
|
||||||
# If neither image type nor image name is provided, assume the server should be run directly
|
|
||||||
# using the current environment packages.
|
|
||||||
if not image_type and not image_name:
|
|
||||||
logger.info("No image type or image name provided. Assuming environment packages.")
|
|
||||||
self._uvicorn_run(config_file, args)
|
self._uvicorn_run(config_file, args)
|
||||||
else:
|
|
||||||
run_args = formulate_run_args(image_type, image_name)
|
|
||||||
|
|
||||||
run_args.extend([str(args.port)])
|
|
||||||
|
|
||||||
if config_file:
|
|
||||||
run_args.extend(["--config", str(config_file)])
|
|
||||||
|
|
||||||
run_command(run_args)
|
|
||||||
|
|
||||||
def _uvicorn_run(self, config_file: Path | None, args: argparse.Namespace) -> None:
|
def _uvicorn_run(self, config_file: Path | None, args: argparse.Namespace) -> None:
|
||||||
if not config_file:
|
if not config_file:
|
||||||
|
|
|
@ -186,7 +186,7 @@ if [[ "$STACK_CONFIG" == *"server:"* ]]; then
|
||||||
echo "Llama Stack Server is already running, skipping start"
|
echo "Llama Stack Server is already running, skipping start"
|
||||||
else
|
else
|
||||||
echo "=== Starting Llama Stack Server ==="
|
echo "=== Starting Llama Stack Server ==="
|
||||||
nohup llama stack run ci-tests --image-type venv > server.log 2>&1 &
|
nohup llama stack run ci-tests > server.log 2>&1 &
|
||||||
|
|
||||||
echo "Waiting for Llama Stack Server to start..."
|
echo "Waiting for Llama Stack Server to start..."
|
||||||
for i in {1..30}; do
|
for i in {1..30}; do
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue