Merge ea30c24595 into sapling-pr-archive-ehhuang

2025-12-05 02:17:31 +00:00 · 2025-10-06 16:11:47 -07:00 · 2025-10-06 16:11:47 -07:00 · 2c17a1b22e
commit 2c17a1b22e
parent ee80bca7b5 ea30c24595
9 changed files with 3953 additions and 4262 deletions
--- a/.github/workflows/integration-auth-tests.yml
+++ b/.github/workflows/integration-auth-tests.yml
@ -86,7 +86,7 @@ jobs:

          # avoid line breaks in the server log, especially because we grep it below.
          export COLUMNS=1984
-          nohup uv run llama stack run $run_dir/run.yaml --image-type venv > server.log 2>&1 &
+          nohup uv run llama stack run $run_dir/run.yaml > server.log 2>&1 &

      - name: Wait for Llama Stack server to be ready
        run: |
--- a/.github/workflows/test-external-provider-module.yml
+++ b/.github/workflows/test-external-provider-module.yml
@ -59,7 +59,7 @@ jobs:
          # Use the virtual environment created by the build step (name comes from build config)
          source ramalama-stack-test/bin/activate
          uv pip list
-          nohup llama stack run tests/external/ramalama-stack/run.yaml --image-type ${{ matrix.image-type }} > server.log 2>&1 &
+          nohup llama stack run tests/external/ramalama-stack/run.yaml > server.log 2>&1 &

      - name: Wait for Llama Stack server to be ready
        run: |
--- a/.github/workflows/test-external.yml
+++ b/.github/workflows/test-external.yml
@ -59,7 +59,7 @@ jobs:
          # Use the virtual environment created by the build step (name comes from build config)
          source ci-test/bin/activate
          uv pip list
-          nohup llama stack run tests/external/run-byoa.yaml --image-type ${{ matrix.image-type }} > server.log 2>&1 &
+          nohup llama stack run tests/external/run-byoa.yaml > server.log 2>&1 &

      - name: Wait for Llama Stack server to be ready
        run: |
--- a/docs/docs/advanced_apis/post_training.mdx
+++ b/docs/docs/advanced_apis/post_training.mdx
@ -52,7 +52,7 @@ You can access the HuggingFace trainer via the `starter` distribution:

 ```bash
 llama stack build --distro starter --image-type venv
-llama stack run --image-type venv ~/.llama/distributions/starter/starter-run.yaml
+llama stack run ~/.llama/distributions/starter/starter-run.yaml
 ```

 ### Usage Example
--- a/docs/getting_started.ipynb
+++ b/docs/getting_started.ipynb
@ -85,88 +85,8 @@
    "id": "J2kGed0R5PSf",
    "outputId": "2478ea60-8d35-48a1-b011-f233831740c5"
   },
-      "outputs": [
-        {
-          "name": "stdout",
-          "output_type": "stream",
-          "text": [
-            "Requirement already satisfied: uv in /opt/homebrew/Caskroom/miniconda/base/envs/stack/lib/python3.10/site-packages (0.5.29)\n",
-            "Environment '/Users/hjshah/git/llama-stack/.venv' already exists, re-using it.\n",
-            "Virtual environment /Users/hjshah/git/llama-stack/.venv is already active\n",
-            "\u001b[2mUsing Python 3.10.16 environment at: /Users/hjshah/git/llama-stack/.venv\u001b[0m\n",
-            "\u001b[2mAudited \u001b[1m1 package\u001b[0m \u001b[2min 314ms\u001b[0m\u001b[0m\n",
-            "Installing pip dependencies\n",
-            "\u001b[2mUsing Python 3.10.16 environment at: /Users/hjshah/git/llama-stack/.venv\u001b[0m\n",
-            "\u001b[2K\u001b[2mResolved \u001b[1m125 packages\u001b[0m \u001b[2min 646ms\u001b[0m\u001b[0m                                       \u001b[0m\n",
-            "\u001b[2mUninstalled \u001b[1m1 package\u001b[0m \u001b[2min 404ms\u001b[0m\u001b[0m\n",
-            "\u001b[2K\u001b[2mInstalled \u001b[1m1 package\u001b[0m \u001b[2min 129ms\u001b[0m\u001b[0m                                \u001b[0m\n",
-            " \u001b[31m-\u001b[39m \u001b[1mnumpy\u001b[0m\u001b[2m==2.2.3\u001b[0m\n",
-            " \u001b[32m+\u001b[39m \u001b[1mnumpy\u001b[0m\u001b[2m==1.26.4\u001b[0m\n",
-            "sentence-transformers --no-deps\n",
-            "\u001b[2mUsing Python 3.10.16 environment at: /Users/hjshah/git/llama-stack/.venv\u001b[0m\n",
-            "\u001b[2mAudited \u001b[1m1 package\u001b[0m \u001b[2min 54ms\u001b[0m\u001b[0m\n",
-            "torch torchvision --index-url https://download.pytorch.org/whl/cpu\n",
-            "\u001b[2mUsing Python 3.10.16 environment at: /Users/hjshah/git/llama-stack/.venv\u001b[0m\n",
-            "\u001b[2mAudited \u001b[1m2 packages\u001b[0m \u001b[2min 10ms\u001b[0m\u001b[0m\n",
-            "\u001b[32mBuild Successful!\u001b[0m\n"
-          ]
-        }
-      ],
-      "source": [
-        "import os\n",
-        "import subprocess\n",
-        "import time\n",
-        "\n",
-        "!pip install uv\n",
-        "\n",
-        "if \"UV_SYSTEM_PYTHON\" in os.environ:\n",
-        "  del os.environ[\"UV_SYSTEM_PYTHON\"]\n",
-        "\n",
-        "# this command installs all the dependencies needed for the llama stack server with the together inference provider\n",
-        "!uv run --with llama-stack llama stack build --distro together --image-type venv\n",
-        "\n",
-        "def run_llama_stack_server_background():\n",
-        "    log_file = open(\"llama_stack_server.log\", \"w\")\n",
-        "    process = subprocess.Popen(\n",
-        "        \"uv run --with llama-stack llama stack run together --image-type venv\",\n",
-        "        shell=True,\n",
-        "        stdout=log_file,\n",
-        "        stderr=log_file,\n",
-        "        text=True\n",
-        "    )\n",
-        "\n",
-        "    print(f\"Starting Llama Stack server with PID: {process.pid}\")\n",
-        "    return process\n",
-        "\n",
-        "def wait_for_server_to_start():\n",
-        "    import requests\n",
-        "    from requests.exceptions import ConnectionError\n",
-        "    import time\n",
-        "\n",
-        "    url = \"http://0.0.0.0:8321/v1/health\"\n",
-        "    max_retries = 30\n",
-        "    retry_interval = 1\n",
-        "\n",
-        "    print(\"Waiting for server to start\", end=\"\")\n",
-        "    for _ in range(max_retries):\n",
-        "        try:\n",
-        "            response = requests.get(url)\n",
-        "            if response.status_code == 200:\n",
-        "                print(\"\\nServer is ready!\")\n",
-        "                return True\n",
-        "        except ConnectionError:\n",
-        "            print(\".\", end=\"\", flush=True)\n",
-        "            time.sleep(retry_interval)\n",
-        "\n",
-        "    print(\"\\nServer failed to start after\", max_retries * retry_interval, \"seconds\")\n",
-        "    return False\n",
-        "\n",
-        "\n",
-        "# use this helper if needed to kill the server\n",
-        "def kill_llama_stack_server():\n",
-        "    # Kill any existing llama stack server processes\n",
-        "    os.system(\"ps aux | grep -v grep | grep llama_stack.core.server.server | awk '{print $2}' | xargs kill -9\")\n"
-      ]
+   "outputs": [],
+   "source": "import os\nimport subprocess\nimport time\n\n!pip install uv\n\nif \"UV_SYSTEM_PYTHON\" in os.environ:\n  del os.environ[\"UV_SYSTEM_PYTHON\"]\n\n# this command installs all the dependencies needed for the llama stack server with the together inference provider\n!uv run --with llama-stack llama stack build --distro together --image-type venv\n\ndef run_llama_stack_server_background():\n    log_file = open(\"llama_stack_server.log\", \"w\")\n    process = subprocess.Popen(\n        \"uv run --with llama-stack llama stack run together\",\n        shell=True,\n        stdout=log_file,\n        stderr=log_file,\n        text=True\n    )\n\n    print(f\"Starting Llama Stack server with PID: {process.pid}\")\n    return process\n\ndef wait_for_server_to_start():\n    import requests\n    from requests.exceptions import ConnectionError\n    import time\n\n    url = \"http://0.0.0.0:8321/v1/health\"\n    max_retries = 30\n    retry_interval = 1\n\n    print(\"Waiting for server to start\", end=\"\")\n    for _ in range(max_retries):\n        try:\n            response = requests.get(url)\n            if response.status_code == 200:\n                print(\"\\nServer is ready!\")\n                return True\n        except ConnectionError:\n            print(\".\", end=\"\", flush=True)\n            time.sleep(retry_interval)\n\n    print(\"\\nServer failed to start after\", max_retries * retry_interval, \"seconds\")\n    return False\n\n\n# use this helper if needed to kill the server\ndef kill_llama_stack_server():\n    # Kill any existing llama stack server processes\n    os.system(\"ps aux | grep -v grep | grep llama_stack.core.server.server | awk '{print $2}' | xargs kill -9\")\n"
  },
  {
   "cell_type": "markdown",
--- a/docs/getting_started_llama_api.ipynb
+++ b/docs/getting_started_llama_api.ipynb
@ -77,196 +77,8 @@
    "id": "J2kGed0R5PSf",
    "outputId": "2478ea60-8d35-48a1-b011-f233831740c5"
   },
-      "outputs": [
-        {
-          "name": "stdout",
-          "output_type": "stream",
-          "text": [
-            "Requirement already satisfied: uv in /opt/homebrew/Caskroom/miniconda/base/envs/l4/lib/python3.10/site-packages (0.6.12)\n",
-            "\u001b[2mUsing Python 3.10.16 environment at: /opt/homebrew/Caskroom/miniconda/base/envs/l4\u001b[0m\n",
-            "\u001b[2mAudited \u001b[1m1 package\u001b[0m \u001b[2min 83ms\u001b[0m\u001b[0m\n",
-            "Environment '/Users/erichuang/projects/internal-llama-stack/.venv' already exists, re-using it.\n",
-            "Virtual environment /Users/erichuang/projects/internal-llama-stack/.venv is already active\n",
-            "\u001b[2mUsing Python 3.11.11 environment at: /Users/erichuang/projects/internal-llama-stack/.venv\u001b[0m\n",
-            "\u001b[2mAudited \u001b[1m1 package\u001b[0m \u001b[2min 387ms\u001b[0m\u001b[0m\n",
-            "Installing pip dependencies\n",
-            "\u001b[2mUsing Python 3.11.11 environment at: /Users/erichuang/projects/internal-llama-stack/.venv\u001b[0m\n",
-            "\u001b[2K\u001b[2mResolved \u001b[1m123 packages\u001b[0m \u001b[2min 1.13s\u001b[0m\u001b[0m                                       \u001b[0m\n",
-            "\u001b[2K\u001b[37m⠙\u001b[0m \u001b[2mPreparing packages...\u001b[0m (0/6)                                                   \n",
-            "\u001b[2K\u001b[1A\u001b[37m⠙\u001b[0m \u001b[2mPreparing packages...\u001b[0m (0/6)-----\u001b[0m\u001b[0m     0 B/9.53 KiB                     \u001b[1A\n",
-            "\u001b[2K\u001b[1A\u001b[37m⠙\u001b[0m \u001b[2mPreparing packages...\u001b[0m (0/6)-\u001b[2m\u001b[0m\u001b[0m 9.53 KiB/9.53 KiB                    \u001b[1A\n",
-            "\u001b[2mshellingham\u001b[0m \u001b[32m------------------------------\u001b[2m\u001b[0m\u001b[0m 9.53 KiB/9.53 KiB\n",
-            "\u001b[2K\u001b[2A\u001b[37m⠙\u001b[0m \u001b[2mPreparing packages...\u001b[0m (0/6)----\u001b[0m\u001b[0m     0 B/44.00 KiB                     \u001b[2A\n",
-            "\u001b[2mshellingham\u001b[0m \u001b[32m------------------------------\u001b[2m\u001b[0m\u001b[0m 9.53 KiB/9.53 KiB\n",
-            "\u001b[2K\u001b[2A\u001b[37m⠙\u001b[0m \u001b[2mPreparing packages...\u001b[0m (0/6)----\u001b[0m\u001b[0m 14.88 KiB/44.00 KiB                   \u001b[2A\n",
-            "\u001b[2mshellingham\u001b[0m \u001b[32m------------------------------\u001b[2m\u001b[0m\u001b[0m 9.53 KiB/9.53 KiB\n",
-            "\u001b[2mtabulate  \u001b[0m \u001b[32m\u001b[2m------------------------------\u001b[0m\u001b[0m     0 B/34.43 KiB\n",
-            "\u001b[2K\u001b[3A\u001b[37m⠙\u001b[0m \u001b[2mPreparing packages...\u001b[0m (0/6)----\u001b[0m\u001b[0m 14.88 KiB/44.00 KiB                   \u001b[3A\n",
-            "\u001b[2mshellingham\u001b[0m \u001b[32m------------------------------\u001b[2m\u001b[0m\u001b[0m 9.53 KiB/9.53 KiB\n",
-            "\u001b[2mtabulate  \u001b[0m \u001b[32m-------------\u001b[2m-----------------\u001b[0m\u001b[0m 14.83 KiB/34.43 KiB\n",
-            "\u001b[2K\u001b[3A\u001b[37m⠙\u001b[0m \u001b[2mPreparing packages...\u001b[0m (0/6)----\u001b[0m\u001b[0m 14.88 KiB/44.00 KiB                   \u001b[3A\n",
-            "\u001b[2meval-type-backport\u001b[0m \u001b[32m\u001b[2m------------------------------\u001b[0m\u001b[0m     0 B/5.69 KiB\n",
-            "\u001b[2mshellingham\u001b[0m \u001b[32m------------------------------\u001b[2m\u001b[0m\u001b[0m 9.53 KiB/9.53 KiB\n",
-            "\u001b[2mtabulate  \u001b[0m \u001b[32m-------------\u001b[2m-----------------\u001b[0m\u001b[0m 14.83 KiB/34.43 KiB\n",
-            "\u001b[2K\u001b[4A\u001b[37m⠙\u001b[0m \u001b[2mPreparing packages...\u001b[0m (0/6)----\u001b[0m\u001b[0m 14.88 KiB/44.00 KiB                   \u001b[4A\n",
-            "\u001b[2meval-type-backport\u001b[0m \u001b[32m------------------------------\u001b[2m\u001b[0m\u001b[0m 5.69 KiB/5.69 KiB\n",
-            "\u001b[2mshellingham\u001b[0m \u001b[32m------------------------------\u001b[2m\u001b[0m\u001b[0m 9.53 KiB/9.53 KiB\n",
-            "\u001b[2mtabulate  \u001b[0m \u001b[32m-------------\u001b[2m-----------------\u001b[0m\u001b[0m 14.83 KiB/34.43 KiB\n",
-            "\u001b[2K\u001b[4A\u001b[37m⠙\u001b[0m \u001b[2mPreparing packages...\u001b[0m (0/6)----\u001b[0m\u001b[0m 14.88 KiB/44.00 KiB                   \u001b[4A\n",
-            "\u001b[2meval-type-backport\u001b[0m \u001b[32m------------------------------\u001b[2m\u001b[0m\u001b[0m 5.69 KiB/5.69 KiB\n",
-            "\u001b[2mshellingham\u001b[0m \u001b[32m------------------------------\u001b[2m\u001b[0m\u001b[0m 9.53 KiB/9.53 KiB\n",
-            "\u001b[2mtabulate  \u001b[0m \u001b[32m-------------\u001b[2m-----------------\u001b[0m\u001b[0m 14.83 KiB/34.43 KiB\n",
-            "\u001b[2mtyper     \u001b[0m \u001b[32m-----------\u001b[2m-------------------\u001b[0m\u001b[0m 14.88 KiB/44.00 KiB\n",
-            "\u001b[2K\u001b[5A\u001b[37m⠙\u001b[0m \u001b[2mPreparing packages...\u001b[0m (0/6)----\u001b[0m\u001b[0m     0 B/85.81 KiB                     \u001b[5A\n",
-            "\u001b[2meval-type-backport\u001b[0m \u001b[32m------------------------------\u001b[2m\u001b[0m\u001b[0m 5.69 KiB/5.69 KiB\n",
-            "\u001b[2mshellingham\u001b[0m \u001b[32m------------------------------\u001b[2m\u001b[0m\u001b[0m 9.53 KiB/9.53 KiB\n",
-            "\u001b[2mtabulate  \u001b[0m \u001b[32m-------------\u001b[2m-----------------\u001b[0m\u001b[0m 14.83 KiB/34.43 KiB\n",
-            "\u001b[2mtyper     \u001b[0m \u001b[32m-----------\u001b[2m-------------------\u001b[0m\u001b[0m 14.88 KiB/44.00 KiB\n",
-            "\u001b[2K\u001b[5A\u001b[37m⠙\u001b[0m \u001b[2mPreparing packages...\u001b[0m (0/6)----\u001b[0m\u001b[0m 16.00 KiB/85.81 KiB                   \u001b[5A\n",
-            "\u001b[2meval-type-backport\u001b[0m \u001b[32m------------------------------\u001b[2m\u001b[0m\u001b[0m 5.69 KiB/5.69 KiB\n",
-            "\u001b[2mshellingham\u001b[0m \u001b[32m------------------------------\u001b[2m\u001b[0m\u001b[0m 9.53 KiB/9.53 KiB\n",
-            "\u001b[2mtabulate  \u001b[0m \u001b[32m-------------\u001b[2m-----------------\u001b[0m\u001b[0m 14.83 KiB/34.43 KiB\n",
-            "\u001b[2mtyper     \u001b[0m \u001b[32m-----------\u001b[2m-------------------\u001b[0m\u001b[0m 14.88 KiB/44.00 KiB\n",
-            "\u001b[2mtogether  \u001b[0m \u001b[32m------\u001b[2m------------------------\u001b[0m\u001b[0m 16.00 KiB/85.81 KiB\n",
-            "\u001b[2K\u001b[6A\u001b[37m⠙\u001b[0m \u001b[2mPreparing packages...\u001b[0m (0/6)----\u001b[0m\u001b[0m     0 B/3.08 MiB                      \u001b[6A\n",
-            "\u001b[2meval-type-backport\u001b[0m \u001b[32m------------------------------\u001b[2m\u001b[0m\u001b[0m 5.69 KiB/5.69 KiB\n",
-            "\u001b[2mshellingham\u001b[0m \u001b[32m------------------------------\u001b[2m\u001b[0m\u001b[0m 9.53 KiB/9.53 KiB\n",
-            "\u001b[2mtabulate  \u001b[0m \u001b[32m-------------\u001b[2m-----------------\u001b[0m\u001b[0m 14.83 KiB/34.43 KiB\n",
-            "\u001b[2mtyper     \u001b[0m \u001b[32m-----------\u001b[2m-------------------\u001b[0m\u001b[0m 14.88 KiB/44.00 KiB\n",
-            "\u001b[2mtogether  \u001b[0m \u001b[32m------\u001b[2m------------------------\u001b[0m\u001b[0m 16.00 KiB/85.81 KiB\n",
-            "\u001b[2K\u001b[6A\u001b[37m⠙\u001b[0m \u001b[2mPreparing packages...\u001b[0m (0/6)----\u001b[0m\u001b[0m 14.91 KiB/3.08 MiB                    \u001b[6A\n",
-            "\u001b[2meval-type-backport\u001b[0m \u001b[32m------------------------------\u001b[2m\u001b[0m\u001b[0m 5.69 KiB/5.69 KiB\n",
-            "\u001b[2mshellingham\u001b[0m \u001b[32m------------------------------\u001b[2m\u001b[0m\u001b[0m 9.53 KiB/9.53 KiB\n",
-            "\u001b[2mtabulate  \u001b[0m \u001b[32m---------------------------\u001b[2m---\u001b[0m\u001b[0m 30.83 KiB/34.43 KiB\n",
-            "\u001b[2mtyper     \u001b[0m \u001b[32m-----------\u001b[2m-------------------\u001b[0m\u001b[0m 14.88 KiB/44.00 KiB\n",
-            "\u001b[2mtogether  \u001b[0m \u001b[32m------\u001b[2m------------------------\u001b[0m\u001b[0m 16.00 KiB/85.81 KiB\n",
-            "\u001b[2K\u001b[6A\u001b[37m⠙\u001b[0m \u001b[2mPreparing packages...\u001b[0m (0/6)----\u001b[0m\u001b[0m 14.91 KiB/3.08 MiB                    \u001b[6A\n",
-            "\u001b[2meval-type-backport\u001b[0m \u001b[32m------------------------------\u001b[2m\u001b[0m\u001b[0m 5.69 KiB/5.69 KiB\n",
-            "\u001b[2mshellingham\u001b[0m \u001b[32m------------------------------\u001b[2m\u001b[0m\u001b[0m 9.53 KiB/9.53 KiB\n",
-            "\u001b[2mtabulate  \u001b[0m \u001b[32m------------------------------\u001b[2m\u001b[0m\u001b[0m 34.43 KiB/34.43 KiB\n",
-            "\u001b[2mtyper     \u001b[0m \u001b[32m-----------\u001b[2m-------------------\u001b[0m\u001b[0m 14.88 KiB/44.00 KiB\n",
-            "\u001b[2mtogether  \u001b[0m \u001b[32m------\u001b[2m------------------------\u001b[0m\u001b[0m 16.00 KiB/85.81 KiB\n",
-            "\u001b[2K\u001b[6A\u001b[37m⠙\u001b[0m \u001b[2mPreparing packages...\u001b[0m (0/6)----\u001b[0m\u001b[0m 14.91 KiB/3.08 MiB                    \u001b[6A\n",
-            "\u001b[2mshellingham\u001b[0m \u001b[32m------------------------------\u001b[2m\u001b[0m\u001b[0m 9.53 KiB/9.53 KiB\n",
-            "\u001b[2mtabulate  \u001b[0m \u001b[32m------------------------------\u001b[2m\u001b[0m\u001b[0m 34.43 KiB/34.43 KiB\n",
-            "\u001b[2mtyper     \u001b[0m \u001b[32m-----------\u001b[2m-------------------\u001b[0m\u001b[0m 14.88 KiB/44.00 KiB\n",
-            "\u001b[2mtogether  \u001b[0m \u001b[32m------\u001b[2m------------------------\u001b[0m\u001b[0m 16.00 KiB/85.81 KiB\n",
-            "\u001b[2K\u001b[5A\u001b[37m⠙\u001b[0m \u001b[2mPreparing packages...\u001b[0m (0/6)----\u001b[0m\u001b[0m 14.91 KiB/3.08 MiB                    \u001b[5A\n",
-            "\u001b[2mshellingham\u001b[0m \u001b[32m------------------------------\u001b[2m\u001b[0m\u001b[0m 9.53 KiB/9.53 KiB\n",
-            "\u001b[2mtabulate  \u001b[0m \u001b[32m------------------------------\u001b[2m\u001b[0m\u001b[0m 34.43 KiB/34.43 KiB\n",
-            "\u001b[2mtyper     \u001b[0m \u001b[32m-----------\u001b[2m-------------------\u001b[0m\u001b[0m 14.88 KiB/44.00 KiB\n",
-            "\u001b[2mtogether  \u001b[0m \u001b[32m------\u001b[2m------------------------\u001b[0m\u001b[0m 16.00 KiB/85.81 KiB\n",
-            "\u001b[2K\u001b[5A\u001b[37m⠙\u001b[0m \u001b[2mPreparing packages...\u001b[0m (0/6)----\u001b[0m\u001b[0m 30.91 KiB/3.08 MiB                    \u001b[5A\n",
-            "\u001b[2mshellingham\u001b[0m \u001b[32m------------------------------\u001b[2m\u001b[0m\u001b[0m 9.53 KiB/9.53 KiB\n",
-            "\u001b[2mtyper     \u001b[0m \u001b[32m-----------\u001b[2m-------------------\u001b[0m\u001b[0m 14.88 KiB/44.00 KiB\n",
-            "\u001b[2mtogether  \u001b[0m \u001b[32m------\u001b[2m------------------------\u001b[0m\u001b[0m 16.00 KiB/85.81 KiB\n",
-            "\u001b[2K\u001b[4A\u001b[37m⠙\u001b[0m \u001b[2mPreparing packages...\u001b[0m (0/6)----\u001b[0m\u001b[0m 30.91 KiB/3.08 MiB                    \u001b[4A\n",
-            "\u001b[2mshellingham\u001b[0m \u001b[32m------------------------------\u001b[2m\u001b[0m\u001b[0m 9.53 KiB/9.53 KiB\n",
-            "\u001b[2mtyper     \u001b[0m \u001b[32m-----------\u001b[2m-------------------\u001b[0m\u001b[0m 14.88 KiB/44.00 KiB\n",
-            "\u001b[2mtogether  \u001b[0m \u001b[32m------\u001b[2m------------------------\u001b[0m\u001b[0m 16.00 KiB/85.81 KiB\n",
-            "\u001b[2K\u001b[4A\u001b[37m⠙\u001b[0m \u001b[2mPreparing packages...\u001b[0m (0/6)----\u001b[0m\u001b[0m 46.91 KiB/3.08 MiB                    \u001b[4A\n",
-            "\u001b[2mshellingham\u001b[0m \u001b[32m------------------------------\u001b[2m\u001b[0m\u001b[0m 9.53 KiB/9.53 KiB\n",
-            "\u001b[2mtyper     \u001b[0m \u001b[32m-----------\u001b[2m-------------------\u001b[0m\u001b[0m 14.88 KiB/44.00 KiB\n",
-            "\u001b[2mtogether  \u001b[0m \u001b[32m------\u001b[2m------------------------\u001b[0m\u001b[0m 16.00 KiB/85.81 KiB\n",
-            "\u001b[2K\u001b[4A\u001b[37m⠙\u001b[0m \u001b[2mPreparing packages...\u001b[0m (0/6)----\u001b[0m\u001b[0m 62.91 KiB/3.08 MiB                    \u001b[4A\n",
-            "\u001b[2mshellingham\u001b[0m \u001b[32m------------------------------\u001b[2m\u001b[0m\u001b[0m 9.53 KiB/9.53 KiB\n",
-            "\u001b[2mtyper     \u001b[0m \u001b[32m-----------\u001b[2m-------------------\u001b[0m\u001b[0m 14.88 KiB/44.00 KiB\n",
-            "\u001b[2mtogether  \u001b[0m \u001b[32m------\u001b[2m------------------------\u001b[0m\u001b[0m 16.00 KiB/85.81 KiB\n",
-            "\u001b[2K\u001b[4A\u001b[37m⠙\u001b[0m \u001b[2mPreparing packages...\u001b[0m (0/6)----\u001b[0m\u001b[0m 78.91 KiB/3.08 MiB                    \u001b[4A\n",
-            "\u001b[2mshellingham\u001b[0m \u001b[32m------------------------------\u001b[2m\u001b[0m\u001b[0m 9.53 KiB/9.53 KiB\n",
-            "\u001b[2mtyper     \u001b[0m \u001b[32m-----------\u001b[2m-------------------\u001b[0m\u001b[0m 14.88 KiB/44.00 KiB\n",
-            "\u001b[2mtogether  \u001b[0m \u001b[32m------\u001b[2m------------------------\u001b[0m\u001b[0m 16.00 KiB/85.81 KiB\n",
-            "\u001b[2K\u001b[4A\u001b[37m⠙\u001b[0m \u001b[2mPreparing packages...\u001b[0m (0/6)----\u001b[0m\u001b[0m 94.91 KiB/3.08 MiB                    \u001b[4A\n",
-            "\u001b[2mshellingham\u001b[0m \u001b[32m------------------------------\u001b[2m\u001b[0m\u001b[0m 9.53 KiB/9.53 KiB\n",
-            "\u001b[2mtyper     \u001b[0m \u001b[32m-----------\u001b[2m-------------------\u001b[0m\u001b[0m 14.88 KiB/44.00 KiB\n",
-            "\u001b[2mtogether  \u001b[0m \u001b[32m------------\u001b[2m------------------\u001b[0m\u001b[0m 32.00 KiB/85.81 KiB\n",
-            "\u001b[2K\u001b[4A\u001b[37m⠙\u001b[0m \u001b[2mPreparing packages...\u001b[0m (0/6)----\u001b[0m\u001b[0m 2.62 MiB/3.08 MiB                     \u001b[4A\n",
-            "\u001b[2mtyper     \u001b[0m \u001b[32m----------------------\u001b[2m--------\u001b[0m\u001b[0m 30.88 KiB/44.00 KiB\n",
-            "\u001b[2mtogether  \u001b[0m \u001b[32m------------\u001b[2m------------------\u001b[0m\u001b[0m 32.00 KiB/85.81 KiB\n",
-            "\u001b[2K\u001b[3A\u001b[37m⠹\u001b[0m \u001b[2mPreparing packages...\u001b[0m (3/6)----\u001b[0m\u001b[0m 2.62 MiB/3.08 MiB                     \u001b[3A\n",
-            "\u001b[2mtyper     \u001b[0m \u001b[32m------------------------------\u001b[2m\u001b[0m\u001b[0m 44.00 KiB/44.00 KiB\n",
-            "\u001b[2mtogether  \u001b[0m \u001b[32m------------\u001b[2m------------------\u001b[0m\u001b[0m 32.00 KiB/85.81 KiB\n",
-            "\u001b[2K\u001b[3A\u001b[37m⠹\u001b[0m \u001b[2mPreparing packages...\u001b[0m (3/6)----\u001b[0m\u001b[0m 2.62 MiB/3.08 MiB                     \u001b[3A\n",
-            "\u001b[2mtogether  \u001b[0m \u001b[32m------------\u001b[2m------------------\u001b[0m\u001b[0m 32.00 KiB/85.81 KiB\n",
-            "\u001b[2K\u001b[2A\u001b[37m⠹\u001b[0m \u001b[2mPreparing packages...\u001b[0m (3/6)2m--\u001b[0m\u001b[0m 2.80 MiB/3.08 MiB                     \u001b[2A\n",
-            "\u001b[2mtogether  \u001b[0m \u001b[32m-----------------\u001b[2m-------------\u001b[0m\u001b[0m 48.00 KiB/85.81 KiB\n",
-            "\u001b[2K\u001b[2A\u001b[37m⠹\u001b[0m \u001b[2mPreparing packages...\u001b[0m (3/6)2m--\u001b[0m\u001b[0m 2.81 MiB/3.08 MiB                     \u001b[2A\n",
-            "\u001b[2K\u001b[1A\u001b[37m⠹\u001b[0m \u001b[2mPreparing packages...\u001b[0m (3/6)----\u001b[0m\u001b[0m 48.00 KiB/85.81 KiB                   \u001b[1A\n",
-            "\u001b[2K\u001b[1A\u001b[37m⠹\u001b[0m \u001b[2mPreparing packages...\u001b[0m (3/6)2m--\u001b[0m\u001b[0m 80.00 KiB/85.81 KiB                   \u001b[1A\n",
-            "\u001b[2K\u001b[2mPrepared \u001b[1m6 packages\u001b[0m \u001b[2min 365ms\u001b[0m\u001b[0m                                                 \u001b[1A\n",
-            "\u001b[2K\u001b[2mInstalled \u001b[1m6 packages\u001b[0m \u001b[2min 50ms\u001b[0m\u001b[0m                                \u001b[0m\n",
-            " \u001b[32m+\u001b[39m \u001b[1meval-type-backport\u001b[0m\u001b[2m==0.2.2\u001b[0m\n",
-            " \u001b[32m+\u001b[39m \u001b[1mfaiss-cpu\u001b[0m\u001b[2m==1.10.0\u001b[0m\n",
-            " \u001b[32m+\u001b[39m \u001b[1mshellingham\u001b[0m\u001b[2m==1.5.4\u001b[0m\n",
-            " \u001b[32m+\u001b[39m \u001b[1mtabulate\u001b[0m\u001b[2m==0.9.0\u001b[0m\n",
-            " \u001b[32m+\u001b[39m \u001b[1mtogether\u001b[0m\u001b[2m==1.5.5\u001b[0m\n",
-            " \u001b[32m+\u001b[39m \u001b[1mtyper\u001b[0m\u001b[2m==0.15.2\u001b[0m\n",
-            "torch torchvision --index-url https://download.pytorch.org/whl/cpu\n",
-            "\u001b[2mUsing Python 3.11.11 environment at: /Users/erichuang/projects/internal-llama-stack/.venv\u001b[0m\n",
-            "\u001b[2mAudited \u001b[1m2 packages\u001b[0m \u001b[2min 32ms\u001b[0m\u001b[0m\n",
-            "sentence-transformers --no-deps\n",
-            "\u001b[2mUsing Python 3.11.11 environment at: /Users/erichuang/projects/internal-llama-stack/.venv\u001b[0m\n",
-            "\u001b[2mAudited \u001b[1m1 package\u001b[0m \u001b[2min 63ms\u001b[0m\u001b[0m\n",
-            "\u001b[32mBuild Successful!\u001b[0m\n"
-          ]
-        }
-      ],
-      "source": [
-        "import os\n",
-        "import subprocess\n",
-        "import time\n",
-        "\n",
-        "!pip install uv\n",
-        "!uv pip install requests\n",
-        "\n",
-        "if \"UV_SYSTEM_PYTHON\" in os.environ:\n",
-        "  del os.environ[\"UV_SYSTEM_PYTHON\"]\n",
-        "\n",
-        "# this command installs all the dependencies needed for the llama stack server\n",
-        "!uv run --with llama-stack llama stack build --distro llama_api --image-type venv\n",
-        "\n",
-        "def run_llama_stack_server_background():\n",
-        "    log_file = open(\"llama_stack_server.log\", \"w\")\n",
-        "    process = subprocess.Popen(\n",
-        "        \"uv run --with llama-stack llama stack run llama_api --image-type venv\",\n",
-        "        shell=True,\n",
-        "        stdout=log_file,\n",
-        "        stderr=log_file,\n",
-        "        text=True\n",
-        "    )\n",
-        "\n",
-        "    print(f\"Starting Llama Stack server with PID: {process.pid}\")\n",
-        "    return process\n",
-        "\n",
-        "def wait_for_server_to_start():\n",
-        "    import requests\n",
-        "    from requests.exceptions import ConnectionError\n",
-        "    import time\n",
-        "\n",
-        "    url = \"http://0.0.0.0:8321/v1/health\"\n",
-        "    max_retries = 30\n",
-        "    retry_interval = 1\n",
-        "\n",
-        "    print(\"Waiting for server to start\", end=\"\")\n",
-        "    for _ in range(max_retries):\n",
-        "        try:\n",
-        "            response = requests.get(url)\n",
-        "            if response.status_code == 200:\n",
-        "                print(\"\\nServer is ready!\")\n",
-        "                return True\n",
-        "        except ConnectionError:\n",
-        "            print(\".\", end=\"\", flush=True)\n",
-        "            time.sleep(retry_interval)\n",
-        "\n",
-        "    print(\"\\nServer failed to start after\", max_retries * retry_interval, \"seconds\")\n",
-        "    return False\n",
-        "\n",
-        "\n",
-        "# use this helper if needed to kill the server\n",
-        "def kill_llama_stack_server():\n",
-        "    # Kill any existing llama stack server processes\n",
-        "    os.system(\"ps aux | grep -v grep | grep llama_stack.core.server.server | awk '{print $2}' | xargs kill -9\")\n"
-      ]
+   "outputs": [],
+   "source": "import os\nimport subprocess\nimport time\n\n!pip install uv\n!uv pip install requests\n\nif \"UV_SYSTEM_PYTHON\" in os.environ:\n  del os.environ[\"UV_SYSTEM_PYTHON\"]\n\n# this command installs all the dependencies needed for the llama stack server\n!uv run --with llama-stack llama stack build --distro llama_api --image-type venv\n\ndef run_llama_stack_server_background():\n    log_file = open(\"llama_stack_server.log\", \"w\")\n    process = subprocess.Popen(\n        \"uv run --with llama-stack llama stack run llama_api\",\n        shell=True,\n        stdout=log_file,\n        stderr=log_file,\n        text=True\n    )\n\n    print(f\"Starting Llama Stack server with PID: {process.pid}\")\n    return process\n\ndef wait_for_server_to_start():\n    import requests\n    from requests.exceptions import ConnectionError\n    import time\n\n    url = \"http://0.0.0.0:8321/v1/health\"\n    max_retries = 30\n    retry_interval = 1\n\n    print(\"Waiting for server to start\", end=\"\")\n    for _ in range(max_retries):\n        try:\n            response = requests.get(url)\n            if response.status_code == 200:\n                print(\"\\nServer is ready!\")\n                return True\n        except ConnectionError:\n            print(\".\", end=\"\", flush=True)\n            time.sleep(retry_interval)\n\n    print(\"\\nServer failed to start after\", max_retries * retry_interval, \"seconds\")\n    return False\n\n\n# use this helper if needed to kill the server\ndef kill_llama_stack_server():\n    # Kill any existing llama stack server processes\n    os.system(\"ps aux | grep -v grep | grep llama_stack.core.server.server | awk '{print $2}' | xargs kill -9\")\n"
  },
  {
   "cell_type": "markdown",
--- a/docs/quick_start.ipynb
+++ b/docs/quick_start.ipynb
@ -137,58 +137,7 @@
    "outputId": "2478ea60-8d35-48a1-b011-f233831740c5"
   },
   "outputs": [],
-      "source": [
-        "import os\n",
-        "import subprocess\n",
-        "\n",
-        "if \"UV_SYSTEM_PYTHON\" in os.environ:\n",
-        "  del os.environ[\"UV_SYSTEM_PYTHON\"]\n",
-        "\n",
-        "# this command installs all the dependencies needed for the llama stack server with the ollama inference provider\n",
-        "!uv run --with llama-stack llama stack build --distro starter --image-type venv\n",
-        "\n",
-        "def run_llama_stack_server_background():\n",
-        "    log_file = open(\"llama_stack_server.log\", \"w\")\n",
-        "    process = subprocess.Popen(\n",
-        "        f\"OLLAMA_URL=http://localhost:11434 uv run --with llama-stack llama stack run starter --image-type venv\n",
-        "        shell=True,\n",
-        "        stdout=log_file,\n",
-        "        stderr=log_file,\n",
-        "        text=True\n",
-        "    )\n",
-        "\n",
-        "    print(f\"Starting Llama Stack server with PID: {process.pid}\")\n",
-        "    return process\n",
-        "\n",
-        "def wait_for_server_to_start():\n",
-        "    import requests\n",
-        "    from requests.exceptions import ConnectionError\n",
-        "    import time\n",
-        "\n",
-        "    url = \"http://0.0.0.0:8321/v1/health\"\n",
-        "    max_retries = 30\n",
-        "    retry_interval = 1\n",
-        "\n",
-        "    print(\"Waiting for server to start\", end=\"\")\n",
-        "    for _ in range(max_retries):\n",
-        "        try:\n",
-        "            response = requests.get(url)\n",
-        "            if response.status_code == 200:\n",
-        "                print(\"\\nServer is ready!\")\n",
-        "                return True\n",
-        "        except ConnectionError:\n",
-        "            print(\".\", end=\"\", flush=True)\n",
-        "            time.sleep(retry_interval)\n",
-        "\n",
-        "    print(\"\\nServer failed to start after\", max_retries * retry_interval, \"seconds\")\n",
-        "    return False\n",
-        "\n",
-        "\n",
-        "# use this helper if needed to kill the server\n",
-        "def kill_llama_stack_server():\n",
-        "    # Kill any existing llama stack server processes\n",
-        "    os.system(\"ps aux | grep -v grep | grep llama_stack.core.server.server | awk '{print $2}' | xargs kill -9\")\n"
-      ]
+   "source": "import os\nimport subprocess\n\nif \"UV_SYSTEM_PYTHON\" in os.environ:\n  del os.environ[\"UV_SYSTEM_PYTHON\"]\n\n# this command installs all the dependencies needed for the llama stack server with the ollama inference provider\n!uv run --with llama-stack llama stack build --distro starter --image-type venv\n\ndef run_llama_stack_server_background():\n    log_file = open(\"llama_stack_server.log\", \"w\")\n    process = subprocess.Popen(\n        f\"OLLAMA_URL=http://localhost:11434 uv run --with llama-stack llama stack run starter\",\n        shell=True,\n        stdout=log_file,\n        stderr=log_file,\n        text=True\n    )\n\n    print(f\"Starting Llama Stack server with PID: {process.pid}\")\n    return process\n\ndef wait_for_server_to_start():\n    import requests\n    from requests.exceptions import ConnectionError\n    import time\n\n    url = \"http://0.0.0.0:8321/v1/health\"\n    max_retries = 30\n    retry_interval = 1\n\n    print(\"Waiting for server to start\", end=\"\")\n    for _ in range(max_retries):\n        try:\n            response = requests.get(url)\n            if response.status_code == 200:\n                print(\"\\nServer is ready!\")\n                return True\n        except ConnectionError:\n            print(\".\", end=\"\", flush=True)\n            time.sleep(retry_interval)\n\n    print(\"\\nServer failed to start after\", max_retries * retry_interval, \"seconds\")\n    return False\n\n\n# use this helper if needed to kill the server\ndef kill_llama_stack_server():\n    # Kill any existing llama stack server processes\n    os.system(\"ps aux | grep -v grep | grep llama_stack.core.server.server | awk '{print $2}' | xargs kill -9\")\n"
  },
  {
   "cell_type": "markdown",
--- a/llama_stack/cli/stack/_build.py
+++ b/llama_stack/cli/stack/_build.py
@ -444,9 +444,19 @@ def _run_stack_build_command_from_build_config(

        cprint("Build Successful!", color="green", file=sys.stderr)
        cprint(f"You can find the newly-built distribution here: {run_config_file}", color="blue", file=sys.stderr)
+        if build_config.image_type == LlamaStackImageType.VENV:
            cprint(
-            "You can run the new Llama Stack distro via: "
-            + colored(f"llama stack run {run_config_file} --image-type {build_config.image_type}", "blue"),
+                "You can run the new Llama Stack distro (after activating "
+                + colored(image_name, "cyan")
+                + ") via: "
+                + colored(f"llama stack run {run_config_file}", "blue"),
+                color="green",
+                file=sys.stderr,
+            )
+        elif build_config.image_type == LlamaStackImageType.CONTAINER:
+            cprint(
+                "You can run the container with: "
+                + colored(f"docker run -p 8321:8321 -v ~/.llama:/root/.llama localhost/{image_name} --port 8321", "blue"),
                color="green",
                file=sys.stderr,
            )
--- a/scripts/integration-tests.sh
+++ b/scripts/integration-tests.sh
@ -186,7 +186,7 @@ if [[ "$STACK_CONFIG" == *"server:"* ]]; then
        echo "Llama Stack Server is already running, skipping start"
    else
        echo "=== Starting Llama Stack Server ==="
-        nohup llama stack run ci-tests --image-type venv > server.log 2>&1 &
+        nohup llama stack run ci-tests > server.log 2>&1 &

        echo "Waiting for Llama Stack Server to start..."
        for i in {1..30}; do