From 62fcb0c3d840c6f8551ff1e5c4b4d2cc409b4c63 Mon Sep 17 00:00:00 2001 From: Eric Huang Date: Tue, 7 Oct 2025 09:50:38 -0700 Subject: [PATCH] chore!: remove --image-type and --image-name from llama stack run # What does this PR do? ## Test Plan --- .github/workflows/integration-auth-tests.yml | 2 +- .../test-external-provider-module.yml | 2 +- .github/workflows/test-external.yml | 2 +- docs/docs/advanced_apis/post_training.mdx | 2 +- docs/docs/distributions/building_distro.mdx | 11 +++--- docs/getting_started.ipynb | 4 +-- docs/getting_started_llama4.ipynb | 4 +-- docs/getting_started_llama_api.ipynb | 4 +-- docs/quick_start.ipynb | 4 +-- docs/zero_to_hero_guide/README.md | 3 +- llama_stack/cli/stack/_build.py | 24 +++++++++---- llama_stack/cli/stack/run.py | 36 +++++++------------ scripts/integration-tests.sh | 2 +- 13 files changed, 48 insertions(+), 52 deletions(-) diff --git a/.github/workflows/integration-auth-tests.yml b/.github/workflows/integration-auth-tests.yml index 238fed683..f9c42ef8a 100644 --- a/.github/workflows/integration-auth-tests.yml +++ b/.github/workflows/integration-auth-tests.yml @@ -86,7 +86,7 @@ jobs: # avoid line breaks in the server log, especially because we grep it below. export COLUMNS=1984 - nohup uv run llama stack run $run_dir/run.yaml --image-type venv > server.log 2>&1 & + nohup uv run llama stack run $run_dir/run.yaml > server.log 2>&1 & - name: Wait for Llama Stack server to be ready run: | diff --git a/.github/workflows/test-external-provider-module.yml b/.github/workflows/test-external-provider-module.yml index 8a757b068..b43cefb27 100644 --- a/.github/workflows/test-external-provider-module.yml +++ b/.github/workflows/test-external-provider-module.yml @@ -59,7 +59,7 @@ jobs: # Use the virtual environment created by the build step (name comes from build config) source ramalama-stack-test/bin/activate uv pip list - nohup llama stack run tests/external/ramalama-stack/run.yaml --image-type ${{ matrix.image-type }} > server.log 2>&1 & + nohup llama stack run tests/external/ramalama-stack/run.yaml > server.log 2>&1 & - name: Wait for Llama Stack server to be ready run: | diff --git a/.github/workflows/test-external.yml b/.github/workflows/test-external.yml index 7ee467451..a008b17af 100644 --- a/.github/workflows/test-external.yml +++ b/.github/workflows/test-external.yml @@ -59,7 +59,7 @@ jobs: # Use the virtual environment created by the build step (name comes from build config) source ci-test/bin/activate uv pip list - nohup llama stack run tests/external/run-byoa.yaml --image-type ${{ matrix.image-type }} > server.log 2>&1 & + nohup llama stack run tests/external/run-byoa.yaml > server.log 2>&1 & - name: Wait for Llama Stack server to be ready run: | diff --git a/docs/docs/advanced_apis/post_training.mdx b/docs/docs/advanced_apis/post_training.mdx index 516ac07e1..43bfaea91 100644 --- a/docs/docs/advanced_apis/post_training.mdx +++ b/docs/docs/advanced_apis/post_training.mdx @@ -52,7 +52,7 @@ You can access the HuggingFace trainer via the `starter` distribution: ```bash llama stack build --distro starter --image-type venv -llama stack run --image-type venv ~/.llama/distributions/starter/starter-run.yaml +llama stack run ~/.llama/distributions/starter/starter-run.yaml ``` ### Usage Example diff --git a/docs/docs/distributions/building_distro.mdx b/docs/docs/distributions/building_distro.mdx index 5ffb623b5..a4f7e1f60 100644 --- a/docs/docs/distributions/building_distro.mdx +++ b/docs/docs/distributions/building_distro.mdx @@ -322,20 +322,20 @@ Now, let's start the Llama Stack Distribution Server. You will need the YAML con llama stack run -h usage: llama stack run [-h] [--port PORT] [--image-name IMAGE_NAME] [--image-type {venv}] [--enable-ui] - [config | template] + [config | distro] Start the server for a Llama Stack Distribution. You should have already built (or downloaded) and configured the distribution. positional arguments: - config | template Path to config file to use for the run or name of known template (`llama stack list` for a list). (default: None) + config | distro Path to config file to use for the run or name of known distro (`llama stack list` for a list). (default: None) options: -h, --help show this help message and exit --port PORT Port to run the server on. It can also be passed via the env var LLAMA_STACK_PORT. (default: 8321) --image-name IMAGE_NAME - Name of the image to run. Defaults to the current environment (default: None) + [DEPRECATED] This flag is no longer supported. Please activate your virtual environment before running. (default: None) --image-type {venv} - Image Type used during the build. This should be venv. (default: None) + [DEPRECATED] This flag is no longer supported. Please activate your virtual environment before running. (default: None) --enable-ui Start the UI server (default: False) ``` @@ -347,9 +347,6 @@ llama stack run tgi # Start using config file llama stack run ~/.llama/distributions/llamastack-my-local-stack/my-local-stack-run.yaml - -# Start using a venv -llama stack run --image-type venv ~/.llama/distributions/llamastack-my-local-stack/my-local-stack-run.yaml ``` ``` diff --git a/docs/getting_started.ipynb b/docs/getting_started.ipynb index d7d544ad5..3dcedfed6 100644 --- a/docs/getting_started.ipynb +++ b/docs/getting_started.ipynb @@ -123,12 +123,12 @@ " del os.environ[\"UV_SYSTEM_PYTHON\"]\n", "\n", "# this command installs all the dependencies needed for the llama stack server with the together inference provider\n", - "!uv run --with llama-stack llama stack build --distro together --image-type venv\n", + "!uv run --with llama-stack llama stack build --distro together\n", "\n", "def run_llama_stack_server_background():\n", " log_file = open(\"llama_stack_server.log\", \"w\")\n", " process = subprocess.Popen(\n", - " \"uv run --with llama-stack llama stack run together --image-type venv\",\n", + " \"uv run --with llama-stack llama stack run together\",\n", " shell=True,\n", " stdout=log_file,\n", " stderr=log_file,\n", diff --git a/docs/getting_started_llama4.ipynb b/docs/getting_started_llama4.ipynb index b840117f1..bca505b5e 100644 --- a/docs/getting_started_llama4.ipynb +++ b/docs/getting_started_llama4.ipynb @@ -233,12 +233,12 @@ " del os.environ[\"UV_SYSTEM_PYTHON\"]\n", "\n", "# this command installs all the dependencies needed for the llama stack server\n", - "!uv run --with llama-stack llama stack build --distro meta-reference-gpu --image-type venv\n", + "!uv run --with llama-stack llama stack build --distro meta-reference-gpu\n", "\n", "def run_llama_stack_server_background():\n", " log_file = open(\"llama_stack_server.log\", \"w\")\n", " process = subprocess.Popen(\n", - " f\"INFERENCE_MODEL={model_id} uv run --with llama-stack llama stack run meta-reference-gpu --image-type venv\",\n", + " f\"INFERENCE_MODEL={model_id} uv run --with llama-stack llama stack run meta-reference-gpu\",\n", " shell=True,\n", " stdout=log_file,\n", " stderr=log_file,\n", diff --git a/docs/getting_started_llama_api.ipynb b/docs/getting_started_llama_api.ipynb index f65566205..7680c4a0c 100644 --- a/docs/getting_started_llama_api.ipynb +++ b/docs/getting_started_llama_api.ipynb @@ -223,12 +223,12 @@ " del os.environ[\"UV_SYSTEM_PYTHON\"]\n", "\n", "# this command installs all the dependencies needed for the llama stack server\n", - "!uv run --with llama-stack llama stack build --distro llama_api --image-type venv\n", + "!uv run --with llama-stack llama stack build --distro llama_api\n", "\n", "def run_llama_stack_server_background():\n", " log_file = open(\"llama_stack_server.log\", \"w\")\n", " process = subprocess.Popen(\n", - " \"uv run --with llama-stack llama stack run llama_api --image-type venv\",\n", + " \"uv run --with llama-stack llama stack run llama_api\",\n", " shell=True,\n", " stdout=log_file,\n", " stderr=log_file,\n", diff --git a/docs/quick_start.ipynb b/docs/quick_start.ipynb index c194a901d..eebfd6686 100644 --- a/docs/quick_start.ipynb +++ b/docs/quick_start.ipynb @@ -145,12 +145,12 @@ " del os.environ[\"UV_SYSTEM_PYTHON\"]\n", "\n", "# this command installs all the dependencies needed for the llama stack server with the ollama inference provider\n", - "!uv run --with llama-stack llama stack build --distro starter --image-type venv\n", + "!uv run --with llama-stack llama stack build --distro starter\n", "\n", "def run_llama_stack_server_background():\n", " log_file = open(\"llama_stack_server.log\", \"w\")\n", " process = subprocess.Popen(\n", - " f\"OLLAMA_URL=http://localhost:11434 uv run --with llama-stack llama stack run starter --image-type venv\n", + " f\"OLLAMA_URL=http://localhost:11434 uv run --with llama-stack llama stack run starter\n", " shell=True,\n", " stdout=log_file,\n", " stderr=log_file,\n", diff --git a/docs/zero_to_hero_guide/README.md b/docs/zero_to_hero_guide/README.md index a899d3ebe..1b643d692 100644 --- a/docs/zero_to_hero_guide/README.md +++ b/docs/zero_to_hero_guide/README.md @@ -88,7 +88,7 @@ If you're looking for more specific topics, we have a [Zero to Hero Guide](#next ... Build Successful! You can find the newly-built template here: ~/.llama/distributions/starter/starter-run.yaml - You can run the new Llama Stack Distro via: uv run --with llama-stack llama stack run starter --image-type venv + You can run the new Llama Stack Distro via: uv run --with llama-stack llama stack run starter ``` 3. **Set the ENV variables by exporting them to the terminal**: @@ -106,7 +106,6 @@ If you're looking for more specific topics, we have a [Zero to Hero Guide](#next SAFETY_MODEL=$SAFETY_MODEL \ OLLAMA_URL=$OLLAMA_URL \ uv run --with llama-stack llama stack run starter \ - --image-type venv \ --port $LLAMA_STACK_PORT ``` Note: Every time you run a new model with `ollama run`, you will need to restart the llama stack. Otherwise it won't see the new model. diff --git a/llama_stack/cli/stack/_build.py b/llama_stack/cli/stack/_build.py index b14e6fe55..471d5cb66 100644 --- a/llama_stack/cli/stack/_build.py +++ b/llama_stack/cli/stack/_build.py @@ -444,12 +444,24 @@ def _run_stack_build_command_from_build_config( cprint("Build Successful!", color="green", file=sys.stderr) cprint(f"You can find the newly-built distribution here: {run_config_file}", color="blue", file=sys.stderr) - cprint( - "You can run the new Llama Stack distro via: " - + colored(f"llama stack run {run_config_file} --image-type {build_config.image_type}", "blue"), - color="green", - file=sys.stderr, - ) + if build_config.image_type == LlamaStackImageType.VENV: + cprint( + "You can run the new Llama Stack distro (after activating " + + colored(image_name, "cyan") + + ") via: " + + colored(f"llama stack run {run_config_file}", "blue"), + color="green", + file=sys.stderr, + ) + elif build_config.image_type == LlamaStackImageType.CONTAINER: + cprint( + "You can run the container with: " + + colored( + f"docker run -p 8321:8321 -v ~/.llama:/root/.llama localhost/{image_name} --port 8321", "blue" + ), + color="green", + file=sys.stderr, + ) return distro_path else: return _generate_run_config(build_config, build_dir, image_name) diff --git a/llama_stack/cli/stack/run.py b/llama_stack/cli/stack/run.py index 4490f38f0..06dae7318 100644 --- a/llama_stack/cli/stack/run.py +++ b/llama_stack/cli/stack/run.py @@ -55,12 +55,12 @@ class StackRun(Subcommand): "--image-name", type=str, default=None, - help="Name of the image to run. Defaults to the current environment", + help="[DEPRECATED] This flag is no longer supported. Please activate your virtual environment before running.", ) self.parser.add_argument( "--image-type", type=str, - help="Image Type used during the build. This can be only venv.", + help="[DEPRECATED] This flag is no longer supported. Please activate your virtual environment before running.", choices=[e.value for e in ImageType if e.value != ImageType.CONTAINER.value], ) self.parser.add_argument( @@ -73,11 +73,18 @@ class StackRun(Subcommand): import yaml from llama_stack.core.configure import parse_and_maybe_upgrade_config - from llama_stack.core.utils.exec import formulate_run_args, run_command + + if args.image_type or args.image_name: + self.parser.error( + "The --image-type and --image-name flags are no longer supported.\n\n" + "Please activate your virtual environment manually before running `llama stack run`.\n\n" + "For example:\n" + " source /path/to/venv/bin/activate\n" + " llama stack run \n" + ) if args.enable_ui: self._start_ui_development_server(args.port) - image_type, image_name = args.image_type, args.image_name if args.config: try: @@ -89,10 +96,6 @@ class StackRun(Subcommand): else: config_file = None - # Check if config is required based on image type - if image_type == ImageType.VENV.value and not config_file: - self.parser.error("Config file is required for venv environment") - if config_file: logger.info(f"Using run configuration: {config_file}") @@ -107,23 +110,8 @@ class StackRun(Subcommand): os.makedirs(str(config.external_providers_dir), exist_ok=True) except AttributeError as e: self.parser.error(f"failed to parse config file '{config_file}':\n {e}") - else: - config = None - # If neither image type nor image name is provided, assume the server should be run directly - # using the current environment packages. - if not image_type and not image_name: - logger.info("No image type or image name provided. Assuming environment packages.") - self._uvicorn_run(config_file, args) - else: - run_args = formulate_run_args(image_type, image_name) - - run_args.extend([str(args.port)]) - - if config_file: - run_args.extend(["--config", str(config_file)]) - - run_command(run_args) + self._uvicorn_run(config_file, args) def _uvicorn_run(self, config_file: Path | None, args: argparse.Namespace) -> None: if not config_file: diff --git a/scripts/integration-tests.sh b/scripts/integration-tests.sh index eee60951d..af7f5cb74 100755 --- a/scripts/integration-tests.sh +++ b/scripts/integration-tests.sh @@ -186,7 +186,7 @@ if [[ "$STACK_CONFIG" == *"server:"* ]]; then echo "Llama Stack Server is already running, skipping start" else echo "=== Starting Llama Stack Server ===" - nohup llama stack run ci-tests --image-type venv > server.log 2>&1 & + nohup llama stack run ci-tests > server.log 2>&1 & echo "Waiting for Llama Stack Server to start..." for i in {1..30}; do