diff --git a/llama_stack/cli/download.py b/llama_stack/cli/download.py
index 09c753776..b96842119 100644
--- a/llama_stack/cli/download.py
+++ b/llama_stack/cli/download.py
@@ -9,6 +9,7 @@ import asyncio
 import json
 import os
 import shutil
+import sys
 from dataclasses import dataclass
 from datetime import datetime, timezone
 from functools import partial
@@ -377,14 +378,15 @@ def _meta_download(
     downloader = ParallelDownloader(max_concurrent_downloads=max_concurrent_downloads)
     asyncio.run(downloader.download_all(tasks))
 
-    cprint(f"\nSuccessfully downloaded model to {output_dir}", "green")
+    cprint(f"\nSuccessfully downloaded model to {output_dir}", color="green", file=sys.stderr)
     cprint(
         f"\nView MD5 checksum files at: {output_dir / 'checklist.chk'}",
-        "white",
+        file=sys.stderr,
     )
     cprint(
         f"\n[Optionally] To run MD5 checksums, use the following command: llama model verify-download --model-id {model_id}",
-        "yellow",
+        color="yellow",
+        file=sys.stderr,
     )
 
 
diff --git a/llama_stack/cli/stack/_build.py b/llama_stack/cli/stack/_build.py
index 4c6f82d1b..8909486b3 100644
--- a/llama_stack/cli/stack/_build.py
+++ b/llama_stack/cli/stack/_build.py
@@ -79,6 +79,7 @@ def run_stack_build_command(args: argparse.Namespace) -> None:
             cprint(
                 f"Could not find template {args.template}. Please run `llama stack build --list-templates` to check out the available templates",
                 color="red",
+                file=sys.stderr,
             )
             sys.exit(1)
         build_config = available_templates[args.template]
@@ -88,6 +89,7 @@ def run_stack_build_command(args: argparse.Namespace) -> None:
             cprint(
                 f"Please specify a image-type ({' | '.join(e.value for e in ImageType)}) for {args.template}",
                 color="red",
+                file=sys.stderr,
             )
             sys.exit(1)
     elif args.providers:
@@ -97,6 +99,7 @@ def run_stack_build_command(args: argparse.Namespace) -> None:
                 cprint(
                     "Could not parse `--providers`. Please ensure the list is in the format api1=provider1,api2=provider2",
                     color="red",
+                    file=sys.stderr,
                 )
                 sys.exit(1)
             api, provider = api_provider.split("=")
@@ -105,6 +108,7 @@ def run_stack_build_command(args: argparse.Namespace) -> None:
                 cprint(
                     f"{api} is not a valid API.",
                     color="red",
+                    file=sys.stderr,
                 )
                 sys.exit(1)
             if provider in providers_for_api:
@@ -113,6 +117,7 @@ def run_stack_build_command(args: argparse.Namespace) -> None:
                 cprint(
                     f"{provider} is not a valid provider for the {api} API.",
                     color="red",
+                    file=sys.stderr,
                 )
                 sys.exit(1)
         distribution_spec = DistributionSpec(
@@ -123,6 +128,7 @@ def run_stack_build_command(args: argparse.Namespace) -> None:
             cprint(
                 f"Please specify a image-type (container | conda | venv) for {args.template}",
                 color="red",
+                file=sys.stderr,
             )
             sys.exit(1)
 
@@ -151,12 +157,14 @@ def run_stack_build_command(args: argparse.Namespace) -> None:
                 cprint(
                     f"No current conda environment detected or specified, will create a new conda environment with the name `llamastack-{name}`",
                     color="yellow",
+                    file=sys.stderr,
                 )
                 image_name = f"llamastack-{name}"
             else:
                 cprint(
                     f"Using conda environment {image_name}",
                     color="green",
+                    file=sys.stderr,
                 )
         else:
             image_name = f"llamastack-{name}"
@@ -169,9 +177,10 @@ def run_stack_build_command(args: argparse.Namespace) -> None:
             """,
             ),
             color="green",
+            file=sys.stderr,
         )
 
-        print("Tip: use <TAB> to see options for the providers.\n")
+        cprint("Tip: use <TAB> to see options for the providers.\n", color="green", file=sys.stderr)
 
         providers = dict()
         for api, providers_for_api in get_provider_registry().items():
@@ -213,6 +222,7 @@ def run_stack_build_command(args: argparse.Namespace) -> None:
                 cprint(
                     f"Could not parse config file {args.config}: {e}",
                     color="red",
+                    file=sys.stderr,
                 )
                 sys.exit(1)
 
@@ -239,14 +249,17 @@ def run_stack_build_command(args: argparse.Namespace) -> None:
         cprint(
             f"Error building stack: {exc}",
             color="red",
+            file=sys.stderr,
         )
-        cprint("Stack trace:", color="red")
+        cprint("Stack trace:", color="red", file=sys.stderr)
         traceback.print_exc()
         sys.exit(1)
+
     if run_config is None:
         cprint(
             "Run config path is empty",
             color="red",
+            file=sys.stderr,
         )
         sys.exit(1)
 
@@ -304,6 +317,7 @@ def _generate_run_config(
                 cprint(
                     f"Failed to import provider {provider_type} for API {api} - assuming it's external, skipping",
                     color="yellow",
+                    file=sys.stderr,
                 )
                 # Set config_type to None to avoid UnboundLocalError
                 config_type = None
@@ -331,10 +345,7 @@ def _generate_run_config(
     # For non-container builds, the run.yaml is generated at the very end of the build process so it
     # makes sense to display this message
     if build_config.image_type != LlamaStackImageType.CONTAINER.value:
-        cprint(
-            f"You can now run your stack with `llama stack run {run_config_file}`",
-            color="green",
-        )
+        cprint(f"You can now run your stack with `llama stack run {run_config_file}`", color="green", file=sys.stderr)
     return run_config_file
 
 
@@ -372,7 +383,7 @@ def _run_stack_build_command_from_build_config(
     # Generate the run.yaml so it can be included in the container image with the proper entrypoint
     # Only do this if we're building a container image and we're not using a template
     if build_config.image_type == LlamaStackImageType.CONTAINER.value and not template_name and config_path:
-        cprint("Generating run.yaml file", color="green")
+        cprint("Generating run.yaml file", color="yellow", file=sys.stderr)
         run_config_file = _generate_run_config(build_config, build_dir, image_name)
 
     with open(build_file_path, "w") as f:
@@ -396,11 +407,13 @@ def _run_stack_build_command_from_build_config(
             run_config_file = build_dir / f"{template_name}-run.yaml"
             shutil.copy(path, run_config_file)
 
-        cprint("Build Successful!", color="green")
-        cprint("You can find the newly-built template here: " + colored(template_path, "light_blue"))
+        cprint("Build Successful!", color="green", file=sys.stderr)
+        cprint(f"You can find the newly-built template here: {template_path}", color="light_blue", file=sys.stderr)
         cprint(
             "You can run the new Llama Stack distro via: "
-            + colored(f"llama stack run {template_path} --image-type {build_config.image_type}", "light_blue")
+            + colored(f"llama stack run {template_path} --image-type {build_config.image_type}", "light_blue"),
+            color="green",
+            file=sys.stderr,
         )
         return template_path
     else:
diff --git a/llama_stack/cli/stack/remove.py b/llama_stack/cli/stack/remove.py
index be7c49a5d..a1796941e 100644
--- a/llama_stack/cli/stack/remove.py
+++ b/llama_stack/cli/stack/remove.py
@@ -58,8 +58,8 @@ class StackRemove(Subcommand):
         """Display available stacks in a table"""
         distributions = self._get_distribution_dirs()
         if not distributions:
-            print("No stacks found in ~/.llama/distributions")
-            return
+            cprint("No stacks found in ~/.llama/distributions", color="red", file=sys.stderr)
+            sys.exit(1)
 
         headers = ["Stack Name", "Path"]
         rows = [[name, str(path)] for name, path in distributions.items()]
@@ -71,19 +71,20 @@ class StackRemove(Subcommand):
         if args.all:
             confirm = input("Are you sure you want to delete ALL stacks? [yes-i-really-want/N] ").lower()
             if confirm != "yes-i-really-want":
-                print("Deletion cancelled.")
+                cprint("Deletion cancelled.", color="green", file=sys.stderr)
                 return
 
             for name, path in distributions.items():
                 try:
                     shutil.rmtree(path)
-                    print(f"Deleted stack: {name}")
+                    cprint(f"Deleted stack: {name}", color="green", file=sys.stderr)
                 except Exception as e:
                     cprint(
                         f"Failed to delete stack {name}: {e}",
                         color="red",
+                        file=sys.stderr,
                     )
-                    sys.exit(2)
+                    sys.exit(1)
 
         if not args.name:
             self._list_stacks()
@@ -95,22 +96,20 @@ class StackRemove(Subcommand):
             cprint(
                 f"Stack not found: {args.name}",
                 color="red",
+                file=sys.stderr,
             )
-            return
+            sys.exit(1)
 
         stack_path = distributions[args.name]
 
         confirm = input(f"Are you sure you want to delete stack '{args.name}'? [y/N] ").lower()
         if confirm != "y":
-            print("Deletion cancelled.")
+            cprint("Deletion cancelled.", color="green", file=sys.stderr)
             return
 
         try:
             shutil.rmtree(stack_path)
-            print(f"Successfully deleted stack: {args.name}")
+            cprint(f"Successfully deleted stack: {args.name}", color="green", file=sys.stderr)
         except Exception as e:
-            cprint(
-                f"Failed to delete stack {args.name}: {e}",
-                color="red",
-            )
-            sys.exit(2)
+            cprint(f"Failed to delete stack {args.name}: {e}", color="red", file=sys.stderr)
+            sys.exit(1)
diff --git a/llama_stack/distribution/build.py b/llama_stack/distribution/build.py
index 3e9dc2028..072f9c425 100644
--- a/llama_stack/distribution/build.py
+++ b/llama_stack/distribution/build.py
@@ -6,6 +6,7 @@
 
 import importlib.resources
 import logging
+import sys
 from pathlib import Path
 
 from pydantic import BaseModel
@@ -95,10 +96,11 @@ def print_pip_install_help(config: BuildConfig):
 
     cprint(
         f"Please install needed dependencies using the following commands:\n\nuv pip install {' '.join(normal_deps)}",
-        "yellow",
+        color="yellow",
+        file=sys.stderr,
     )
     for special_dep in special_deps:
-        cprint(f"uv pip install {special_dep}", "yellow")
+        cprint(f"uv pip install {special_dep}", color="yellow", file=sys.stderr)
     print()
 
 
diff --git a/llama_stack/distribution/client.py b/llama_stack/distribution/client.py
index 9fde8a157..03e4fb051 100644
--- a/llama_stack/distribution/client.py
+++ b/llama_stack/distribution/client.py
@@ -6,6 +6,7 @@
 
 import inspect
 import json
+import sys
 from collections.abc import AsyncIterator
 from enum import Enum
 from typing import Any, Union, get_args, get_origin
@@ -96,13 +97,13 @@ def create_api_client_class(protocol) -> type:
                             try:
                                 data = json.loads(data)
                                 if "error" in data:
-                                    cprint(data, "red")
+                                    cprint(data, color="red", file=sys.stderr)
                                     continue
 
                                 yield parse_obj_as(return_type, data)
                             except Exception as e:
-                                print(f"Error with parsing or validation: {e}")
-                                print(data)
+                                cprint(f"Error with parsing or validation: {e}", color="red", file=sys.stderr)
+                                cprint(data, color="red", file=sys.stderr)
 
         def httpx_request_params(self, method_name: str, *args, **kwargs) -> dict:
             webmethod, sig = self.routes[method_name]
diff --git a/llama_stack/distribution/library_client.py b/llama_stack/distribution/library_client.py
index 21b49a975..3cd2d1728 100644
--- a/llama_stack/distribution/library_client.py
+++ b/llama_stack/distribution/library_client.py
@@ -9,6 +9,7 @@ import inspect
 import json
 import logging
 import os
+import sys
 from concurrent.futures import ThreadPoolExecutor
 from enum import Enum
 from pathlib import Path
@@ -210,10 +211,11 @@ class AsyncLlamaStackAsLibraryClient(AsyncLlamaStackClient):
             self.endpoint_impls = None
             self.impls = await construct_stack(self.config, self.custom_provider_registry)
         except ModuleNotFoundError as _e:
-            cprint(_e.msg, "red")
+            cprint(_e.msg, color="red", file=sys.stderr)
             cprint(
                 "Using llama-stack as a library requires installing dependencies depending on the template (providers) you choose.\n",
-                "yellow",
+                color="yellow",
+                file=sys.stderr,
             )
             if self.config_path_or_template_name.endswith(".yaml"):
                 # Convert Provider objects to their types
@@ -234,7 +236,13 @@ class AsyncLlamaStackAsLibraryClient(AsyncLlamaStackClient):
                 cprint(
                     f"Please run:\n\n{prefix}llama stack build --template {self.config_path_or_template_name} --image-type venv\n\n",
                     "yellow",
+                    file=sys.stderr,
                 )
+            cprint(
+                "Please check your internet connection and try again.",
+                "red",
+                file=sys.stderr,
+            )
             raise _e
 
         if Api.telemetry in self.impls:
diff --git a/llama_stack/distribution/utils/exec.py b/llama_stack/distribution/utils/exec.py
index 4acce4f5b..7c2e00524 100644
--- a/llama_stack/distribution/utils/exec.py
+++ b/llama_stack/distribution/utils/exec.py
@@ -8,6 +8,7 @@ import logging
 import os
 import signal
 import subprocess
+import sys
 
 from termcolor import cprint
 
@@ -33,6 +34,7 @@ def formulate_run_args(image_type, image_name, config, template_name) -> list:
             cprint(
                 "No current conda environment detected, please specify a conda environment name with --image-name",
                 color="red",
+                file=sys.stderr,
             )
             return
 
@@ -49,12 +51,13 @@ def formulate_run_args(image_type, image_name, config, template_name) -> list:
                     return envpath
             return None
 
-        print(f"Using conda environment: {env_name}")
+        cprint(f"Using conda environment: {env_name}", color="green", file=sys.stderr)
         conda_prefix = get_conda_prefix(env_name)
         if not conda_prefix:
             cprint(
                 f"Conda environment {env_name} does not exist.",
                 color="red",
+                file=sys.stderr,
             )
             return
 
@@ -63,6 +66,7 @@ def formulate_run_args(image_type, image_name, config, template_name) -> list:
             cprint(
                 f"Build file {build_file} does not exist.\n\nPlease run `llama stack build` or specify the correct conda environment name with --image-name",
                 color="red",
+                file=sys.stderr,
             )
             return
     else:
@@ -73,9 +77,10 @@ def formulate_run_args(image_type, image_name, config, template_name) -> list:
             cprint(
                 "No current virtual environment detected, please specify a virtual environment name with --image-name",
                 color="red",
+                file=sys.stderr,
             )
             return
-        print(f"Using virtual environment: {env_name}")
+        cprint(f"Using virtual environment: {env_name}", file=sys.stderr)
 
     script = importlib.resources.files("llama_stack") / "distribution/start_stack.sh"
     run_args = [
diff --git a/llama_stack/log.py b/llama_stack/log.py
index 98858d208..f4184710a 100644
--- a/llama_stack/log.py
+++ b/llama_stack/log.py
@@ -6,6 +6,7 @@
 
 import logging
 import os
+import sys
 from logging.config import dictConfig
 
 from rich.console import Console
@@ -234,7 +235,7 @@ def get_logger(
 
 env_config = os.environ.get("LLAMA_STACK_LOGGING", "")
 if env_config:
-    cprint(f"Environment variable LLAMA_STACK_LOGGING found: {env_config}", "yellow")
+    cprint(f"Environment variable LLAMA_STACK_LOGGING found: {env_config}", color="yellow", file=sys.stderr)
     _category_levels.update(parse_environment_config(env_config))
 
 log_file = os.environ.get("LLAMA_STACK_LOG_FILE")
diff --git a/llama_stack/models/llama/llama3/generation.py b/llama_stack/models/llama/llama3/generation.py
index c6d618818..fe7be5ea9 100644
--- a/llama_stack/models/llama/llama3/generation.py
+++ b/llama_stack/models/llama/llama3/generation.py
@@ -174,6 +174,7 @@ class Llama3:
                 cprint(
                     "Input to model:\n" + self.tokenizer.decode(tokens_to_print) + "\n",
                     "red",
+                    file=sys.stderr,
                 )
         prompt_tokens = [inp.tokens for inp in llm_inputs]
 
@@ -184,7 +185,11 @@ class Llama3:
         max_prompt_len = max(len(t) for t in prompt_tokens)
 
         if max_prompt_len >= params.max_seq_len:
-            cprint(f"Out of token budget {max_prompt_len} vs {params.max_seq_len}", "red")
+            cprint(
+                f"Out of token budget {max_prompt_len} vs {params.max_seq_len}",
+                color="red",
+                file=sys.stderr,
+            )
             return
 
         total_len = min(max_gen_len + max_prompt_len, params.max_seq_len)
diff --git a/llama_stack/models/llama/llama4/generation.py b/llama_stack/models/llama/llama4/generation.py
index 476761209..6132d25d4 100644
--- a/llama_stack/models/llama/llama4/generation.py
+++ b/llama_stack/models/llama/llama4/generation.py
@@ -133,9 +133,9 @@ class Llama4:
 
         print_model_input = print_model_input or os.environ.get("LLAMA_MODELS_DEBUG", "0") == "1"
         if print_model_input:
-            cprint("Input to model:\n", "yellow")
+            cprint("Input to model:\n", color="yellow", file=sys.stderr)
             for inp in llm_inputs:
-                cprint(self.tokenizer.decode(inp.tokens), "grey")
+                cprint(self.tokenizer.decode(inp.tokens), color="grey", file=sys.stderr)
         prompt_tokens = [inp.tokens for inp in llm_inputs]
 
         bsz = len(llm_inputs)
@@ -145,7 +145,7 @@ class Llama4:
         max_prompt_len = max(len(t) for t in prompt_tokens)
 
         if max_prompt_len >= params.max_seq_len:
-            cprint(f"Out of token budget {max_prompt_len} vs {params.max_seq_len}", "red")
+            cprint(f"Out of token budget {max_prompt_len} vs {params.max_seq_len}", color="red", file=sys.stderr)
             return
 
         total_len = min(max_gen_len + max_prompt_len, params.max_seq_len)
diff --git a/llama_stack/providers/inline/inference/meta_reference/inference.py b/llama_stack/providers/inline/inference/meta_reference/inference.py
index 048336f9e..e238e1b78 100644
--- a/llama_stack/providers/inline/inference/meta_reference/inference.py
+++ b/llama_stack/providers/inline/inference/meta_reference/inference.py
@@ -6,6 +6,7 @@
 
 import asyncio
 import os
+import sys
 from collections.abc import AsyncGenerator
 
 from pydantic import BaseModel
@@ -455,9 +456,9 @@ class MetaReferenceInferenceImpl(
                 first = token_results[0]
                 if not first.finished and not first.ignore_token:
                     if os.environ.get("LLAMA_MODELS_DEBUG", "0") in ("1", "2"):
-                        cprint(first.text, "cyan", end="")
+                        cprint(first.text, color="cyan", end="", file=sys.stderr)
                     if os.environ.get("LLAMA_MODELS_DEBUG", "0") == "2":
-                        cprint(f"<{first.token}>", "magenta", end="")
+                        cprint(f"<{first.token}>", color="magenta", end="", file=sys.stderr)
 
                 for result in token_results:
                     idx = result.batch_idx
@@ -519,9 +520,9 @@ class MetaReferenceInferenceImpl(
             for token_results in self.generator.chat_completion([request]):
                 token_result = token_results[0]
                 if os.environ.get("LLAMA_MODELS_DEBUG", "0") == "1":
-                    cprint(token_result.text, "cyan", end="")
+                    cprint(token_result.text, color="cyan", end="", file=sys.stderr)
                 if os.environ.get("LLAMA_MODELS_DEBUG", "0") == "2":
-                    cprint(f"<{token_result.token}>", "magenta", end="")
+                    cprint(f"<{token_result.token}>", color="magenta", end="", file=sys.stderr)
 
                 if token_result.token == tokenizer.eot_id:
                     stop_reason = StopReason.end_of_turn