diff --git a/docs/docs/distributions/importing_as_library.mdx b/docs/docs/distributions/importing_as_library.mdx
index cf626d2c7..33f65f290 100644
--- a/docs/docs/distributions/importing_as_library.mdx
+++ b/docs/docs/distributions/importing_as_library.mdx
@@ -11,7 +11,7 @@ If you are planning to use an external service for Inference (even Ollama or TGI
 This avoids the overhead of setting up a server.
 ```bash
 # setup
-uv pip install llama-stack
+uv pip install llama-stack llama-stack-client
 llama stack list-deps starter | xargs -L1 uv pip install
 ```
 
diff --git a/docs/notebooks/Llama_Stack_Agent_Workflows.ipynb b/docs/notebooks/Llama_Stack_Agent_Workflows.ipynb
index 51604f6d1..899216d7a 100644
--- a/docs/notebooks/Llama_Stack_Agent_Workflows.ipynb
+++ b/docs/notebooks/Llama_Stack_Agent_Workflows.ipynb
@@ -37,7 +37,7 @@
    "outputs": [],
    "source": [
     "# NBVAL_SKIP\n",
-    "!pip install -U llama-stack\n",
+    "!pip install -U llama-stack llama-stack-client\n",
     "llama stack list-deps fireworks | xargs -L1 uv pip install\n"
    ]
   },
diff --git a/docs/notebooks/Llama_Stack_Benchmark_Evals.ipynb b/docs/notebooks/Llama_Stack_Benchmark_Evals.ipynb
index 94af24258..e78e776fc 100644
--- a/docs/notebooks/Llama_Stack_Benchmark_Evals.ipynb
+++ b/docs/notebooks/Llama_Stack_Benchmark_Evals.ipynb
@@ -44,7 +44,7 @@
    "outputs": [],
    "source": [
     "# NBVAL_SKIP\n",
-    "!pip install -U llama-stack"
+    "!pip install -U llama-stack llama-stack-client\n",
    ]
   },
   {
diff --git a/docs/notebooks/nvidia/beginner_e2e/Llama_Stack_NVIDIA_E2E_Flow.ipynb b/docs/notebooks/nvidia/beginner_e2e/Llama_Stack_NVIDIA_E2E_Flow.ipynb
index 0ce9c6f5f..7bcafd3a1 100644
--- a/docs/notebooks/nvidia/beginner_e2e/Llama_Stack_NVIDIA_E2E_Flow.ipynb
+++ b/docs/notebooks/nvidia/beginner_e2e/Llama_Stack_NVIDIA_E2E_Flow.ipynb
@@ -74,6 +74,7 @@
    "source": [
     "```bash\n",
     "uv sync --extra dev\n",
+    "uv pip install -U llama-stack-client\n",
     "uv pip install -e .\n",
     "source .venv/bin/activate\n",
     "```"
diff --git a/pyproject.toml b/pyproject.toml
index 8f07f9cbd..35f8afc05 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -30,7 +30,6 @@ dependencies = [
     "httpx",
     "jinja2>=3.1.6",
     "jsonschema",
-    "llama-stack-client>=0.3.0",
     "openai>=2.5.0",
     "prompt-toolkit",
     "python-dotenv",
diff --git a/src/llama_stack/__init__.py b/src/llama_stack/__init__.py
index 1c2ce7123..756f351d8 100644
--- a/src/llama_stack/__init__.py
+++ b/src/llama_stack/__init__.py
@@ -3,8 +3,3 @@
 #
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
-
-from llama_stack.core.library_client import (  # noqa: F401
-    AsyncLlamaStackAsLibraryClient,
-    LlamaStackAsLibraryClient,
-)
diff --git a/src/llama_stack/core/library_client.py b/src/llama_stack/core/library_client.py
index 6203b529e..42d5d279c 100644
--- a/src/llama_stack/core/library_client.py
+++ b/src/llama_stack/core/library_client.py
@@ -18,14 +18,21 @@ from typing import Any, TypeVar, Union, get_args, get_origin
 import httpx
 import yaml
 from fastapi import Response as FastAPIResponse
-from llama_stack_client import (
-    NOT_GIVEN,
-    APIResponse,
-    AsyncAPIResponse,
-    AsyncLlamaStackClient,
-    AsyncStream,
-    LlamaStackClient,
-)
+
+try:
+    from llama_stack_client import (
+        NOT_GIVEN,
+        APIResponse,
+        AsyncAPIResponse,
+        AsyncLlamaStackClient,
+        AsyncStream,
+        LlamaStackClient,
+    )
+except ImportError as e:
+    raise ImportError(
+        "llama-stack-client is not installed. Please install it with `pip install llama-stack-client`."
+    ) from e
+
 from pydantic import BaseModel, TypeAdapter
 from rich.console import Console
 from termcolor import cprint
diff --git a/src/llama_stack/providers/remote/datasetio/nvidia/README.md b/src/llama_stack/providers/remote/datasetio/nvidia/README.md
index da57d5550..7b9f39141 100644
--- a/src/llama_stack/providers/remote/datasetio/nvidia/README.md
+++ b/src/llama_stack/providers/remote/datasetio/nvidia/README.md
@@ -20,6 +20,7 @@ This provider enables dataset management using NVIDIA's NeMo Customizer service.
 Build the NVIDIA environment:
 
 ```bash
+uv pip install llama-stack-client
 uv run llama stack list-deps nvidia | xargs -L1 uv pip install
 ```
 
diff --git a/src/llama_stack/providers/remote/inference/nvidia/NVIDIA.md b/src/llama_stack/providers/remote/inference/nvidia/NVIDIA.md
index 97fa95a1f..d3bdc4fb7 100644
--- a/src/llama_stack/providers/remote/inference/nvidia/NVIDIA.md
+++ b/src/llama_stack/providers/remote/inference/nvidia/NVIDIA.md
@@ -18,6 +18,7 @@ This provider enables running inference using NVIDIA NIM.
 Build the NVIDIA environment:
 
 ```bash
+uv pip install llama-stack-client
 uv run llama stack list-deps nvidia | xargs -L1 uv pip install
 ```
 
@@ -199,4 +200,4 @@ rerank_response = client.alpha.inference.rerank(
 
 for i, result in enumerate(rerank_response):
     print(f"{i+1}. [Index: {result.index}, " f"Score: {(result.relevance_score):.3f}]")
-```
\ No newline at end of file
+```
diff --git a/src/llama_stack/providers/remote/post_training/nvidia/README.md b/src/llama_stack/providers/remote/post_training/nvidia/README.md
index 789514b1e..83f20a44e 100644
--- a/src/llama_stack/providers/remote/post_training/nvidia/README.md
+++ b/src/llama_stack/providers/remote/post_training/nvidia/README.md
@@ -22,6 +22,7 @@ This provider enables fine-tuning of LLMs using NVIDIA's NeMo Customizer service
 Build the NVIDIA environment:
 
 ```bash
+uv pip install llama-stack-client
 uv run llama stack list-deps nvidia | xargs -L1 uv pip install
 ```
 
diff --git a/src/llama_stack/providers/remote/safety/nvidia/README.md b/src/llama_stack/providers/remote/safety/nvidia/README.md
index e589afe84..af11b2539 100644
--- a/src/llama_stack/providers/remote/safety/nvidia/README.md
+++ b/src/llama_stack/providers/remote/safety/nvidia/README.md
@@ -19,6 +19,7 @@ This provider enables safety checks and guardrails for LLM interactions using NV
 Build the NVIDIA environment:
 
 ```bash
+uv pip install llama-stack-client
 uv run llama stack list-deps nvidia | xargs -L1 uv pip install
 ```
 
diff --git a/uv.lock b/uv.lock
index de1c8879c..ef76142a6 100644
--- a/uv.lock
+++ b/uv.lock
@@ -1945,7 +1945,6 @@ dependencies = [
     { name = "httpx" },
     { name = "jinja2" },
     { name = "jsonschema" },
-    { name = "llama-stack-client" },
     { name = "openai" },
     { name = "opentelemetry-exporter-otlp-proto-http" },
     { name = "opentelemetry-sdk" },
@@ -2096,7 +2095,6 @@ requires-dist = [
     { name = "httpx" },
     { name = "jinja2", specifier = ">=3.1.6" },
     { name = "jsonschema" },
-    { name = "llama-stack-client", specifier = ">=0.3.0" },
     { name = "llama-stack-client", marker = "extra == 'ui'", specifier = ">=0.3.0" },
     { name = "openai", specifier = ">=2.5.0" },
     { name = "opentelemetry-exporter-otlp-proto-http", specifier = ">=1.30.0" },