diff --git a/docs/docs/distributions/importing_as_library.mdx b/docs/docs/distributions/importing_as_library.mdx index cf626d2c7..33f65f290 100644 --- a/docs/docs/distributions/importing_as_library.mdx +++ b/docs/docs/distributions/importing_as_library.mdx @@ -11,7 +11,7 @@ If you are planning to use an external service for Inference (even Ollama or TGI This avoids the overhead of setting up a server. ```bash # setup -uv pip install llama-stack +uv pip install llama-stack llama-stack-client llama stack list-deps starter | xargs -L1 uv pip install ``` diff --git a/docs/notebooks/Llama_Stack_Agent_Workflows.ipynb b/docs/notebooks/Llama_Stack_Agent_Workflows.ipynb index 51604f6d1..899216d7a 100644 --- a/docs/notebooks/Llama_Stack_Agent_Workflows.ipynb +++ b/docs/notebooks/Llama_Stack_Agent_Workflows.ipynb @@ -37,7 +37,7 @@ "outputs": [], "source": [ "# NBVAL_SKIP\n", - "!pip install -U llama-stack\n", + "!pip install -U llama-stack llama-stack-client\n", "llama stack list-deps fireworks | xargs -L1 uv pip install\n" ] }, diff --git a/docs/notebooks/Llama_Stack_Benchmark_Evals.ipynb b/docs/notebooks/Llama_Stack_Benchmark_Evals.ipynb index 94af24258..e78e776fc 100644 --- a/docs/notebooks/Llama_Stack_Benchmark_Evals.ipynb +++ b/docs/notebooks/Llama_Stack_Benchmark_Evals.ipynb @@ -44,7 +44,7 @@ "outputs": [], "source": [ "# NBVAL_SKIP\n", - "!pip install -U llama-stack" + "!pip install -U llama-stack llama-stack-client\n", ] }, { diff --git a/docs/notebooks/nvidia/beginner_e2e/Llama_Stack_NVIDIA_E2E_Flow.ipynb b/docs/notebooks/nvidia/beginner_e2e/Llama_Stack_NVIDIA_E2E_Flow.ipynb index 0ce9c6f5f..7bcafd3a1 100644 --- a/docs/notebooks/nvidia/beginner_e2e/Llama_Stack_NVIDIA_E2E_Flow.ipynb +++ b/docs/notebooks/nvidia/beginner_e2e/Llama_Stack_NVIDIA_E2E_Flow.ipynb @@ -74,6 +74,7 @@ "source": [ "```bash\n", "uv sync --extra dev\n", + "uv pip install -U llama-stack-client\n", "uv pip install -e .\n", "source .venv/bin/activate\n", "```" diff --git a/pyproject.toml b/pyproject.toml index 8f07f9cbd..35f8afc05 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -30,7 +30,6 @@ dependencies = [ "httpx", "jinja2>=3.1.6", "jsonschema", - "llama-stack-client>=0.3.0", "openai>=2.5.0", "prompt-toolkit", "python-dotenv", diff --git a/src/llama_stack/__init__.py b/src/llama_stack/__init__.py index 1c2ce7123..756f351d8 100644 --- a/src/llama_stack/__init__.py +++ b/src/llama_stack/__init__.py @@ -3,8 +3,3 @@ # # This source code is licensed under the terms described in the LICENSE file in # the root directory of this source tree. - -from llama_stack.core.library_client import ( # noqa: F401 - AsyncLlamaStackAsLibraryClient, - LlamaStackAsLibraryClient, -) diff --git a/src/llama_stack/core/library_client.py b/src/llama_stack/core/library_client.py index 6203b529e..42d5d279c 100644 --- a/src/llama_stack/core/library_client.py +++ b/src/llama_stack/core/library_client.py @@ -18,14 +18,21 @@ from typing import Any, TypeVar, Union, get_args, get_origin import httpx import yaml from fastapi import Response as FastAPIResponse -from llama_stack_client import ( - NOT_GIVEN, - APIResponse, - AsyncAPIResponse, - AsyncLlamaStackClient, - AsyncStream, - LlamaStackClient, -) + +try: + from llama_stack_client import ( + NOT_GIVEN, + APIResponse, + AsyncAPIResponse, + AsyncLlamaStackClient, + AsyncStream, + LlamaStackClient, + ) +except ImportError as e: + raise ImportError( + "llama-stack-client is not installed. Please install it with `pip install llama-stack-client`." + ) from e + from pydantic import BaseModel, TypeAdapter from rich.console import Console from termcolor import cprint diff --git a/src/llama_stack/providers/remote/datasetio/nvidia/README.md b/src/llama_stack/providers/remote/datasetio/nvidia/README.md index da57d5550..7b9f39141 100644 --- a/src/llama_stack/providers/remote/datasetio/nvidia/README.md +++ b/src/llama_stack/providers/remote/datasetio/nvidia/README.md @@ -20,6 +20,7 @@ This provider enables dataset management using NVIDIA's NeMo Customizer service. Build the NVIDIA environment: ```bash +uv pip install llama-stack-client uv run llama stack list-deps nvidia | xargs -L1 uv pip install ``` diff --git a/src/llama_stack/providers/remote/inference/nvidia/NVIDIA.md b/src/llama_stack/providers/remote/inference/nvidia/NVIDIA.md index 97fa95a1f..d3bdc4fb7 100644 --- a/src/llama_stack/providers/remote/inference/nvidia/NVIDIA.md +++ b/src/llama_stack/providers/remote/inference/nvidia/NVIDIA.md @@ -18,6 +18,7 @@ This provider enables running inference using NVIDIA NIM. Build the NVIDIA environment: ```bash +uv pip install llama-stack-client uv run llama stack list-deps nvidia | xargs -L1 uv pip install ``` @@ -199,4 +200,4 @@ rerank_response = client.alpha.inference.rerank( for i, result in enumerate(rerank_response): print(f"{i+1}. [Index: {result.index}, " f"Score: {(result.relevance_score):.3f}]") -``` \ No newline at end of file +``` diff --git a/src/llama_stack/providers/remote/post_training/nvidia/README.md b/src/llama_stack/providers/remote/post_training/nvidia/README.md index 789514b1e..83f20a44e 100644 --- a/src/llama_stack/providers/remote/post_training/nvidia/README.md +++ b/src/llama_stack/providers/remote/post_training/nvidia/README.md @@ -22,6 +22,7 @@ This provider enables fine-tuning of LLMs using NVIDIA's NeMo Customizer service Build the NVIDIA environment: ```bash +uv pip install llama-stack-client uv run llama stack list-deps nvidia | xargs -L1 uv pip install ``` diff --git a/src/llama_stack/providers/remote/safety/nvidia/README.md b/src/llama_stack/providers/remote/safety/nvidia/README.md index e589afe84..af11b2539 100644 --- a/src/llama_stack/providers/remote/safety/nvidia/README.md +++ b/src/llama_stack/providers/remote/safety/nvidia/README.md @@ -19,6 +19,7 @@ This provider enables safety checks and guardrails for LLM interactions using NV Build the NVIDIA environment: ```bash +uv pip install llama-stack-client uv run llama stack list-deps nvidia | xargs -L1 uv pip install ``` diff --git a/uv.lock b/uv.lock index de1c8879c..ef76142a6 100644 --- a/uv.lock +++ b/uv.lock @@ -1945,7 +1945,6 @@ dependencies = [ { name = "httpx" }, { name = "jinja2" }, { name = "jsonschema" }, - { name = "llama-stack-client" }, { name = "openai" }, { name = "opentelemetry-exporter-otlp-proto-http" }, { name = "opentelemetry-sdk" }, @@ -2096,7 +2095,6 @@ requires-dist = [ { name = "httpx" }, { name = "jinja2", specifier = ">=3.1.6" }, { name = "jsonschema" }, - { name = "llama-stack-client", specifier = ">=0.3.0" }, { name = "llama-stack-client", marker = "extra == 'ui'", specifier = ">=0.3.0" }, { name = "openai", specifier = ">=2.5.0" }, { name = "opentelemetry-exporter-otlp-proto-http", specifier = ">=1.30.0" },