diff --git a/docs/docs/distributions/importing_as_library.mdx b/docs/docs/distributions/importing_as_library.mdx index cf626d2c7..33f65f290 100644 --- a/docs/docs/distributions/importing_as_library.mdx +++ b/docs/docs/distributions/importing_as_library.mdx @@ -11,7 +11,7 @@ If you are planning to use an external service for Inference (even Ollama or TGI This avoids the overhead of setting up a server. ```bash # setup -uv pip install llama-stack +uv pip install llama-stack llama-stack-client llama stack list-deps starter | xargs -L1 uv pip install ``` diff --git a/docs/notebooks/Llama_Stack_Agent_Workflows.ipynb b/docs/notebooks/Llama_Stack_Agent_Workflows.ipynb index 51604f6d1..899216d7a 100644 --- a/docs/notebooks/Llama_Stack_Agent_Workflows.ipynb +++ b/docs/notebooks/Llama_Stack_Agent_Workflows.ipynb @@ -37,7 +37,7 @@ "outputs": [], "source": [ "# NBVAL_SKIP\n", - "!pip install -U llama-stack\n", + "!pip install -U llama-stack llama-stack-client\n", "llama stack list-deps fireworks | xargs -L1 uv pip install\n" ] }, diff --git a/docs/notebooks/Llama_Stack_Benchmark_Evals.ipynb b/docs/notebooks/Llama_Stack_Benchmark_Evals.ipynb index 94af24258..d51c0d39a 100644 --- a/docs/notebooks/Llama_Stack_Benchmark_Evals.ipynb +++ b/docs/notebooks/Llama_Stack_Benchmark_Evals.ipynb @@ -44,7 +44,7 @@ "outputs": [], "source": [ "# NBVAL_SKIP\n", - "!pip install -U llama-stack" + "!pip install -U llama-stack llama-stack-client\n" ] }, { diff --git a/docs/notebooks/nvidia/beginner_e2e/Llama_Stack_NVIDIA_E2E_Flow.ipynb b/docs/notebooks/nvidia/beginner_e2e/Llama_Stack_NVIDIA_E2E_Flow.ipynb index 0ce9c6f5f..7bcafd3a1 100644 --- a/docs/notebooks/nvidia/beginner_e2e/Llama_Stack_NVIDIA_E2E_Flow.ipynb +++ b/docs/notebooks/nvidia/beginner_e2e/Llama_Stack_NVIDIA_E2E_Flow.ipynb @@ -74,6 +74,7 @@ "source": [ "```bash\n", "uv sync --extra dev\n", + "uv pip install -U llama-stack-client\n", "uv pip install -e .\n", "source .venv/bin/activate\n", "```" diff --git a/pyproject.toml b/pyproject.toml index f8577ad2b..4ec83249c 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -24,13 +24,13 @@ classifiers = [ "Topic :: Scientific/Engineering :: Information Analysis", ] dependencies = [ + "PyYAML>=6.0", "aiohttp", "fastapi>=0.115.0,<1.0", # server "fire", # for MCP in LLS client "httpx", "jinja2>=3.1.6", "jsonschema", - "llama-stack-client>=0.3.0", "openai>=2.5.0", "prompt-toolkit", "python-dotenv", @@ -51,6 +51,11 @@ dependencies = [ "sqlalchemy[asyncio]>=2.0.41", # server - for conversations ] +[project.optional-dependencies] +client = [ + "llama-stack-client>=0.3.0", # Optional for library-only usage +] + [dependency-groups] dev = [ "pytest>=8.4", @@ -96,6 +101,7 @@ type_checking = [ "lm-format-enforcer", "mcp", "ollama", + "llama-stack-client>=0.3.0", ] # These are the dependencies required for running unit tests. unit = [ diff --git a/src/llama_stack/__init__.py b/src/llama_stack/__init__.py index 1c2ce7123..756f351d8 100644 --- a/src/llama_stack/__init__.py +++ b/src/llama_stack/__init__.py @@ -3,8 +3,3 @@ # # This source code is licensed under the terms described in the LICENSE file in # the root directory of this source tree. - -from llama_stack.core.library_client import ( # noqa: F401 - AsyncLlamaStackAsLibraryClient, - LlamaStackAsLibraryClient, -) diff --git a/src/llama_stack/core/library_client.py b/src/llama_stack/core/library_client.py index 6203b529e..b8f9f715f 100644 --- a/src/llama_stack/core/library_client.py +++ b/src/llama_stack/core/library_client.py @@ -18,14 +18,21 @@ from typing import Any, TypeVar, Union, get_args, get_origin import httpx import yaml from fastapi import Response as FastAPIResponse -from llama_stack_client import ( - NOT_GIVEN, - APIResponse, - AsyncAPIResponse, - AsyncLlamaStackClient, - AsyncStream, - LlamaStackClient, -) + +try: + from llama_stack_client import ( + NOT_GIVEN, + APIResponse, + AsyncAPIResponse, + AsyncLlamaStackClient, + AsyncStream, + LlamaStackClient, + ) +except ImportError as e: + raise ImportError( + "llama-stack-client is not installed. Please install it with `uv pip install llama-stack[client]`." + ) from e + from pydantic import BaseModel, TypeAdapter from rich.console import Console from termcolor import cprint diff --git a/src/llama_stack/providers/remote/datasetio/nvidia/README.md b/src/llama_stack/providers/remote/datasetio/nvidia/README.md index da57d5550..7b9f39141 100644 --- a/src/llama_stack/providers/remote/datasetio/nvidia/README.md +++ b/src/llama_stack/providers/remote/datasetio/nvidia/README.md @@ -20,6 +20,7 @@ This provider enables dataset management using NVIDIA's NeMo Customizer service. Build the NVIDIA environment: ```bash +uv pip install llama-stack-client uv run llama stack list-deps nvidia | xargs -L1 uv pip install ``` diff --git a/src/llama_stack/providers/remote/inference/nvidia/NVIDIA.md b/src/llama_stack/providers/remote/inference/nvidia/NVIDIA.md index 97fa95a1f..d3bdc4fb7 100644 --- a/src/llama_stack/providers/remote/inference/nvidia/NVIDIA.md +++ b/src/llama_stack/providers/remote/inference/nvidia/NVIDIA.md @@ -18,6 +18,7 @@ This provider enables running inference using NVIDIA NIM. Build the NVIDIA environment: ```bash +uv pip install llama-stack-client uv run llama stack list-deps nvidia | xargs -L1 uv pip install ``` @@ -199,4 +200,4 @@ rerank_response = client.alpha.inference.rerank( for i, result in enumerate(rerank_response): print(f"{i+1}. [Index: {result.index}, " f"Score: {(result.relevance_score):.3f}]") -``` \ No newline at end of file +``` diff --git a/src/llama_stack/providers/remote/post_training/nvidia/README.md b/src/llama_stack/providers/remote/post_training/nvidia/README.md index 789514b1e..83f20a44e 100644 --- a/src/llama_stack/providers/remote/post_training/nvidia/README.md +++ b/src/llama_stack/providers/remote/post_training/nvidia/README.md @@ -22,6 +22,7 @@ This provider enables fine-tuning of LLMs using NVIDIA's NeMo Customizer service Build the NVIDIA environment: ```bash +uv pip install llama-stack-client uv run llama stack list-deps nvidia | xargs -L1 uv pip install ``` diff --git a/src/llama_stack/providers/remote/safety/nvidia/README.md b/src/llama_stack/providers/remote/safety/nvidia/README.md index e589afe84..af11b2539 100644 --- a/src/llama_stack/providers/remote/safety/nvidia/README.md +++ b/src/llama_stack/providers/remote/safety/nvidia/README.md @@ -19,6 +19,7 @@ This provider enables safety checks and guardrails for LLM interactions using NV Build the NVIDIA environment: ```bash +uv pip install llama-stack-client uv run llama stack list-deps nvidia | xargs -L1 uv pip install ``` diff --git a/tests/integration/fixtures/common.py b/tests/integration/fixtures/common.py index 67e459cc5..d5e4c15f7 100644 --- a/tests/integration/fixtures/common.py +++ b/tests/integration/fixtures/common.py @@ -20,8 +20,8 @@ import yaml from llama_stack_client import LlamaStackClient from openai import OpenAI -from llama_stack import LlamaStackAsLibraryClient from llama_stack.core.datatypes import VectorStoresConfig +from llama_stack.core.library_client import LlamaStackAsLibraryClient from llama_stack.core.stack import run_config_from_adhoc_config_spec from llama_stack.env import get_env_or_fail diff --git a/tests/integration/inference/test_provider_data_routing.py b/tests/integration/inference/test_provider_data_routing.py index 34ee2672f..99aa75395 100644 --- a/tests/integration/inference/test_provider_data_routing.py +++ b/tests/integration/inference/test_provider_data_routing.py @@ -16,7 +16,6 @@ from unittest.mock import AsyncMock, patch import pytest -from llama_stack import LlamaStackAsLibraryClient from llama_stack.apis.datatypes import Api from llama_stack.apis.inference.inference import ( OpenAIAssistantMessageParam, @@ -24,6 +23,7 @@ from llama_stack.apis.inference.inference import ( OpenAIChatCompletionUsage, OpenAIChoice, ) +from llama_stack.core.library_client import LlamaStackAsLibraryClient from llama_stack.core.telemetry.telemetry import MetricEvent diff --git a/tests/integration/inference/test_tools_with_schemas.py b/tests/integration/inference/test_tools_with_schemas.py index b144a5196..f30e9ece5 100644 --- a/tests/integration/inference/test_tools_with_schemas.py +++ b/tests/integration/inference/test_tools_with_schemas.py @@ -13,7 +13,7 @@ import json import pytest -from llama_stack import LlamaStackAsLibraryClient +from llama_stack.core.library_client import LlamaStackAsLibraryClient from llama_stack.models.llama.datatypes import ToolDefinition from tests.common.mcp import make_mcp_server diff --git a/tests/integration/inspect/test_inspect.py b/tests/integration/inspect/test_inspect.py index 8c62c85a1..3a62068c9 100644 --- a/tests/integration/inspect/test_inspect.py +++ b/tests/integration/inspect/test_inspect.py @@ -7,7 +7,7 @@ import pytest from llama_stack_client import LlamaStackClient -from llama_stack import LlamaStackAsLibraryClient +from llama_stack.core.library_client import LlamaStackAsLibraryClient class TestInspect: diff --git a/tests/integration/providers/test_providers.py b/tests/integration/providers/test_providers.py index fc65e2a10..959abf195 100644 --- a/tests/integration/providers/test_providers.py +++ b/tests/integration/providers/test_providers.py @@ -6,7 +6,7 @@ from llama_stack_client import LlamaStackClient -from llama_stack import LlamaStackAsLibraryClient +from llama_stack.core.library_client import LlamaStackAsLibraryClient class TestProviders: diff --git a/tests/integration/responses/fixtures/fixtures.py b/tests/integration/responses/fixtures/fixtures.py index 1783a5622..dbf67e138 100644 --- a/tests/integration/responses/fixtures/fixtures.py +++ b/tests/integration/responses/fixtures/fixtures.py @@ -11,7 +11,7 @@ import pytest import yaml from openai import OpenAI -from llama_stack import LlamaStackAsLibraryClient +from llama_stack.core.library_client import LlamaStackAsLibraryClient # --- Helper Functions --- diff --git a/tests/integration/responses/test_file_search.py b/tests/integration/responses/test_file_search.py index e8d4fe359..dde5fd7f6 100644 --- a/tests/integration/responses/test_file_search.py +++ b/tests/integration/responses/test_file_search.py @@ -9,7 +9,7 @@ import time import pytest -from llama_stack import LlamaStackAsLibraryClient +from llama_stack.core.library_client import LlamaStackAsLibraryClient from .helpers import new_vector_store, upload_file diff --git a/tests/integration/responses/test_tool_responses.py b/tests/integration/responses/test_tool_responses.py index 3f1c35214..9bf58c6ff 100644 --- a/tests/integration/responses/test_tool_responses.py +++ b/tests/integration/responses/test_tool_responses.py @@ -12,8 +12,8 @@ import httpx import openai import pytest -from llama_stack import LlamaStackAsLibraryClient from llama_stack.core.datatypes import AuthenticationRequiredError +from llama_stack.core.library_client import LlamaStackAsLibraryClient from tests.common.mcp import dependency_tools, make_mcp_server from .fixtures.test_cases import ( diff --git a/tests/integration/tool_runtime/test_mcp.py b/tests/integration/tool_runtime/test_mcp.py index 59f558d2c..3a8fde37f 100644 --- a/tests/integration/tool_runtime/test_mcp.py +++ b/tests/integration/tool_runtime/test_mcp.py @@ -10,7 +10,7 @@ import pytest from llama_stack_client.lib.agents.agent import Agent from llama_stack_client.lib.agents.turn_events import StepCompleted, StepProgress, ToolCallIssuedDelta -from llama_stack import LlamaStackAsLibraryClient +from llama_stack.core.library_client import LlamaStackAsLibraryClient AUTH_TOKEN = "test-token" diff --git a/tests/integration/tool_runtime/test_mcp_json_schema.py b/tests/integration/tool_runtime/test_mcp_json_schema.py index 240ec403a..def0b27b8 100644 --- a/tests/integration/tool_runtime/test_mcp_json_schema.py +++ b/tests/integration/tool_runtime/test_mcp_json_schema.py @@ -13,7 +13,7 @@ import json import pytest -from llama_stack import LlamaStackAsLibraryClient +from llama_stack.core.library_client import LlamaStackAsLibraryClient from tests.common.mcp import make_mcp_server AUTH_TOKEN = "test-token" diff --git a/tests/integration/tool_runtime/test_registration.py b/tests/integration/tool_runtime/test_registration.py index c8c9cd046..4d532ed87 100644 --- a/tests/integration/tool_runtime/test_registration.py +++ b/tests/integration/tool_runtime/test_registration.py @@ -8,8 +8,8 @@ import re import pytest -from llama_stack import LlamaStackAsLibraryClient from llama_stack.apis.common.errors import ToolGroupNotFoundError +from llama_stack.core.library_client import LlamaStackAsLibraryClient from tests.common.mcp import MCP_TOOLGROUP_ID, make_mcp_server diff --git a/uv.lock b/uv.lock index b2e562abc..ba9a862a3 100644 --- a/uv.lock +++ b/uv.lock @@ -1945,7 +1945,6 @@ dependencies = [ { name = "httpx" }, { name = "jinja2" }, { name = "jsonschema" }, - { name = "llama-stack-client" }, { name = "openai" }, { name = "opentelemetry-exporter-otlp-proto-http" }, { name = "opentelemetry-sdk" }, @@ -1955,6 +1954,7 @@ dependencies = [ { name = "pyjwt", extra = ["crypto"] }, { name = "python-dotenv" }, { name = "python-multipart" }, + { name = "pyyaml" }, { name = "rich" }, { name = "sqlalchemy", extra = ["asyncio"] }, { name = "starlette" }, @@ -1963,6 +1963,11 @@ dependencies = [ { name = "uvicorn" }, ] +[package.optional-dependencies] +client = [ + { name = "llama-stack-client" }, +] + [package.dev-dependencies] benchmark = [ { name = "locust" }, @@ -2035,6 +2040,7 @@ type-checking = [ { name = "datasets" }, { name = "fairscale" }, { name = "faiss-cpu" }, + { name = "llama-stack-client" }, { name = "lm-format-enforcer" }, { name = "mcp" }, { name = "nest-asyncio" }, @@ -2088,7 +2094,7 @@ requires-dist = [ { name = "httpx" }, { name = "jinja2", specifier = ">=3.1.6" }, { name = "jsonschema" }, - { name = "llama-stack-client", specifier = ">=0.3.0" }, + { name = "llama-stack-client", marker = "extra == 'client'", specifier = ">=0.3.0" }, { name = "openai", specifier = ">=2.5.0" }, { name = "opentelemetry-exporter-otlp-proto-http", specifier = ">=1.30.0" }, { name = "opentelemetry-sdk", specifier = ">=1.30.0" }, @@ -2098,6 +2104,7 @@ requires-dist = [ { name = "pyjwt", extras = ["crypto"], specifier = ">=2.10.0" }, { name = "python-dotenv" }, { name = "python-multipart", specifier = ">=0.0.20" }, + { name = "pyyaml", specifier = ">=6.0" }, { name = "rich" }, { name = "sqlalchemy", extras = ["asyncio"], specifier = ">=2.0.41" }, { name = "starlette" }, @@ -2105,6 +2112,7 @@ requires-dist = [ { name = "tiktoken" }, { name = "uvicorn", specifier = ">=0.34.0" }, ] +provides-extras = ["client"] [package.metadata.requires-dev] benchmark = [{ name = "locust", specifier = ">=2.39.1" }] @@ -2175,6 +2183,7 @@ type-checking = [ { name = "datasets" }, { name = "fairscale" }, { name = "faiss-cpu" }, + { name = "llama-stack-client", specifier = ">=0.3.0" }, { name = "lm-format-enforcer" }, { name = "mcp" }, { name = "nest-asyncio" },