chore!: remove model mgmt from CLI for Hugging Face CLI (#3700)

This change removes the `llama model` and `llama download` subcommands from the CLI, replacing them with recommendations to use the Hugging Face CLI instead. Rationale for this change: - The model management functionality was largely duplicating what Hugging Face CLI already provides, leading to unnecessary maintenance overhead (except the download source from Meta?) - Maintaining our own implementation required fixing bugs and keeping up with changes in model repositories and download mechanisms - The Hugging Face CLI is more mature, widely adopted, and better maintained - This allows us to focus on the core Llama Stack functionality rather than reimplementing model management tools Changes made: - Removed all model-related CLI commands and their implementations - Updated documentation to recommend using `huggingface-cli` for model downloads - Removed Meta-specific download logic and statements - Simplified the CLI to focus solely on stack management operations Users should now use: - `huggingface-cli download` for downloading models - `huggingface-cli scan-cache` for listing downloaded models This is a breaking change as it removes previously available CLI commands. Signed-off-by: Sébastien Han <seb@redhat.com>
2025-10-13 06:07:58 +00:00 · 2025-10-10 01:50:33 +02:00 · 2025-10-10 01:50:33 +02:00 · 7ee0ee7843
commit 7ee0ee7843
parent 841d0c3583
21 changed files with 63 additions and 1612 deletions
--- a/pyproject.toml
+++ b/pyproject.toml
@ -25,14 +25,13 @@ classifiers = [
 ]
 dependencies = [
    "aiohttp",
-    "fastapi>=0.115.0,<1.0",                  # server
-    "fire",                                   # for MCP in LLS client
+    "fastapi>=0.115.0,<1.0",                          # server
+    "fire",                                           # for MCP in LLS client
    "httpx",
-    "huggingface-hub>=0.34.0,<1.0",
    "jinja2>=3.1.6",
    "jsonschema",
    "llama-stack-client>=0.2.23",
-    "openai>=1.107",                                # for expires_after support
+    "openai>=1.107",                                  # for expires_after support
    "prompt-toolkit",
    "python-dotenv",
    "python-jose[cryptography]",
@ -43,13 +42,13 @@ dependencies = [
    "tiktoken",
    "pillow",
    "h11>=0.16.0",
-    "python-multipart>=0.0.20",               # For fastapi Form
-    "uvicorn>=0.34.0",                        # server
-    "opentelemetry-sdk>=1.30.0",              # server
+    "python-multipart>=0.0.20",                       # For fastapi Form
+    "uvicorn>=0.34.0",                                # server
+    "opentelemetry-sdk>=1.30.0",                      # server
    "opentelemetry-exporter-otlp-proto-http>=1.30.0", # server
-    "aiosqlite>=0.21.0",                      # server - for metadata store
-    "asyncpg",                                # for metadata store
-    "sqlalchemy[asyncio]>=2.0.41",           # server - for conversations
+    "aiosqlite>=0.21.0",                              # server - for metadata store
+    "asyncpg",                                        # for metadata store
+    "sqlalchemy[asyncio]>=2.0.41",                    # server - for conversations
 ]

 [project.optional-dependencies]
@ -68,14 +67,14 @@ dev = [
    "pytest-cov",
    "pytest-html",
    "pytest-json-report",
-    "pytest-socket", # For blocking network access in unit tests
-    "nbval", # For notebook testing
+    "pytest-socket",       # For blocking network access in unit tests
+    "nbval",               # For notebook testing
    "black",
    "ruff",
    "types-requests",
    "types-setuptools",
    "pre-commit",
-    "ruamel.yaml", # needed for openapi generator
+    "ruamel.yaml",         # needed for openapi generator
 ]
 # These are the dependencies required for running unit tests.
 unit = [
@ -141,9 +140,7 @@ docs = [
    "requests",
 ]
 codegen = ["rich", "pydantic>=2.11.9", "jinja2>=3.1.6"]
-benchmark = [
-    "locust>=2.39.1",
-]
+benchmark = ["locust>=2.39.1"]

 [project.urls]
 Homepage = "https://github.com/llamastack/llama-stack"
@ -242,7 +239,6 @@ follow_imports = "silent"
 # to exclude the entire directory.
 exclude = [
    # As we fix more and more of these, we should remove them from the list
-    "^llama_stack/cli/download\\.py$",
    "^llama_stack.core/build\\.py$",
    "^llama_stack.core/client\\.py$",
    "^llama_stack.core/request_headers\\.py$",
@ -332,6 +328,4 @@ classmethod-decorators = ["classmethod", "pydantic.field_validator"]
 [tool.pytest.ini_options]
 addopts = ["--durations=10"]
 asyncio_mode = "auto"
-markers = [
-    "allow_network: Allow network access for specific unit tests",
-]
+markers = ["allow_network: Allow network access for specific unit tests"]