chore!: remove model mgmt from CLI for Hugging Face CLI (#3700)

This change removes the `llama model` and `llama download` subcommands
from the CLI, replacing them with recommendations to use the Hugging
Face CLI instead.

Rationale for this change:
- The model management functionality was largely duplicating what
Hugging Face CLI already provides, leading to unnecessary maintenance
overhead (except the download source from Meta?)
- Maintaining our own implementation required fixing bugs and keeping up
with changes in model repositories and download mechanisms
- The Hugging Face CLI is more mature, widely adopted, and better
maintained
- This allows us to focus on the core Llama Stack functionality rather
than reimplementing model management tools

Changes made:
- Removed all model-related CLI commands and their implementations
- Updated documentation to recommend using `huggingface-cli` for model
downloads
- Removed Meta-specific download logic and statements
- Simplified the CLI to focus solely on stack management operations

Users should now use:
- `huggingface-cli download` for downloading models
- `huggingface-cli scan-cache` for listing downloaded models

This is a breaking change as it removes previously available CLI
commands.

Signed-off-by: Sébastien Han <seb@redhat.com>
This commit is contained in:
Sébastien Han 2025-10-10 01:50:33 +02:00 committed by GitHub
parent 841d0c3583
commit 7ee0ee7843
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
21 changed files with 63 additions and 1612 deletions

View file

@ -25,14 +25,13 @@ classifiers = [
]
dependencies = [
"aiohttp",
"fastapi>=0.115.0,<1.0", # server
"fire", # for MCP in LLS client
"fastapi>=0.115.0,<1.0", # server
"fire", # for MCP in LLS client
"httpx",
"huggingface-hub>=0.34.0,<1.0",
"jinja2>=3.1.6",
"jsonschema",
"llama-stack-client>=0.2.23",
"openai>=1.107", # for expires_after support
"openai>=1.107", # for expires_after support
"prompt-toolkit",
"python-dotenv",
"python-jose[cryptography]",
@ -43,13 +42,13 @@ dependencies = [
"tiktoken",
"pillow",
"h11>=0.16.0",
"python-multipart>=0.0.20", # For fastapi Form
"uvicorn>=0.34.0", # server
"opentelemetry-sdk>=1.30.0", # server
"python-multipart>=0.0.20", # For fastapi Form
"uvicorn>=0.34.0", # server
"opentelemetry-sdk>=1.30.0", # server
"opentelemetry-exporter-otlp-proto-http>=1.30.0", # server
"aiosqlite>=0.21.0", # server - for metadata store
"asyncpg", # for metadata store
"sqlalchemy[asyncio]>=2.0.41", # server - for conversations
"aiosqlite>=0.21.0", # server - for metadata store
"asyncpg", # for metadata store
"sqlalchemy[asyncio]>=2.0.41", # server - for conversations
]
[project.optional-dependencies]
@ -68,14 +67,14 @@ dev = [
"pytest-cov",
"pytest-html",
"pytest-json-report",
"pytest-socket", # For blocking network access in unit tests
"nbval", # For notebook testing
"pytest-socket", # For blocking network access in unit tests
"nbval", # For notebook testing
"black",
"ruff",
"types-requests",
"types-setuptools",
"pre-commit",
"ruamel.yaml", # needed for openapi generator
"ruamel.yaml", # needed for openapi generator
]
# These are the dependencies required for running unit tests.
unit = [
@ -141,9 +140,7 @@ docs = [
"requests",
]
codegen = ["rich", "pydantic>=2.11.9", "jinja2>=3.1.6"]
benchmark = [
"locust>=2.39.1",
]
benchmark = ["locust>=2.39.1"]
[project.urls]
Homepage = "https://github.com/llamastack/llama-stack"
@ -242,7 +239,6 @@ follow_imports = "silent"
# to exclude the entire directory.
exclude = [
# As we fix more and more of these, we should remove them from the list
"^llama_stack/cli/download\\.py$",
"^llama_stack.core/build\\.py$",
"^llama_stack.core/client\\.py$",
"^llama_stack.core/request_headers\\.py$",
@ -332,6 +328,4 @@ classmethod-decorators = ["classmethod", "pydantic.field_validator"]
[tool.pytest.ini_options]
addopts = ["--durations=10"]
asyncio_mode = "auto"
markers = [
"allow_network: Allow network access for specific unit tests",
]
markers = ["allow_network: Allow network access for specific unit tests"]