llama-stack/pyproject.toml
Ashwin Bharambe 530d4bdfe1
refactor: move all llama code to models/llama out of meta reference (#1887)
# What does this PR do?

Move around bits. This makes the copies from llama-models _much_ easier
to maintain and ensures we don't entangle meta-reference specific
tidbits into llama-models code even by accident.

Also, kills the meta-reference-quantized-gpu distro and rolls
quantization deps into meta-reference-gpu.

## Test Plan

```
LLAMA_MODELS_DEBUG=1 \
  with-proxy llama stack run meta-reference-gpu \
  --env INFERENCE_MODEL=meta-llama/Llama-4-Scout-17B-16E-Instruct \
   --env INFERENCE_CHECKPOINT_DIR=<DIR> \
   --env MODEL_PARALLEL_SIZE=4 \
   --env QUANTIZATION_TYPE=fp8_mixed
```

Start a server with and without quantization. Point integration tests to
it using:

```
pytest -s -v  tests/integration/inference/test_text_inference.py \
   --stack-config http://localhost:8321 --text-model meta-llama/Llama-4-Scout-17B-16E-Instruct
```
2025-04-07 15:03:58 -07:00

315 lines
12 KiB
TOML

[build-system]
requires = ["setuptools>=61.0"]
build-backend = "setuptools.build_meta"
[project]
name = "llama_stack"
version = "0.2.1"
authors = [{ name = "Meta Llama", email = "llama-oss@meta.com" }]
description = "Llama Stack"
readme = "README.md"
requires-python = ">=3.10"
license = { "text" = "MIT" }
classifiers = [
"License :: OSI Approved :: MIT License",
"Programming Language :: Python :: 3",
"Operating System :: OS Independent",
"Intended Audience :: Developers",
"Intended Audience :: Information Technology",
"Intended Audience :: Science/Research",
"Topic :: Scientific/Engineering :: Artificial Intelligence",
"Topic :: Scientific/Engineering :: Information Analysis",
]
dependencies = [
"blobfile",
"fire",
"httpx",
"huggingface-hub",
"jinja2>=3.1.6",
"jsonschema",
"llama-stack-client>=0.2.1",
"prompt-toolkit",
"python-dotenv",
"pydantic>=2",
"requests",
"rich",
"setuptools",
"termcolor",
"tiktoken",
"pillow",
]
[project.optional-dependencies]
dev = [
"pytest",
"pytest-asyncio",
"pytest-cov",
"pytest-html",
"nbval", # For notebook testing
"black",
"ruff",
"types-requests",
"types-setuptools",
"pre-commit",
"uvicorn",
"fastapi",
"ruamel.yaml", # needed for openapi generator
]
# These are the dependencies required for running unit tests.
unit = ["sqlite-vec", "openai", "aiosqlite", "aiohttp", "pypdf", "chardet", "qdrant-client"]
# These are the core dependencies required for running integration tests. They are shared across all
# providers. If a provider requires additional dependencies, please add them to your environment
# separately. If you are using "uv" to execute your tests, you can use the "--with" flag to specify extra
# dependencies.
test = [
"openai",
"aiosqlite",
"aiohttp",
"torch>=2.6.0",
"torchvision>=0.21.0",
"opentelemetry-sdk",
"opentelemetry-exporter-otlp-proto-http",
"chardet",
"pypdf",
"mcp",
"datasets",
"autoevals",
]
docs = [
"sphinx-autobuild",
"myst-parser",
"sphinx-rtd-theme",
"sphinx_rtd_dark_mode",
"sphinx-copybutton",
"sphinx-tabs",
"sphinx-design",
"sphinxcontrib.redoc",
"sphinxcontrib.video",
"sphinxcontrib.mermaid",
"tomli",
]
codegen = ["rich", "pydantic", "jinja2>=3.1.6"]
[project.urls]
Homepage = "https://github.com/meta-llama/llama-stack"
[project.scripts]
llama = "llama_stack.cli.llama:main"
install-wheel-from-presigned = "llama_stack.cli.scripts.run:install_wheel_from_presigned"
[tool.setuptools]
packages = { find = {} }
license-files = []
[[tool.uv.index]]
name = "pytorch-cpu"
url = "https://download.pytorch.org/whl/cpu"
explicit = true
[tool.uv.sources]
torch = [{ index = "pytorch-cpu" }]
torchvision = [{ index = "pytorch-cpu" }]
[tool.ruff]
line-length = 120
exclude = [
"./.git",
"./docs/*",
"./build",
"./venv",
"*.pyi",
".pre-commit-config.yaml",
"*.md",
".flake8",
]
[tool.ruff.lint]
select = [
"B", # flake8-bugbear
"B9", # flake8-bugbear subset
"C", # comprehensions
"E", # pycodestyle
"F", # Pyflakes
"N", # Naming
"W", # Warnings
"DTZ", # datetime rules
"I", # isort (imports order)
]
ignore = [
# The following ignores are desired by the project maintainers.
"E402", # Module level import not at top of file
"E501", # Line too long
"F405", # Maybe undefined or defined from star import
"C408", # Ignored because we like the dict keyword argument syntax
"N812", # Ignored because import torch.nn.functional as F is PyTorch convention
# These are the additional ones we started ignoring after moving to ruff. We should look into each one of them later.
"C901", # Complexity of the function is too high
]
# Ignore the following errors for the following files
[tool.ruff.lint.per-file-ignores]
"tests/**/*.py" = ["DTZ"] # Ignore datetime rules for tests
[tool.mypy]
mypy_path = ["llama_stack"]
packages = ["llama_stack"]
plugins = ['pydantic.mypy']
disable_error_code = []
warn_return_any = true
# # honor excludes by not following there through imports
follow_imports = "silent"
# Note: some entries are directories, not files. This is because mypy doesn't
# respect __init__.py excludes, so the only way to suppress these right now is
# to exclude the entire directory.
exclude = [
# As we fix more and more of these, we should remove them from the list
"^llama_stack/apis/agents/agents\\.py$",
"^llama_stack/apis/batch_inference/batch_inference\\.py$",
"^llama_stack/apis/benchmarks/benchmarks\\.py$",
"^llama_stack/apis/common/content_types\\.py$",
"^llama_stack/apis/common/training_types\\.py$",
"^llama_stack/apis/datasetio/datasetio\\.py$",
"^llama_stack/apis/datasets/datasets\\.py$",
"^llama_stack/apis/eval/eval\\.py$",
"^llama_stack/apis/files/files\\.py$",
"^llama_stack/apis/inference/inference\\.py$",
"^llama_stack/apis/inspect/inspect\\.py$",
"^llama_stack/apis/models/models\\.py$",
"^llama_stack/apis/post_training/post_training\\.py$",
"^llama_stack/apis/providers/providers\\.py$",
"^llama_stack/apis/resource\\.py$",
"^llama_stack/apis/safety/safety\\.py$",
"^llama_stack/apis/scoring/scoring\\.py$",
"^llama_stack/apis/scoring_functions/scoring_functions\\.py$",
"^llama_stack/apis/shields/shields\\.py$",
"^llama_stack/apis/synthetic_data_generation/synthetic_data_generation\\.py$",
"^llama_stack/apis/telemetry/telemetry\\.py$",
"^llama_stack/apis/tools/rag_tool\\.py$",
"^llama_stack/apis/tools/tools\\.py$",
"^llama_stack/apis/vector_dbs/vector_dbs\\.py$",
"^llama_stack/apis/vector_io/vector_io\\.py$",
"^llama_stack/cli/download\\.py$",
"^llama_stack/cli/llama\\.py$",
"^llama_stack/cli/stack/_build\\.py$",
"^llama_stack/cli/stack/list_providers\\.py$",
"^llama_stack/distribution/build\\.py$",
"^llama_stack/distribution/client\\.py$",
"^llama_stack/distribution/configure\\.py$",
"^llama_stack/distribution/library_client\\.py$",
"^llama_stack/distribution/request_headers\\.py$",
"^llama_stack/distribution/routers/",
"^llama_stack/distribution/server/endpoints\\.py$",
"^llama_stack/distribution/server/server\\.py$",
"^llama_stack/distribution/server/websocket_server\\.py$",
"^llama_stack/distribution/stack\\.py$",
"^llama_stack/distribution/store/registry\\.py$",
"^llama_stack/distribution/ui/page/playground/chat\\.py$",
"^llama_stack/distribution/utils/exec\\.py$",
"^llama_stack/distribution/utils/prompt_for_config\\.py$",
"^llama_stack/models/llama/datatypes\\.py$",
"^llama_stack/models/llama/llama3/chat_format\\.py$",
"^llama_stack/models/llama/llama3/interface\\.py$",
"^llama_stack/models/llama/llama3/prompt_templates/system_prompts\\.py$",
"^llama_stack/models/llama/llama3/tokenizer\\.py$",
"^llama_stack/models/llama/llama3/tool_utils\\.py$",
"^llama_stack/models/llama/llama3_3/prompts\\.py$",
"^llama_stack/models/llama/llama4/",
"^llama_stack/models/llama/sku_list\\.py$",
"^llama_stack/providers/inline/agents/meta_reference/",
"^llama_stack/providers/inline/agents/meta_reference/agent_instance\\.py$",
"^llama_stack/providers/inline/agents/meta_reference/agents\\.py$",
"^llama_stack/providers/inline/agents/meta_reference/safety\\.py$",
"^llama_stack/providers/inline/datasetio/localfs/",
"^llama_stack/providers/inline/eval/meta_reference/eval\\.py$",
"^llama_stack/providers/inline/inference/meta_reference/config\\.py$",
"^llama_stack/providers/inline/inference/meta_reference/inference\\.py$",
"^llama_stack/models/llama/llama3/generation\\.py$",
"^llama_stack/models/llama/llama3/multimodal/model\\.py$",
"^llama_stack/models/llama/llama4/",
"^llama_stack/providers/inline/inference/meta_reference/parallel_utils\\.py$",
"^llama_stack/providers/inline/inference/meta_reference/quantization/fp8_impls\\.py$",
"^llama_stack/providers/inline/inference/meta_reference/quantization/loader\\.py$",
"^llama_stack/providers/inline/inference/sentence_transformers/sentence_transformers\\.py$",
"^llama_stack/providers/inline/inference/vllm/",
"^llama_stack/providers/inline/post_training/common/validator\\.py$",
"^llama_stack/providers/inline/post_training/torchtune/post_training\\.py$",
"^llama_stack/providers/inline/safety/code_scanner/",
"^llama_stack/providers/inline/safety/llama_guard/",
"^llama_stack/providers/inline/safety/prompt_guard/",
"^llama_stack/providers/inline/scoring/basic/",
"^llama_stack/providers/inline/scoring/braintrust/",
"^llama_stack/providers/inline/scoring/llm_as_judge/",
"^llama_stack/providers/remote/agents/sample/",
"^llama_stack/providers/remote/datasetio/huggingface/",
"^llama_stack/providers/remote/inference/anthropic/",
"^llama_stack/providers/remote/inference/bedrock/",
"^llama_stack/providers/remote/inference/cerebras/",
"^llama_stack/providers/remote/inference/databricks/",
"^llama_stack/providers/remote/inference/fireworks/",
"^llama_stack/providers/remote/inference/gemini/",
"^llama_stack/providers/remote/inference/groq/",
"^llama_stack/providers/remote/inference/nvidia/",
"^llama_stack/providers/remote/inference/openai/",
"^llama_stack/providers/remote/inference/passthrough/",
"^llama_stack/providers/remote/inference/runpod/",
"^llama_stack/providers/remote/inference/sambanova/",
"^llama_stack/providers/remote/inference/sample/",
"^llama_stack/providers/remote/inference/tgi/",
"^llama_stack/providers/remote/inference/together/",
"^llama_stack/providers/remote/safety/bedrock/",
"^llama_stack/providers/remote/safety/nvidia/",
"^llama_stack/providers/remote/safety/sample/",
"^llama_stack/providers/remote/tool_runtime/bing_search/",
"^llama_stack/providers/remote/tool_runtime/brave_search/",
"^llama_stack/providers/remote/tool_runtime/model_context_protocol/",
"^llama_stack/providers/remote/tool_runtime/tavily_search/",
"^llama_stack/providers/remote/tool_runtime/wolfram_alpha/",
"^llama_stack/providers/remote/post_training/nvidia/",
"^llama_stack/providers/remote/vector_io/chroma/",
"^llama_stack/providers/remote/vector_io/milvus/",
"^llama_stack/providers/remote/vector_io/pgvector/",
"^llama_stack/providers/remote/vector_io/qdrant/",
"^llama_stack/providers/remote/vector_io/sample/",
"^llama_stack/providers/remote/vector_io/weaviate/",
"^llama_stack/providers/tests/conftest\\.py$",
"^llama_stack/providers/utils/bedrock/client\\.py$",
"^llama_stack/providers/utils/bedrock/refreshable_boto_session\\.py$",
"^llama_stack/providers/utils/inference/embedding_mixin\\.py$",
"^llama_stack/providers/utils/inference/litellm_openai_mixin\\.py$",
"^llama_stack/providers/utils/inference/model_registry\\.py$",
"^llama_stack/providers/utils/inference/openai_compat\\.py$",
"^llama_stack/providers/utils/inference/prompt_adapter\\.py$",
"^llama_stack/providers/utils/kvstore/config\\.py$",
"^llama_stack/providers/utils/kvstore/kvstore\\.py$",
"^llama_stack/providers/utils/kvstore/mongodb/mongodb\\.py$",
"^llama_stack/providers/utils/kvstore/postgres/postgres\\.py$",
"^llama_stack/providers/utils/kvstore/redis/redis\\.py$",
"^llama_stack/providers/utils/kvstore/sqlite/sqlite\\.py$",
"^llama_stack/providers/utils/memory/vector_store\\.py$",
"^llama_stack/providers/utils/scoring/aggregation_utils\\.py$",
"^llama_stack/providers/utils/scoring/base_scoring_fn\\.py$",
"^llama_stack/providers/utils/telemetry/dataset_mixin\\.py$",
"^llama_stack/providers/utils/telemetry/trace_protocol\\.py$",
"^llama_stack/providers/utils/telemetry/tracing\\.py$",
"^llama_stack/scripts/",
"^llama_stack/strong_typing/auxiliary\\.py$",
"^llama_stack/strong_typing/deserializer\\.py$",
"^llama_stack/strong_typing/inspection\\.py$",
"^llama_stack/strong_typing/schema\\.py$",
"^llama_stack/strong_typing/serializer\\.py$",
"^llama_stack/templates/dev/dev\\.py$",
"^llama_stack/templates/groq/groq\\.py$",
"^llama_stack/templates/sambanova/sambanova\\.py$",
"^llama_stack/templates/template\\.py$",
]
[[tool.mypy.overrides]]
# packages that lack typing annotations, do not have stubs, or are unavailable.
module = ["yaml", "fire"]
ignore_missing_imports = true
[tool.pydantic-mypy]
init_forbid_extra = true
init_typed = true
warn_required_dynamic_aliases = true