mirror of
https://github.com/meta-llama/llama-stack.git
synced 2025-07-19 19:30:05 +00:00
Some checks failed
Vector IO Integration Tests / test-matrix (3.12, inline::faiss) (push) Failing after 5s
Integration Auth Tests / test-matrix (oauth2_token) (push) Failing after 6s
Vector IO Integration Tests / test-matrix (3.13, inline::milvus) (push) Failing after 4s
Python Package Build Test / build (3.13) (push) Failing after 2s
Test Llama Stack Build / generate-matrix (push) Successful in 6s
Vector IO Integration Tests / test-matrix (3.13, inline::sqlite-vec) (push) Failing after 7s
Vector IO Integration Tests / test-matrix (3.12, inline::sqlite-vec) (push) Failing after 9s
Update ReadTheDocs / update-readthedocs (push) Failing after 3s
Test Llama Stack Build / build-single-provider (push) Failing after 7s
Integration Tests / discover-tests (push) Successful in 13s
Test Llama Stack Build / build-custom-container-distribution (push) Failing after 13s
Test Llama Stack Build / build-ubi9-container-distribution (push) Failing after 12s
Vector IO Integration Tests / test-matrix (3.12, remote::pgvector) (push) Failing after 17s
Vector IO Integration Tests / test-matrix (3.13, remote::pgvector) (push) Failing after 15s
Integration Tests / test-matrix (push) Failing after 5s
Unit Tests / unit-tests (3.12) (push) Failing after 12s
Vector IO Integration Tests / test-matrix (3.12, remote::chromadb) (push) Failing after 19s
Vector IO Integration Tests / test-matrix (3.12, inline::milvus) (push) Failing after 19s
SqlStore Integration Tests / test-postgres (3.12) (push) Failing after 22s
Vector IO Integration Tests / test-matrix (3.13, remote::chromadb) (push) Failing after 17s
Test External Providers / test-external-providers (venv) (push) Failing after 17s
Test Llama Stack Build / build (push) Failing after 14s
SqlStore Integration Tests / test-postgres (3.13) (push) Failing after 35s
Python Package Build Test / build (3.12) (push) Failing after 51s
Vector IO Integration Tests / test-matrix (3.13, inline::faiss) (push) Failing after 57s
Unit Tests / unit-tests (3.13) (push) Failing after 53s
Pre-commit / pre-commit (push) Successful in 1m42s
# What does this PR do? <!-- Provide a short summary of what this PR does and why. Link to relevant issues if applicable. --> The purpose of this task is to create a solution that can automatically detect when new models are added, deprecated, or removed by OpenAI and Llama API providers, and automatically update the list of supported models in LLamaStack. This feature is vitally important in order to avoid missing new models and editing the entries manually hence I created automation allowing users to dynamically register: - any models from OpenAI provider available at [https://api.openai.com/v1/models](https://api.openai.com/v1/models) that are not in [https://github.com/meta-llama/llama-stack/blob/main/llama_stack/providers/remote/inference/openai/models.py](https://github.com/meta-llama/llama-stack/blob/main/llama_stack/providers/remote/inference/openai/models.py) - any models from Llama API provider available at [https://api.llama.com/v1/models](https://api.llama.com/v1/models) that are not in [https://github.com/meta-llama/llama-stack/blob/main/llama_stack/providers/remote/inference/llama_openai_compat/models.py](https://github.com/meta-llama/llama-stack/blob/main/llama_stack/providers/remote/inference/llama_openai_compat/models.py) <!-- If resolving an issue, uncomment and update the line below --> <!-- Closes #[issue-number] --> Closes #2504 this PR is dependant on #2710 ## Test Plan <!-- Describe the tests you ran to verify your changes with result summaries. *Provide clear instructions so the plan can be easily re-executed.* --> 1. Create venv at root llamastack directory: `uv venv .venv --python 3.12 --seed` 2. Activate venv: `source .venv/bin/activate` 3. `uv pip install -e .` 4. Create OpenAI distro modifying run.yaml 5. Build distro: `llama stack build --template starter --image-type venv` 6. Then run LlamaStack, but before navigate to templates/starter folder: `llama stack run run.yaml --image-type venv OPENAI_API_KEY=<YOUR_KEY> ENABLE_OPENAI=openai` 7. Then try to register dummy llm that doesn't exist in OpenAI provider: ` llama-stack-client models register ianm/ianllm --provider-model-id=ianllm --provider-id=openai ` You should receive this output - combined list of static config + fetched available models from OpenAI: <img width="1380" height="474" alt="Screenshot 2025-07-14 at 12 48 50" src="https://github.com/user-attachments/assets/d26aad18-6b15-49ee-9c49-b01b2d33f883" /> 8. Then register real llm from OpenAI: llama-stack-client models register openai/gpt-4-turbo-preview --provider-model-id=gpt-4-turbo-preview --provider-id=openai <img width="1253" height="613" alt="Screenshot 2025-07-14 at 13 43 02" src="https://github.com/user-attachments/assets/60a5c9b1-3468-4eb9-9e92-cd7d21de3ca0" /> <img width="1288" height="655" alt="Screenshot 2025-07-14 at 13 43 11" src="https://github.com/user-attachments/assets/c1e48871-0e24-4bd9-a0b8-8c95552a51ee" /> We correctly fetched all available models from OpenAI As for Llama API, as a non-US person I don't have access to Llama API Key but I joined wait list. The implementation for Llama is the same as for OpenAI since Llama is openai compatible. So, the response from GET endpoint has the same structure as OpenAI https://llama.developer.meta.com/docs/api/models
352 lines
13 KiB
TOML
352 lines
13 KiB
TOML
[build-system]
|
|
requires = ["setuptools>=61.0"]
|
|
build-backend = "setuptools.build_meta"
|
|
|
|
[project]
|
|
name = "llama_stack"
|
|
version = "0.2.15"
|
|
authors = [{ name = "Meta Llama", email = "llama-oss@meta.com" }]
|
|
description = "Llama Stack"
|
|
readme = "README.md"
|
|
requires-python = ">=3.12"
|
|
license = { "text" = "MIT" }
|
|
classifiers = [
|
|
"License :: OSI Approved :: MIT License",
|
|
"Programming Language :: Python :: 3",
|
|
"Operating System :: OS Independent",
|
|
"Intended Audience :: Developers",
|
|
"Intended Audience :: Information Technology",
|
|
"Intended Audience :: Science/Research",
|
|
"Topic :: Scientific/Engineering :: Artificial Intelligence",
|
|
"Topic :: Scientific/Engineering :: Information Analysis",
|
|
]
|
|
dependencies = [
|
|
"aiohttp",
|
|
"fastapi>=0.115.0,<1.0", # server
|
|
"fire", # for MCP in LLS client
|
|
"httpx",
|
|
"huggingface-hub>=0.30.0,<1.0",
|
|
"jinja2>=3.1.6",
|
|
"jsonschema",
|
|
"llama-stack-client>=0.2.15",
|
|
"llama-api-client>=0.1.2",
|
|
"openai>=1.66",
|
|
"prompt-toolkit",
|
|
"python-dotenv",
|
|
"python-jose[cryptography]",
|
|
"pydantic>=2",
|
|
"rich",
|
|
"starlette",
|
|
"termcolor",
|
|
"tiktoken",
|
|
"pillow",
|
|
"h11>=0.16.0",
|
|
"python-multipart>=0.0.20", # For fastapi Form
|
|
"uvicorn>=0.34.0", # server
|
|
"opentelemetry-sdk>=1.30.0", # server
|
|
"opentelemetry-exporter-otlp-proto-http>=1.30.0", # server
|
|
"aiosqlite>=0.21.0", # server - for metadata store
|
|
"asyncpg", # for metadata store
|
|
]
|
|
|
|
[project.optional-dependencies]
|
|
ui = [
|
|
"streamlit",
|
|
"pandas",
|
|
"llama-stack-client>=0.2.15",
|
|
"streamlit-option-menu",
|
|
]
|
|
|
|
[dependency-groups]
|
|
dev = [
|
|
"pytest>=8.4",
|
|
"pytest-timeout",
|
|
"pytest-asyncio>=1.0",
|
|
"pytest-cov",
|
|
"pytest-html",
|
|
"pytest-json-report",
|
|
"pytest-socket", # For blocking network access in unit tests
|
|
"nbval", # For notebook testing
|
|
"black",
|
|
"ruff",
|
|
"types-requests",
|
|
"types-setuptools",
|
|
"pre-commit",
|
|
"ruamel.yaml", # needed for openapi generator
|
|
]
|
|
# These are the dependencies required for running unit tests.
|
|
unit = [
|
|
"sqlite-vec",
|
|
"openai",
|
|
"aiosqlite",
|
|
"aiohttp",
|
|
"pypdf",
|
|
"mcp",
|
|
"chardet",
|
|
"qdrant-client",
|
|
"sqlalchemy",
|
|
"sqlalchemy[asyncio]>=2.0.41",
|
|
"blobfile",
|
|
"faiss-cpu",
|
|
"pymilvus>=2.5.12",
|
|
"litellm",
|
|
"together",
|
|
]
|
|
# These are the core dependencies required for running integration tests. They are shared across all
|
|
# providers. If a provider requires additional dependencies, please add them to your environment
|
|
# separately. If you are using "uv" to execute your tests, you can use the "--group" flag to specify extra
|
|
# dependencies.
|
|
test = [
|
|
"openai",
|
|
"aiosqlite",
|
|
"aiohttp",
|
|
"torch>=2.6.0",
|
|
"torchvision>=0.21.0",
|
|
"chardet",
|
|
"pypdf",
|
|
"mcp",
|
|
"datasets",
|
|
"autoevals",
|
|
"transformers",
|
|
"sqlalchemy",
|
|
"sqlalchemy[asyncio]>=2.0.41",
|
|
"requests",
|
|
"pymilvus>=2.5.12",
|
|
]
|
|
docs = [
|
|
"setuptools",
|
|
"sphinx-autobuild",
|
|
"myst-parser",
|
|
"sphinx",
|
|
"sphinx-rtd-theme",
|
|
"sphinx_rtd_dark_mode",
|
|
"sphinx-copybutton",
|
|
"sphinx-tabs",
|
|
"sphinx-design",
|
|
"sphinxcontrib.redoc",
|
|
"sphinxcontrib.video",
|
|
"sphinxcontrib.mermaid",
|
|
"sphinx-reredirects",
|
|
"tomli",
|
|
"linkify",
|
|
"sphinxcontrib.openapi",
|
|
"requests",
|
|
]
|
|
codegen = ["rich", "pydantic", "jinja2>=3.1.6"]
|
|
|
|
[project.urls]
|
|
Homepage = "https://github.com/meta-llama/llama-stack"
|
|
|
|
[project.scripts]
|
|
llama = "llama_stack.cli.llama:main"
|
|
install-wheel-from-presigned = "llama_stack.cli.scripts.run:install_wheel_from_presigned"
|
|
|
|
[tool.setuptools.packages.find]
|
|
where = ["."]
|
|
include = ["llama_stack", "llama_stack.*"]
|
|
|
|
[[tool.uv.index]]
|
|
name = "pytorch-cpu"
|
|
url = "https://download.pytorch.org/whl/cpu"
|
|
explicit = true
|
|
|
|
[tool.uv.sources]
|
|
torch = [{ index = "pytorch-cpu" }]
|
|
torchvision = [{ index = "pytorch-cpu" }]
|
|
|
|
[tool.ruff]
|
|
line-length = 120
|
|
exclude = [
|
|
"./.git",
|
|
"./docs/*",
|
|
"./build",
|
|
"./venv",
|
|
"*.pyi",
|
|
".pre-commit-config.yaml",
|
|
"*.md",
|
|
".flake8",
|
|
]
|
|
|
|
[tool.ruff.lint]
|
|
select = [
|
|
"UP", # pyupgrade
|
|
"B", # flake8-bugbear
|
|
"B9", # flake8-bugbear subset
|
|
"C", # comprehensions
|
|
"E", # pycodestyle
|
|
"F", # Pyflakes
|
|
"N", # Naming
|
|
"W", # Warnings
|
|
"DTZ", # datetime rules
|
|
"I", # isort (imports order)
|
|
"RUF001", # Checks for ambiguous Unicode characters in strings
|
|
"RUF002", # Checks for ambiguous Unicode characters in docstrings
|
|
"RUF003", # Checks for ambiguous Unicode characters in comments
|
|
"PLC2401", # Checks for the use of non-ASCII characters in variable names
|
|
"PLC2403", # Checks for the use of non-ASCII characters in import statements
|
|
"PLE2510", # Checks for strings that contain the control character BS.
|
|
"PLE2512", # Checks for strings that contain the raw control character SUB.
|
|
"PLE2513", # Checks for strings that contain the raw control character ESC.
|
|
"PLE2514", # Checks for strings that contain the raw control character NUL (0 byte).
|
|
"PLE2515", # Checks for strings that contain the zero width space character.
|
|
]
|
|
ignore = [
|
|
# The following ignores are desired by the project maintainers.
|
|
"E402", # Module level import not at top of file
|
|
"E501", # Line too long
|
|
"F405", # Maybe undefined or defined from star import
|
|
"C408", # Ignored because we like the dict keyword argument syntax
|
|
"N812", # Ignored because import torch.nn.functional as F is PyTorch convention
|
|
|
|
# These are the additional ones we started ignoring after moving to ruff. We should look into each one of them later.
|
|
"C901", # Complexity of the function is too high
|
|
]
|
|
unfixable = [
|
|
"PLE2515",
|
|
] # Do not fix this automatically since ruff will replace the zero-width space with \u200b - let's do it manually
|
|
|
|
# Ignore the following errors for the following files
|
|
[tool.ruff.lint.per-file-ignores]
|
|
"tests/**/*.py" = ["DTZ"] # Ignore datetime rules for tests
|
|
"llama_stack/providers/inline/scoring/basic/utils/ifeval_utils.py" = ["RUF001"]
|
|
"llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/regex_parser_multiple_choice_answer.py" = [
|
|
"RUF001",
|
|
"PLE2515",
|
|
]
|
|
"llama_stack/apis/**/__init__.py" = [
|
|
"F403",
|
|
] # Using import * is acceptable (or at least tolerated) in an __init__.py of a package API
|
|
|
|
[tool.mypy]
|
|
mypy_path = ["llama_stack"]
|
|
packages = ["llama_stack"]
|
|
plugins = ['pydantic.mypy']
|
|
disable_error_code = []
|
|
warn_return_any = true
|
|
# # honor excludes by not following there through imports
|
|
follow_imports = "silent"
|
|
# Note: some entries are directories, not files. This is because mypy doesn't
|
|
# respect __init__.py excludes, so the only way to suppress these right now is
|
|
# to exclude the entire directory.
|
|
exclude = [
|
|
# As we fix more and more of these, we should remove them from the list
|
|
"^llama_stack/cli/download\\.py$",
|
|
"^llama_stack/distribution/build\\.py$",
|
|
"^llama_stack/distribution/client\\.py$",
|
|
"^llama_stack/distribution/request_headers\\.py$",
|
|
"^llama_stack/distribution/routers/",
|
|
"^llama_stack/distribution/routing_tables/",
|
|
"^llama_stack/distribution/server/endpoints\\.py$",
|
|
"^llama_stack/distribution/server/server\\.py$",
|
|
"^llama_stack/distribution/stack\\.py$",
|
|
"^llama_stack/distribution/store/registry\\.py$",
|
|
"^llama_stack/distribution/utils/exec\\.py$",
|
|
"^llama_stack/distribution/utils/prompt_for_config\\.py$",
|
|
"^llama_stack/models/llama/llama3/chat_format\\.py$",
|
|
"^llama_stack/models/llama/llama3/interface\\.py$",
|
|
"^llama_stack/models/llama/llama3/tokenizer\\.py$",
|
|
"^llama_stack/models/llama/llama3/tool_utils\\.py$",
|
|
"^llama_stack/providers/inline/agents/meta_reference/",
|
|
"^llama_stack/providers/inline/agents/meta_reference/agent_instance\\.py$",
|
|
"^llama_stack/providers/inline/agents/meta_reference/agents\\.py$",
|
|
"^llama_stack/providers/inline/datasetio/localfs/",
|
|
"^llama_stack/providers/inline/eval/meta_reference/eval\\.py$",
|
|
"^llama_stack/providers/inline/inference/meta_reference/inference\\.py$",
|
|
"^llama_stack/models/llama/llama3/generation\\.py$",
|
|
"^llama_stack/models/llama/llama3/multimodal/model\\.py$",
|
|
"^llama_stack/models/llama/llama4/",
|
|
"^llama_stack/providers/inline/inference/meta_reference/parallel_utils\\.py$",
|
|
"^llama_stack/providers/inline/inference/meta_reference/quantization/fp8_impls\\.py$",
|
|
"^llama_stack/providers/inline/inference/sentence_transformers/sentence_transformers\\.py$",
|
|
"^llama_stack/providers/inline/inference/vllm/",
|
|
"^llama_stack/providers/inline/post_training/common/validator\\.py$",
|
|
"^llama_stack/providers/inline/safety/code_scanner/",
|
|
"^llama_stack/providers/inline/safety/llama_guard/",
|
|
"^llama_stack/providers/inline/safety/prompt_guard/",
|
|
"^llama_stack/providers/inline/scoring/basic/",
|
|
"^llama_stack/providers/inline/scoring/braintrust/",
|
|
"^llama_stack/providers/inline/scoring/llm_as_judge/",
|
|
"^llama_stack/providers/remote/agents/sample/",
|
|
"^llama_stack/providers/remote/datasetio/huggingface/",
|
|
"^llama_stack/providers/remote/datasetio/nvidia/",
|
|
"^llama_stack/providers/remote/inference/anthropic/",
|
|
"^llama_stack/providers/remote/inference/bedrock/",
|
|
"^llama_stack/providers/remote/inference/cerebras/",
|
|
"^llama_stack/providers/remote/inference/databricks/",
|
|
"^llama_stack/providers/remote/inference/fireworks/",
|
|
"^llama_stack/providers/remote/inference/gemini/",
|
|
"^llama_stack/providers/remote/inference/groq/",
|
|
"^llama_stack/providers/remote/inference/nvidia/",
|
|
"^llama_stack/providers/remote/inference/openai/",
|
|
"^llama_stack/providers/remote/inference/passthrough/",
|
|
"^llama_stack/providers/remote/inference/runpod/",
|
|
"^llama_stack/providers/remote/inference/sambanova/",
|
|
"^llama_stack/providers/remote/inference/sample/",
|
|
"^llama_stack/providers/remote/inference/tgi/",
|
|
"^llama_stack/providers/remote/inference/together/",
|
|
"^llama_stack/providers/remote/inference/watsonx/",
|
|
"^llama_stack/providers/remote/safety/bedrock/",
|
|
"^llama_stack/providers/remote/safety/nvidia/",
|
|
"^llama_stack/providers/remote/safety/sambanova/",
|
|
"^llama_stack/providers/remote/safety/sample/",
|
|
"^llama_stack/providers/remote/tool_runtime/bing_search/",
|
|
"^llama_stack/providers/remote/tool_runtime/brave_search/",
|
|
"^llama_stack/providers/remote/tool_runtime/model_context_protocol/",
|
|
"^llama_stack/providers/remote/tool_runtime/tavily_search/",
|
|
"^llama_stack/providers/remote/tool_runtime/wolfram_alpha/",
|
|
"^llama_stack/providers/remote/post_training/nvidia/",
|
|
"^llama_stack/providers/remote/vector_io/chroma/",
|
|
"^llama_stack/providers/remote/vector_io/milvus/",
|
|
"^llama_stack/providers/remote/vector_io/pgvector/",
|
|
"^llama_stack/providers/remote/vector_io/qdrant/",
|
|
"^llama_stack/providers/remote/vector_io/sample/",
|
|
"^llama_stack/providers/remote/vector_io/weaviate/",
|
|
"^llama_stack/providers/tests/conftest\\.py$",
|
|
"^llama_stack/providers/utils/bedrock/client\\.py$",
|
|
"^llama_stack/providers/utils/bedrock/refreshable_boto_session\\.py$",
|
|
"^llama_stack/providers/utils/inference/embedding_mixin\\.py$",
|
|
"^llama_stack/providers/utils/inference/litellm_openai_mixin\\.py$",
|
|
"^llama_stack/providers/utils/inference/model_registry\\.py$",
|
|
"^llama_stack/providers/utils/inference/openai_compat\\.py$",
|
|
"^llama_stack/providers/utils/inference/prompt_adapter\\.py$",
|
|
"^llama_stack/providers/utils/kvstore/config\\.py$",
|
|
"^llama_stack/providers/utils/kvstore/kvstore\\.py$",
|
|
"^llama_stack/providers/utils/kvstore/mongodb/mongodb\\.py$",
|
|
"^llama_stack/providers/utils/kvstore/postgres/postgres\\.py$",
|
|
"^llama_stack/providers/utils/kvstore/redis/redis\\.py$",
|
|
"^llama_stack/providers/utils/kvstore/sqlite/sqlite\\.py$",
|
|
"^llama_stack/providers/utils/memory/vector_store\\.py$",
|
|
"^llama_stack/providers/utils/scoring/aggregation_utils\\.py$",
|
|
"^llama_stack/providers/utils/scoring/base_scoring_fn\\.py$",
|
|
"^llama_stack/providers/utils/telemetry/dataset_mixin\\.py$",
|
|
"^llama_stack/providers/utils/telemetry/trace_protocol\\.py$",
|
|
"^llama_stack/providers/utils/telemetry/tracing\\.py$",
|
|
"^llama_stack/strong_typing/auxiliary\\.py$",
|
|
"^llama_stack/strong_typing/deserializer\\.py$",
|
|
"^llama_stack/strong_typing/inspection\\.py$",
|
|
"^llama_stack/strong_typing/schema\\.py$",
|
|
"^llama_stack/strong_typing/serializer\\.py$",
|
|
"^llama_stack/templates/groq/groq\\.py$",
|
|
"^llama_stack/templates/llama_api/llama_api\\.py$",
|
|
"^llama_stack/templates/sambanova/sambanova\\.py$",
|
|
"^llama_stack/templates/template\\.py$",
|
|
]
|
|
|
|
[[tool.mypy.overrides]]
|
|
# packages that lack typing annotations, do not have stubs, or are unavailable.
|
|
module = ["yaml", "fire"]
|
|
ignore_missing_imports = true
|
|
|
|
[tool.pydantic-mypy]
|
|
init_forbid_extra = true
|
|
init_typed = true
|
|
warn_required_dynamic_aliases = true
|
|
|
|
[tool.ruff.lint.pep8-naming]
|
|
classmethod-decorators = ["classmethod", "pydantic.field_validator"]
|
|
|
|
[tool.pytest.ini_options]
|
|
asyncio_mode = "auto"
|
|
markers = [
|
|
"allow_network: Allow network access for specific unit tests",
|
|
]
|