prototype: use pyproject and uv to build distribution

Goals: * remove the need of a custom tool to install a collection of python packages AKA `llama stack build` * use the power of 'uv', which was designed to manage dependencies * `llama stack build` can "probably" go away and be replaced with uv Howto, with the pyproject, you can install an Ollama distribution in a virtual env like so: ``` uv venv --python 3.10 ollama-distro source ollama-distro/bin/activate uv sync --extra ollama llama stack run llama_stack/templates/ollama/run.yaml ``` Caveats: * external provider, we could still use a build file or add the known external providers to the pyproject? * growth of the uv.lock? We create a requirements.txt for convenience as some users are most familiar with this format than looking at pyproject. Signed-off-by: Sébastien Han <seb@redhat.com>
2025-07-07 06:20:45 +00:00 · 2025-05-27 20:31:57 +02:00 · 2025-05-27 20:31:57 +02:00 · b6ebbe1bc0
commit b6ebbe1bc0
parent 6832e8a658
13 changed files with 5579 additions and 679 deletions
--- a/.github/actions/setup-runner/action.yml
+++ b/.github/actions/setup-runner/action.yml
@ -5,6 +5,10 @@ inputs:
    description: The Python version to use
    required: false
    default: "3.11"
+  install-ollama:
+    description: Install ollama
+    required: false
+    default: true
 runs:
  using: "composite"
  steps:
@ -17,11 +21,13 @@ runs:

    - name: Install dependencies
      shell: bash
+      env:
+        INSTALL_OLLAMA: ${{ inputs.install-ollama }}
+      if: ${{ env.INSTALL_OLLAMA == 'true' }}
      run: |
-        uv sync --all-groups
-        uv pip install ollama faiss-cpu
+        uv sync --all-groups --extra ollama
+
        # always test against the latest version of the client
        # TODO: this is not necessarily a good idea. we need to test against both published and latest
        # to find out backwards compatibility issues.
        uv pip install git+https://github.com/meta-llama/llama-stack-client-python.git@main
-        uv pip install -e .
--- a/.github/workflows/integration-auth-tests.yml
+++ b/.github/workflows/integration-auth-tests.yml
@ -33,9 +33,6 @@ jobs:
      - name: Install dependencies
        uses: ./.github/actions/setup-runner

-      - name: Build Llama Stack
-        run: |
-          llama stack build --template ollama --image-type venv

      - name: Install minikube
        if: ${{ matrix.auth-provider == 'kubernetes' }}
--- a/.github/workflows/integration-tests.yml
+++ b/.github/workflows/integration-tests.yml
@ -41,16 +41,12 @@ jobs:
      - name: Setup ollama
        uses: ./.github/actions/setup-ollama

-      - name: Build Llama Stack
-        run: |
-          uv run llama stack build --template ollama --image-type venv
-
      - name: Start Llama Stack server in background
        if: matrix.client-type == 'http'
        env:
          INFERENCE_MODEL: "meta-llama/Llama-3.2-3B-Instruct"
        run: |
-          LLAMA_STACK_LOG_FILE=server.log nohup uv run llama stack run ./llama_stack/templates/ollama/run.yaml --image-type venv --env OLLAMA_URL="http://0.0.0.0:11434" &
+          LLAMA_STACK_LOG_FILE=server.log nohup uv run llama stack run ./llama_stack/templates/ollama/run.yaml --env OLLAMA_URL="http://0.0.0.0:11434"  &

      - name: Wait for Llama Stack server to be ready
        if: matrix.client-type == 'http'
--- a/.github/workflows/providers-build.yml
+++ b/.github/workflows/providers-build.yml
@ -36,7 +36,7 @@ jobs:
      - name: Generate Template List
        id: set-matrix
        run: |
-          templates=$(ls llama_stack/templates/*/*build.yaml | awk -F'/' '{print $(NF-1)}' | jq -R -s -c 'split("\n")[:-1]')
+          templates=$(ls llama_stack/templates/*/*build.yaml | grep -v "experimental-post-training" | awk -F'/' '{print $(NF-1)}' | jq -R -s -c 'split("\n")[:-1]')
          echo "templates=$templates" >> "$GITHUB_OUTPUT"

  build:
@ -54,16 +54,42 @@ jobs:

      - name: Install dependencies
        uses: ./.github/actions/setup-runner
+        with:
+          install-ollama: false

-      - name: Print build dependencies
+      - name: Print dependencies in the image
+        if: matrix.image-type == 'venv'
        run: |
-          uv run llama stack build --template ${{ matrix.template }} --image-type ${{ matrix.image-type }} --image-name test --print-deps-only
+          uv pip list

-      - name: Run Llama Stack Build
+      - name: Run Llama Stack Build - VENV
+        if: matrix.image-type == 'venv'
        run: |
-          # USE_COPY_NOT_MOUNT is set to true since mounting is not supported by docker buildx, we use COPY instead
-          # LLAMA_STACK_DIR is set to the current directory so we are building from the source
-          USE_COPY_NOT_MOUNT=true LLAMA_STACK_DIR=. uv run llama stack build --template ${{ matrix.template }} --image-type ${{ matrix.image-type }} --image-name test
+          uv sync --no-default-groups --extra ${{ matrix.template }}
+
+      # TODO
+      - name: Run Llama Stack Build - CONTAINER
+        if: matrix.image-type == 'container'
+        run: |
+          # TODO: use llama_stack/templates/Containerfile when we have a new release!
+          cat << 'EOF' > Containerfile
+          FROM registry.access.redhat.com/ubi9
+          WORKDIR /app
+          ARG TEMPLATE
+
+          RUN dnf -y update \
+              && dnf install -y python3.11 python3.11-pip python3.11-wheel python3.11-setuptools python3.11-devel gcc make \
+              && ln -s /bin/pip3.11 /bin/pip \
+              && ln -s /bin/python3.11 /bin/python \
+              && dnf clean all
+
+          RUN mkdir -p /.llama/providers.d /.cache
+          COPY . /app/llama-stack
+          RUN cd llama-stack && pip install --no-cache .[${TEMPLATE}]
+          RUN chmod -R g+rw /app /.llama /.cache
+          ENTRYPOINT ["python", "-m", "llama_stack.distribution.server.server", "--config", "/app/llama-stack/templates/${TEMPLATE}/run.yaml"]
+          EOF
+          docker build --build-arg TEMPLATE=${{ matrix.template }} -f Containerfile -t ${{ matrix.template }} .

      - name: Print dependencies in the image
        if: matrix.image-type == 'venv'
--- a/.github/workflows/update-readthedocs.yml
+++ b/.github/workflows/update-readthedocs.yml
@ -43,7 +43,7 @@ jobs:
      - name: Build HTML
        run: |
          cd docs
-          uv run make html
+          uv run --group docs make html

      - name: Trigger ReadTheDocs build
        if: github.event_name != 'pull_request'
--- a/llama_stack/providers/registry/inference.py
+++ b/llama_stack/providers/registry/inference.py
@ -40,7 +40,7 @@ def available_providers() -> list[ProviderSpec]:
            api=Api.inference,
            provider_type="inline::vllm",
            pip_packages=[
-                "vllm",
+                "vllm; sys_platform == 'linux'",
            ],
            module="llama_stack.providers.inline.inference.vllm",
            config_class="llama_stack.providers.inline.inference.vllm.VLLMConfig",
@ -49,8 +49,9 @@ def available_providers() -> list[ProviderSpec]:
            api=Api.inference,
            provider_type="inline::sentence-transformers",
            pip_packages=[
-                "torch torchvision --index-url https://download.pytorch.org/whl/cpu",
-                "sentence-transformers --no-deps",
+                "torch",
+                "torchvision",
+                "sentence-transformers",
            ],
            module="llama_stack.providers.inline.inference.sentence_transformers",
            config_class="llama_stack.providers.inline.inference.sentence_transformers.config.SentenceTransformersInferenceConfig",
--- a/llama_stack/templates/Containerfile
+++ b/llama_stack/templates/Containerfile
@ -0,0 +1,15 @@
+# Usage:
+# podman build --build-arg TEMPLATE={TEMPLATE_NAME} -f llama_stack/templates/Containerfile -t TEMPLATE_NAME .
+FROM registry.access.redhat.com/ubi9
+WORKDIR /app
+ARG TEMPLATE
+
+RUN dnf -y update \
+    && dnf install -y python3.11 python3.11-pip python3.11-wheel python3.11-setuptools python3.11-devel gcc make \
+    && ln -s /bin/pip3.11 /bin/pip \
+    && ln -s /bin/python3.11 /bin/python \
+    && dnf clean all
+RUN mkdir -p /.llama/providers.d /.cache
+RUN pip install --no-cache llama-stack[${TEMPLATE}]
+RUN chmod -R g+rw /app /.llama /.cache
+ENTRYPOINT ["python", "-m", "llama_stack.distribution.server.server", "--config", "/app/llama-stack/templates/${TEMPLATE}/run.yaml"]
--- a/pyproject.toml
+++ b/pyproject.toml
@ -1,5 +1,5 @@
 [build-system]
-requires = ["setuptools>=61.0"]
+requires = ["setuptools>=80.0"]
 build-backend = "setuptools.build_meta"

 [project]
@ -53,6 +53,866 @@ ui = [
    "streamlit-option-menu",
 ]

+#################
+# DISTRIBUTIONS #
+#################
+bedrock = [
+    "aiosqlite",
+    "autoevals",
+    "boto3",
+    "chardet",
+    "chromadb-client",
+    "datasets",
+    "emoji",
+    "faiss-cpu",
+    "fastapi",
+    "fire",
+    "httpx",
+    "langdetect",
+    "matplotlib",
+    "mcp",
+    "nltk",
+    "numpy",
+    "openai",
+    "opentelemetry-exporter-otlp-proto-http",
+    "opentelemetry-sdk",
+    "pandas",
+    "pillow",
+    "psycopg2-binary",
+    "pymongo",
+    "pypdf",
+    "pythainlp",
+    "redis",
+    "requests",
+    "scikit-learn",
+    "scipy",
+    "sentencepiece",
+    "sqlalchemy[asyncio]",
+    "tqdm",
+    "transformers",
+    "tree_sitter",
+    "uvicorn",
+]
+cerebras = [
+    "aiosqlite",
+    "autoevals",
+    "cerebras_cloud_sdk",
+    "chardet",
+    "chromadb-client",
+    "datasets",
+    "emoji",
+    "faiss-cpu",
+    "fastapi",
+    "fire",
+    "httpx",
+    "langdetect",
+    "matplotlib",
+    "nltk",
+    "numpy",
+    "openai",
+    "opentelemetry-exporter-otlp-proto-http",
+    "opentelemetry-sdk",
+    "pandas",
+    "pillow",
+    "psycopg2-binary",
+    "pymongo",
+    "pypdf",
+    "pythainlp",
+    "redis",
+    "requests",
+    "scikit-learn",
+    "scipy",
+    "sentence-transformers",
+    "sentencepiece",
+    "sqlalchemy[asyncio]",
+    "torch",
+    "torchvision",
+    "tqdm",
+    "transformers",
+    "tree_sitter",
+    "uvicorn",
+]
+ci-tests = [
+    "aiosqlite",
+    "autoevals",
+    "chardet",
+    "chromadb-client",
+    "datasets",
+    "emoji",
+    "fastapi",
+    "fire",
+    "fireworks-ai",
+    "httpx",
+    "langdetect",
+    "matplotlib",
+    "mcp",
+    "nltk",
+    "numpy",
+    "openai",
+    "opentelemetry-exporter-otlp-proto-http",
+    "opentelemetry-sdk",
+    "pandas",
+    "pillow",
+    "psycopg2-binary",
+    "pymongo",
+    "pypdf",
+    "pythainlp",
+    "redis",
+    "requests",
+    "scikit-learn",
+    "scipy",
+    "sentence-transformers",
+    "sentencepiece",
+    "sqlalchemy[asyncio]",
+    "sqlite-vec",
+    "torch",
+    "torchvision",
+    "tqdm",
+    "transformers",
+    "tree_sitter",
+    "uvicorn",
+]
+dell = [
+    "aiohttp",
+    "aiosqlite",
+    "autoevals",
+    "chardet",
+    "chromadb-client",
+    "datasets",
+    "emoji",
+    "faiss-cpu",
+    "fastapi",
+    "fire",
+    "httpx",
+    "huggingface_hub",
+    "langdetect",
+    "matplotlib",
+    "nltk",
+    "numpy",
+    "openai",
+    "opentelemetry-exporter-otlp-proto-http",
+    "opentelemetry-sdk",
+    "pandas",
+    "pillow",
+    "psycopg2-binary",
+    "pymongo",
+    "pypdf",
+    "pythainlp",
+    "redis",
+    "requests",
+    "scikit-learn",
+    "scipy",
+    "sentence-transformers",
+    "sentencepiece",
+    "sqlalchemy[asyncio]",
+    "torch",
+    "torchvision",
+    "tqdm",
+    "transformers",
+    "tree_sitter",
+    "uvicorn",
+]
+fireworks = [
+    "aiosqlite",
+    "asyncpg",
+    "autoevals",
+    "chardet",
+    "chromadb-client",
+    "datasets",
+    "emoji",
+    "faiss-cpu",
+    "fastapi",
+    "fire",
+    "fireworks-ai",
+    "httpx",
+    "langdetect",
+    "matplotlib",
+    "mcp",
+    "nltk",
+    "numpy",
+    "openai",
+    "opentelemetry-exporter-otlp-proto-http",
+    "opentelemetry-sdk",
+    "pandas",
+    "pillow",
+    "psycopg2-binary",
+    "pymongo",
+    "pypdf",
+    "pythainlp",
+    "redis",
+    "requests",
+    "scikit-learn",
+    "scipy",
+    "sentence-transformers",
+    "sentencepiece",
+    "sqlalchemy[asyncio]",
+    "torch",
+    "torchvision",
+    "tqdm",
+    "transformers",
+    "tree_sitter",
+    "uvicorn",
+]
+groq = [
+    "aiosqlite",
+    "autoevals",
+    "chardet",
+    "datasets",
+    "emoji",
+    "faiss-cpu",
+    "fastapi",
+    "fire",
+    "httpx",
+    "langdetect",
+    "litellm",
+    "matplotlib",
+    "nltk",
+    "numpy",
+    "openai",
+    "opentelemetry-exporter-otlp-proto-http",
+    "opentelemetry-sdk",
+    "pandas",
+    "pillow",
+    "psycopg2-binary",
+    "pymongo",
+    "pypdf",
+    "pythainlp",
+    "redis",
+    "requests",
+    "scikit-learn",
+    "scipy",
+    "sentencepiece",
+    "sqlalchemy[asyncio]",
+    "tqdm",
+    "transformers",
+    "tree_sitter",
+    "uvicorn",
+]
+hf-endpoint = [
+    "aiohttp",
+    "aiosqlite",
+    "autoevals",
+    "chardet",
+    "chromadb-client",
+    "datasets",
+    "emoji",
+    "faiss-cpu",
+    "fastapi",
+    "fire",
+    "httpx",
+    "huggingface_hub",
+    "langdetect",
+    "matplotlib",
+    "mcp",
+    "nltk",
+    "numpy",
+    "openai",
+    "opentelemetry-exporter-otlp-proto-http",
+    "opentelemetry-sdk",
+    "pandas",
+    "pillow",
+    "psycopg2-binary",
+    "pymongo",
+    "pypdf",
+    "pythainlp",
+    "redis",
+    "requests",
+    "scikit-learn",
+    "scipy",
+    "sentencepiece",
+    "sqlalchemy[asyncio]",
+    "tqdm",
+    "transformers",
+    "tree_sitter",
+    "uvicorn",
+]
+hf-serverless = [
+    "aiohttp",
+    "aiosqlite",
+    "autoevals",
+    "chardet",
+    "chromadb-client",
+    "datasets",
+    "emoji",
+    "faiss-cpu",
+    "fastapi",
+    "fire",
+    "httpx",
+    "huggingface_hub",
+    "langdetect",
+    "matplotlib",
+    "mcp",
+    "nltk",
+    "numpy",
+    "openai",
+    "opentelemetry-exporter-otlp-proto-http",
+    "opentelemetry-sdk",
+    "pandas",
+    "pillow",
+    "psycopg2-binary",
+    "pymongo",
+    "pypdf",
+    "pythainlp",
+    "redis",
+    "requests",
+    "scikit-learn",
+    "scipy",
+    "sentence-transformers",
+    "sentencepiece",
+    "sqlalchemy[asyncio]",
+    "torch",
+    "torchvision",
+    "tqdm",
+    "transformers",
+    "tree_sitter",
+    "uvicorn",
+]
+llama_api = [
+    "aiosqlite",
+    "autoevals",
+    "chardet",
+    "chromadb-client",
+    "datasets",
+    "emoji",
+    "fastapi",
+    "fire",
+    "httpx",
+    "langdetect",
+    "litellm",
+    "matplotlib",
+    "mcp",
+    "nltk",
+    "numpy",
+    "openai",
+    "opentelemetry-exporter-otlp-proto-http",
+    "opentelemetry-sdk",
+    "pandas",
+    "pillow",
+    "psycopg2-binary",
+    "pymongo",
+    "pypdf",
+    "pythainlp",
+    "redis",
+    "requests",
+    "scikit-learn",
+    "scipy",
+    "sentence-transformers",
+    "sentencepiece",
+    "sqlalchemy[asyncio]",
+    "sqlite-vec",
+    "torch",
+    "torchvision",
+    "tqdm",
+    "transformers",
+    "tree_sitter",
+    "uvicorn",
+]
+meta-reference-gpu = [
+    "accelerate",
+    "aiosqlite",
+    "autoevals",
+    "chardet",
+    "chromadb-client",
+    "datasets",
+    "emoji",
+    "fairscale",
+    "faiss-cpu",
+    "fastapi",
+    "fbgemm-gpu-genai==1.1.2",
+    "fire",
+    "httpx",
+    "langdetect",
+    "lm-format-enforcer",
+    "matplotlib",
+    "mcp",
+    "nltk",
+    "numpy",
+    "openai",
+    "opentelemetry-exporter-otlp-proto-http",
+    "opentelemetry-sdk",
+    "pandas",
+    "pillow",
+    "psycopg2-binary",
+    "pymongo",
+    "pypdf",
+    "pythainlp",
+    "redis",
+    "requests",
+    "scikit-learn",
+    "scipy",
+    "sentence-transformers",
+    "sentencepiece",
+    "sqlalchemy[asyncio]",
+    "torch",
+    "torchao==0.8.0",
+    "torchvision",
+    "tqdm",
+    "transformers",
+    "tree_sitter",
+    "uvicorn",
+    "zmq",
+]
+nvidia = [
+    "aiohttp",
+    "aiosqlite",
+    "chardet",
+    "datasets",
+    "faiss-cpu",
+    "fastapi",
+    "fire",
+    "httpx",
+    "matplotlib",
+    "nltk",
+    "numpy",
+    "openai",
+    "opentelemetry-exporter-otlp-proto-http",
+    "opentelemetry-sdk",
+    "pandas",
+    "pillow",
+    "psycopg2-binary",
+    "pymongo",
+    "pypdf",
+    "redis",
+    "requests",
+    "scikit-learn",
+    "scipy",
+    "sentencepiece",
+    "sqlalchemy[asyncio]",
+    "tqdm",
+    "transformers",
+    "uvicorn",
+]
+ollama = [
+    "aiohttp",
+    "aiosqlite",
+    "autoevals",
+    "chardet",
+    "chromadb-client",
+    "datasets",
+    "emoji",
+    "faiss-cpu",
+    "fastapi",
+    "fire",
+    "httpx",
+    "langdetect",
+    "matplotlib",
+    "mcp",
+    "nltk",
+    "numpy",
+    "ollama",
+    "openai",
+    "opentelemetry-exporter-otlp-proto-http",
+    "opentelemetry-sdk",
+    "pandas",
+    "peft",
+    "pillow",
+    "psycopg2-binary",
+    "pymongo",
+    "pypdf",
+    "pythainlp",
+    "redis",
+    "requests",
+    "scikit-learn",
+    "scipy",
+    "sentencepiece",
+    "sqlalchemy[asyncio]",
+    "torch",
+    "tqdm",
+    "transformers",
+    "tree_sitter",
+    "trl",
+    "uvicorn",
+]
+open-benchmark = [
+    "aiosqlite",
+    "autoevals",
+    "chardet",
+    "chromadb-client",
+    "datasets",
+    "emoji",
+    "fastapi",
+    "fire",
+    "httpx",
+    "langdetect",
+    "litellm",
+    "matplotlib",
+    "mcp",
+    "nltk",
+    "numpy",
+    "openai",
+    "opentelemetry-exporter-otlp-proto-http",
+    "opentelemetry-sdk",
+    "pandas",
+    "pillow",
+    "psycopg2-binary",
+    "pymongo",
+    "pypdf",
+    "pythainlp",
+    "redis",
+    "requests",
+    "scikit-learn",
+    "scipy",
+    "sentencepiece",
+    "sqlalchemy[asyncio]",
+    "sqlite-vec",
+    "together",
+    "tqdm",
+    "transformers",
+    "tree_sitter",
+    "uvicorn",
+]
+passthrough = [
+    "aiosqlite",
+    "autoevals",
+    "chardet",
+    "chromadb-client",
+    "datasets",
+    "emoji",
+    "faiss-cpu",
+    "fastapi",
+    "fire",
+    "httpx",
+    "langdetect",
+    "matplotlib",
+    "mcp",
+    "nltk",
+    "numpy",
+    "openai",
+    "opentelemetry-exporter-otlp-proto-http",
+    "opentelemetry-sdk",
+    "pandas",
+    "pillow",
+    "psycopg2-binary",
+    "pymongo",
+    "pypdf",
+    "pythainlp",
+    "redis",
+    "requests",
+    "scikit-learn",
+    "scipy",
+    "sentence-transformers",
+    "sentencepiece",
+    "sqlalchemy[asyncio]",
+    "torch",
+    "torchvision",
+    "tqdm",
+    "transformers",
+    "tree_sitter",
+    "uvicorn",
+]
+postgres-demo = [
+    "aiosqlite",
+    "asyncpg",
+    "chardet",
+    "chromadb-client",
+    "fastapi",
+    "fire",
+    "httpx",
+    "matplotlib",
+    "mcp",
+    "nltk",
+    "numpy",
+    "openai",
+    "opentelemetry-exporter-otlp-proto-http",
+    "opentelemetry-sdk",
+    "pandas",
+    "pillow",
+    "psycopg2-binary",
+    "pymongo",
+    "pypdf",
+    "redis",
+    "requests",
+    "scikit-learn",
+    "scipy",
+    "sentence-transformers",
+    "sentencepiece",
+    "sqlalchemy[asyncio]",
+    "torch",
+    "torchvision",
+    "tqdm",
+    "transformers",
+    "uvicorn",
+]
+remote-vllm = [
+    "aiosqlite",
+    "autoevals",
+    "chardet",
+    "chromadb-client",
+    "datasets",
+    "emoji",
+    "faiss-cpu",
+    "fastapi",
+    "fire",
+    "httpx",
+    "langdetect",
+    "matplotlib",
+    "mcp",
+    "nltk",
+    "numpy",
+    "openai",
+    "opentelemetry-exporter-otlp-proto-http",
+    "opentelemetry-sdk",
+    "pandas",
+    "pillow",
+    "psycopg2-binary",
+    "pymongo",
+    "pypdf",
+    "pythainlp",
+    "redis",
+    "requests",
+    "scikit-learn",
+    "scipy",
+    "sentence-transformers",
+    "sentencepiece",
+    "sqlalchemy[asyncio]",
+    "torch",
+    "torchvision",
+    "tqdm",
+    "transformers",
+    "tree_sitter",
+    "uvicorn",
+]
+sambanova = [
+    "aiosqlite",
+    "chardet",
+    "chromadb-client",
+    "faiss-cpu",
+    "fastapi",
+    "fire",
+    "httpx",
+    "litellm",
+    "matplotlib",
+    "mcp",
+    "nltk",
+    "numpy",
+    "opentelemetry-exporter-otlp-proto-http",
+    "opentelemetry-sdk",
+    "pandas",
+    "pillow",
+    "psycopg2-binary",
+    "pymongo",
+    "pypdf",
+    "redis",
+    "requests",
+    "scikit-learn",
+    "scipy",
+    "sentence-transformers",
+    "sentencepiece",
+    "sqlalchemy[asyncio]",
+    "torch",
+    "torchvision",
+    "tqdm",
+    "transformers",
+    "uvicorn",
+]
+starter = [
+    "aiohttp",
+    "aiosqlite",
+    "asyncpg",
+    "autoevals",
+    "chardet",
+    "chromadb-client",
+    "datasets",
+    "emoji",
+    "fastapi",
+    "fire",
+    "fireworks-ai",
+    "httpx",
+    "langdetect",
+    "litellm",
+    "matplotlib",
+    "mcp",
+    "nltk",
+    "numpy",
+    "ollama",
+    "openai",
+    "opentelemetry-exporter-otlp-proto-http",
+    "opentelemetry-sdk",
+    "pandas",
+    "pillow",
+    "psycopg2-binary",
+    "pymongo",
+    "pypdf",
+    "pythainlp",
+    "redis",
+    "requests",
+    "scikit-learn",
+    "scipy",
+    "sentence-transformers",
+    "sentencepiece",
+    "sqlalchemy[asyncio]",
+    "sqlite-vec",
+    "together",
+    "torch",
+    "torchvision",
+    "tqdm",
+    "transformers",
+    "tree_sitter",
+    "uvicorn",
+]
+tgi = [
+    "aiohttp",
+    "aiosqlite",
+    "autoevals",
+    "chardet",
+    "chromadb-client",
+    "datasets",
+    "emoji",
+    "faiss-cpu",
+    "fastapi",
+    "fire",
+    "httpx",
+    "huggingface_hub",
+    "langdetect",
+    "matplotlib",
+    "mcp",
+    "nltk",
+    "numpy",
+    "openai",
+    "opentelemetry-exporter-otlp-proto-http",
+    "opentelemetry-sdk",
+    "pandas",
+    "pillow",
+    "psycopg2-binary",
+    "pymongo",
+    "pypdf",
+    "pythainlp",
+    "redis",
+    "requests",
+    "scikit-learn",
+    "scipy",
+    "sentence-transformers",
+    "sentencepiece",
+    "sqlalchemy[asyncio]",
+    "torch",
+    "torchvision",
+    "tqdm",
+    "transformers",
+    "tree_sitter",
+    "uvicorn",
+]
+together = [
+    "aiosqlite",
+    "autoevals",
+    "chardet",
+    "chromadb-client",
+    "datasets",
+    "emoji",
+    "faiss-cpu",
+    "fastapi",
+    "fire",
+    "httpx",
+    "langdetect",
+    "matplotlib",
+    "mcp",
+    "nltk",
+    "numpy",
+    "openai",
+    "opentelemetry-exporter-otlp-proto-http",
+    "opentelemetry-sdk",
+    "pandas",
+    "pillow",
+    "psycopg2-binary",
+    "pymongo",
+    "pypdf",
+    "pythainlp",
+    "redis",
+    "requests",
+    "scikit-learn",
+    "scipy",
+    "sentence-transformers",
+    "sentencepiece",
+    "sqlalchemy[asyncio]",
+    "together",
+    "torch",
+    "torchvision",
+    "tqdm",
+    "transformers",
+    "tree_sitter",
+    "uvicorn",
+]
+vllm-gpu = [
+    "aiosqlite",
+    "autoevals",
+    "chardet",
+    "chromadb-client",
+    "datasets",
+    "emoji",
+    "faiss-cpu",
+    "fastapi",
+    "fire",
+    "httpx",
+    "langdetect",
+    "matplotlib",
+    "mcp",
+    "nltk",
+    "numpy",
+    "openai",
+    "opentelemetry-exporter-otlp-proto-http",
+    "opentelemetry-sdk",
+    "pandas",
+    "pillow",
+    "psycopg2-binary",
+    "pymongo",
+    "pypdf",
+    "pythainlp",
+    "redis",
+    "requests",
+    "scikit-learn",
+    "scipy",
+    "sentence-transformers",
+    "sentencepiece",
+    "sqlalchemy[asyncio]",
+    "torch",
+    "torchvision",
+    "tqdm",
+    "transformers",
+    "tree_sitter",
+    "uvicorn",
+    "vllm; sys_platform == 'linux'",
+]
+watsonx = [
+    "aiosqlite",
+    "autoevals",
+    "chardet",
+    "datasets",
+    "emoji",
+    "faiss-cpu",
+    "fastapi",
+    "fire",
+    "httpx",
+    "ibm_watson_machine_learning",
+    "langdetect",
+    "matplotlib",
+    "mcp",
+    "nltk",
+    "numpy",
+    "openai",
+    "opentelemetry-exporter-otlp-proto-http",
+    "opentelemetry-sdk",
+    "pandas",
+    "pillow",
+    "psycopg2-binary",
+    "pymongo",
+    "pypdf",
+    "pythainlp",
+    "redis",
+    "requests",
+    "scikit-learn",
+    "scipy",
+    "sentence-transformers",
+    "sentencepiece",
+    "sqlalchemy[asyncio]",
+    "torch",
+    "torchvision",
+    "tqdm",
+    "transformers",
+    "tree_sitter",
+    "uvicorn",
+]
+
+
+
 [dependency-groups]
 dev = [
    "pytest",
@ -123,7 +983,7 @@ docs = [
    "linkify",
    "sphinxcontrib.openapi",
 ]
-codegen = ["rich", "pydantic", "jinja2>=3.1.6"]
+codegen = ["rich", "pydantic", "jinja2>=3.1.6", "tomlkit"]

 [project.urls]
 Homepage = "https://github.com/meta-llama/llama-stack"
@ -145,6 +1005,11 @@ explicit = true
 torch = [{ index = "pytorch-cpu" }]
 torchvision = [{ index = "pytorch-cpu" }]

+[[tool.uv.dependency-metadata]]
+name = "sentence-transformers"
+requires-dist = [
+] # This instructs UV to not install any dependencies for this package (torch is installed by default)
+
 [tool.ruff]
 line-length = 120
 exclude = [
--- a/requirements-ollama.txt
+++ b/requirements-ollama.txt
@ -0,0 +1,159 @@
+# This file was autogenerated by uv via the following command:
+#    uv export --frozen --no-hashes --no-emit-project --output-file=requirements-ollama.txt --no-annotate --no-default-groups --extra ollama
+accelerate==1.7.0
+aiohappyeyeballs==2.5.0
+aiohttp==3.11.13
+aiosignal==1.3.2
+aiosqlite==0.21.0
+annotated-types==0.7.0
+anyio==4.8.0
+async-timeout==5.0.1 ; python_full_version < '3.11.3'
+attrs==25.1.0
+autoevals==0.0.122
+backoff==2.2.1
+braintrust-core==0.0.58
+certifi==2025.1.31
+chardet==5.2.0
+charset-normalizer==3.4.1
+chevron==0.14.0
+chromadb-client==1.0.12
+click==8.1.8
+colorama==0.4.6 ; sys_platform == 'win32'
+contourpy==1.3.2
+cycler==0.12.1
+datasets==3.3.2
+deprecated==1.2.18
+dill==0.3.8
+distro==1.9.0
+dnspython==2.7.0
+ecdsa==0.19.1
+emoji==2.14.1
+exceptiongroup==1.2.2 ; python_full_version < '3.11'
+faiss-cpu==1.11.0
+fastapi==0.115.8
+filelock==3.17.0
+fire==0.7.0
+fonttools==4.58.1
+frozenlist==1.5.0
+fsspec==2024.12.0
+googleapis-common-protos==1.67.0
+greenlet==3.2.2
+grpcio==1.71.0
+h11==0.16.0
+hf-xet==1.1.2 ; (platform_machine == 'aarch64' and sys_platform != 'darwin') or (platform_machine == 'amd64' and sys_platform != 'darwin') or (platform_machine == 'arm64' and sys_platform != 'darwin') or (platform_machine == 'x86_64' and sys_platform != 'darwin')
+httpcore==1.0.9
+httpx==0.28.1
+httpx-sse==0.4.0
+huggingface-hub==0.29.0 ; sys_platform == 'darwin'
+huggingface-hub==0.32.3 ; sys_platform != 'darwin'
+idna==3.10
+importlib-metadata==8.0.0 ; sys_platform != 'darwin'
+importlib-metadata==8.5.0 ; sys_platform == 'darwin'
+jinja2==3.1.6
+jiter==0.8.2
+joblib==1.5.1
+jsonschema==4.23.0
+jsonschema-specifications==2024.10.1
+kiwisolver==1.4.8
+langdetect==1.0.9
+levenshtein==0.27.1
+llama-stack-client==0.2.9
+markdown-it-py==3.0.0
+markupsafe==3.0.2
+matplotlib==3.10.3
+mcp==1.3.0
+mdurl==0.1.2
+mpmath==1.3.0
+multidict==6.1.0
+multiprocess==0.70.16
+networkx==3.4.2
+nltk==3.9.1
+numpy==1.26.4
+ollama==0.5.1
+openai==1.71.0
+opentelemetry-api==1.26.0 ; sys_platform != 'darwin'
+opentelemetry-api==1.30.0 ; sys_platform == 'darwin'
+opentelemetry-exporter-otlp-proto-common==1.26.0 ; sys_platform != 'darwin'
+opentelemetry-exporter-otlp-proto-common==1.30.0 ; sys_platform == 'darwin'
+opentelemetry-exporter-otlp-proto-grpc==1.26.0 ; sys_platform != 'darwin'
+opentelemetry-exporter-otlp-proto-grpc==1.30.0 ; sys_platform == 'darwin'
+opentelemetry-exporter-otlp-proto-http==1.26.0 ; sys_platform != 'darwin'
+opentelemetry-exporter-otlp-proto-http==1.30.0 ; sys_platform == 'darwin'
+opentelemetry-proto==1.26.0 ; sys_platform != 'darwin'
+opentelemetry-proto==1.30.0 ; sys_platform == 'darwin'
+opentelemetry-sdk==1.26.0 ; sys_platform != 'darwin'
+opentelemetry-sdk==1.30.0 ; sys_platform == 'darwin'
+opentelemetry-semantic-conventions==0.47b0 ; sys_platform != 'darwin'
+opentelemetry-semantic-conventions==0.51b0 ; sys_platform == 'darwin'
+orjson==3.10.18
+overrides==7.7.0
+packaging==24.2
+pandas==2.1.4
+peft==0.15.2
+pillow==11.1.0
+posthog==4.2.0
+prompt-toolkit==3.0.50
+propcache==0.3.0
+protobuf==4.25.8 ; sys_platform != 'darwin'
+protobuf==5.29.3 ; sys_platform == 'darwin'
+psutil==7.0.0
+psycopg2-binary==2.9.10
+pyaml==25.1.0
+pyarrow==19.0.1
+pyasn1==0.4.8
+pydantic==2.10.6
+pydantic-core==2.27.2
+pydantic-settings==2.8.1
+pygments==2.19.1
+pymongo==4.13.0
+pyparsing==3.2.3
+pypdf==5.3.1
+pythainlp==5.1.2
+python-dateutil==2.9.0.post0
+python-dotenv==1.0.1
+python-jose==3.4.0
+python-multipart==0.0.20
+pytz==2025.1
+pyyaml==6.0.2
+rapidfuzz==3.12.2
+redis==6.2.0
+referencing==0.36.2
+regex==2024.11.6
+requests==2.32.2 ; (python_full_version < '3.11' and sys_platform == 'darwin') or (python_full_version >= '3.11' and sys_platform == 'linux') or (platform_machine != 'aarch64' and sys_platform == 'linux') or (sys_platform != 'darwin' and sys_platform != 'linux')
+requests==2.32.3 ; (python_full_version < '3.11' and platform_machine == 'aarch64' and sys_platform == 'linux') or (python_full_version >= '3.11' and sys_platform == 'darwin')
+rich==13.9.4
+rpds-py==0.22.3
+rsa==4.9
+safetensors==0.5.3
+scikit-learn==1.6.1
+scipy==1.15.3
+sentencepiece==0.2.0
+setuptools==80.8.0
+six==1.17.0
+sniffio==1.3.1
+sqlalchemy==2.0.41
+sse-starlette==2.2.1
+starlette==0.45.3
+sympy==1.13.1
+tenacity==9.1.2
+termcolor==2.5.0
+threadpoolctl==3.6.0
+tiktoken==0.9.0
+tokenizers==0.21.1
+torch==2.6.0 ; sys_platform == 'darwin'
+torch==2.6.0+cpu ; sys_platform != 'darwin'
+tqdm==4.67.1
+transformers==4.50.3 ; sys_platform == 'darwin'
+transformers==4.52.4 ; sys_platform != 'darwin'
+tree-sitter==0.24.0
+trl==0.18.1
+typing-extensions==4.12.2
+tzdata==2025.1
+urllib3==2.1.0 ; (python_full_version < '3.11' and platform_machine == 'aarch64' and sys_platform == 'linux') or (python_full_version >= '3.11' and sys_platform == 'darwin')
+urllib3==2.3.0 ; (python_full_version < '3.11' and sys_platform == 'darwin') or (python_full_version >= '3.11' and sys_platform == 'linux') or (platform_machine != 'aarch64' and sys_platform == 'linux') or (sys_platform != 'darwin' and sys_platform != 'linux')
+uvicorn==0.34.0
+wcwidth==0.2.13
+wrapt==1.17.2
+xxhash==3.5.0
+yarl==1.18.3
+zipp==3.21.0
--- a/requirements.txt
+++ b/requirements.txt
@ -14,8 +14,6 @@ anyio==4.8.0
    #   llama-stack-client
    #   openai
    #   starlette
-async-timeout==5.0.1 ; python_full_version < '3.11'
-    # via aiohttp
 attrs==25.1.0
    # via
    #   aiohttp
@ -40,8 +38,6 @@ distro==1.9.0
    #   openai
 ecdsa==0.19.1
    # via python-jose
-exceptiongroup==1.2.2 ; python_full_version < '3.11'
-    # via anyio
 fastapi==0.115.8
    # via llama-stack
 filelock==3.17.0
@ -58,6 +54,8 @@ h11==0.16.0
    # via
    #   httpcore
    #   llama-stack
+hf-xet==1.1.4 ; (platform_machine == 'aarch64' and sys_platform != 'darwin') or (platform_machine == 'amd64' and sys_platform != 'darwin') or (platform_machine == 'arm64' and sys_platform != 'darwin') or (platform_machine == 'x86_64' and sys_platform != 'darwin')
+    # via huggingface-hub
 httpcore==1.0.9
    # via httpx
 httpx==0.28.1
@ -65,7 +63,9 @@ httpx==0.28.1
    #   llama-stack
    #   llama-stack-client
    #   openai
-huggingface-hub==0.29.0
+huggingface-hub==0.29.0 ; sys_platform == 'darwin'
+    # via llama-stack
+huggingface-hub==0.33.0 ; sys_platform != 'darwin'
    # via llama-stack
 idna==3.10
    # via
@ -99,7 +99,7 @@ openai==1.71.0
    # via llama-stack
 packaging==24.2
    # via huggingface-hub
-pandas==2.2.3
+pandas==2.1.1
    # via llama-stack-client
 pillow==11.1.0
    # via llama-stack
@ -147,7 +147,12 @@ referencing==0.36.2
    #   jsonschema-specifications
 regex==2024.11.6
    # via tiktoken
-requests==2.32.4
+requests==2.32.2 ; (python_full_version < '3.12' and sys_platform == 'darwin') or (python_full_version >= '3.12' and sys_platform == 'linux') or (platform_machine != 'aarch64' and sys_platform == 'linux') or (sys_platform != 'darwin' and sys_platform != 'linux')
+    # via
+    #   huggingface-hub
+    #   llama-stack
+    #   tiktoken
+requests==2.32.4 ; (python_full_version < '3.12' and platform_machine == 'aarch64' and sys_platform == 'linux') or (python_full_version >= '3.12' and sys_platform == 'darwin')
    # via
    #   huggingface-hub
    #   llama-stack
@ -195,15 +200,15 @@ typing-extensions==4.12.2
    #   fastapi
    #   huggingface-hub
    #   llama-stack-client
-    #   multidict
    #   openai
    #   pydantic
    #   pydantic-core
    #   referencing
-    #   rich
 tzdata==2025.1
    # via pandas
-urllib3==2.3.0
+urllib3==2.1.0 ; (python_full_version < '3.12' and platform_machine == 'aarch64' and sys_platform == 'linux') or (python_full_version >= '3.12' and sys_platform == 'darwin')
+    # via requests
+urllib3==2.3.0 ; (python_full_version < '3.12' and sys_platform == 'darwin') or (python_full_version >= '3.12' and sys_platform == 'linux') or (platform_machine != 'aarch64' and sys_platform == 'linux') or (sys_platform != 'darwin' and sys_platform != 'linux')
    # via requests
 wcwidth==0.2.13
    # via prompt-toolkit
--- a/scripts/distro_codegen.py
+++ b/scripts/distro_codegen.py
@ -13,8 +13,14 @@ from collections.abc import Iterable
 from functools import partial
 from pathlib import Path

+import tomlkit
 from rich.progress import Progress, SpinnerColumn, TextColumn

+from llama_stack.distribution.build import (
+    SERVER_DEPENDENCIES,
+    get_provider_dependencies,
+)
+
 REPO_ROOT = Path(__file__).parent.parent


@ -85,6 +91,24 @@ def check_for_changes(change_tracker: ChangedPathTracker) -> bool:
    return has_changes


+def collect_template_dependencies(template_dir: Path) -> tuple[str | None, list[str]]:
+    try:
+        module_name = f"llama_stack.templates.{template_dir.name}"
+        module = importlib.import_module(module_name)
+
+        if template_func := getattr(module, "get_distribution_template", None):
+            template = template_func()
+            normal_deps, special_deps = get_provider_dependencies(template)
+            # Combine all dependencies in order: normal deps, special deps, server deps
+            all_deps = sorted(set(normal_deps + SERVER_DEPENDENCIES)) + sorted(set(special_deps))
+
+            return template.name, all_deps
+    except Exception as e:
+        print("Error collecting template dependencies for", template_dir, e)
+        return None, []
+    return None, []
+
+
 def pre_import_templates(template_dirs: list[Path]) -> None:
    # Pre-import all template modules to avoid deadlocks.
    for template_dir in template_dirs:
@ -92,6 +116,53 @@ def pre_import_templates(template_dirs: list[Path]) -> None:
        importlib.import_module(module_name)


+def generate_dependencies_files(change_tracker: ChangedPathTracker):
+    templates_dir = REPO_ROOT / "llama_stack" / "templates"
+    distribution_deps = {}
+
+    for template_dir in find_template_dirs(templates_dir):
+        print("template_dir", template_dir)
+        name, deps = collect_template_dependencies(template_dir)
+        if name:
+            distribution_deps[name] = deps
+        else:
+            print("No template function found for", template_dir)
+
+    # First, remove any distributions that are no longer present
+    pyproject_file = REPO_ROOT / "pyproject.toml"
+    change_tracker.add_paths(pyproject_file)
+
+    # Read and parse the current pyproject.toml content
+    with open(pyproject_file) as fp:
+        pyproject = tomlkit.load(fp)
+
+    # Get current optional dependencies
+    current_deps = pyproject["project"]["optional-dependencies"]
+
+    # Store ui dependencies if they exist
+    ui_deps = current_deps.get("ui")
+
+    # Remove distributions that are no longer present
+    for name in list(current_deps.keys()):
+        if name not in distribution_deps.keys() and name != "ui":
+            del current_deps[name]
+
+    # Now add/update the remaining distributions
+    for name, deps in distribution_deps.items():
+        deps_array = tomlkit.array()
+        for dep in sorted(deps):
+            deps_array.append(dep)
+        current_deps[name] = deps_array.multiline(True)
+
+    # Restore ui dependencies if they existed
+    if ui_deps is not None:
+        current_deps["ui"] = ui_deps
+
+    # Write back to pyproject.toml
+    with open(pyproject_file, "w") as fp:
+        tomlkit.dump(pyproject, fp)
+
+
 def main():
    templates_dir = REPO_ROOT / "llama_stack" / "templates"
    change_tracker = ChangedPathTracker()
@ -114,6 +185,9 @@ def main():
            list(executor.map(process_func, template_dirs))
            progress.update(task, advance=len(template_dirs))

+    # TODO: generate a Containerfile for each distribution as well?
+    generate_dependencies_files(change_tracker)
+
    if check_for_changes(change_tracker):
        print(
            "Distribution template changes detected. Please commit the changes.",
--- a/scripts/unit-tests.sh
+++ b/scripts/unit-tests.sh
@ -16,4 +16,4 @@ if [ $FOUND_PYTHON -ne 0 ]; then
     uv python install "$PYTHON_VERSION"
 fi

-uv run --python "$PYTHON_VERSION" --with-editable . --group unit pytest --asyncio-mode=auto -s -v tests/unit/ $@
+uv run --python "$PYTHON_VERSION" --with-editable . --group dev --group unit pytest --asyncio-mode=auto -s -v tests/unit/ $@
--- a/uv.lock
+++ b/uv.lock